From 98ec919fbc24e85c20818b472131687ba42ae6ab Mon Sep 17 00:00:00 2001
From: Torbjorn Granlund <tege@gmplib.org>
Date: Mon, 7 Nov 2011 18:42:27 +0100
Subject: Change how mpn_redc_1 works, use more broadly.

---
 configure.in             |  2 +-
 gmp-impl.h               |  5 +---
 mpn/generic/powm.c       | 44 +++++++++++++++++++------------
 mpn/generic/powm_sec.c   | 18 +++++++++----
 mpn/generic/redc_1.c     |  5 +---
 mpn/generic/redc_1_sec.c | 45 --------------------------------
 mpn/x86_64/redc_1.asm    | 68 ++++++++++--------------------------------------
 tests/refmpn.c           |  7 ++---
 tune/speed.h             |  6 ++---
 9 files changed, 61 insertions(+), 139 deletions(-)
 delete mode 100644 mpn/generic/redc_1_sec.c

diff --git a/configure.in b/configure.in
index 9c0092427..79367c210 100644
--- a/configure.in
+++ b/configure.in
@@ -2638,7 +2638,7 @@ gmp_mpn_functions="$extra_functions					   \
   mu_bdiv_q mu_bdiv_qr							   \
   bdiv_q bdiv_qr							   \
   divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec		   \
-  redc_1_sec trialdiv remove						   \
+  trialdiv remove							   \
   and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n			   \
   copyi copyd zero							   \
   $gmp_mpn_functions_optional"
diff --git a/gmp-impl.h b/gmp-impl.h
index e918c31ed..c0ed63791 100644
--- a/gmp-impl.h
+++ b/gmp-impl.h
@@ -1063,7 +1063,7 @@ __GMP_DECLSPEC void mpn_mulmid __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_sr
 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 
 #define mpn_redc_1 __MPN(redc_1)
-__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
 
 #define mpn_redc_2 __MPN(redc_2)
 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
@@ -1471,9 +1471,6 @@ __GMP_DECLSPEC void      mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t
 __GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
 #define   mpn_tabselect __MPN(tabselect)
 __GMP_DECLSPEC void      mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
-#define mpn_redc_1_sec __MPN(redc_1_sec)
-__GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
-
 #define   mpn_addcnd_n __MPN(addcnd_n)
 __GMP_DECLSPEC mp_limb_t mpn_addcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 #define   mpn_subcnd_n __MPN(subcnd_n)
diff --git a/mpn/generic/powm.c b/mpn/generic/powm.c
index 57edfd4f6..fa92362ad 100644
--- a/mpn/generic/powm.c
+++ b/mpn/generic/powm.c
@@ -6,7 +6,7 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -74,6 +74,16 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 #include "longlong.h"
 
+#undef MPN_REDC_1
+#define MPN_REDC_1(rp, up, mp, n, invm)					\
+  do {									\
+    mp_limb_t cy;							\
+    mpn_redc_1 (up, mp, n, invm);					\
+    cy = mpn_add_n (rp, up + n, up, n);					\
+    if (cy != 0)							\
+      mpn_sub_n (rp, rp, mp, n);					\
+  } while (0)
+
 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
 #define WANT_REDC_2 1
 #endif
@@ -212,12 +222,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
   mpn_sqr (tp, this_pp, n);
 #if WANT_REDC_2
   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     mpn_redc_2 (rp, tp, mp, n, mip);
 #else
   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
 #endif
   else
     mpn_redc_n (rp, tp, mp, n, mip);
@@ -229,12 +239,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
       this_pp += n;
 #if WANT_REDC_2
       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-	mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
 	mpn_redc_2 (this_pp, tp, mp, n, mip);
 #else
       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-	mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
 #endif
       else
 	mpn_redc_n (this_pp, tp, mp, n, mip);
@@ -309,7 +319,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	      INNERLOOP;
 	    }
 	  else
@@ -319,7 +329,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	      INNERLOOP;
 	    }
 	}
@@ -380,7 +390,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	      INNERLOOP;
 	    }
 	  else
@@ -390,7 +400,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	      INNERLOOP;
 	    }
 	}
@@ -401,7 +411,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	  INNERLOOP;
 	}
       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
@@ -440,7 +450,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	      INNERLOOP;
 	    }
 	  else
@@ -450,7 +460,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	      INNERLOOP;
 	    }
 	}
@@ -501,7 +511,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	      INNERLOOP;
 	    }
 	  else
@@ -511,7 +521,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	      INNERLOOP;
 	    }
 	}
@@ -522,7 +532,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #undef MPN_REDUCE
 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
 	  INNERLOOP;
 	}
       else
@@ -545,12 +555,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 
 #if WANT_REDC_2
   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
     mpn_redc_2 (rp, tp, mp, n, mip);
 #else
   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
 #endif
   else
     mpn_redc_n (rp, tp, mp, n, mip);
diff --git a/mpn/generic/powm_sec.c b/mpn/generic/powm_sec.c
index 315ae6e5e..3a6f55403 100644
--- a/mpn/generic/powm_sec.c
+++ b/mpn/generic/powm_sec.c
@@ -7,7 +7,7 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -56,6 +56,14 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define WANT_CACHE_SECURITY 1
 
+#undef MPN_REDC_1_SEC
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
+  do {									\
+    mp_limb_t cy;							\
+    mpn_redc_1 (up, mp, n, invm);					\
+    cy = mpn_add_n (rp, up + n, up, n);					\
+    mpn_subcnd_n (rp, rp, mp, n, cy);					\
+  } while (0)
 
 /* Define our own mpn squaring function.  We do this since we cannot use a
    native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
@@ -252,7 +260,7 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
     {
       mpn_mul_basecase (tp, this_pp, n, pp + n, n);
       this_pp += n;
-      mpn_redc_1_sec (this_pp, tp, mp, n, minv);
+      MPN_REDC_1_SEC (this_pp, tp, mp, n, minv);
     }
 
   expbits = getbits (ep, ebi, windowsize);
@@ -278,7 +286,7 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
       do
 	{
 	  mpn_local_sqr (tp, rp, n, tp + 2 * n);
-	  mpn_redc_1_sec (rp, tp, mp, n, minv);
+	  MPN_REDC_1_SEC (rp, tp, mp, n, minv);
 	  this_windowsize--;
 	}
       while (this_windowsize != 0);
@@ -289,12 +297,12 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 #else
       mpn_mul_basecase (tp, rp, n, pp + n * expbits, n);
 #endif
-      mpn_redc_1_sec (rp, tp, mp, n, minv);
+      MPN_REDC_1_SEC (rp, tp, mp, n, minv);
     }
 
   MPN_COPY (tp, rp, n);
   MPN_ZERO (tp + n, n);
-  mpn_redc_1_sec (rp, tp, mp, n, minv);
+  MPN_REDC_1_SEC (rp, tp, mp, n, minv);
   cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
   mpn_subcnd_n (rp, rp, mp, n, !cnd);
   TMP_FREE;
diff --git a/mpn/generic/redc_1.c b/mpn/generic/redc_1.c
index 177f3932f..3567414eb 100644
--- a/mpn/generic/redc_1.c
+++ b/mpn/generic/redc_1.c
@@ -25,7 +25,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 
 void
-mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+mpn_redc_1 (mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
 {
   mp_size_t j;
   mp_limb_t cy;
@@ -40,7 +40,4 @@ mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
       up[0] = cy;
       up++;
     }
-  cy = mpn_add_n (rp, up, up - n, n);
-  if (cy != 0)
-    mpn_sub_n (rp, rp, mp, n);
 }
diff --git a/mpn/generic/redc_1_sec.c b/mpn/generic/redc_1_sec.c
deleted file mode 100644
index 3d914381c..000000000
--- a/mpn/generic/redc_1_sec.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/* mpn_redc_1_sec.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
-   mp[] is n limbs; up[] is 2n limbs.
-
-   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-
-Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_redc_1_sec (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
-{
-  mp_size_t j;
-  mp_limb_t cy;
-
-  ASSERT (n > 0);
-  ASSERT_MPN (up, 2*n);
-
-  for (j = n - 1; j >= 0; j--)
-    {
-      cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
-      ASSERT (up[0] == 0);
-      up[0] = cy;
-      up++;
-    }
-  cy = mpn_add_n (rp, up, up - n, n);
-  mpn_subcnd_n (rp, rp, mp, n, cy);
-}
diff --git a/mpn/x86_64/redc_1.asm b/mpn/x86_64/redc_1.asm
index 976cab2bc..8d731c68c 100644
--- a/mpn/x86_64/redc_1.asm
+++ b/mpn/x86_64/redc_1.asm
@@ -1,6 +1,6 @@
 dnl  AMD64 mpn_redc_1 -- Montgomery reduction with a one-limb modular inverse.
 
-dnl  Copyright 2004, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2008, 2011 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
@@ -34,22 +34,18 @@ C TODO
 C  * Handle certain sizes, e.g., 1, 2, 3, 4, 8, with single-loop code.
 C    The code for 1, 2, 3, 4 should perhaps be completely register based.
 C  * Perhaps align outer loops.
-C  * The sub_n at the end leaks side-channel data.  How do we fix that?
-C  * Write mpn_add_n_sub_n computing R = A + B - C.  It should run at 2 c/l.
 C  * We could software pipeline the IMUL stuff, by putting it before the
 C    outer loops and before the end of the outer loops.  The last outer
 C    loop iteration would then compute an unneeded product, but it is at
 C    least not a stray read from up[], since it is at up[n].
-C  * Can we combine both the add_n and sub_n into the loops, somehow?
 
 C INPUT PARAMETERS
-define(`rp',	  `%rdi')
-define(`up',	  `%rsi')
-define(`param_mp',`%rdx')
-define(`n',	  `%rcx')
-define(`invm',	  `%r8')
+define(`up',	  `%rdi')
+define(`mp',	  `%rsi')
+define(`n_param', `%rdx')
+define(`invm',	  `%rcx')
 
-define(`mp',	  `%r13')
+define(`n',	  `%r13')
 define(`i',	  `%r11')
 define(`nneg',	  `%r12')
 
@@ -62,13 +58,12 @@ PROLOGUE(mpn_redc_1)
 	push	%r12
 	push	%r13
 	push	%r14
-	push	n
-	sub	$8, %rsp		C maintain ABI required rsp alignment
 
-	lea	(param_mp,n,8), mp	C mp += n
-	lea	(up,n,8), up		C up += n
+	lea	(mp,n_param,8), mp	C mp += n
+	lea	(up,n_param,8), up	C up += n
 
-	mov	n, nneg
+	mov	n_param, nneg
+	mov	n_param, n
 	neg	nneg
 
 	mov	R32(n), R32(%rax)
@@ -136,9 +131,7 @@ L(n1):	mov	%r14, 16(up,nneg,8)	C up[0]
 	add	$8, up
 	dec	n
 	jnz	L(o1)
-C	lea	(mp), mp
-	lea	16(up), up
-	jmp	L(common)
+	jmp	L(ret)
 
 L(b0):	C lea	(mp), mp
 	lea	-16(up), up
@@ -190,10 +183,7 @@ L(ed0):	add	%r10, (up)
 	add	$8, up
 	dec	n
 	jnz	L(o0)
-C	lea	(mp), mp
-	lea	16(up), up
-	jmp	L(common)
-
+	jmp	L(ret)
 
 L(b3):	lea	-8(mp), mp
 	lea	-24(up), up
@@ -244,9 +234,7 @@ L(ed3):	add	%r10, 8(up)
 	add	$8, up
 	dec	n
 	jnz	L(o3)
-	lea	8(mp), mp
-	lea	24(up), up
-	jmp	L(common)
+	jmp	L(ret)
 
 L(b2):	lea	-16(mp), mp
 	lea	-32(up), up
@@ -299,36 +287,8 @@ L(ed2):	add	%r10, 16(up)
 	add	$8, up
 	dec	n
 	jnz	L(o2)
-	lea	16(mp), mp
-	lea	32(up), up
-
-
-L(common):
-	lea	(mp,nneg,8), mp		C restore entry mp
-
-C   cy = mpn_add_n (rp, up, up - n, n);
-C		    rdi rsi  rdx    rcx
-	lea	(up,nneg,8), up		C up -= n
-	lea	(up,nneg,8), %rdx	C rdx = up - n [up entry value]
-	mov	rp, nneg		C preserve rp over first call
-	mov	8(%rsp), %rcx		C pass entry n
-C	mov	rp, %rdi
-	CALL(	mpn_add_n)
-	test	R32(%rax), R32(%rax)
-	jz	L(ret)
-
-C     mpn_sub_n (rp, rp, mp, n);
-C		 rdi rsi rdx rcx
-	mov	nneg, %rdi
-	mov	nneg, %rsi
-	mov	mp, %rdx
-	mov	8(%rsp), %rcx		C pass entry n
-	CALL(	mpn_sub_n)
 
-L(ret):
-	add	$8, %rsp
-	pop	n			C just increment rsp
-	pop	%r14
+L(ret):	pop	%r14
 	pop	%r13
 	pop	%r12
 	pop	%rbx
diff --git a/tests/refmpn.c b/tests/refmpn.c
index fbcc602d6..7ace7ebce 100644
--- a/tests/refmpn.c
+++ b/tests/refmpn.c
@@ -2,7 +2,7 @@
    of the normal gmp code.  Speed isn't a consideration.
 
 Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009 Free Software Foundation, Inc.
+2007, 2008, 2009, 2011 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
@@ -2303,12 +2303,9 @@ refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
 
   for (j = n - 1; j >= 0; j--)
     {
-      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+      up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
       up++;
     }
-  cy = mpn_add_n (rp, up, up - n, n);
-  if (cy != 0)
-    mpn_sub_n (rp, rp, mp, n);
 }
 
 size_t
diff --git a/tune/speed.h b/tune/speed.h
index c017a8ec2..08c01a5dc 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -2193,7 +2193,7 @@ int speed_routine_count_zeros_setup
 #define SPEED_ROUTINE_REDC_1(function)					\
   {									\
     unsigned   i;							\
-    mp_ptr     cp, mp, tp, ap;						\
+    mp_ptr     mp, tp, ap;						\
     mp_limb_t  inv;							\
     double     t;							\
     TMP_DECL;								\
@@ -2203,7 +2203,6 @@ int speed_routine_count_zeros_setup
     TMP_MARK;								\
     SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);		\
     SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);		\
-    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);		\
     SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);		\
 									\
     MPN_COPY (ap,         s->xp, s->size);				\
@@ -2218,14 +2217,13 @@ int speed_routine_count_zeros_setup
     speed_operand_src (s, ap, 2*s->size+1);				\
     speed_operand_dst (s, tp, 2*s->size+1);				\
     speed_operand_src (s, mp, s->size);					\
-    speed_operand_dst (s, cp, s->size);					\
     speed_cache_fill (s);						\
 									\
     speed_starttime ();							\
     i = s->reps;							\
     do {								\
       MPN_COPY (tp, ap, 2*s->size);					\
-      function (cp, tp, mp, s->size, inv);				\
+      function (tp, mp, s->size, inv);					\
     } while (--i != 0);							\
     t = speed_endtime ();						\
 									\
-- 
cgit v1.2.1