summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTorbjorn Granlund <tege@gmplib.org>2011-11-07 18:42:27 +0100
committerTorbjorn Granlund <tege@gmplib.org>2011-11-07 18:42:27 +0100
commit98ec919fbc24e85c20818b472131687ba42ae6ab (patch)
tree5a402cf11ffb5ad59a3ddbfa09eba510b2592bdd
parent9e346a7777b9c5576e5a4758a1701ed114a9e977 (diff)
downloadgmp-98ec919fbc24e85c20818b472131687ba42ae6ab.tar.gz
Change how mpn_redc_1 works, use more broadly.
-rw-r--r--configure.in2
-rw-r--r--gmp-impl.h5
-rw-r--r--mpn/generic/powm.c44
-rw-r--r--mpn/generic/powm_sec.c18
-rw-r--r--mpn/generic/redc_1.c5
-rw-r--r--mpn/generic/redc_1_sec.c45
-rw-r--r--mpn/x86_64/redc_1.asm68
-rw-r--r--tests/refmpn.c7
-rw-r--r--tune/speed.h6
9 files changed, 61 insertions, 139 deletions
diff --git a/configure.in b/configure.in
index 9c0092427..79367c210 100644
--- a/configure.in
+++ b/configure.in
@@ -2638,7 +2638,7 @@ gmp_mpn_functions="$extra_functions \
mu_bdiv_q mu_bdiv_qr \
bdiv_q bdiv_qr \
divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec \
- redc_1_sec trialdiv remove \
+ trialdiv remove \
and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n \
copyi copyd zero \
$gmp_mpn_functions_optional"
diff --git a/gmp-impl.h b/gmp-impl.h
index e918c31ed..c0ed63791 100644
--- a/gmp-impl.h
+++ b/gmp-impl.h
@@ -1063,7 +1063,7 @@ __GMP_DECLSPEC void mpn_mulmid __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_sr
__GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
#define mpn_redc_1 __MPN(redc_1)
-__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
#define mpn_redc_2 __MPN(redc_2)
__GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
@@ -1471,9 +1471,6 @@ __GMP_DECLSPEC void mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t
__GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
#define mpn_tabselect __MPN(tabselect)
__GMP_DECLSPEC void mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
-#define mpn_redc_1_sec __MPN(redc_1_sec)
-__GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
-
#define mpn_addcnd_n __MPN(addcnd_n)
__GMP_DECLSPEC mp_limb_t mpn_addcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
#define mpn_subcnd_n __MPN(subcnd_n)
diff --git a/mpn/generic/powm.c b/mpn/generic/powm.c
index 57edfd4f6..fa92362ad 100644
--- a/mpn/generic/powm.c
+++ b/mpn/generic/powm.c
@@ -6,7 +6,7 @@
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -74,6 +74,16 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp-impl.h"
#include "longlong.h"
+#undef MPN_REDC_1
+#define MPN_REDC_1(rp, up, mp, n, invm) \
+ do { \
+ mp_limb_t cy; \
+ mpn_redc_1 (up, mp, n, invm); \
+ cy = mpn_add_n (rp, up + n, up, n); \
+ if (cy != 0) \
+ mpn_sub_n (rp, rp, mp, n); \
+ } while (0)
+
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
#define WANT_REDC_2 1
#endif
@@ -212,12 +222,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
mpn_sqr (tp, this_pp, n);
#if WANT_REDC_2
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
- mpn_redc_1 (rp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
mpn_redc_2 (rp, tp, mp, n, mip);
#else
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
- mpn_redc_1 (rp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
#endif
else
mpn_redc_n (rp, tp, mp, n, mip);
@@ -229,12 +239,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
this_pp += n;
#if WANT_REDC_2
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
- mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
mpn_redc_2 (this_pp, tp, mp, n, mip);
#else
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
- mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
#endif
else
mpn_redc_n (this_pp, tp, mp, n, mip);
@@ -309,7 +319,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
else
@@ -319,7 +329,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
}
@@ -380,7 +390,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
else
@@ -390,7 +400,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
}
@@ -401,7 +411,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
@@ -440,7 +450,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
else
@@ -450,7 +460,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
}
@@ -501,7 +511,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
else
@@ -511,7 +521,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
}
@@ -522,7 +532,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
else
@@ -545,12 +555,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#if WANT_REDC_2
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
- mpn_redc_1 (rp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
mpn_redc_2 (rp, tp, mp, n, mip);
#else
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
- mpn_redc_1 (rp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
#endif
else
mpn_redc_n (rp, tp, mp, n, mip);
diff --git a/mpn/generic/powm_sec.c b/mpn/generic/powm_sec.c
index 315ae6e5e..3a6f55403 100644
--- a/mpn/generic/powm_sec.c
+++ b/mpn/generic/powm_sec.c
@@ -7,7 +7,7 @@
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -56,6 +56,14 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define WANT_CACHE_SECURITY 1
+#undef MPN_REDC_1_SEC
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm) \
+ do { \
+ mp_limb_t cy; \
+ mpn_redc_1 (up, mp, n, invm); \
+ cy = mpn_add_n (rp, up + n, up, n); \
+ mpn_subcnd_n (rp, rp, mp, n, cy); \
+ } while (0)
/* Define our own mpn squaring function. We do this since we cannot use a
native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
@@ -252,7 +260,7 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
{
mpn_mul_basecase (tp, this_pp, n, pp + n, n);
this_pp += n;
- mpn_redc_1_sec (this_pp, tp, mp, n, minv);
+ MPN_REDC_1_SEC (this_pp, tp, mp, n, minv);
}
expbits = getbits (ep, ebi, windowsize);
@@ -278,7 +286,7 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
do
{
mpn_local_sqr (tp, rp, n, tp + 2 * n);
- mpn_redc_1_sec (rp, tp, mp, n, minv);
+ MPN_REDC_1_SEC (rp, tp, mp, n, minv);
this_windowsize--;
}
while (this_windowsize != 0);
@@ -289,12 +297,12 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
#else
mpn_mul_basecase (tp, rp, n, pp + n * expbits, n);
#endif
- mpn_redc_1_sec (rp, tp, mp, n, minv);
+ MPN_REDC_1_SEC (rp, tp, mp, n, minv);
}
MPN_COPY (tp, rp, n);
MPN_ZERO (tp + n, n);
- mpn_redc_1_sec (rp, tp, mp, n, minv);
+ MPN_REDC_1_SEC (rp, tp, mp, n, minv);
cnd = mpn_sub_n (tp, rp, mp, n); /* we need just retval */
mpn_subcnd_n (rp, rp, mp, n, !cnd);
TMP_FREE;
diff --git a/mpn/generic/redc_1.c b/mpn/generic/redc_1.c
index 177f3932f..3567414eb 100644
--- a/mpn/generic/redc_1.c
+++ b/mpn/generic/redc_1.c
@@ -25,7 +25,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp-impl.h"
void
-mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
+mpn_redc_1 (mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
{
mp_size_t j;
mp_limb_t cy;
@@ -40,7 +40,4 @@ mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
up[0] = cy;
up++;
}
- cy = mpn_add_n (rp, up, up - n, n);
- if (cy != 0)
- mpn_sub_n (rp, rp, mp, n);
}
diff --git a/mpn/generic/redc_1_sec.c b/mpn/generic/redc_1_sec.c
deleted file mode 100644
index 3d914381c..000000000
--- a/mpn/generic/redc_1_sec.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/* mpn_redc_1_sec. Set cp[] <- up[]/R^n mod mp[]. Clobber up[].
- mp[] is n limbs; up[] is 2n limbs.
-
- THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
- SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-
-Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_redc_1_sec (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
-{
- mp_size_t j;
- mp_limb_t cy;
-
- ASSERT (n > 0);
- ASSERT_MPN (up, 2*n);
-
- for (j = n - 1; j >= 0; j--)
- {
- cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
- ASSERT (up[0] == 0);
- up[0] = cy;
- up++;
- }
- cy = mpn_add_n (rp, up, up - n, n);
- mpn_subcnd_n (rp, rp, mp, n, cy);
-}
diff --git a/mpn/x86_64/redc_1.asm b/mpn/x86_64/redc_1.asm
index 976cab2bc..8d731c68c 100644
--- a/mpn/x86_64/redc_1.asm
+++ b/mpn/x86_64/redc_1.asm
@@ -1,6 +1,6 @@
dnl AMD64 mpn_redc_1 -- Montgomery reduction with a one-limb modular inverse.
-dnl Copyright 2004, 2008 Free Software Foundation, Inc.
+dnl Copyright 2004, 2008, 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
@@ -34,22 +34,18 @@ C TODO
C * Handle certain sizes, e.g., 1, 2, 3, 4, 8, with single-loop code.
C The code for 1, 2, 3, 4 should perhaps be completely register based.
C * Perhaps align outer loops.
-C * The sub_n at the end leaks side-channel data. How do we fix that?
-C * Write mpn_add_n_sub_n computing R = A + B - C. It should run at 2 c/l.
C * We could software pipeline the IMUL stuff, by putting it before the
C outer loops and before the end of the outer loops. The last outer
C loop iteration would then compute an unneeded product, but it is at
C least not a stray read from up[], since it is at up[n].
-C * Can we combine both the add_n and sub_n into the loops, somehow?
C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`param_mp',`%rdx')
-define(`n', `%rcx')
-define(`invm', `%r8')
+define(`up', `%rdi')
+define(`mp', `%rsi')
+define(`n_param', `%rdx')
+define(`invm', `%rcx')
-define(`mp', `%r13')
+define(`n', `%r13')
define(`i', `%r11')
define(`nneg', `%r12')
@@ -62,13 +58,12 @@ PROLOGUE(mpn_redc_1)
push %r12
push %r13
push %r14
- push n
- sub $8, %rsp C maintain ABI required rsp alignment
- lea (param_mp,n,8), mp C mp += n
- lea (up,n,8), up C up += n
+ lea (mp,n_param,8), mp C mp += n
+ lea (up,n_param,8), up C up += n
- mov n, nneg
+ mov n_param, nneg
+ mov n_param, n
neg nneg
mov R32(n), R32(%rax)
@@ -136,9 +131,7 @@ L(n1): mov %r14, 16(up,nneg,8) C up[0]
add $8, up
dec n
jnz L(o1)
-C lea (mp), mp
- lea 16(up), up
- jmp L(common)
+ jmp L(ret)
L(b0): C lea (mp), mp
lea -16(up), up
@@ -190,10 +183,7 @@ L(ed0): add %r10, (up)
add $8, up
dec n
jnz L(o0)
-C lea (mp), mp
- lea 16(up), up
- jmp L(common)
-
+ jmp L(ret)
L(b3): lea -8(mp), mp
lea -24(up), up
@@ -244,9 +234,7 @@ L(ed3): add %r10, 8(up)
add $8, up
dec n
jnz L(o3)
- lea 8(mp), mp
- lea 24(up), up
- jmp L(common)
+ jmp L(ret)
L(b2): lea -16(mp), mp
lea -32(up), up
@@ -299,36 +287,8 @@ L(ed2): add %r10, 16(up)
add $8, up
dec n
jnz L(o2)
- lea 16(mp), mp
- lea 32(up), up
-
-
-L(common):
- lea (mp,nneg,8), mp C restore entry mp
-
-C cy = mpn_add_n (rp, up, up - n, n);
-C rdi rsi rdx rcx
- lea (up,nneg,8), up C up -= n
- lea (up,nneg,8), %rdx C rdx = up - n [up entry value]
- mov rp, nneg C preserve rp over first call
- mov 8(%rsp), %rcx C pass entry n
-C mov rp, %rdi
- CALL( mpn_add_n)
- test R32(%rax), R32(%rax)
- jz L(ret)
-
-C mpn_sub_n (rp, rp, mp, n);
-C rdi rsi rdx rcx
- mov nneg, %rdi
- mov nneg, %rsi
- mov mp, %rdx
- mov 8(%rsp), %rcx C pass entry n
- CALL( mpn_sub_n)
-L(ret):
- add $8, %rsp
- pop n C just increment rsp
- pop %r14
+L(ret): pop %r14
pop %r13
pop %r12
pop %rbx
diff --git a/tests/refmpn.c b/tests/refmpn.c
index fbcc602d6..7ace7ebce 100644
--- a/tests/refmpn.c
+++ b/tests/refmpn.c
@@ -2,7 +2,7 @@
of the normal gmp code. Speed isn't a consideration.
Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009 Free Software Foundation, Inc.
+2007, 2008, 2009, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -2303,12 +2303,9 @@ refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
for (j = n - 1; j >= 0; j--)
{
- up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+ up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
up++;
}
- cy = mpn_add_n (rp, up, up - n, n);
- if (cy != 0)
- mpn_sub_n (rp, rp, mp, n);
}
size_t
diff --git a/tune/speed.h b/tune/speed.h
index c017a8ec2..08c01a5dc 100644
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -2193,7 +2193,7 @@ int speed_routine_count_zeros_setup
#define SPEED_ROUTINE_REDC_1(function) \
{ \
unsigned i; \
- mp_ptr cp, mp, tp, ap; \
+ mp_ptr mp, tp, ap; \
mp_limb_t inv; \
double t; \
TMP_DECL; \
@@ -2203,7 +2203,6 @@ int speed_routine_count_zeros_setup
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \
SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2); \
\
MPN_COPY (ap, s->xp, s->size); \
@@ -2218,14 +2217,13 @@ int speed_routine_count_zeros_setup
speed_operand_src (s, ap, 2*s->size+1); \
speed_operand_dst (s, tp, 2*s->size+1); \
speed_operand_src (s, mp, s->size); \
- speed_operand_dst (s, cp, s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do { \
MPN_COPY (tp, ap, 2*s->size); \
- function (cp, tp, mp, s->size, inv); \
+ function (tp, mp, s->size, inv); \
} while (--i != 0); \
t = speed_endtime (); \
\