From 98ec919fbc24e85c20818b472131687ba42ae6ab Mon Sep 17 00:00:00 2001 From: Torbjorn Granlund Date: Mon, 7 Nov 2011 18:42:27 +0100 Subject: Change how mpn_redc_1 works, use more broadly. --- configure.in | 2 +- gmp-impl.h | 5 +--- mpn/generic/powm.c | 44 +++++++++++++++++++------------ mpn/generic/powm_sec.c | 18 +++++++++---- mpn/generic/redc_1.c | 5 +--- mpn/generic/redc_1_sec.c | 45 -------------------------------- mpn/x86_64/redc_1.asm | 68 ++++++++++-------------------------------------- tests/refmpn.c | 7 ++--- tune/speed.h | 6 ++--- 9 files changed, 61 insertions(+), 139 deletions(-) delete mode 100644 mpn/generic/redc_1_sec.c diff --git a/configure.in b/configure.in index 9c0092427..79367c210 100644 --- a/configure.in +++ b/configure.in @@ -2638,7 +2638,7 @@ gmp_mpn_functions="$extra_functions \ mu_bdiv_q mu_bdiv_qr \ bdiv_q bdiv_qr \ divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec \ - redc_1_sec trialdiv remove \ + trialdiv remove \ and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n \ copyi copyd zero \ $gmp_mpn_functions_optional" diff --git a/gmp-impl.h b/gmp-impl.h index e918c31ed..c0ed63791 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -1063,7 +1063,7 @@ __GMP_DECLSPEC void mpn_mulmid __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_sr __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); #define mpn_redc_1 __MPN(redc_1) -__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); +__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); #define mpn_redc_2 __MPN(redc_2) __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); @@ -1471,9 +1471,6 @@ __GMP_DECLSPEC void mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t __GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t)); #define mpn_tabselect __MPN(tabselect) __GMP_DECLSPEC void mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t)); -#define mpn_redc_1_sec __MPN(redc_1_sec) -__GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); - #define mpn_addcnd_n __MPN(addcnd_n) __GMP_DECLSPEC mp_limb_t mpn_addcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); #define mpn_subcnd_n __MPN(subcnd_n) diff --git a/mpn/generic/powm.c b/mpn/generic/powm.c index 57edfd4f6..fa92362ad 100644 --- a/mpn/generic/powm.c +++ b/mpn/generic/powm.c @@ -6,7 +6,7 @@ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. -Copyright 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +Copyright 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -74,6 +74,16 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp-impl.h" #include "longlong.h" +#undef MPN_REDC_1 +#define MPN_REDC_1(rp, up, mp, n, invm) \ + do { \ + mp_limb_t cy; \ + mpn_redc_1 (up, mp, n, invm); \ + cy = mpn_add_n (rp, up + n, up, n); \ + if (cy != 0) \ + mpn_sub_n (rp, rp, mp, n); \ + } while (0) + #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 #define WANT_REDC_2 1 #endif @@ -212,12 +222,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mpn_sqr (tp, this_pp, n); #if WANT_REDC_2 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) - mpn_redc_1 (rp, tp, mp, n, mip[0]); + MPN_REDC_1 (rp, tp, mp, n, mip[0]); else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) mpn_redc_2 (rp, tp, mp, n, mip); #else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) - mpn_redc_1 (rp, tp, mp, n, mip[0]); + MPN_REDC_1 (rp, tp, mp, n, mip[0]); #endif else mpn_redc_n (rp, tp, mp, n, mip); @@ -229,12 +239,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, this_pp += n; #if WANT_REDC_2 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) - mpn_redc_1 (this_pp, tp, mp, n, mip[0]); + MPN_REDC_1 (this_pp, tp, mp, n, mip[0]); else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) mpn_redc_2 (this_pp, tp, mp, n, mip); #else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) - mpn_redc_1 (this_pp, tp, mp, n, mip[0]); + MPN_REDC_1 (this_pp, tp, mp, n, mip[0]); #endif else mpn_redc_n (this_pp, tp, mp, n, mip); @@ -309,7 +319,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else @@ -319,7 +329,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } } @@ -380,7 +390,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else @@ -390,7 +400,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } } @@ -401,7 +411,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) @@ -440,7 +450,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else @@ -450,7 +460,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } } @@ -501,7 +511,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else @@ -511,7 +521,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } } @@ -522,7 +532,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #undef MPN_REDUCE #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) -#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) +#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0]) INNERLOOP; } else @@ -545,12 +555,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #if WANT_REDC_2 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) - mpn_redc_1 (rp, tp, mp, n, mip[0]); + MPN_REDC_1 (rp, tp, mp, n, mip[0]); else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) mpn_redc_2 (rp, tp, mp, n, mip); #else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) - mpn_redc_1 (rp, tp, mp, n, mip[0]); + MPN_REDC_1 (rp, tp, mp, n, mip[0]); #endif else mpn_redc_n (rp, tp, mp, n, mip); diff --git a/mpn/generic/powm_sec.c b/mpn/generic/powm_sec.c index 315ae6e5e..3a6f55403 100644 --- a/mpn/generic/powm_sec.c +++ b/mpn/generic/powm_sec.c @@ -7,7 +7,7 @@ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. -Copyright 2007, 2008, 2009 Free Software Foundation, Inc. +Copyright 2007, 2008, 2009, 2011 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -56,6 +56,14 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define WANT_CACHE_SECURITY 1 +#undef MPN_REDC_1_SEC +#define MPN_REDC_1_SEC(rp, up, mp, n, invm) \ + do { \ + mp_limb_t cy; \ + mpn_redc_1 (up, mp, n, invm); \ + cy = mpn_add_n (rp, up + n, up, n); \ + mpn_subcnd_n (rp, rp, mp, n, cy); \ + } while (0) /* Define our own mpn squaring function. We do this since we cannot use a native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over @@ -252,7 +260,7 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn, { mpn_mul_basecase (tp, this_pp, n, pp + n, n); this_pp += n; - mpn_redc_1_sec (this_pp, tp, mp, n, minv); + MPN_REDC_1_SEC (this_pp, tp, mp, n, minv); } expbits = getbits (ep, ebi, windowsize); @@ -278,7 +286,7 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn, do { mpn_local_sqr (tp, rp, n, tp + 2 * n); - mpn_redc_1_sec (rp, tp, mp, n, minv); + MPN_REDC_1_SEC (rp, tp, mp, n, minv); this_windowsize--; } while (this_windowsize != 0); @@ -289,12 +297,12 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn, #else mpn_mul_basecase (tp, rp, n, pp + n * expbits, n); #endif - mpn_redc_1_sec (rp, tp, mp, n, minv); + MPN_REDC_1_SEC (rp, tp, mp, n, minv); } MPN_COPY (tp, rp, n); MPN_ZERO (tp + n, n); - mpn_redc_1_sec (rp, tp, mp, n, minv); + MPN_REDC_1_SEC (rp, tp, mp, n, minv); cnd = mpn_sub_n (tp, rp, mp, n); /* we need just retval */ mpn_subcnd_n (rp, rp, mp, n, !cnd); TMP_FREE; diff --git a/mpn/generic/redc_1.c b/mpn/generic/redc_1.c index 177f3932f..3567414eb 100644 --- a/mpn/generic/redc_1.c +++ b/mpn/generic/redc_1.c @@ -25,7 +25,7 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp-impl.h" void -mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) +mpn_redc_1 (mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) { mp_size_t j; mp_limb_t cy; @@ -40,7 +40,4 @@ mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) up[0] = cy; up++; } - cy = mpn_add_n (rp, up, up - n, n); - if (cy != 0) - mpn_sub_n (rp, rp, mp, n); } diff --git a/mpn/generic/redc_1_sec.c b/mpn/generic/redc_1_sec.c deleted file mode 100644 index 3d914381c..000000000 --- a/mpn/generic/redc_1_sec.c +++ /dev/null @@ -1,45 +0,0 @@ -/* mpn_redc_1_sec. Set cp[] <- up[]/R^n mod mp[]. Clobber up[]. - mp[] is n limbs; up[] is 2n limbs. - - THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY - SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. - -Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -void -mpn_redc_1_sec (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) -{ - mp_size_t j; - mp_limb_t cy; - - ASSERT (n > 0); - ASSERT_MPN (up, 2*n); - - for (j = n - 1; j >= 0; j--) - { - cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); - ASSERT (up[0] == 0); - up[0] = cy; - up++; - } - cy = mpn_add_n (rp, up, up - n, n); - mpn_subcnd_n (rp, rp, mp, n, cy); -} diff --git a/mpn/x86_64/redc_1.asm b/mpn/x86_64/redc_1.asm index 976cab2bc..8d731c68c 100644 --- a/mpn/x86_64/redc_1.asm +++ b/mpn/x86_64/redc_1.asm @@ -1,6 +1,6 @@ dnl AMD64 mpn_redc_1 -- Montgomery reduction with a one-limb modular inverse. -dnl Copyright 2004, 2008 Free Software Foundation, Inc. +dnl Copyright 2004, 2008, 2011 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl @@ -34,22 +34,18 @@ C TODO C * Handle certain sizes, e.g., 1, 2, 3, 4, 8, with single-loop code. C The code for 1, 2, 3, 4 should perhaps be completely register based. C * Perhaps align outer loops. -C * The sub_n at the end leaks side-channel data. How do we fix that? -C * Write mpn_add_n_sub_n computing R = A + B - C. It should run at 2 c/l. C * We could software pipeline the IMUL stuff, by putting it before the C outer loops and before the end of the outer loops. The last outer C loop iteration would then compute an unneeded product, but it is at C least not a stray read from up[], since it is at up[n]. -C * Can we combine both the add_n and sub_n into the loops, somehow? C INPUT PARAMETERS -define(`rp', `%rdi') -define(`up', `%rsi') -define(`param_mp',`%rdx') -define(`n', `%rcx') -define(`invm', `%r8') +define(`up', `%rdi') +define(`mp', `%rsi') +define(`n_param', `%rdx') +define(`invm', `%rcx') -define(`mp', `%r13') +define(`n', `%r13') define(`i', `%r11') define(`nneg', `%r12') @@ -62,13 +58,12 @@ PROLOGUE(mpn_redc_1) push %r12 push %r13 push %r14 - push n - sub $8, %rsp C maintain ABI required rsp alignment - lea (param_mp,n,8), mp C mp += n - lea (up,n,8), up C up += n + lea (mp,n_param,8), mp C mp += n + lea (up,n_param,8), up C up += n - mov n, nneg + mov n_param, nneg + mov n_param, n neg nneg mov R32(n), R32(%rax) @@ -136,9 +131,7 @@ L(n1): mov %r14, 16(up,nneg,8) C up[0] add $8, up dec n jnz L(o1) -C lea (mp), mp - lea 16(up), up - jmp L(common) + jmp L(ret) L(b0): C lea (mp), mp lea -16(up), up @@ -190,10 +183,7 @@ L(ed0): add %r10, (up) add $8, up dec n jnz L(o0) -C lea (mp), mp - lea 16(up), up - jmp L(common) - + jmp L(ret) L(b3): lea -8(mp), mp lea -24(up), up @@ -244,9 +234,7 @@ L(ed3): add %r10, 8(up) add $8, up dec n jnz L(o3) - lea 8(mp), mp - lea 24(up), up - jmp L(common) + jmp L(ret) L(b2): lea -16(mp), mp lea -32(up), up @@ -299,36 +287,8 @@ L(ed2): add %r10, 16(up) add $8, up dec n jnz L(o2) - lea 16(mp), mp - lea 32(up), up - - -L(common): - lea (mp,nneg,8), mp C restore entry mp - -C cy = mpn_add_n (rp, up, up - n, n); -C rdi rsi rdx rcx - lea (up,nneg,8), up C up -= n - lea (up,nneg,8), %rdx C rdx = up - n [up entry value] - mov rp, nneg C preserve rp over first call - mov 8(%rsp), %rcx C pass entry n -C mov rp, %rdi - CALL( mpn_add_n) - test R32(%rax), R32(%rax) - jz L(ret) - -C mpn_sub_n (rp, rp, mp, n); -C rdi rsi rdx rcx - mov nneg, %rdi - mov nneg, %rsi - mov mp, %rdx - mov 8(%rsp), %rcx C pass entry n - CALL( mpn_sub_n) -L(ret): - add $8, %rsp - pop n C just increment rsp - pop %r14 +L(ret): pop %r14 pop %r13 pop %r12 pop %rbx diff --git a/tests/refmpn.c b/tests/refmpn.c index fbcc602d6..7ace7ebce 100644 --- a/tests/refmpn.c +++ b/tests/refmpn.c @@ -2,7 +2,7 @@ of the normal gmp code. Speed isn't a consideration. Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, -2007, 2008, 2009 Free Software Foundation, Inc. +2007, 2008, 2009, 2011 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -2303,12 +2303,9 @@ refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) for (j = n - 1; j >= 0; j--) { - up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); + up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); up++; } - cy = mpn_add_n (rp, up, up - n, n); - if (cy != 0) - mpn_sub_n (rp, rp, mp, n); } size_t diff --git a/tune/speed.h b/tune/speed.h index c017a8ec2..08c01a5dc 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -2193,7 +2193,7 @@ int speed_routine_count_zeros_setup #define SPEED_ROUTINE_REDC_1(function) \ { \ unsigned i; \ - mp_ptr cp, mp, tp, ap; \ + mp_ptr mp, tp, ap; \ mp_limb_t inv; \ double t; \ TMP_DECL; \ @@ -2203,7 +2203,6 @@ int speed_routine_count_zeros_setup TMP_MARK; \ SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp); \ SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \ SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2); \ \ MPN_COPY (ap, s->xp, s->size); \ @@ -2218,14 +2217,13 @@ int speed_routine_count_zeros_setup speed_operand_src (s, ap, 2*s->size+1); \ speed_operand_dst (s, tp, 2*s->size+1); \ speed_operand_src (s, mp, s->size); \ - speed_operand_dst (s, cp, s->size); \ speed_cache_fill (s); \ \ speed_starttime (); \ i = s->reps; \ do { \ MPN_COPY (tp, ap, 2*s->size); \ - function (cp, tp, mp, s->size, inv); \ + function (tp, mp, s->size, inv); \ } while (--i != 0); \ t = speed_endtime (); \ \ -- cgit v1.2.1