diff options
Diffstat (limited to 'sec-modinv.c')
-rw-r--r-- | sec-modinv.c | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/sec-modinv.c b/sec-modinv.c new file mode 100644 index 00000000..f674a8fb --- /dev/null +++ b/sec-modinv.c @@ -0,0 +1,170 @@ +/* sec-modinv.c */ + +/* nettle, low-level cryptographics library + * + * Copyright (C) 2013 Niels Möller + * + * The nettle library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * The nettle library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with the nettle library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02111-1301, USA. + */ + +/* Development of Nettle's ECC support was funded by Internetfonden. */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <assert.h> + +#include "ecc-internal.h" + +static void +cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n) +{ + mp_limb_t cy = (cnd != 0); + mp_limb_t mask = -cy; + mp_size_t i; + + for (i = 0; i < n; i++) + { + mp_limb_t r = (ap[i] ^ mask) + cy; + cy = r < cy; + rp[i] = r; + } +} + +static void +cnd_swap (int cnd, mp_limb_t *ap, mp_limb_t *bp, mp_size_t n) +{ + mp_limb_t mask = - (mp_limb_t) (cnd != 0); + mp_size_t i; + for (i = 0; i < n; i++) + { + mp_limb_t a, b, t; + a = ap[i]; + b = bp[i]; + t = (a ^ b) & mask; + ap[i] = a ^ t; + bp[i] = b ^ t; + } +} + +/* Compute a^{-1} mod m, with running time depending only on the size. + Also needs (m+1)/2, and m must be odd. */ +void +sec_modinv (mp_limb_t *vp, mp_limb_t *ap, mp_size_t n, + const mp_limb_t *mp, const mp_limb_t *mp1h, mp_size_t bit_size, + mp_limb_t *scratch) +{ +#define bp scratch +#define dp (scratch + n) +#define up (scratch + 2*n) + + mp_bitcnt_t i; + + /* Maintain + + a = u * orig_a (mod m) + b = v * orig_a (mod m) + + and b odd at all times. Initially, + + a = a_orig, u = 1 + b = m, v = 0 + */ + + assert (ap != vp); + + up[0] = 1; + mpn_zero (up+1, n - 1); + mpn_copyi (bp, mp, n); + mpn_zero (vp, n); + + for (i = bit_size + GMP_NUMB_BITS * n; i-- > 0; ) + { + mp_limb_t odd, swap, cy; + + /* Always maintain b odd. The logic of the iteration is as + follows. For a, b: + + odd = a & 1 + a -= odd * b + if (underflow from a-b) + { + b += a, assigns old a + a = B^n-a + } + + a /= 2 + + For u, v: + + if (underflow from a - b) + swap u, v + u -= odd * v + if (underflow from u - v) + u += m + + u /= 2 + if (a one bit was shifted out) + u += (m+1)/2 + + As long as a > 0, the quantity + + (bitsize of a) + (bitsize of b) + + is reduced by at least one bit per iteration, hence after + (bit_size of orig_a) + (bit_size of m) - 1 iterations we + surely have a = 0. Then b = gcd(orig_a, m) and if b = 1 then + also v = orig_a^{-1} (mod m) + */ + + assert (bp[0] & 1); + odd = ap[0] & 1; + + /* Which variant is fastest depends on the speed of the various + cnd_* functions. Assembly implementation would help. */ +#if 1 + swap = cnd_sub_n (odd, ap, bp, n); + cnd_add_n (swap, bp, ap, n); + cnd_neg (swap, ap, ap, n); +#else + swap = odd & mpn_sub_n (dp, ap, bp, n); + cnd_copy (swap, bp, ap, n); + cnd_neg (swap, dp, dp, n); + cnd_copy (odd, ap, dp, n); +#endif + +#if 1 + cnd_swap (swap, up, vp, n); + cy = cnd_sub_n (odd, up, vp, n); + cy -= cnd_add_n (cy, up, mp, n); +#else + cy = cnd_sub_n (odd, up, vp, n); + cnd_add_n (swap, vp, up, n); + cnd_neg (swap, up, up, n); + cnd_add_n (cy ^ swap, up, mp, n); +#endif + cy = mpn_rshift (ap, ap, n, 1); + assert (cy == 0); + cy = mpn_rshift (up, up, n, 1); + cy = cnd_add_n (cy, up, mp1h, n); + assert (cy == 0); + } + assert ( (ap[0] | ap[n-1]) == 0); +#undef bp +#undef dp +#undef up +} |