diff options
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | Makefile.in | 2 | ||||
-rw-r--r-- | curve25519-mul.c | 111 | ||||
-rw-r--r-- | curve448-mul.c | 111 | ||||
-rw-r--r-- | ecc-internal.h | 9 | ||||
-rw-r--r-- | ecc-mul-m.c | 135 |
6 files changed, 182 insertions, 198 deletions
@@ -1,3 +1,15 @@ +2019-12-14 Niels Möller <nisse@lysator.liu.se> + + * ecc-mul-m.c (ecc_mul_m): New file and function. Implements + multipliction for curves in Montgomery representation, as used for + curve25519 and curve448. Extracted from curve25519_mul. + * ecc-internal.h (ecc_mul_m): Declare. + (ECC_MUL_M_ITCH): New macro. + * Makefile.in (hogweed_SOURCES): Add ecc-mul-m.c. + + * curve25519-mul.c (curve25519_mul): Use ecc_mul_m. + * curve448-mul.c (curve448_mul): Likewise. + 2019-12-13 Niels Möller <nisse@lysator.liu.se> * Merge curve448 implementation. diff --git a/Makefile.in b/Makefile.in index 036a3a1d..8d06149f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -180,7 +180,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \ ecc-dup-jj.c ecc-add-jja.c ecc-add-jjj.c \ ecc-eh-to-a.c \ ecc-dup-eh.c ecc-add-eh.c ecc-add-ehh.c \ - ecc-mul-g-eh.c ecc-mul-a-eh.c \ + ecc-mul-g-eh.c ecc-mul-a-eh.c ecc-mul-m.c \ ecc-mul-g.c ecc-mul-a.c ecc-hash.c ecc-random.c \ ecc-point.c ecc-scalar.c ecc-point-mul.c ecc-point-mul-g.c \ ecc-ecdsa-sign.c ecdsa-sign.c \ diff --git a/curve25519-mul.c b/curve25519-mul.c index ba76bc0b..aff2b293 100644 --- a/curve25519-mul.c +++ b/curve25519-mul.c @@ -44,104 +44,21 @@ void curve25519_mul (uint8_t *q, const uint8_t *n, const uint8_t *p) { - const struct ecc_curve *ecc = &_nettle_curve25519; + const struct ecc_modulo *m = &_nettle_curve25519.p; mp_size_t itch; - mp_limb_t *scratch; - int i; - mp_limb_t cy; - - /* FIXME: Could save some more scratch space, e.g., by letting BB - overlap C, D, and CB overlap A, D. And possibly reusing some of - x2, z2, x3, z3. */ -#define x1 scratch -#define x2 (scratch + ecc->p.size) -#define z2 (scratch + 2*ecc->p.size) -#define x3 (scratch + 3*ecc->p.size) -#define z3 (scratch + 4*ecc->p.size) - -#define A (scratch + 5*ecc->p.size) -#define B (scratch + 6*ecc->p.size) -#define C (scratch + 7*ecc->p.size) -#define D (scratch + 8*ecc->p.size) -#define AA (scratch + 9*ecc->p.size) -#define BB (scratch +10*ecc->p.size) -#define E (scratch + 10*ecc->p.size) /* Overlap BB */ -#define DA (scratch + 9*ecc->p.size) /* Overlap AA */ -#define CB (scratch + 10*ecc->p.size) /* Overlap BB */ - - itch = ecc->p.size * 12; - scratch = gmp_alloc_limbs (itch); - - /* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x1 always + mp_limb_t *x; + + itch = m->size + ECC_MUL_M_ITCH(m->size); + x = gmp_alloc_limbs (itch); + + /* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x always holds at least 256 bits. */ - mpn_set_base256_le (x1, ecc->p.size, p, CURVE25519_SIZE); + mpn_set_base256_le (x, m->size, p, CURVE25519_SIZE); /* Clear bit 255, as required by RFC 7748. */ - x1[255/GMP_NUMB_BITS] &= ~((mp_limb_t) 1 << (255 % GMP_NUMB_BITS)); - - /* Initialize, x2 = x1, z2 = 1 */ - mpn_copyi (x2, x1, ecc->p.size); - z2[0] = 1; - mpn_zero (z2+1, ecc->p.size - 1); - - /* Get x3, z3 from doubling. Since bit 254 is forced to 1. */ - ecc_modp_add (ecc, A, x2, z2); - ecc_modp_sub (ecc, B, x2, z2); - ecc_modp_sqr (ecc, AA, A); - ecc_modp_sqr (ecc, BB, B); - ecc_modp_mul (ecc, x3, AA, BB); - ecc_modp_sub (ecc, E, AA, BB); - ecc_modp_addmul_1 (ecc, AA, E, 121665); - ecc_modp_mul (ecc, z3, E, AA); - - for (i = 253; i >= 3; i--) - { - int bit = (n[i/8] >> (i & 7)) & 1; - - cnd_swap (bit, x2, x3, 2*ecc->p.size); - - /* Formulas from draft-turner-thecurve25519function-00-Mont. We - compute new coordinates in memory-address order, since mul - and sqr clobbers higher limbs. */ - ecc_modp_add (ecc, A, x2, z2); - ecc_modp_sub (ecc, B, x2, z2); - ecc_modp_sqr (ecc, AA, A); - ecc_modp_sqr (ecc, BB, B); - ecc_modp_mul (ecc, x2, AA, BB); /* Last use of BB */ - ecc_modp_sub (ecc, E, AA, BB); - ecc_modp_addmul_1 (ecc, AA, E, 121665); - ecc_modp_add (ecc, C, x3, z3); - ecc_modp_sub (ecc, D, x3, z3); - ecc_modp_mul (ecc, z2, E, AA); /* Last use of E and AA */ - ecc_modp_mul (ecc, DA, D, A); /* Last use of D, A. FIXME: could - let CB overlap. */ - ecc_modp_mul (ecc, CB, C, B); - - ecc_modp_add (ecc, C, DA, CB); - ecc_modp_sqr (ecc, x3, C); - ecc_modp_sub (ecc, C, DA, CB); - ecc_modp_sqr (ecc, DA, C); - ecc_modp_mul (ecc, z3, DA, x1); - - /* FIXME: Could be combined with the loop's initial cnd_swap. */ - cnd_swap (bit, x2, x3, 2*ecc->p.size); - } - /* Do the 3 low zero bits, just duplicating x2 */ - for ( ; i >= 0; i--) - { - ecc_modp_add (ecc, A, x2, z2); - ecc_modp_sub (ecc, B, x2, z2); - ecc_modp_sqr (ecc, AA, A); - ecc_modp_sqr (ecc, BB, B); - ecc_modp_mul (ecc, x2, AA, BB); - ecc_modp_sub (ecc, E, AA, BB); - ecc_modp_addmul_1 (ecc, AA, E, 121665); - ecc_modp_mul (ecc, z2, E, AA); - } - ecc->p.invert (&ecc->p, x3, z2, z3 + ecc->p.size); - ecc_modp_mul (ecc, z3, x2, x3); - cy = mpn_sub_n (x2, z3, ecc->p.m, ecc->p.size); - cnd_copy (cy, x2, z3, ecc->p.size); - mpn_get_base256_le (q, CURVE25519_SIZE, x2, ecc->p.size); - - gmp_free_limbs (scratch, itch); + x[255/GMP_NUMB_BITS] &= ~((mp_limb_t) 1 << (255 % GMP_NUMB_BITS)); + + ecc_mul_m (m, 121665, 3, 253, x, n, x, x + m->size); + mpn_get_base256_le (q, CURVE25519_SIZE, x, m->size); + + gmp_free_limbs (x, itch); } diff --git a/curve448-mul.c b/curve448-mul.c index 59cf7664..2090c8a5 100644 --- a/curve448-mul.c +++ b/curve448-mul.c @@ -46,105 +46,16 @@ void curve448_mul (uint8_t *q, const uint8_t *n, const uint8_t *p) { - const struct ecc_curve *ecc = &_nettle_curve448; + const struct ecc_modulo *m = &_nettle_curve448.p; mp_size_t itch; - mp_limb_t *scratch; - int i; - mp_limb_t cy; - - /* FIXME: Could save some more scratch space, e.g., by letting BB - overlap C, D, and CB overlap A, D. And possibly reusing some of - x2, z2, x3, z3. */ -#define x1 scratch -#define x2 (scratch + ecc->p.size) -#define z2 (scratch + 2*ecc->p.size) -#define x3 (scratch + 3*ecc->p.size) -#define z3 (scratch + 4*ecc->p.size) - -#define A (scratch + 5*ecc->p.size) -#define B (scratch + 6*ecc->p.size) -#define C (scratch + 7*ecc->p.size) -#define D (scratch + 8*ecc->p.size) -#define AA (scratch + 9*ecc->p.size) -#define BB (scratch + 10*ecc->p.size) -#define E (scratch + 10*ecc->p.size) /* Overlap BB */ -#define DA (scratch + 9*ecc->p.size) /* Overlap AA */ -#define CB (scratch + 10*ecc->p.size) /* Overlap BB */ - -#define a24 39081 - - itch = ecc->p.size * 12; - assert (ecc->p.invert_itch + 5*ecc->p.size <= itch); - scratch = gmp_alloc_limbs (itch); - - /* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x1 always - holds at least 256 bits. */ - mpn_set_base256_le (x1, ecc->p.size, p, CURVE448_SIZE); - - /* Initialize, x2 = x1, z2 = 1 */ - mpn_copyi (x2, x1, ecc->p.size); - z2[0] = 1; - mpn_zero (z2+1, ecc->p.size - 1); - - /* Get x3, z3 from doubling. Since bit 447 is forced to 1. */ - ecc_modp_add (ecc, A, x2, z2); - ecc_modp_sub (ecc, B, x2, z2); - ecc_modp_sqr (ecc, AA, A); - ecc_modp_sqr (ecc, BB, B); - ecc_modp_mul (ecc, x3, AA, BB); - ecc_modp_sub (ecc, E, AA, BB); - ecc_modp_addmul_1 (ecc, AA, E, a24); - ecc_modp_mul (ecc, z3, E, AA); - - for (i = 446; i >= 2; i--) - { - int bit = (n[i/8] >> (i & 7)) & 1; - - cnd_swap (bit, x2, x3, 2*ecc->p.size); - - /* Formulas from RFC 7748. We compute new coordinates in - memory-address order, since mul and sqr clobbers higher - limbs. */ - ecc_modp_add (ecc, A, x2, z2); - ecc_modp_sub (ecc, B, x2, z2); - ecc_modp_sqr (ecc, AA, A); - ecc_modp_sqr (ecc, BB, B); - ecc_modp_mul (ecc, x2, AA, BB); - ecc_modp_sub (ecc, E, AA, BB); /* Last use of BB */ - ecc_modp_addmul_1 (ecc, AA, E, a24); - ecc_modp_add (ecc, C, x3, z3); - ecc_modp_sub (ecc, D, x3, z3); - ecc_modp_mul (ecc, z2, E, AA); /* Last use of E and AA */ - ecc_modp_mul (ecc, DA, D, A); /* Last use of D, A. FIXME: could - let CB overlap. */ - ecc_modp_mul (ecc, CB, C, B); - - ecc_modp_add (ecc, C, DA, CB); - ecc_modp_sqr (ecc, x3, C); - ecc_modp_sub (ecc, C, DA, CB); - ecc_modp_sqr (ecc, DA, C); - ecc_modp_mul (ecc, z3, DA, x1); - - /* FIXME: Could be combined with the loop's initial cnd_swap. */ - cnd_swap (bit, x2, x3, 2*ecc->p.size); - } - /* Do the 2 low zero bits, just duplicating x2 */ - for ( ; i >= 0; i--) - { - ecc_modp_add (ecc, A, x2, z2); - ecc_modp_sub (ecc, B, x2, z2); - ecc_modp_sqr (ecc, AA, A); - ecc_modp_sqr (ecc, BB, B); - ecc_modp_mul (ecc, x2, AA, BB); - ecc_modp_sub (ecc, E, AA, BB); - ecc_modp_addmul_1 (ecc, AA, E, a24); - ecc_modp_mul (ecc, z2, E, AA); - } - ecc->p.invert (&ecc->p, x3, z2, z3 + ecc->p.size); - ecc_modp_mul (ecc, z3, x2, x3); - cy = mpn_sub_n (x2, z3, ecc->p.m, ecc->p.size); - cnd_copy (cy, x2, z3, ecc->p.size); - mpn_get_base256_le (q, CURVE448_SIZE, x2, ecc->p.size); - - gmp_free_limbs (scratch, itch); + mp_limb_t *x; + + itch = m->size + ECC_MUL_M_ITCH(m->size); + x = gmp_alloc_limbs (itch); + + mpn_set_base256_le (x, m->size, p, CURVE448_SIZE); + ecc_mul_m (m, 39081, 2, 446, x, n, x, x + m->size); + mpn_get_base256_le (q, CURVE448_SIZE, x, m->size); + + gmp_free_limbs (x, itch); } diff --git a/ecc-internal.h b/ecc-internal.h index a7c7fa15..cd1a1573 100644 --- a/ecc-internal.h +++ b/ecc-internal.h @@ -69,6 +69,7 @@ #define ecc_mul_a _nettle_ecc_mul_a #define ecc_mul_g_eh _nettle_ecc_mul_g_eh #define ecc_mul_a_eh _nettle_ecc_mul_a_eh +#define ecc_mul_m _nettle_ecc_mul_m #define cnd_copy _nettle_cnd_copy #define sec_add_1 _nettle_sec_add_1 #define sec_sub_1 _nettle_sec_sub_1 @@ -394,6 +395,13 @@ ecc_mul_a_eh (const struct ecc_curve *ecc, mp_limb_t *scratch); void +ecc_mul_m (const struct ecc_modulo *m, + mp_limb_t a24, + unsigned bit_low, unsigned bit_high, + mp_limb_t *qx, const uint8_t *n, const mp_limb_t *px, + mp_limb_t *scratch); + +void cnd_copy (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n); mp_limb_t @@ -439,6 +447,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, #define ECC_MUL_A_EH_ITCH(size) \ (((3 << ECC_MUL_A_EH_WBITS) + 10) * (size)) #endif +#define ECC_MUL_M_ITCH(size) (11*(size)) #define ECC_ECDSA_SIGN_ITCH(size) (12*(size)) #define ECC_MOD_RANDOM_ITCH(size) (size) #define ECC_HASH_ITCH(size) (1+(size)) diff --git a/ecc-mul-m.c b/ecc-mul-m.c new file mode 100644 index 00000000..68bdd16e --- /dev/null +++ b/ecc-mul-m.c @@ -0,0 +1,135 @@ +/* ecc-mul-m.c + + Point multiplication using Montgomery curve representation. + + Copyright (C) 2014 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <assert.h> + +#include "ecc.h" +#include "ecc-internal.h" + +void +ecc_mul_m (const struct ecc_modulo *m, + mp_limb_t a24, + unsigned bit_low, unsigned bit_high, + mp_limb_t *qx, const uint8_t *n, const mp_limb_t *px, + mp_limb_t *scratch) +{ + unsigned i; + mp_limb_t cy; + + /* FIXME: Could save some more scratch space, e.g., by letting BB + overlap C, D, and CB overlap A, D. And possibly reusing some of + x2, z2, x3, z3. */ +#define x2 (scratch) +#define z2 (scratch + m->size) +#define x3 (scratch + 2*m->size) +#define z3 (scratch + 3*m->size) + +#define A (scratch + 4*m->size) +#define B (scratch + 5*m->size) +#define C (scratch + 6*m->size) +#define D (scratch + 7*m->size) +#define AA (scratch + 8*m->size) +#define BB (scratch +9*m->size) +#define E (scratch + 9*m->size) /* Overlap BB */ +#define DA (scratch + 8*m->size) /* Overlap AA */ +#define CB (scratch + 9*m->size) /* Overlap BB */ + + /* Initialize, x2 = px, z2 = 1 */ + mpn_copyi (x2, px, m->size); + z2[0] = 1; + mpn_zero (z2+1, m->size - 1); + + /* Get x3, z3 from doubling. Since most significant bit is forced to 1. */ + ecc_mod_add (m, A, x2, z2); + ecc_mod_sub (m, B, x2, z2); + ecc_mod_sqr (m, AA, A); + ecc_mod_sqr (m, BB, B); + ecc_mod_mul (m, x3, AA, BB); + ecc_mod_sub (m, E, AA, BB); + ecc_mod_addmul_1 (m, AA, E, a24); + ecc_mod_mul (m, z3, E, AA); + + for (i = bit_high; i >= bit_low; i--) + { + int bit = (n[i/8] >> (i & 7)) & 1; + + cnd_swap (bit, x2, x3, 2*m->size); + + /* Formulas from RFC 7748. We compute new coordinates in + memory-address order, since mul and sqr clobbers higher + limbs. */ + ecc_mod_add (m, A, x2, z2); + ecc_mod_sub (m, B, x2, z2); + ecc_mod_sqr (m, AA, A); + ecc_mod_sqr (m, BB, B); + ecc_mod_mul (m, x2, AA, BB); /* Last use of BB */ + ecc_mod_sub (m, E, AA, BB); + ecc_mod_addmul_1 (m, AA, E, a24); + ecc_mod_add (m, C, x3, z3); + ecc_mod_sub (m, D, x3, z3); + ecc_mod_mul (m, z2, E, AA); /* Last use of E and AA */ + ecc_mod_mul (m, DA, D, A); /* Last use of D, A. FIXME: could + let CB overlap. */ + ecc_mod_mul (m, CB, C, B); + + ecc_mod_add (m, C, DA, CB); + ecc_mod_sqr (m, x3, C); + ecc_mod_sub (m, C, DA, CB); + ecc_mod_sqr (m, DA, C); + ecc_mod_mul (m, z3, DA, px); + + /* FIXME: Could be combined with the loop's initial cnd_swap. */ + cnd_swap (bit, x2, x3, 2*m->size); + } + /* Do the low zero bits, just duplicating x2 */ + for (i = 0; i < bit_low; i++) + { + ecc_mod_add (m, A, x2, z2); + ecc_mod_sub (m, B, x2, z2); + ecc_mod_sqr (m, AA, A); + ecc_mod_sqr (m, BB, B); + ecc_mod_mul (m, x2, AA, BB); + ecc_mod_sub (m, E, AA, BB); + ecc_mod_addmul_1 (m, AA, E, a24); + ecc_mod_mul (m, z2, E, AA); + } + assert (m->invert_itch <= 7 * m->size); + m->invert (m, x3, z2, z3 + m->size); + ecc_mod_mul (m, z3, x2, x3); + cy = mpn_sub_n (qx, z3, m->m, m->size); + cnd_copy (cy, qx, z3, m->size); +} |