summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2019-12-14 10:00:23 +0100
committerNiels Möller <nisse@lysator.liu.se>2019-12-14 10:00:23 +0100
commitb33eea3b4a3eb5ea07c9615f195f84d43cda5178 (patch)
tree31418e79b695a373688f2a674a245b53e56ff0d0
parent39af4a6ad4245aefe1e39e75972ec7d542fcde9f (diff)
downloadnettle-b33eea3b4a3eb5ea07c9615f195f84d43cda5178.tar.gz
New function ecc_mul_m.
* curve25519-mul.c (curve25519_mul): Use ecc_mul_m. * curve448-mul.c (curve448_mul): Likewise. * ecc-mul-m.c (ecc_mul_m): New file and function. Implements multipliction for curves in Montgomery representation, as used for curve25519 and curve448. Extracted from curve25519_mul. * ecc-internal.h (ecc_mul_m): Declare. (ECC_MUL_M_ITCH): New macro. * Makefile.in (hogweed_SOURCES): Add ecc-mul-m.c.
-rw-r--r--ChangeLog12
-rw-r--r--Makefile.in2
-rw-r--r--curve25519-mul.c111
-rw-r--r--curve448-mul.c111
-rw-r--r--ecc-internal.h9
-rw-r--r--ecc-mul-m.c135
6 files changed, 182 insertions, 198 deletions
diff --git a/ChangeLog b/ChangeLog
index eb66f3aa..0516a040 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2019-12-14 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-mul-m.c (ecc_mul_m): New file and function. Implements
+ multipliction for curves in Montgomery representation, as used for
+ curve25519 and curve448. Extracted from curve25519_mul.
+ * ecc-internal.h (ecc_mul_m): Declare.
+ (ECC_MUL_M_ITCH): New macro.
+ * Makefile.in (hogweed_SOURCES): Add ecc-mul-m.c.
+
+ * curve25519-mul.c (curve25519_mul): Use ecc_mul_m.
+ * curve448-mul.c (curve448_mul): Likewise.
+
2019-12-13 Niels Möller <nisse@lysator.liu.se>
* Merge curve448 implementation.
diff --git a/Makefile.in b/Makefile.in
index 036a3a1d..8d06149f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -180,7 +180,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \
ecc-dup-jj.c ecc-add-jja.c ecc-add-jjj.c \
ecc-eh-to-a.c \
ecc-dup-eh.c ecc-add-eh.c ecc-add-ehh.c \
- ecc-mul-g-eh.c ecc-mul-a-eh.c \
+ ecc-mul-g-eh.c ecc-mul-a-eh.c ecc-mul-m.c \
ecc-mul-g.c ecc-mul-a.c ecc-hash.c ecc-random.c \
ecc-point.c ecc-scalar.c ecc-point-mul.c ecc-point-mul-g.c \
ecc-ecdsa-sign.c ecdsa-sign.c \
diff --git a/curve25519-mul.c b/curve25519-mul.c
index ba76bc0b..aff2b293 100644
--- a/curve25519-mul.c
+++ b/curve25519-mul.c
@@ -44,104 +44,21 @@
void
curve25519_mul (uint8_t *q, const uint8_t *n, const uint8_t *p)
{
- const struct ecc_curve *ecc = &_nettle_curve25519;
+ const struct ecc_modulo *m = &_nettle_curve25519.p;
mp_size_t itch;
- mp_limb_t *scratch;
- int i;
- mp_limb_t cy;
-
- /* FIXME: Could save some more scratch space, e.g., by letting BB
- overlap C, D, and CB overlap A, D. And possibly reusing some of
- x2, z2, x3, z3. */
-#define x1 scratch
-#define x2 (scratch + ecc->p.size)
-#define z2 (scratch + 2*ecc->p.size)
-#define x3 (scratch + 3*ecc->p.size)
-#define z3 (scratch + 4*ecc->p.size)
-
-#define A (scratch + 5*ecc->p.size)
-#define B (scratch + 6*ecc->p.size)
-#define C (scratch + 7*ecc->p.size)
-#define D (scratch + 8*ecc->p.size)
-#define AA (scratch + 9*ecc->p.size)
-#define BB (scratch +10*ecc->p.size)
-#define E (scratch + 10*ecc->p.size) /* Overlap BB */
-#define DA (scratch + 9*ecc->p.size) /* Overlap AA */
-#define CB (scratch + 10*ecc->p.size) /* Overlap BB */
-
- itch = ecc->p.size * 12;
- scratch = gmp_alloc_limbs (itch);
-
- /* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x1 always
+ mp_limb_t *x;
+
+ itch = m->size + ECC_MUL_M_ITCH(m->size);
+ x = gmp_alloc_limbs (itch);
+
+ /* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x always
holds at least 256 bits. */
- mpn_set_base256_le (x1, ecc->p.size, p, CURVE25519_SIZE);
+ mpn_set_base256_le (x, m->size, p, CURVE25519_SIZE);
/* Clear bit 255, as required by RFC 7748. */
- x1[255/GMP_NUMB_BITS] &= ~((mp_limb_t) 1 << (255 % GMP_NUMB_BITS));
-
- /* Initialize, x2 = x1, z2 = 1 */
- mpn_copyi (x2, x1, ecc->p.size);
- z2[0] = 1;
- mpn_zero (z2+1, ecc->p.size - 1);
-
- /* Get x3, z3 from doubling. Since bit 254 is forced to 1. */
- ecc_modp_add (ecc, A, x2, z2);
- ecc_modp_sub (ecc, B, x2, z2);
- ecc_modp_sqr (ecc, AA, A);
- ecc_modp_sqr (ecc, BB, B);
- ecc_modp_mul (ecc, x3, AA, BB);
- ecc_modp_sub (ecc, E, AA, BB);
- ecc_modp_addmul_1 (ecc, AA, E, 121665);
- ecc_modp_mul (ecc, z3, E, AA);
-
- for (i = 253; i >= 3; i--)
- {
- int bit = (n[i/8] >> (i & 7)) & 1;
-
- cnd_swap (bit, x2, x3, 2*ecc->p.size);
-
- /* Formulas from draft-turner-thecurve25519function-00-Mont. We
- compute new coordinates in memory-address order, since mul
- and sqr clobbers higher limbs. */
- ecc_modp_add (ecc, A, x2, z2);
- ecc_modp_sub (ecc, B, x2, z2);
- ecc_modp_sqr (ecc, AA, A);
- ecc_modp_sqr (ecc, BB, B);
- ecc_modp_mul (ecc, x2, AA, BB); /* Last use of BB */
- ecc_modp_sub (ecc, E, AA, BB);
- ecc_modp_addmul_1 (ecc, AA, E, 121665);
- ecc_modp_add (ecc, C, x3, z3);
- ecc_modp_sub (ecc, D, x3, z3);
- ecc_modp_mul (ecc, z2, E, AA); /* Last use of E and AA */
- ecc_modp_mul (ecc, DA, D, A); /* Last use of D, A. FIXME: could
- let CB overlap. */
- ecc_modp_mul (ecc, CB, C, B);
-
- ecc_modp_add (ecc, C, DA, CB);
- ecc_modp_sqr (ecc, x3, C);
- ecc_modp_sub (ecc, C, DA, CB);
- ecc_modp_sqr (ecc, DA, C);
- ecc_modp_mul (ecc, z3, DA, x1);
-
- /* FIXME: Could be combined with the loop's initial cnd_swap. */
- cnd_swap (bit, x2, x3, 2*ecc->p.size);
- }
- /* Do the 3 low zero bits, just duplicating x2 */
- for ( ; i >= 0; i--)
- {
- ecc_modp_add (ecc, A, x2, z2);
- ecc_modp_sub (ecc, B, x2, z2);
- ecc_modp_sqr (ecc, AA, A);
- ecc_modp_sqr (ecc, BB, B);
- ecc_modp_mul (ecc, x2, AA, BB);
- ecc_modp_sub (ecc, E, AA, BB);
- ecc_modp_addmul_1 (ecc, AA, E, 121665);
- ecc_modp_mul (ecc, z2, E, AA);
- }
- ecc->p.invert (&ecc->p, x3, z2, z3 + ecc->p.size);
- ecc_modp_mul (ecc, z3, x2, x3);
- cy = mpn_sub_n (x2, z3, ecc->p.m, ecc->p.size);
- cnd_copy (cy, x2, z3, ecc->p.size);
- mpn_get_base256_le (q, CURVE25519_SIZE, x2, ecc->p.size);
-
- gmp_free_limbs (scratch, itch);
+ x[255/GMP_NUMB_BITS] &= ~((mp_limb_t) 1 << (255 % GMP_NUMB_BITS));
+
+ ecc_mul_m (m, 121665, 3, 253, x, n, x, x + m->size);
+ mpn_get_base256_le (q, CURVE25519_SIZE, x, m->size);
+
+ gmp_free_limbs (x, itch);
}
diff --git a/curve448-mul.c b/curve448-mul.c
index 59cf7664..2090c8a5 100644
--- a/curve448-mul.c
+++ b/curve448-mul.c
@@ -46,105 +46,16 @@
void
curve448_mul (uint8_t *q, const uint8_t *n, const uint8_t *p)
{
- const struct ecc_curve *ecc = &_nettle_curve448;
+ const struct ecc_modulo *m = &_nettle_curve448.p;
mp_size_t itch;
- mp_limb_t *scratch;
- int i;
- mp_limb_t cy;
-
- /* FIXME: Could save some more scratch space, e.g., by letting BB
- overlap C, D, and CB overlap A, D. And possibly reusing some of
- x2, z2, x3, z3. */
-#define x1 scratch
-#define x2 (scratch + ecc->p.size)
-#define z2 (scratch + 2*ecc->p.size)
-#define x3 (scratch + 3*ecc->p.size)
-#define z3 (scratch + 4*ecc->p.size)
-
-#define A (scratch + 5*ecc->p.size)
-#define B (scratch + 6*ecc->p.size)
-#define C (scratch + 7*ecc->p.size)
-#define D (scratch + 8*ecc->p.size)
-#define AA (scratch + 9*ecc->p.size)
-#define BB (scratch + 10*ecc->p.size)
-#define E (scratch + 10*ecc->p.size) /* Overlap BB */
-#define DA (scratch + 9*ecc->p.size) /* Overlap AA */
-#define CB (scratch + 10*ecc->p.size) /* Overlap BB */
-
-#define a24 39081
-
- itch = ecc->p.size * 12;
- assert (ecc->p.invert_itch + 5*ecc->p.size <= itch);
- scratch = gmp_alloc_limbs (itch);
-
- /* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x1 always
- holds at least 256 bits. */
- mpn_set_base256_le (x1, ecc->p.size, p, CURVE448_SIZE);
-
- /* Initialize, x2 = x1, z2 = 1 */
- mpn_copyi (x2, x1, ecc->p.size);
- z2[0] = 1;
- mpn_zero (z2+1, ecc->p.size - 1);
-
- /* Get x3, z3 from doubling. Since bit 447 is forced to 1. */
- ecc_modp_add (ecc, A, x2, z2);
- ecc_modp_sub (ecc, B, x2, z2);
- ecc_modp_sqr (ecc, AA, A);
- ecc_modp_sqr (ecc, BB, B);
- ecc_modp_mul (ecc, x3, AA, BB);
- ecc_modp_sub (ecc, E, AA, BB);
- ecc_modp_addmul_1 (ecc, AA, E, a24);
- ecc_modp_mul (ecc, z3, E, AA);
-
- for (i = 446; i >= 2; i--)
- {
- int bit = (n[i/8] >> (i & 7)) & 1;
-
- cnd_swap (bit, x2, x3, 2*ecc->p.size);
-
- /* Formulas from RFC 7748. We compute new coordinates in
- memory-address order, since mul and sqr clobbers higher
- limbs. */
- ecc_modp_add (ecc, A, x2, z2);
- ecc_modp_sub (ecc, B, x2, z2);
- ecc_modp_sqr (ecc, AA, A);
- ecc_modp_sqr (ecc, BB, B);
- ecc_modp_mul (ecc, x2, AA, BB);
- ecc_modp_sub (ecc, E, AA, BB); /* Last use of BB */
- ecc_modp_addmul_1 (ecc, AA, E, a24);
- ecc_modp_add (ecc, C, x3, z3);
- ecc_modp_sub (ecc, D, x3, z3);
- ecc_modp_mul (ecc, z2, E, AA); /* Last use of E and AA */
- ecc_modp_mul (ecc, DA, D, A); /* Last use of D, A. FIXME: could
- let CB overlap. */
- ecc_modp_mul (ecc, CB, C, B);
-
- ecc_modp_add (ecc, C, DA, CB);
- ecc_modp_sqr (ecc, x3, C);
- ecc_modp_sub (ecc, C, DA, CB);
- ecc_modp_sqr (ecc, DA, C);
- ecc_modp_mul (ecc, z3, DA, x1);
-
- /* FIXME: Could be combined with the loop's initial cnd_swap. */
- cnd_swap (bit, x2, x3, 2*ecc->p.size);
- }
- /* Do the 2 low zero bits, just duplicating x2 */
- for ( ; i >= 0; i--)
- {
- ecc_modp_add (ecc, A, x2, z2);
- ecc_modp_sub (ecc, B, x2, z2);
- ecc_modp_sqr (ecc, AA, A);
- ecc_modp_sqr (ecc, BB, B);
- ecc_modp_mul (ecc, x2, AA, BB);
- ecc_modp_sub (ecc, E, AA, BB);
- ecc_modp_addmul_1 (ecc, AA, E, a24);
- ecc_modp_mul (ecc, z2, E, AA);
- }
- ecc->p.invert (&ecc->p, x3, z2, z3 + ecc->p.size);
- ecc_modp_mul (ecc, z3, x2, x3);
- cy = mpn_sub_n (x2, z3, ecc->p.m, ecc->p.size);
- cnd_copy (cy, x2, z3, ecc->p.size);
- mpn_get_base256_le (q, CURVE448_SIZE, x2, ecc->p.size);
-
- gmp_free_limbs (scratch, itch);
+ mp_limb_t *x;
+
+ itch = m->size + ECC_MUL_M_ITCH(m->size);
+ x = gmp_alloc_limbs (itch);
+
+ mpn_set_base256_le (x, m->size, p, CURVE448_SIZE);
+ ecc_mul_m (m, 39081, 2, 446, x, n, x, x + m->size);
+ mpn_get_base256_le (q, CURVE448_SIZE, x, m->size);
+
+ gmp_free_limbs (x, itch);
}
diff --git a/ecc-internal.h b/ecc-internal.h
index a7c7fa15..cd1a1573 100644
--- a/ecc-internal.h
+++ b/ecc-internal.h
@@ -69,6 +69,7 @@
#define ecc_mul_a _nettle_ecc_mul_a
#define ecc_mul_g_eh _nettle_ecc_mul_g_eh
#define ecc_mul_a_eh _nettle_ecc_mul_a_eh
+#define ecc_mul_m _nettle_ecc_mul_m
#define cnd_copy _nettle_cnd_copy
#define sec_add_1 _nettle_sec_add_1
#define sec_sub_1 _nettle_sec_sub_1
@@ -394,6 +395,13 @@ ecc_mul_a_eh (const struct ecc_curve *ecc,
mp_limb_t *scratch);
void
+ecc_mul_m (const struct ecc_modulo *m,
+ mp_limb_t a24,
+ unsigned bit_low, unsigned bit_high,
+ mp_limb_t *qx, const uint8_t *n, const mp_limb_t *px,
+ mp_limb_t *scratch);
+
+void
cnd_copy (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n);
mp_limb_t
@@ -439,6 +447,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
#define ECC_MUL_A_EH_ITCH(size) \
(((3 << ECC_MUL_A_EH_WBITS) + 10) * (size))
#endif
+#define ECC_MUL_M_ITCH(size) (11*(size))
#define ECC_ECDSA_SIGN_ITCH(size) (12*(size))
#define ECC_MOD_RANDOM_ITCH(size) (size)
#define ECC_HASH_ITCH(size) (1+(size))
diff --git a/ecc-mul-m.c b/ecc-mul-m.c
new file mode 100644
index 00000000..68bdd16e
--- /dev/null
+++ b/ecc-mul-m.c
@@ -0,0 +1,135 @@
+/* ecc-mul-m.c
+
+ Point multiplication using Montgomery curve representation.
+
+ Copyright (C) 2014 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+
+#include "ecc.h"
+#include "ecc-internal.h"
+
+void
+ecc_mul_m (const struct ecc_modulo *m,
+ mp_limb_t a24,
+ unsigned bit_low, unsigned bit_high,
+ mp_limb_t *qx, const uint8_t *n, const mp_limb_t *px,
+ mp_limb_t *scratch)
+{
+ unsigned i;
+ mp_limb_t cy;
+
+ /* FIXME: Could save some more scratch space, e.g., by letting BB
+ overlap C, D, and CB overlap A, D. And possibly reusing some of
+ x2, z2, x3, z3. */
+#define x2 (scratch)
+#define z2 (scratch + m->size)
+#define x3 (scratch + 2*m->size)
+#define z3 (scratch + 3*m->size)
+
+#define A (scratch + 4*m->size)
+#define B (scratch + 5*m->size)
+#define C (scratch + 6*m->size)
+#define D (scratch + 7*m->size)
+#define AA (scratch + 8*m->size)
+#define BB (scratch +9*m->size)
+#define E (scratch + 9*m->size) /* Overlap BB */
+#define DA (scratch + 8*m->size) /* Overlap AA */
+#define CB (scratch + 9*m->size) /* Overlap BB */
+
+ /* Initialize, x2 = px, z2 = 1 */
+ mpn_copyi (x2, px, m->size);
+ z2[0] = 1;
+ mpn_zero (z2+1, m->size - 1);
+
+ /* Get x3, z3 from doubling. Since most significant bit is forced to 1. */
+ ecc_mod_add (m, A, x2, z2);
+ ecc_mod_sub (m, B, x2, z2);
+ ecc_mod_sqr (m, AA, A);
+ ecc_mod_sqr (m, BB, B);
+ ecc_mod_mul (m, x3, AA, BB);
+ ecc_mod_sub (m, E, AA, BB);
+ ecc_mod_addmul_1 (m, AA, E, a24);
+ ecc_mod_mul (m, z3, E, AA);
+
+ for (i = bit_high; i >= bit_low; i--)
+ {
+ int bit = (n[i/8] >> (i & 7)) & 1;
+
+ cnd_swap (bit, x2, x3, 2*m->size);
+
+ /* Formulas from RFC 7748. We compute new coordinates in
+ memory-address order, since mul and sqr clobbers higher
+ limbs. */
+ ecc_mod_add (m, A, x2, z2);
+ ecc_mod_sub (m, B, x2, z2);
+ ecc_mod_sqr (m, AA, A);
+ ecc_mod_sqr (m, BB, B);
+ ecc_mod_mul (m, x2, AA, BB); /* Last use of BB */
+ ecc_mod_sub (m, E, AA, BB);
+ ecc_mod_addmul_1 (m, AA, E, a24);
+ ecc_mod_add (m, C, x3, z3);
+ ecc_mod_sub (m, D, x3, z3);
+ ecc_mod_mul (m, z2, E, AA); /* Last use of E and AA */
+ ecc_mod_mul (m, DA, D, A); /* Last use of D, A. FIXME: could
+ let CB overlap. */
+ ecc_mod_mul (m, CB, C, B);
+
+ ecc_mod_add (m, C, DA, CB);
+ ecc_mod_sqr (m, x3, C);
+ ecc_mod_sub (m, C, DA, CB);
+ ecc_mod_sqr (m, DA, C);
+ ecc_mod_mul (m, z3, DA, px);
+
+ /* FIXME: Could be combined with the loop's initial cnd_swap. */
+ cnd_swap (bit, x2, x3, 2*m->size);
+ }
+ /* Do the low zero bits, just duplicating x2 */
+ for (i = 0; i < bit_low; i++)
+ {
+ ecc_mod_add (m, A, x2, z2);
+ ecc_mod_sub (m, B, x2, z2);
+ ecc_mod_sqr (m, AA, A);
+ ecc_mod_sqr (m, BB, B);
+ ecc_mod_mul (m, x2, AA, BB);
+ ecc_mod_sub (m, E, AA, BB);
+ ecc_mod_addmul_1 (m, AA, E, a24);
+ ecc_mod_mul (m, z2, E, AA);
+ }
+ assert (m->invert_itch <= 7 * m->size);
+ m->invert (m, x3, z2, z3 + m->size);
+ ecc_mod_mul (m, z3, x2, x3);
+ cy = mpn_sub_n (qx, z3, m->m, m->size);
+ cnd_copy (cy, qx, z3, m->size);
+}