summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2022-10-07 20:37:58 +0200
committerNiels Möller <nisse@lysator.liu.se>2022-11-28 19:30:04 +0100
commit39ed186b89bae0db3fe8a443b09ef8b493c483b3 (patch)
tree42b537c84f52f7823826779ccc22f1aa659e4bbe
parent3fbce8821d1d6c1c7a2a25c8e5ad9b186125bcdb (diff)
downloadnettle-39ed186b89bae0db3fe8a443b09ef8b493c483b3.tar.gz
Reimplement modular inversion.
New implementation based on the algorithm by Daniel J. Bernstein and Bo-Yin Yang, see https://eprint.iacr.org/2019/266.pdf.
-rw-r--r--ChangeLog22
-rw-r--r--curve25519-eh-to-x.c2
-rw-r--r--curve25519-mul-g.c5
-rw-r--r--curve25519-mul.c2
-rw-r--r--curve448-eh-to-x.c2
-rw-r--r--curve448-mul-g.c2
-rw-r--r--curve448-mul.c2
-rw-r--r--ecc-curve25519.c35
-rw-r--r--ecc-curve448.c26
-rw-r--r--ecc-gost-gc256b.c8
-rw-r--r--ecc-gost-gc512a.c8
-rw-r--r--ecc-internal.h15
-rw-r--r--ecc-mod-inv.c355
-rw-r--r--ecc-secp192r1.c8
-rw-r--r--ecc-secp224r1.c40
-rw-r--r--ecc-secp256r1.c73
-rw-r--r--ecc-secp384r1.c53
-rw-r--r--ecc-secp521r1.c64
-rw-r--r--eccdata.c38
-rw-r--r--testsuite/ecc-modinv-test.c13
20 files changed, 420 insertions, 353 deletions
diff --git a/ChangeLog b/ChangeLog
index f1e5537d..0727f8d1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2022-10-07 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-mod-inv.c: Implement Bernstein-Yang algorithm. It
+ outperforms older inversion code, except except secp192r1 mod p
+ inversion.
+ * eccdata.c (output_modulo): Output iteration count for new
+ modular inversion algorithm, and needed power of two for redc
+ inversion.
+ * ecc-curve25519.c (ecc_curve25519_inv): Deleted.
+ * ecc-curve448.c (ecc_curve448_inv): Deleted.
+ * ecc-secp224r1.c (ecc_secp224r1_inv): Deleted.
+ * ecc-secp256r1.c (ecc_secp256r1_inv): Deleted.
+ * ecc-secp384r1.c (ecc_secp384r1_inv): Deleted.
+ * ecc-secp521r1.c (ecc_secp521r1_inv): Deleted.
+
+2022-10-03 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-internal.h (struct ecc_modulo): Delete (m+1)/2 constant, and
+ update all curve structs.
+ * eccdata.c (output_curve): Delete corresponding code to generate
+ ecc_pp1h and ecc_qp1h.
+
2022-11-09 Niels Möller <nisse@lysator.liu.se>
From Mamone Tarsha:
diff --git a/curve25519-eh-to-x.c b/curve25519-eh-to-x.c
index d90defda..8b6e8d1a 100644
--- a/curve25519-eh-to-x.c
+++ b/curve25519-eh-to-x.c
@@ -64,7 +64,7 @@ curve25519_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
x = 0, and we should be fine, since ecc_mod_inv for ecc->p returns 0
in this case. */
ecc_mod_sub (&ecc->p, t0, wp, vp);
- /* Needs a total of 6*size storage. */
+ /* Needs a total of 2*size + scratch for inversion. */
ecc->p.invert (&ecc->p, t1, t0, tp);
ecc_mod_add (&ecc->p, t0, wp, vp);
diff --git a/curve25519-mul-g.c b/curve25519-mul-g.c
index 000e0987..3f12af73 100644
--- a/curve25519-mul-g.c
+++ b/curve25519-mul-g.c
@@ -33,6 +33,7 @@
# include "config.h"
#endif
+#include <assert.h>
#include <string.h>
#include "curve25519.h"
@@ -57,7 +58,9 @@ curve25519_mul_g (uint8_t *r, const uint8_t *n)
t[0] &= ~7;
t[CURVE25519_SIZE-1] = (t[CURVE25519_SIZE-1] & 0x3f) | 0x40;
- itch = 4*ecc->p.size + ecc->mul_g_itch;
+ assert (ecc->mul_g_itch <= 2*ecc->p.size + ecc->p.invert_itch);
+ itch = 6*ecc->p.size + ecc->p.invert_itch;
+
scratch = gmp_alloc_limbs (itch);
mpn_set_base256_le (x, ecc->p.size, t, CURVE25519_SIZE);
diff --git a/curve25519-mul.c b/curve25519-mul.c
index aff2b293..d8023fc3 100644
--- a/curve25519-mul.c
+++ b/curve25519-mul.c
@@ -48,7 +48,7 @@ curve25519_mul (uint8_t *q, const uint8_t *n, const uint8_t *p)
mp_size_t itch;
mp_limb_t *x;
- itch = m->size + ECC_MUL_M_ITCH(m->size);
+ itch = m->size + ECC_MUL_M_ITCH(m->size, m->invert_itch);
x = gmp_alloc_limbs (itch);
/* Note that 255 % GMP_NUMB_BITS == 0 isn't supported, so x always
diff --git a/curve448-eh-to-x.c b/curve448-eh-to-x.c
index 3b9bf3ec..29c38b27 100644
--- a/curve448-eh-to-x.c
+++ b/curve448-eh-to-x.c
@@ -58,7 +58,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, mp_limb_t *scratch)
x = v^2 / u^2 = (V/W)^2 / (U/W)^2 = (V/U)^2
*/
- /* Needs a total of 5*size storage. */
+ /* Needs a total of size + scratch for inversion. */
ecc->p.invert (&ecc->p, t0, up, tp);
ecc_mod_mul (&ecc->p, t0, t0, vp, tp);
ecc_mod_sqr_canonical (&ecc->p, xp, t0, tp);
diff --git a/curve448-mul-g.c b/curve448-mul-g.c
index a396595a..71af4547 100644
--- a/curve448-mul-g.c
+++ b/curve448-mul-g.c
@@ -58,7 +58,7 @@ curve448_mul_g (uint8_t *r, const uint8_t *n)
t[0] &= ~3;
t[CURVE448_SIZE-1] = (t[CURVE448_SIZE-1] & 0x7f) | 0x80;
- itch = 5*ecc->p.size + ecc->mul_g_itch;
+ itch = 4*ecc->p.size + ecc->mul_g_itch;
scratch = gmp_alloc_limbs (itch);
mpn_set_base256_le (x, ecc->p.size, t, CURVE448_SIZE);
diff --git a/curve448-mul.c b/curve448-mul.c
index 2090c8a5..c8b13e90 100644
--- a/curve448-mul.c
+++ b/curve448-mul.c
@@ -50,7 +50,7 @@ curve448_mul (uint8_t *q, const uint8_t *n, const uint8_t *p)
mp_size_t itch;
mp_limb_t *x;
- itch = m->size + ECC_MUL_M_ITCH(m->size);
+ itch = m->size + ECC_MUL_M_ITCH(m->size, m->invert_itch);
x = gmp_alloc_limbs (itch);
mpn_set_base256_le (x, m->size, p, CURVE448_SIZE);
diff --git a/ecc-curve25519.c b/ecc-curve25519.c
index 539bff22..bb419ad2 100644
--- a/ecc-curve25519.c
+++ b/ecc-curve25519.c
@@ -149,27 +149,6 @@ ecc_mod_pow_252m3 (const struct ecc_modulo *m,
#undef tp
}
-/* Scratch as for ecc_mod_pow_252m3 above. */
-#define ECC_25519_INV_ITCH (4*ECC_LIMB_SIZE)
-
-static void
-ecc_curve25519_inv (const struct ecc_modulo *p,
- mp_limb_t *rp, const mp_limb_t *ap,
- mp_limb_t *scratch)
-{
- /* Addition chain
-
- p - 2 = 2^{255} - 21
- = 1 + 2 (1 + 4 (2^{252}-3))
- */
- ecc_mod_pow_252m3 (p, rp, ap, scratch);
- ecc_mod_sqr (p, rp, rp, scratch);
- ecc_mod_sqr (p, rp, rp, scratch);
- ecc_mod_mul (p, rp, ap, rp, scratch);
- ecc_mod_sqr (p, rp, rp, scratch);
- ecc_mod_mul (p, rp, ap, rp, scratch);
-}
-
static int
ecc_curve25519_zero_p (const struct ecc_modulo *p, mp_limb_t *xp)
{
@@ -259,20 +238,22 @@ const struct ecc_curve _nettle_curve25519 =
ECC_LIMB_SIZE,
ECC_BMODP_SIZE,
0,
- ECC_25519_INV_ITCH,
+ ECC_MOD_INV_ITCH(ECC_LIMB_SIZE),
0,
ECC_25519_SQRT_RATIO_ITCH,
+ ECC_INVP_COUNT,
+ ECC_BINVP,
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
ecc_Bm2p,
NULL,
- ecc_pp1h,
+ NULL,
ecc_curve25519_modp,
ecc_curve25519_modp,
- ecc_curve25519_inv,
+ ecc_mod_inv,
NULL,
ecc_curve25519_sqrt_ratio,
},
@@ -284,13 +265,15 @@ const struct ecc_curve _nettle_curve25519 =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVQ_COUNT,
+ ECC_BINVQ,
ecc_q,
ecc_Bmodq,
ecc_mBmodq_shifted, /* Use q - 2^{252} instead. */
ecc_Bm2q,
NULL,
- ecc_qp1h,
+ NULL,
ecc_curve25519_modq,
ecc_curve25519_modq,
@@ -308,7 +291,7 @@ const struct ecc_curve _nettle_curve25519 =
ECC_DUP_TH_ITCH (ECC_LIMB_SIZE),
ECC_MUL_A_EH_ITCH (ECC_LIMB_SIZE),
ECC_MUL_G_EH_ITCH (ECC_LIMB_SIZE),
- ECC_EH_TO_A_ITCH (ECC_LIMB_SIZE, ECC_25519_INV_ITCH),
+ ECC_EH_TO_A_ITCH (ECC_LIMB_SIZE, ECC_MOD_INV_ITCH(ECC_LIMB_SIZE)),
ecc_add_th,
ecc_add_thh,
diff --git a/ecc-curve448.c b/ecc-curve448.c
index daef56cc..8d1258fe 100644
--- a/ecc-curve448.c
+++ b/ecc-curve448.c
@@ -142,18 +142,6 @@ ecc_mod_pow_446m224m1 (const struct ecc_modulo *p,
#undef tp
}
-#define ECC_CURVE448_INV_ITCH (4*ECC_LIMB_SIZE)
-
-static void ecc_curve448_inv (const struct ecc_modulo *p,
- mp_limb_t *rp, const mp_limb_t *ap,
- mp_limb_t *tp)
-{
- ecc_mod_pow_446m224m1 (p, rp, ap, tp);/* a^{2^446-2^222-1} */
- ecc_mod_sqr (p, rp, rp, tp); /* a^{2^447-2^223-2} */
- ecc_mod_sqr (p, rp, rp, tp); /* a^{2^448-2^224-4} */
- ecc_mod_mul (p, rp, ap, rp, tp); /* a^{2^448-2^224-3} */
-}
-
/* To guarantee that inputs to ecc_mod_zero_p are in the required range. */
#if ECC_LIMB_SIZE * GMP_NUMB_BITS != 448
#error Unsupported limb size
@@ -213,20 +201,22 @@ const struct ecc_curve _nettle_curve448 =
ECC_LIMB_SIZE,
ECC_BMODP_SIZE,
0,
- ECC_CURVE448_INV_ITCH,
+ ECC_MOD_INV_ITCH(ECC_LIMB_SIZE),
0,
ECC_CURVE448_SQRT_RATIO_ITCH,
+ ECC_INVP_COUNT,
+ ECC_BINVP,
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
ecc_Bm2p,
NULL,
- ecc_pp1h,
+ NULL,
ecc_curve448_modp,
ecc_curve448_modp,
- ecc_curve448_inv,
+ ecc_mod_inv,
NULL,
ecc_curve448_sqrt_ratio,
},
@@ -238,13 +228,15 @@ const struct ecc_curve _nettle_curve448 =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVQ_COUNT,
+ ECC_BINVQ,
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
ecc_Bm2q,
NULL,
- ecc_qp1h,
+ NULL,
ecc_mod,
ecc_mod,
@@ -262,7 +254,7 @@ const struct ecc_curve _nettle_curve448 =
ECC_DUP_EH_ITCH (ECC_LIMB_SIZE),
ECC_MUL_A_EH_ITCH (ECC_LIMB_SIZE),
ECC_MUL_G_EH_ITCH (ECC_LIMB_SIZE),
- ECC_EH_TO_A_ITCH (ECC_LIMB_SIZE, ECC_CURVE448_INV_ITCH),
+ ECC_EH_TO_A_ITCH (ECC_LIMB_SIZE, ECC_MOD_INV_ITCH (ECC_LIMB_SIZE)),
ecc_add_eh,
ecc_add_ehh,
diff --git a/ecc-gost-gc256b.c b/ecc-gost-gc256b.c
index df9cbb58..10c7ae8c 100644
--- a/ecc-gost-gc256b.c
+++ b/ecc-gost-gc256b.c
@@ -67,14 +67,16 @@ const struct ecc_curve _nettle_gost_gc256b =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVP_COUNT,
+ ECC_BINVP,
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
ecc_Bm2p,
ecc_redc_ppm1,
+ NULL,
- ecc_pp1h,
ecc_gost_gc256b_modp,
ecc_gost_gc256b_modp,
ecc_mod_inv,
@@ -89,13 +91,15 @@ const struct ecc_curve _nettle_gost_gc256b =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVQ_COUNT,
+ ECC_BINVQ,
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
ecc_Bm2q,
NULL,
- ecc_qp1h,
+ NULL,
ecc_gost_gc256b_modq,
ecc_gost_gc256b_modq,
diff --git a/ecc-gost-gc512a.c b/ecc-gost-gc512a.c
index 3807b57e..0ba58d64 100644
--- a/ecc-gost-gc512a.c
+++ b/ecc-gost-gc512a.c
@@ -67,14 +67,16 @@ const struct ecc_curve _nettle_gost_gc512a =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVP_COUNT,
+ ECC_BINVP,
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
ecc_Bm2p,
ecc_redc_ppm1,
+ NULL,
- ecc_pp1h,
ecc_gost_gc512a_modp,
ecc_gost_gc512a_modp,
ecc_mod_inv,
@@ -89,13 +91,15 @@ const struct ecc_curve _nettle_gost_gc512a =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVQ_COUNT,
+ ECC_BINVQ,
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
ecc_Bm2q,
NULL,
- ecc_qp1h,
+ NULL,
ecc_gost_gc512a_modq,
ecc_gost_gc512a_modq,
diff --git a/ecc-internal.h b/ecc-internal.h
index be02de5f..cd4d5ea1 100644
--- a/ecc-internal.h
+++ b/ecc-internal.h
@@ -170,6 +170,13 @@ struct ecc_modulo
unsigned short sqrt_itch;
unsigned short sqrt_ratio_itch;
+ /* Needed iterations for the Bernstein-Yang inversion algorithm, for
+ input size bit_size, or bit_size + 1 in case the most significant
+ bit of the most significant modulo limb is zero. */
+ unsigned short invert_count;
+ /* m^{-1} mod 2^{GMP_NUMB_BITS} */
+ mp_limb_t binv;
+
const mp_limb_t *m;
/* B^size mod m. Expected to have at least 32 leading zeros
(equality for secp_256r1). */
@@ -184,8 +191,8 @@ struct ecc_modulo
/* m +/- 1, for redc, excluding redc_size low limbs. */
const mp_limb_t *redc_mpm1;
- /* (m+1)/2 */
- const mp_limb_t *mp1h;
+
+ const mp_limb_t *invert_power;
ecc_mod_func *mod;
ecc_mod_func *reduce;
@@ -482,7 +489,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
mp_limb_t *scratch);
/* Current scratch needs: */
-#define ECC_MOD_INV_ITCH(size) (3*(size))
+#define ECC_MOD_INV_ITCH(size) (5*(size)+4)
#define ECC_J_TO_A_ITCH(size, inv) ((size)+(inv))
#define ECC_EH_TO_A_ITCH(size, inv) ((size)+(inv))
#define ECC_DUP_JJ_ITCH(size) (4*(size))
@@ -508,7 +515,7 @@ curve448_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
#define ECC_MUL_A_EH_ITCH(size) \
(((3 << ECC_MUL_A_EH_WBITS) + 7) * (size))
#endif
-#define ECC_MUL_M_ITCH(size) (8*(size))
+#define ECC_MUL_M_ITCH(size, inv) (4*(size) + (inv))
#define ECC_ECDSA_SIGN_ITCH(size) (11*(size))
#define ECC_GOSTDSA_SIGN_ITCH(size) (11*(size))
#define ECC_MOD_RANDOM_ITCH(size) (size)
diff --git a/ecc-mod-inv.c b/ecc-mod-inv.c
index 254fb697..68d6c339 100644
--- a/ecc-mod-inv.c
+++ b/ecc-mod-inv.c
@@ -1,6 +1,6 @@
/* ecc-mod-inv.c
- Copyright (C) 2013, 2014 Niels Möller
+ Copyright (C) 2013, 2014, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -29,8 +29,6 @@
not, see http://www.gnu.org/licenses/.
*/
-/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */
-
#if HAVE_CONFIG_H
# include "config.h"
#endif
@@ -39,7 +37,11 @@
#include "ecc-internal.h"
-static void
+#if GMP_LIMB_BITS != GMP_NUMB_BITS
+#error Unsupported configuration
+#endif
+
+static mp_limb_t
cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n)
{
mp_limb_t cy = (cnd != 0);
@@ -52,109 +54,282 @@ cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n)
cy = r < cy;
rp[i] = r;
}
+ /* Return cnd except R = A = 0. */
+ return cnd & (1-cy);
}
-/* Compute a^{-1} mod m, with running time depending only on the size.
- Returns zero if a == 0 (mod m), to be consistent with a^{phi(m)-1}.
- Also needs (m+1)/2, and m must be odd.
+#define BITCNT_BITS (sizeof(mp_bitcnt_t) * CHAR_BIT)
+
+#define STEPS (GMP_NUMB_BITS - 2)
+
+struct matrix {
+ /* Matrix elements interpreted as signed two's complement. Absolute
+ value of elements is at most 2^STEPS. */
+ mp_limb_t a[2][2];
+};
+
+/* Conditionally set (a, b) <-- (b, -a) */
+#define CND_NEG_SWAP_LIMB(cnd, a, b) do {\
+ mp_limb_t __cnd_sum = (a) + (b); \
+ mp_limb_t __cnd_diff = (a) - (b); \
+ (a) -= __cnd_diff & -(cnd); \
+ (b) -= __cnd_sum & -(cnd); \
+ } while (0)
+
+/* Perform K elementary reduction steps on (delta, f, g). Only the
+ least significant K bits of f, g matter. Note that delta has an
+ unsigned type, but is used as two's complement. */
+static mp_bitcnt_t
+steps(struct matrix *M, unsigned k, mp_bitcnt_t delta, mp_limb_t f, mp_limb_t g)
+{
+ mp_limb_t a00, a01, a10, a11;
+ assert (f & 1);
+
+ /* Identity matrix. */
+ a00 = a11 = 1;
+ a01 = a10 = 0;
+
+ /* Preserve invariant (f ; g) = 2^{-i} M (f_orig, g_orig) */
+ for (; k-- > 0; delta++)
+ {
+ mp_limb_t odd = g & 1;
+ mp_limb_t swap = odd & ~(delta >> (BITCNT_BITS-1));
+
+ /* Swap; (f'; g') = (g; -f) = (0,1;-1,0) (g;f) */
+ CND_NEG_SWAP_LIMB(swap, f, g);
+ CND_NEG_SWAP_LIMB(swap, a00, a10);
+ CND_NEG_SWAP_LIMB(swap, a01, a11);
+
+ /* Conditional negation. */
+ delta = (delta ^ - (mp_bitcnt_t) swap) + swap;
+
+ /* Cancel low bit and shift. */
+ g += f & -odd;
+ a10 += a00 & -odd;
+ a11 += a01 & -odd;
+
+ g >>= 1;
+ a00 <<= 1;
+ a01 <<= 1;
+ }
+ M->a[0][0] = a00; M->a[0][1] = a01;
+ M->a[1][0] = a10; M->a[1][1] = a11;
+ return delta;
+}
+
+/* Set R = (u * F + v * G), treating all numbers as two's complement.
+ No overlap allowed. */
+static mp_limb_t
+add_add_mul (mp_ptr rp, const mp_limb_t *fp, const mp_limb_t *gp, mp_size_t n,
+ mp_limb_t u, mp_limb_t v) {
+ mp_limb_t f_sign = fp[n-1] >> (GMP_LIMB_BITS - 1);
+ mp_limb_t g_sign = gp[n-1] >> (GMP_LIMB_BITS - 1);
+ mp_limb_t u_sign = u >> (GMP_LIMB_BITS - 1);
+ mp_limb_t v_sign = v >> (GMP_LIMB_BITS - 1);
+ mp_limb_t hi = mpn_mul_1 (rp, fp, n, u) - ((-f_sign) & u);
+ hi -= ((-u_sign) & fp[n-1]) + mpn_cnd_sub_n (u_sign, rp + 1, rp + 1, fp, n-1);
+
+ hi += mpn_addmul_1 (rp, gp, n, v) - ((-g_sign) & v);
+ hi -= ((-v_sign) & gp[n-1]) + mpn_cnd_sub_n (v_sign, rp + 1, rp + 1, gp, n-1);
+
+ return hi;
+}
+
+/* Update (f'; g') = M (f; g) / 2^{shift}, where all numbers are two's complement. */
+static void
+matrix_vector_mul (const struct matrix *M, unsigned shift,
+ mp_size_t n, mp_limb_t *fp, mp_limb_t *gp, mp_limb_t *tp)
+{
+ mp_limb_t f_hi = add_add_mul (tp, fp, gp, n, M->a[0][0], M->a[0][1]);
+ mp_limb_t g_hi = add_add_mul (tp + n, fp, gp, n, M->a[1][0], M->a[1][1]);
+ mp_limb_t lo = mpn_rshift (fp, tp, n, shift);
+ assert (lo == 0);
+ fp[n-1] += (f_hi << (GMP_LIMB_BITS - shift));
+ lo = mpn_rshift (gp, tp + n, n, shift);
+ assert (lo == 0);
+ gp[n-1] += (g_hi << (GMP_LIMB_BITS - shift));
+}
+
+/* Set R = (u * F + v * G) (mod M), treating u, v as two's complement,
+ but F, G, R unsigned. No overlap allowed. n limb inputs, n+1 limb
+ output.
+
+ Input can be allowed in the range 0 <= F, G < min (2 M, B^n),
+ output always in the range 0 <= R < B M.
+*/
+static void
+add_add_mul_mod (mp_ptr rp, const mp_limb_t *fp, const mp_limb_t *gp,
+ const mp_limb_t *mp, mp_size_t n,
+ mp_limb_t u, mp_limb_t v) {
+ mp_limb_t u_sign = u >> (GMP_LIMB_BITS - 1);
+ mp_limb_t v_sign = v >> (GMP_LIMB_BITS - 1);
+ mp_limb_t r_sign;
+
+ assert ((fp[n-1] >> 1) <= mp[n-1]);
+ assert ((gp[n-1] >> 1) <= mp[n-1]);
+
+ rp[n] = mpn_mul_1 (rp, fp, n, u);
+ mpn_cnd_sub_n (u_sign, rp + 1, rp + 1, fp, n);
+
+ rp[n] += mpn_addmul_1 (rp, gp, n, v);
+ mpn_cnd_sub_n (v_sign, rp + 1, rp + 1, gp, n);
+
+ /* Row sums of the matrix have absolute value <= B/4. With inputs F,
+ G < 2 M, at this point we have
+
+ |R| < B M/2.
+
+ If R < 0, then R + B M is positive negative, adding B M makes the
+ result positive.
+ */
+ r_sign = rp[n] >> (GMP_LIMB_BITS - 1);
+ r_sign -= mpn_cnd_add_n (r_sign, rp + 1, rp + 1, mp, n);
+ assert (r_sign == 0);
+ assert (rp[n] <= mp[n-1]);
+}
+
+/* Input in range 0 <= T < B M, output in range 0 <= R < M. */
+static void
+redc_1 (mp_limb_t *rp, mp_limb_t *tp,
+ const mp_limb_t *mp, mp_size_t n, mp_limb_t m_binv)
+{
+ mp_limb_t cy, hi;
+ /* If we knew that 2M < B^n, we could allow result value in the
+ range 0 <= R < 2M, and use mpn_add_mul_1 without any adjustment
+ step. */
+ hi = mpn_submul_1 (tp, mp, n, m_binv * tp[0]);
+ assert (tp[0] == 0);
+ cy = tp[n] < hi;
+ tp[n] -= hi;
+
+ cy -= mpn_cnd_add_n (cy, rp, tp + 1, mp, n);
+ assert (cy == 0);
+}
+
+/* Update (u'; v') = M (u; v) B^{-1} (mod m), where u, v, m are
+ unsigned n limbs, M has signed elements, and B is the bignum
+ base.
- Needs 3n limbs of scratch space.
+ Output are canonically reduced, 0 <= U, V < M, but
+ inputs are only required to be < 2 M.
*/
+static void
+matrix_vector_mul_mod (const struct matrix *M, const mp_limb_t *mp,
+ mp_limb_t m_binv,
+ mp_size_t n, mp_limb_t *up, mp_limb_t *vp, mp_limb_t *tp)
+{
+ add_add_mul_mod (tp, up, vp, mp, n, M->a[0][0], M->a[0][1]);
+ add_add_mul_mod (tp + n + 1, up, vp, mp, n, M->a[1][0], M->a[1][1]);
+
+ /* Reduce to n limbs, by multiplying with B^-1 (mod m) */
+ redc_1 (up, tp, mp, n, m_binv);
+ redc_1 (vp, tp + n + 1, mp, n, m_binv);
+}
+
+/* Input in range 0 <= T < 2M, output in range 0 <= R < 2M. */
+static void
+redc_bits (mp_limb_t *rp, mp_limb_t *tp, const mp_limb_t *mp,
+ mp_size_t n, mp_limb_t m_binv, mp_bitcnt_t k)
+{
+ mp_limb_t hi;
+
+ for (; k >= GMP_NUMB_BITS; k -= GMP_NUMB_BITS)
+ {
+ hi = mpn_addmul_1 (tp, mp, n, -m_binv * tp[0]);
+ mpn_copyi (tp, tp + 1, n - 1);
+ tp[n-1] = hi;
+ }
+ if (k > 0)
+ {
+ mp_limb_t mask = ((mp_limb_t) 2 << (k-1)) - 1;
+ mp_limb_t q = (-m_binv * tp[0]) & mask;
+ hi = mpn_addmul_1 (tp, mp, n, q);
+ mpn_rshift (rp, tp, n, k);
+ rp[n-1] |= hi << (GMP_NUMB_BITS - k);
+ }
+}
+
+static int
+one_p (const mp_limb_t *p, mp_size_t n)
+{
+ mp_limb_t diff = p[0] ^ 1;
+ mp_size_t i;
+ for (i = 1; i < n; i++)
+ diff |= p[i];
+
+ return diff == 0;
+}
-/* FIXME: Could use mpn_sec_invert (in GMP-6), but with a bit more
- scratch need since it doesn't precompute (m+1)/2. */
+/* Compute a^{-1} mod m, with running time depending only on the size.
+ Returns zero if a == 0 (mod m), to be consistent with a^{phi(m)-1}.
+ Based on the algorithm in https://eprint.iacr.org/2019/266.pdf */
void
ecc_mod_inv (const struct ecc_modulo *m,
- mp_limb_t *vp, const mp_limb_t *in_ap,
+ mp_limb_t *up, const mp_limb_t *ap,
mp_limb_t *scratch)
{
-#define ap scratch
-#define bp (scratch + n)
-#define up (scratch + 2*n)
-
mp_size_t n = m->size;
- /* Avoid the mp_bitcnt_t type for compatibility with older GMP
- versions. */
- unsigned i;
+ mp_bitcnt_t shift, delta, count;
+ mp_limb_t cy;
+ struct matrix M;
+
+ /* Total scratch need 4*(n+1) for fp, gp, tp, n for v, total 5*n+4 */
+#define fp scratch
+#define gp (scratch + n + 1)
+#define vp (scratch + 2*n + 2)
+#define tp (scratch + 3*n + 2)
+
+#define mp (m->m)
+
+ /* Input should satisfy a < 2m, since that is what the iteration
+ count is based on. */
+ assert ((ap[n-1] >> 1) <= mp[n-1]);
- /* Maintain
+ mpn_copyi (fp, mp, n); fp[n] = 0; gp[n] = 0;
- a = u * orig_a (mod m)
- b = v * orig_a (mod m)
+ if (m->invert_power)
+ /* Multiply by suitable pre-computed power to get inverse in redc
+ representation. */
+ ecc_mod_mul (m, gp, ap, m->invert_power, tp);
+ else {
+ mpn_copyi (gp, ap, n);
- and b odd at all times. Initially,
+ shift = GMP_NUMB_BITS*((m->invert_count + STEPS - 1) / STEPS) - m->invert_count;
- a = a_orig, u = 1
- b = m, v = 0
- */
+ /* Premultiply a by 2^{- shift} mod m */
+ redc_bits (gp, gp, mp, n, m->binv, shift);
+ }
- assert (ap != vp);
+ /* Maintain invariant
- up[0] = 1;
- mpn_zero (up+1, n - 1);
- mpn_copyi (bp, m->m, n);
- mpn_zero (vp, n);
- mpn_copyi (ap, in_ap, n);
+ a * u = 2^{-count} B^{ceil(count/STEPS)} f (mod m)
+ a * v = 2^{-count} B^{ceil(count/STEPS)} g (mod m)
+ */
+ mpn_zero (up, n);
+ mpn_zero (vp+1, n-1); vp[0] = 1;
- for (i = m->bit_size + GMP_NUMB_BITS * n; i-- > 0; )
+ for (delta = 1, count = m->invert_count; count > STEPS; count -= STEPS)
{
- mp_limb_t odd, swap, cy;
-
- /* Always maintain b odd. The logic of the iteration is as
- follows. For a, b:
-
- odd = a & 1
- a -= odd * b
- if (underflow from a-b)
- {
- b += a, assigns old a
- a = B^n-a
- }
-
- a /= 2
-
- For u, v:
-
- if (underflow from a - b)
- swap u, v
- u -= odd * v
- if (underflow from u - v)
- u += m
-
- u /= 2
- if (a one bit was shifted out)
- u += (m+1)/2
-
- As long as a > 0, the quantity
-
- (bitsize of a) + (bitsize of b)
-
- is reduced by at least one bit per iteration, hence after
- (bit_size of orig_a) + (bit_size of m) - 1 iterations we
- surely have a = 0. Then b = gcd(orig_a, m) and if b = 1 then
- also v = orig_a^{-1} (mod m)
- */
-
- assert (bp[0] & 1);
- odd = ap[0] & 1;
-
- swap = mpn_cnd_sub_n (odd, ap, ap, bp, n);
- mpn_cnd_add_n (swap, bp, bp, ap, n);
- cnd_neg (swap, ap, ap, n);
-
- mpn_cnd_swap (swap, up, vp, n);
- cy = mpn_cnd_sub_n (odd, up, up, vp, n);
- cy -= mpn_cnd_add_n (cy, up, up, m->m, n);
- assert (cy == 0);
-
- cy = mpn_rshift (ap, ap, n, 1);
- assert (cy == 0);
- cy = mpn_rshift (up, up, n, 1);
- cy = mpn_cnd_add_n (cy, up, up, m->mp1h, n);
- assert (cy == 0);
+ delta = steps (&M, STEPS, delta, fp[0], gp[0]);
+ matrix_vector_mul (&M, STEPS, n+1, fp, gp, tp);
+ matrix_vector_mul_mod (&M, mp, m->binv, n, up, vp, tp);
}
- assert ( (ap[0] | ap[n-1]) == 0);
-#undef ap
-#undef bp
-#undef up
+ delta = steps (&M, count, delta, fp[0], gp[0]);
+ matrix_vector_mul (&M, count, n+1, fp, gp, tp);
+ /* Only compute u, we don't need v. */
+ add_add_mul_mod (tp, up, vp, mp, n, M.a[0][0], M.a[0][1]);
+ redc_1 (up, tp, mp, n, m->binv);
+
+ assert (sec_zero_p (gp, n+1));
+
+ /* Now f = ±1 (if the inverse exists), and a * u = f (mod m) */
+ cy = cnd_neg (fp[n] >> (GMP_LIMB_BITS - 1), up, up, n);
+ /* Make u non-negative */
+ cy -= mpn_cnd_add_n (cy, up, up, mp, n);
+
+ cnd_neg (fp[n] >> (GMP_LIMB_BITS - 1), fp, fp, n + 1);
+ cy = one_p (fp, n+1);
+ /* Set to zero if invert doesn't exist. */
+ while (n > 0)
+ up[--n] &= - cy;
}
diff --git a/ecc-secp192r1.c b/ecc-secp192r1.c
index 4a07bca3..00ff87b1 100644
--- a/ecc-secp192r1.c
+++ b/ecc-secp192r1.c
@@ -244,13 +244,15 @@ const struct ecc_curve _nettle_secp_192r1 =
ECC_SECP192R1_INV_ITCH,
ECC_SECP192R1_SQRT_ITCH,
0,
+ ECC_INVP_COUNT,
+ ECC_BINVP,
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
ecc_Bm2p,
ecc_redc_ppm1,
- ecc_pp1h,
+ NULL,
ecc_secp192r1_modp,
ecc_secp192r1_modp,
@@ -266,13 +268,15 @@ const struct ecc_curve _nettle_secp_192r1 =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVQ_COUNT,
+ ECC_BINVQ,
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
ecc_Bm2q,
NULL,
- ecc_qp1h,
+ NULL,
ecc_mod,
ecc_mod,
diff --git a/ecc-secp224r1.c b/ecc-secp224r1.c
index b2a335ec..341aee7b 100644
--- a/ecc-secp224r1.c
+++ b/ecc-secp224r1.c
@@ -111,32 +111,6 @@ ecc_mod_pow_127m1 (const struct ecc_modulo *m,
#undef tp
}
-#define ECC_SECP224R1_INV_ITCH (4*ECC_LIMB_SIZE)
-
-static void
-ecc_secp224r1_inv (const struct ecc_modulo *p,
- mp_limb_t *rp, const mp_limb_t *ap,
- mp_limb_t *scratch)
-{
-#define a96m1 scratch
-#define tp (scratch + ECC_LIMB_SIZE)
-
- /* Compute a^{p - 2}, with
-
- p-2 = 2^{224} - 2^{96} - 1
- = 2^{97}(2^{127} - 1) + 2^{96} - 1
-
- This addition chain needs 97 squarings and one multiply in
- addition to ecc_mod_pow_127m1, for a total of 223 squarings and
- 13 multiplies.
- */
- ecc_mod_pow_127m1 (p, rp, a96m1, ap, tp);
- ecc_mod_pow_2k_mul (p, rp, rp, 97, a96m1, tp); /* a^{2^{224} - 2^{96} - 1 */
-
-#undef a96m1
-#undef tp
-}
-
#define ECC_SECP224R1_SQRT_ITCH (5*ECC_LIMB_SIZE)
static int
@@ -216,20 +190,22 @@ const struct ecc_curve _nettle_secp_224r1 =
ECC_LIMB_SIZE,
ECC_BMODP_SIZE,
-ECC_REDC_SIZE,
- ECC_SECP224R1_INV_ITCH,
+ ECC_MOD_INV_ITCH(ECC_LIMB_SIZE),
ECC_SECP224R1_SQRT_ITCH,
0,
+ ECC_INVP_COUNT,
+ ECC_BINVP,
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
ecc_Bm2p,
ecc_redc_ppm1,
- ecc_pp1h,
+ USE_REDC ? ecc_invp_redc_power : NULL,
ecc_secp224r1_modp,
USE_REDC ? ecc_secp224r1_redc : ecc_secp224r1_modp,
- ecc_secp224r1_inv,
+ ecc_mod_inv,
ecc_secp224r1_sqrt,
NULL,
},
@@ -241,13 +217,15 @@ const struct ecc_curve _nettle_secp_224r1 =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVQ_COUNT,
+ ECC_BINVQ,
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
ecc_Bm2q,
NULL,
- ecc_qp1h,
+ NULL,
ecc_mod,
ecc_mod,
@@ -265,7 +243,7 @@ const struct ecc_curve _nettle_secp_224r1 =
ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE),
ECC_MUL_A_ITCH (ECC_LIMB_SIZE),
ECC_MUL_G_ITCH (ECC_LIMB_SIZE),
- ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_SECP224R1_INV_ITCH),
+ ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_MOD_INV_ITCH (ECC_LIMB_SIZE)),
ecc_add_jja,
ecc_add_jjj,
diff --git a/ecc-secp256r1.c b/ecc-secp256r1.c
index 4848dfe3..8c852aaa 100644
--- a/ecc-secp256r1.c
+++ b/ecc-secp256r1.c
@@ -213,64 +213,6 @@ ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp)
#error Unsupported parameters
#endif
-#define ECC_SECP256R1_INV_ITCH (4*ECC_LIMB_SIZE)
-
-static void
-ecc_secp256r1_inv (const struct ecc_modulo *p,
- mp_limb_t *rp, const mp_limb_t *ap,
- mp_limb_t *scratch)
-{
-#define a5m1 scratch
-#define t0 (scratch + ECC_LIMB_SIZE)
-#define a15m1 t0
-#define a32m1 a5m1
-#define tp (scratch + 2*ECC_LIMB_SIZE)
-/*
- Addition chain for p - 2 = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 3
-
- 2^5 - 1 = 1 + 2 (2^4 - 1) = 1 + 2 (2^2+1)(2 + 1) 4 S + 3 M
- 2^{15} - 1 = (2^5 - 1) (1 + 2^5 (1 + 2^5) 10 S + 2 M
- 2^{16} - 1 = 1 + 2 (2^{15} - 1) S + M
- 2^{32} - 1 = (2^{16} + 1) (2^{16} - 1) 16 S + M
- 2^{64} - 2^{32} + 1 = 2^{32} (2^{32} - 1) + 1 32 S + M
- 2^{192} - 2^{160} + 2^{128} + 2^{32} - 1
- = 2^{128} (2^{64} - 2^{32} + 1) + 2^{32} - 1 128 S + M
- 2^{224} - 2^{192} + 2^{160} + 2^{64} - 1
- = 2^{32} (...) + 2^{32} - 1 32 S + M
- 2^{239} - 2^{207} + 2^{175} + 2^{79} - 1
- = 2^{15} (...) + 2^{15} - 1 15 S + M
- 2^{254} - 2^{222} + 2^{190} + 2^{94} - 1
- = 2^{15} (...) + 2^{15} - 1 15 S + M
- p - 2 = 2^2 (...) + 1 2 S M
- ---------------
- 255 S + 13 M
- */
- ecc_mod_sqr (p, rp, ap, tp); /* a^2 */
- ecc_mod_mul (p, rp, rp, ap, tp); /* a^3 */
- ecc_mod_pow_2kp1 (p, t0, rp, 2, tp); /* a^{2^4 - 1} */
- ecc_mod_sqr (p, rp, t0, tp); /* a^{2^5 - 2} */
- ecc_mod_mul (p, a5m1, rp, ap, tp); /* a^{2^5 - 1}, a5m1 */
-
- ecc_mod_pow_2kp1 (p, rp, a5m1, 5, tp); /* a^{2^{10} - 1, a5m1*/
- ecc_mod_pow_2k_mul (p, a15m1, rp, 5, a5m1, tp); /* a^{2^{15} - 1}, a5m1 a15m1 */
- ecc_mod_sqr (p, rp, a15m1, tp); /* a^{2^{16} - 2}, a15m1 */
- ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^{16} - 1}, a15m1 */
- ecc_mod_pow_2kp1 (p, a32m1, rp, 16, tp); /* a^{2^{32} - 1}, a15m1, a32m1 */
-
- ecc_mod_pow_2k_mul (p, rp, a32m1, 32, ap, tp);/* a^{2^{64} - 2^{32} + 1 */
- ecc_mod_pow_2k_mul (p, rp, rp, 128, a32m1, tp); /* a^{2^{192} - 2^{160} + 2^{128} + 2^{32} - 1} */
- ecc_mod_pow_2k_mul (p, rp, rp, 32, a32m1, tp);/* a^{2^{224} - 2^{192} + 2^{160} + 2^{64} - 1} */
- ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{239} - 2^{207} + 2^{175} + 2^{79} - 1} */
- ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{254} - 2^{222} + 2^{190} + 2^{94} - 1} */
- ecc_mod_pow_2k_mul (p, rp, rp, 2, ap, tp); /* a^{2^{256} - 2^{224} + 2^{192} + 2^{96} - 3} */
-
-#undef a5m1
-#undef t0
-#undef a15m1
-#undef a32m1
-#undef tp
-}
-
/* To guarantee that inputs to ecc_mod_zero_p are in the required range. */
#if ECC_LIMB_SIZE * GMP_NUMB_BITS != 256
#error Unsupported limb size
@@ -336,20 +278,22 @@ const struct ecc_curve _nettle_secp_256r1 =
ECC_LIMB_SIZE,
ECC_BMODP_SIZE,
ECC_REDC_SIZE,
- ECC_SECP256R1_INV_ITCH,
+ ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
ECC_SECP256R1_SQRT_ITCH,
0,
+ ECC_INVP_COUNT,
+ ECC_BINVP,
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
ecc_Bm2p,
ecc_redc_ppm1,
- ecc_pp1h,
+ USE_REDC ? ecc_invp_redc_power : NULL,
ecc_secp256r1_modp,
USE_REDC ? ecc_secp256r1_redc : ecc_secp256r1_modp,
- ecc_secp256r1_inv,
+ ecc_mod_inv,
ecc_secp256r1_sqrt,
NULL,
},
@@ -361,14 +305,15 @@ const struct ecc_curve _nettle_secp_256r1 =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVQ_COUNT,
+ ECC_BINVQ,
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
ecc_Bm2q,
NULL,
- ecc_qp1h,
-
+ NULL,
ecc_secp256r1_modq,
ecc_secp256r1_modq,
ecc_mod_inv,
@@ -385,7 +330,7 @@ const struct ecc_curve _nettle_secp_256r1 =
ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE),
ECC_MUL_A_ITCH (ECC_LIMB_SIZE),
ECC_MUL_G_ITCH (ECC_LIMB_SIZE),
- ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_SECP256R1_INV_ITCH),
+ ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_MOD_INV_ITCH (ECC_LIMB_SIZE)),
ecc_add_jja,
ecc_add_jjj,
diff --git a/ecc-secp384r1.c b/ecc-secp384r1.c
index abac5e6d..9c015297 100644
--- a/ecc-secp384r1.c
+++ b/ecc-secp384r1.c
@@ -210,45 +210,6 @@ ecc_mod_pow_288m32m1 (const struct ecc_modulo *m,
#undef tp
}
-#define ECC_SECP384R1_INV_ITCH (6*ECC_LIMB_SIZE)
-
-static void
-ecc_secp384r1_inv (const struct ecc_modulo *p,
- mp_limb_t *rp, const mp_limb_t *ap,
- mp_limb_t *scratch)
-{
- /*
- Addition chain for
-
- p - 2 = 2^{384} - 2^{128} - 2^{96} + 2^{32} - 3
-
- Start with
-
- a^{2^{288} - 2^{32} - 1}
-
- and then use
-
- 2^{382} - 2^{126} - 2^{94} + 2^{30} - 1
- = 2^{94} (2^{288} - 2^{32} - 1) + 2^{30} - 1
-
- 2^{384} - 2^{128} - 2^{96} + 2^{32} - 3
- = 2^2 (2^{382} - 2^{126} - 2^{94} + 2^{30} - 1) + 1
-
- This addition chain needs 96 additional squarings and 2
- multiplies, for a total of 383 squarings and 14 multiplies.
- */
-
-#define a30m1 scratch
-#define tp (scratch + ECC_LIMB_SIZE)
-
- ecc_mod_pow_288m32m1 (p, rp, a30m1, ap, tp);
- ecc_mod_pow_2k_mul (p, rp, rp, 94, a30m1, tp); /* a^{2^{382} - 2^{126} - 2^{94} + 2^{30} - 1 */
- ecc_mod_pow_2k_mul (p, rp, rp, 2, ap, tp);
-
-#undef a30m1
-#undef tp
-}
-
/* To guarantee that inputs to ecc_mod_zero_p are in the required range. */
#if ECC_LIMB_SIZE * GMP_NUMB_BITS != 384
#error Unsupported limb size
@@ -307,20 +268,22 @@ const struct ecc_curve _nettle_secp_384r1 =
ECC_LIMB_SIZE,
ECC_BMODP_SIZE,
ECC_REDC_SIZE,
- ECC_SECP384R1_INV_ITCH,
+ ECC_MOD_INV_ITCH(ECC_LIMB_SIZE),
ECC_SECP384R1_SQRT_ITCH,
0,
+ ECC_INVP_COUNT,
+ ECC_BINVP,
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
ecc_Bm2p,
ecc_redc_ppm1,
- ecc_pp1h,
+ NULL,
ecc_secp384r1_modp,
ecc_secp384r1_modp,
- ecc_secp384r1_inv,
+ ecc_mod_inv,
ecc_secp384r1_sqrt,
NULL,
},
@@ -332,13 +295,15 @@ const struct ecc_curve _nettle_secp_384r1 =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVQ_COUNT,
+ ECC_BINVQ,
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
ecc_Bm2q,
NULL,
- ecc_qp1h,
+ NULL,
ecc_mod,
ecc_mod,
@@ -356,7 +321,7 @@ const struct ecc_curve _nettle_secp_384r1 =
ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE),
ECC_MUL_A_ITCH (ECC_LIMB_SIZE),
ECC_MUL_G_ITCH (ECC_LIMB_SIZE),
- ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_SECP384R1_INV_ITCH),
+ ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_MOD_INV_ITCH (ECC_LIMB_SIZE)),
ecc_add_jja,
ecc_add_jjj,
diff --git a/ecc-secp521r1.c b/ecc-secp521r1.c
index 8ab7b4bf..c47b7b81 100644
--- a/ecc-secp521r1.c
+++ b/ecc-secp521r1.c
@@ -75,53 +75,6 @@ ecc_secp521r1_modp (const struct ecc_modulo *m UNUSED, mp_limb_t *rp, mp_limb_t
}
#endif
-#define ECC_SECP521R1_INV_ITCH (3*ECC_LIMB_SIZE)
-
-static void
-ecc_secp521r1_inv (const struct ecc_modulo *p,
- mp_limb_t *rp, const mp_limb_t *ap,
- mp_limb_t *scratch)
-{
-#define t0 scratch
-#define tp (scratch + ECC_LIMB_SIZE)
-
- /* Addition chain for p - 2:
-
- 2^{521} - 3
- = 1 + 2^2(2^519 - 1)
- = 1 + 2^2(1 + 2 (2^518 - 1)
- = 1 + 2^2(1 + 2 (2^259 + 1) (1 + 2(2^258 - 1)))
- = 1 + 2^2(1 + 2 (2^259 + 1) (1 + 2(2^129 + 1) (2^129 - 1)))
- = 1 + 2^2(1 + 2 (2^259 + 1) (1 + 2(2^129 + 1) (1 + 2 (2^128 - 1))))
-
- where
-
- 2^{128} - 1 = (2^64 + 1) (2^32+1) (2^16 + 1) (2^8 + 1) (2^4 + 1) (2^2 + 1) (2 + 1)
-
- This addition chain needs 520 squarings and 13 multiplies.
- */
-
- ecc_mod_sqr (p, rp, ap, tp); /* a^2 */
- ecc_mod_mul (p, rp, ap, rp, tp); /* a^3 = a^{2^2 - 1} */
- ecc_mod_pow_2kp1 (p, t0, rp, 2, tp); /* a^15 = a^{2^4 - 1} */
- ecc_mod_pow_2kp1 (p, rp, t0, 4, tp); /* a^{2^8 - 1} */
- ecc_mod_pow_2kp1 (p, t0, rp, 8, tp); /* a^{2^16 - 1} */
- ecc_mod_pow_2kp1 (p, rp, t0, 16, tp); /* a^{2^32 - 1} */
- ecc_mod_pow_2kp1 (p, t0, rp, 32, tp); /* a^{2^64 - 1} */
- ecc_mod_pow_2kp1 (p, rp, t0, 64, tp); /* a^{2^128 - 1} */
- ecc_mod_sqr (p, rp, rp, tp); /* a^{2^129 - 2} */
- ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^129 - 1} */
- ecc_mod_pow_2kp1 (p, t0, rp, 129, tp);/* a^{2^258 - 1} */
- ecc_mod_sqr (p, rp, t0, tp); /* a^{2^259 - 2} */
- ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^259 - 1} */
- ecc_mod_pow_2kp1 (p, t0, rp, 259, tp);/* a^{2^518 - 1} */
- ecc_mod_sqr (p, rp, t0, tp); /* a^{2^519 - 2} */
- ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^519 - 1} */
- ecc_mod_sqr (p, rp, rp, tp); /* a^{2^520 - 2} */
- ecc_mod_sqr (p, rp, rp, tp); /* a^{2^521 - 4} */
- ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^521 - 3} */
-}
-
#define ECC_SECP521R1_SQRT_ITCH (2*ECC_LIMB_SIZE)
static int
@@ -162,20 +115,22 @@ const struct ecc_curve _nettle_secp_521r1 =
ECC_LIMB_SIZE,
ECC_BMODP_SIZE,
ECC_REDC_SIZE,
- ECC_SECP521R1_INV_ITCH,
+ ECC_MOD_INV_ITCH(ECC_LIMB_SIZE),
ECC_SECP521R1_SQRT_ITCH,
0,
+ ECC_INVP_COUNT,
+ ECC_BINVP,
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
ecc_Bm2p,
ecc_redc_ppm1,
- ecc_pp1h,
+ NULL,
ecc_secp521r1_modp,
ecc_secp521r1_modp,
- ecc_secp521r1_inv,
+ ecc_mod_inv,
ecc_secp521r1_sqrt,
NULL,
},
@@ -187,13 +142,15 @@ const struct ecc_curve _nettle_secp_521r1 =
ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
0,
0,
+ ECC_INVQ_COUNT,
+ ECC_BINVQ,
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
ecc_Bm2q,
NULL,
- ecc_qp1h,
+ NULL,
ecc_mod,
ecc_mod,
@@ -211,8 +168,11 @@ const struct ecc_curve _nettle_secp_521r1 =
ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE),
ECC_MUL_A_ITCH (ECC_LIMB_SIZE),
ECC_MUL_G_ITCH (ECC_LIMB_SIZE),
+#if 0
ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_SECP521R1_INV_ITCH),
-
+#else
+ ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_MOD_INV_ITCH(ECC_LIMB_SIZE)),
+#endif
ecc_add_jja,
ecc_add_jjj,
ecc_dup_jj,
diff --git a/eccdata.c b/eccdata.c
index e0726e8d..1d2e1993 100644
--- a/eccdata.c
+++ b/eccdata.c
@@ -1167,11 +1167,19 @@ string_toupper (char *buf, size_t size, const char *s)
abort();
}
+static unsigned
+invert_count (unsigned bit_size) {
+ /* See https://eprint.iacr.org/2019/266.pdf, Theorem 11.2 */
+ return (49 * bit_size + 57) / 17;
+}
+
static void
output_modulo (const char *name, const mpz_t x,
unsigned size, unsigned bits_per_limb)
{
unsigned bit_size;
+ unsigned count;
+ unsigned invert_iterations;
int shift;
char buf[20];
mpz_t t;
@@ -1193,9 +1201,29 @@ output_modulo (const char *name, const mpz_t x,
/ bits_per_limb));
bit_size = mpz_sizeinbase (x, 2);
-
shift = size * bits_per_limb - bit_size;
assert (shift >= 0);
+
+ /* If most significant bit of top limb is zero, input may be one bit
+ larger than the modulo. */
+ count = invert_count (bit_size + (shift > 0));
+ printf("#define ECC_INV%s_COUNT %u\n", buf, count);
+
+ mpz_set_ui (t, 0);
+ mpz_setbit (t, bits_per_limb);
+ mpz_invert (t, x, t);
+
+ printf ("#define ECC_BINV%s 0x", buf);
+ mpz_out_str (stdout, 16, t);
+ printf("%s\n", bits_per_limb > 32 ? "ULL" : "UL");
+
+ invert_iterations = (count + bits_per_limb - 3) / (bits_per_limb - 2);
+ mpz_set_ui (t, 2);
+ mpz_powm_ui (t, t, bits_per_limb * (invert_iterations + size) - count, x);
+ mpz_invert (t, t, x);
+ snprintf (buf, sizeof(buf), "ecc_inv%s_redc_power", name);
+ output_bignum (buf, t, size, bits_per_limb);
+
if (shift > 0)
{
mpz_set_ui (t, 0);
@@ -1292,14 +1320,6 @@ output_curve (const struct ecc_curve *ecc, unsigned bits_per_limb)
output_bignum ("ecc_b", ecc->b, limb_size, bits_per_limb);
- mpz_add_ui (t, ecc->p, 1);
- mpz_fdiv_q_2exp (t, t, 1);
- output_bignum ("ecc_pp1h", t, limb_size, bits_per_limb);
-
- mpz_add_ui (t, ecc->q, 1);
- mpz_fdiv_q_2exp (t, t, 1);
- output_bignum ("ecc_qp1h", t, limb_size, bits_per_limb);
-
/* Trailing zeros in p+1 correspond to trailing ones in p. */
redc_limbs = mpz_scan0 (ecc->p, 0) / bits_per_limb;
if (redc_limbs > 0)
diff --git a/testsuite/ecc-modinv-test.c b/testsuite/ecc-modinv-test.c
index aecfb43b..147896a6 100644
--- a/testsuite/ecc-modinv-test.c
+++ b/testsuite/ecc-modinv-test.c
@@ -51,9 +51,12 @@ test_modulo (gmp_randstate_t rands, const char *name,
mp_limb_t *scratch;
unsigned j;
mpz_t r;
+ mp_bitcnt_t bits;
mpz_init (r);
+ bits = m->bit_size + (m->size * GMP_NUMB_BITS > m->bit_size);
+
a = xalloc_limbs (m->size);
ai = xalloc_limbs (m->size);
ref = xalloc_limbs (m->size);;
@@ -90,15 +93,17 @@ test_modulo (gmp_randstate_t rands, const char *name,
fprintf (stderr, " (bad)\n");
abort ();
}
-
+
for (j = 0; j < COUNT; j++)
{
if (j & 1)
- mpz_rrandomb (r, rands, m->size * GMP_NUMB_BITS);
+ mpz_rrandomb (r, rands, bits);
else
- mpz_urandomb (r, rands, m->size * GMP_NUMB_BITS);
+ mpz_urandomb (r, rands, bits);
mpz_limbs_copy (a, r, m->size);
+ if ((a[m->size - 1] >> 1) > m->m[m->size - 1])
+ a[m->size - 1] = 2*m->m[m->size - 1] + 1;
if (!ref_modinv (ref, a, m->m, m->size, use_redc))
{
@@ -113,7 +118,7 @@ test_modulo (gmp_randstate_t rands, const char *name,
fprintf (stderr, "%s->invert failed (test %u, bit size %u):\n",
name, j, m->bit_size);
fprintf (stderr, "a = ");
- mpz_out_str (stderr, 16, r);
+ mpn_out_str (stderr, 16, a, m->size);
fprintf (stderr, "\np = ");
mpn_out_str (stderr, 16, m->m, m->size);
fprintf (stderr, "\nt = ");