diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-10-20 22:20:02 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-10-20 22:20:02 +0200 |
commit | 138c60b5c0a7e7a0e6b0977328751f6fd08d5158 (patch) | |
tree | 5a9c28f34ad45428811e632d3eb3e22506cf4767 | |
parent | 4312f84030c6bf0adc54e901bb7746d25423cbf8 (diff) | |
download | nettle-138c60b5c0a7e7a0e6b0977328751f6fd08d5158.tar.gz |
Optimize modular inversion for secp224r1 and secp256r1
* ecc-secp224r1.c (ecc_secp224r1_inv): New function, modular
inverse using powering.
(_nettle_secp_224r1): Analogous updates. Increases signing
performance roughly 17% on x86_64.
* ecc-secp256r1.c (ecc_secp256r1_inv): New function, modular
inverse using powering.
(_nettle_secp_256r1): Analogous updates. Increases signing
performance roughly 6% on x86_64.
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | ecc-secp224r1.c | 61 | ||||
-rw-r--r-- | ecc-secp256r1.c | 59 |
3 files changed, 126 insertions, 6 deletions
@@ -1,3 +1,15 @@ +2020-10-20 Niels Möller <nisse@lysator.liu.se> + + * ecc-secp256r1.c (ecc_secp256r1_inv): New function, modular + inverse using powering. + (_nettle_secp_256r1): Analogous updates. Increases signing + performance roughly 6% on x86_64. + + * ecc-secp224r1.c (ecc_secp224r1_inv): New function, modular + inverse using powering. + (_nettle_secp_224r1): Analogous updates. Increases signing + performance roughly 17% on x86_64. + 2020-10-19 Niels Möller <nisse@lysator.liu.se> * ecc-secp521r1.c (ecc_secp521r1_inv): New function, modular diff --git a/ecc-secp224r1.c b/ecc-secp224r1.c index c8d4d40e..2575c27f 100644 --- a/ecc-secp224r1.c +++ b/ecc-secp224r1.c @@ -62,6 +62,61 @@ ecc_secp224r1_modp (const struct ecc_modulo *m, mp_limb_t *rp); # error Configuration error #endif +#define ECC_SECP224R1_INV_ITCH (6*ECC_LIMB_SIZE) + +static void +ecc_secp224r1_inv (const struct ecc_modulo *p, + mp_limb_t *rp, const mp_limb_t *ap, + mp_limb_t *scratch) +{ +#define a7 scratch +#define a31m1 (scratch + ECC_LIMB_SIZE) +#define a96m1 (scratch + 2*ECC_LIMB_SIZE) + /* t0 overlaps a96m1. */ +#define t0 (scratch + 2*ECC_LIMB_SIZE) +#define t1 (scratch + 4*ECC_LIMB_SIZE) + /* t2 overlaps a7 and a31m1, and is used when those values are no + longer needed. */ +#define t2 scratch + + /* Addition chain for p - 2 = 2^{224} - 2^{96} - 1 + + 7 = 1 + 2 (2+1) 2 S + 2 M + 2^{31} - 1 = 1 + 2 (2^{15} + 1)(1 + 2 (2^7 + 1) (1 + 2 (2^3+1) * 7)) + 28 S + 6 M + 2^{34} - 1 = 2^3 (2^{31} - 1) + 7 3 S + M + 2^{65} - 1 = 2^{31}(2^{34} - 1) + 2^{31} - 1 31 S + M + 2^{96} - 1 = 2^{31}(2^{65} - 1) + 2^{31} - 1 31 S + M + 2^{127} - 1 = 2^{31}(2^{96} - 1) + 2^{31} - 1 31 S + M + + 2^{224} - 2^{96} - 1 97 S + M + = 2^{97}(2^{127} - 1) + 2^{96} - 1 + + This addition chain needs 223 squarings and 13 multiplies. + */ + ecc_mod_sqr (p, rp, ap); /* a^2 */ + ecc_mod_mul (p, t0, ap, rp); /* a^3 */ + ecc_mod_sqr (p, rp, t0); /* a^6 */ + ecc_mod_mul (p, a7, ap, rp); /* a^{2^3-1} a7 */ + + ecc_mod_pow_2kp1 (p, t0, a7, 3, rp); /* a^{2^6 - 1} */ + ecc_mod_sqr (p, rp, t0); /* a^{2^7 - 2} */ + ecc_mod_mul (p, t0, rp, ap); /* a^{2^7 - 1} */ + ecc_mod_pow_2kp1 (p, rp, t0, 7, t1); /* a^{2^14 - 1} */ + ecc_mod_sqr (p, t0, rp); /* a^{2^15 - 2} */ + ecc_mod_mul (p, rp, t0, ap); /* a^{2^15 - 1} */ + ecc_mod_pow_2kp1 (p, t0, rp, 15, t1); /* a^{2^30 - 1} */ + ecc_mod_sqr (p, rp, t0); /* a^{2^31 - 2} */ + ecc_mod_mul (p, a31m1, rp, ap); /* a^{2^31 - 1} a7, a31m1 */ + + ecc_mod_pow_2k_mul (p, rp, a31m1, 3, a7, t0); /* a^{2^34 - 1} a31m1 */ + ecc_mod_pow_2k_mul (p, t1, rp, 31, a31m1, t0); /* a^{2^65 - 1} a31m1 */ + ecc_mod_pow_2k_mul (p, a96m1, t1, 31, a31m1, rp); /* a^{2^96 - 1} a31m1, a96m1 */ + ecc_mod_pow_2k_mul (p, t1, a96m1, 31, a31m1, rp); /* a^{2^{127} - 1} a96m1 */ + ecc_mod_pow_2k_mul (p, rp, t1, 97, a96m1, t2); /* a^{2^{224} - 2^{96} - 1 */ +} + + const struct ecc_curve _nettle_secp_224r1 = { { @@ -69,7 +124,7 @@ const struct ecc_curve _nettle_secp_224r1 = ECC_LIMB_SIZE, ECC_BMODP_SIZE, -ECC_REDC_SIZE, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + ECC_SECP224R1_INV_ITCH, 0, ecc_p, @@ -80,7 +135,7 @@ const struct ecc_curve _nettle_secp_224r1 = ecc_secp224r1_modp, USE_REDC ? ecc_secp224r1_redc : ecc_secp224r1_modp, - USE_REDC ? ecc_mod_inv_redc : ecc_mod_inv, + ecc_secp224r1_inv, NULL, }, { @@ -112,7 +167,7 @@ const struct ecc_curve _nettle_secp_224r1 = ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), ECC_MUL_A_ITCH (ECC_LIMB_SIZE), ECC_MUL_G_ITCH (ECC_LIMB_SIZE), - ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), + 2*ECC_LIMB_SIZE + ECC_SECP224R1_INV_ITCH, ecc_add_jja, ecc_add_jjj, diff --git a/ecc-secp256r1.c b/ecc-secp256r1.c index adab8d90..e624b020 100644 --- a/ecc-secp256r1.c +++ b/ecc-secp256r1.c @@ -145,6 +145,59 @@ ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp) rp[3] = u1; } +#define ECC_SECP256R1_INV_ITCH (7*ECC_LIMB_SIZE) + +static void +ecc_secp256r1_inv (const struct ecc_modulo *p, + mp_limb_t *rp, const mp_limb_t *ap, + mp_limb_t *scratch) +{ +#define a5m1 scratch +#define a15m1 (scratch + ECC_LIMB_SIZE) + /* Overlaps first half of t0 */ +#define a32m1 (scratch + 2*ECC_LIMB_SIZE) +#define t0 (scratch + 3*ECC_LIMB_SIZE) +#define t1 (scratch + 5*ECC_LIMB_SIZE) +/* + Addition chain for p - 2 = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 3 + + 2^5 - 1 = 1 + 2 (2^4 - 1) = 1 + 2 (2^2+1)(2 + 1) 4 S + 3 M + 2^{15} - 1 = (2^5 - 1) (1 + 2^5 (1 + 2^5) 10 S + 2 M + 2^{16} - 1 = 1 + 2 (2^{15} - 1) S + M + 2^{32} - 1 = (2^{16} + 1) (2^{16} - 1) 16 S + M + 2^{64} - 2^{32} + 1 = 2^{32} (2^{32} - 1) + 1 32 S + M + 2^{192} - 2^{160} + 2^{128} + 2^{32} - 1 + = 2^{128} (2^{64} - 2^{32} + 1) + 2^{32} - 1 128 S + M + 2^{224} - 2^{192} + 2^{160} + 2^{64} - 1 + = 2^{32} (...) + 2^{32} - 1 32 S + M + 2^{239} - 2^{207} + 2^{175} + 2^{79} - 1 + = 2^{15} (...) + 2^{15} - 1 15 S + M + 2^{254} - 2^{222} + 2^{190} + 2^{94} - 1 + = 2^{15} (...) + 2^{15} - 1 15 S + M + p - 2 = 2^2 (...) + 1 2 S M + --------------- + 255 S + 13 M + */ + ecc_mod_sqr (p, rp, ap); /* a^2 */ + ecc_mod_mul (p, t1, ap, rp); /* a^3 */ + ecc_mod_pow_2kp1 (p, rp, t1, 2, t0); /* a^{2^4 - 1} */ + ecc_mod_sqr (p, t0, rp); /* a^{2^5 - 2} */ + ecc_mod_mul (p, a5m1, ap, t0); /* a^{2^5 - 1}, a5m1 */ + + ecc_mod_pow_2kp1 (p, rp, a5m1, 5, t0); /* a^{2^{10} - 1, a5m1*/ + ecc_mod_pow_2k_mul (p, a15m1, rp, 5, a5m1, t0); /* a^{2^{15} - 1}, a5m1 a15m1 */ + ecc_mod_sqr (p, rp, a15m1); /* a^{2^{16} - 2}, a15m1 */ + ecc_mod_mul (p, t1, ap, rp); /* a^{2^{16} - 1}, a15m1 */ + ecc_mod_pow_2kp1 (p, a32m1, t1, 16, rp); /* a^{2^{32} - 1}, a15m1, a32m1 */ + + ecc_mod_pow_2k_mul (p, t0, a32m1, 32, ap, t1); /* a^{2^{64} - 2^{32} + 1 */ + ecc_mod_pow_2k_mul (p, rp, t0, 128, a32m1, t1); /* a^{2^{192} - 2^{160} + 2^{128} + 2^{32} - 1} */ + ecc_mod_pow_2k_mul (p, t0, rp, 32, a32m1, t1); /* a^{2^{224} - 2^{192} + 2^{160} + 2^{64} - 1} */ + ecc_mod_pow_2k_mul (p, rp, t0, 15, a15m1, t1); /* a^{2^{239} - 2^{207} + 2^{175} + 2^{79} - 1} */ + ecc_mod_pow_2k_mul (p, t0, rp, 15, a15m1, t1); /* a^{2^{254} - 2^{222} + 2^{190} + 2^{94} - 1} */ + ecc_mod_pow_2k_mul (p, rp, t0, 2, ap, t0); /* a^{2^{256} - 2^{224} + 2^{192} + 2^{96} - 3} */ +} + static void ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp) { @@ -246,7 +299,7 @@ const struct ecc_curve _nettle_secp_256r1 = ECC_LIMB_SIZE, ECC_BMODP_SIZE, ECC_REDC_SIZE, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + ECC_SECP256R1_INV_ITCH, 0, ecc_p, @@ -257,7 +310,7 @@ const struct ecc_curve _nettle_secp_256r1 = ecc_secp256r1_modp, USE_REDC ? ecc_secp256r1_redc : ecc_secp256r1_modp, - USE_REDC ? ecc_mod_inv_redc : ecc_mod_inv, + ecc_secp256r1_inv, NULL, }, { @@ -289,7 +342,7 @@ const struct ecc_curve _nettle_secp_256r1 = ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), ECC_MUL_A_ITCH (ECC_LIMB_SIZE), ECC_MUL_G_ITCH (ECC_LIMB_SIZE), - ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), + 2*ECC_LIMB_SIZE + ECC_SECP256R1_INV_ITCH, ecc_add_jja, ecc_add_jjj, |