diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-10-21 20:28:29 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-10-21 20:38:46 +0200 |
commit | 9d1d358a7e5cc1b36c80e4be3cde5be0e5c2d143 (patch) | |
tree | fc24ce068c0afee0a403320a86e23f01b5601e20 | |
parent | e512e22185e25515bd8db7cd1da679d3ff51fc1d (diff) | |
download | nettle-optimize-ecc-invert.tar.gz |
Optimize modular inversion for secp384r1.optimize-ecc-invert
* ecc-secp384r1.c (ecc_secp384r1_inv): New function, modular
inverse using powering.
(_nettle_secp_384r1): Analogous updates. Increases signing
performance roughly 15% on x86_64.
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | ecc-secp384r1.c | 66 |
2 files changed, 70 insertions, 3 deletions
@@ -1,3 +1,10 @@ +2020-10-21 Niels Möller <nisse@lysator.liu.se> + + * ecc-secp384r1.c (ecc_secp384r1_inv): New function, modular + inverse using powering. + (_nettle_secp_384r1): Analogous updates. Increases signing + performance roughly 15% on x86_64. + 2020-10-20 Niels Möller <nisse@lysator.liu.se> * ecc-mod-inv.c (ecc_mod_inv_redc): Deleted, no longer needed. diff --git a/ecc-secp384r1.c b/ecc-secp384r1.c index 54bcd112..53017a15 100644 --- a/ecc-secp384r1.c +++ b/ecc-secp384r1.c @@ -146,7 +146,67 @@ ecc_secp384r1_modp (const struct ecc_modulo *p, mp_limb_t *rp) #else #define ecc_secp384r1_modp ecc_mod #endif + +#define ECC_SECP384R1_INV_ITCH (7*ECC_LIMB_SIZE) + +static void +ecc_secp384r1_inv (const struct ecc_modulo *p, + mp_limb_t *rp, const mp_limb_t *ap, + mp_limb_t *scratch) +{ +#define a3 scratch +#define a5m1 (scratch + ECC_LIMB_SIZE) + /* Using as destination clobbers t0 */ +#define a15m1 (scratch + 2*ECC_LIMB_SIZE) + /* Overlaps a5m1 */ +#define a30m1 (scratch + ECC_LIMB_SIZE) +#define t0 (scratch + 3*ECC_LIMB_SIZE) +#define t1 (scratch + 5*ECC_LIMB_SIZE) + /* + Addition chain for + + p - 2 = 2^{384} - 2^{128} - 2^{96} + 2^{32} - 3 + + 3 = 1 + 2 + 2^4 - 1 = 15 = (2^2 + 1) * 3 + 2^5 - 1 = 1 + 2(2^4 - 1) + 2^{15} - 1 = (1 + 2^5(1 + 2^5)) (2^5-1) + 2^{30} - 1 = (2^{15} + 1) (2^{15} - 1) + 2^{60} - 1 = (2^{30} + 1) (2^{30} - 1) + 2^{120} - 1 = (2^{60} + 1) (2^{60} - 1) + 2^{240} - 1 = (2^{120} + 1)(2^{120} - 1) + 2^{255} - 1 = 2^{15} (2^{240} - 1) + 2^{15} - 1 + 2^{286} - 2^{30} - 1 = 2^{31} (2^{255} - 1) + 2^{30} - 1 + + 2^{288} - 2^{32} - 1 = 2^2 (2^{286} - 2^{30} - 1) + 3 + 2^{382} - 2^{126} - 2^{94} + 2^{30} - 1 + = 2^{94} (2^{288} - 2^{32} - 1) + 2^{30} - 1 + + This addition chain needs 383 squarings and 14 multiplies. + + */ + ecc_mod_sqr (p, rp, ap); /* a^2 */ + ecc_mod_mul (p, a3, ap, rp); /* a^3 */ + ecc_mod_pow_2kp1 (p, rp, a3, 2, t0); /* a^{2^4 - 1}, a3 */ + ecc_mod_sqr (p, t0, rp); /* a^{2^5 - 2} */ + ecc_mod_mul (p, a5m1, ap, t0); /* a^{2^5 - 1}, a3 a5m1 */ + + ecc_mod_pow_2kp1 (p, rp, a5m1, 5, t0); /* a^{2^{10} - 1, a3, a5m1*/ + ecc_mod_pow_2k_mul (p, a15m1, rp, 5, a5m1, t1); /* a^{2^{15} - 1}, a3, a5m1 a15m1 */ + ecc_mod_pow_2kp1 (p, t0, a15m1, 15, rp); /* a^{2^{30} - 1}, a3 a15m1 a30m1 */ + mpn_copyi (a30m1, t0, ECC_LIMB_SIZE); + ecc_mod_pow_2kp1 (p, t0, a30m1, 30, rp); /* a^{2^{60} - 1, a3 a15m1 a30m1 */ + ecc_mod_pow_2kp1 (p, rp, t0, 60, t1); /* a^{2^{120} - 1, a3 a15m1 a30m1 */ + ecc_mod_pow_2kp1 (p, t0, rp, 120, t1); /* a^{2^{240} - 1, a3 a15m1 a30m1 */ + ecc_mod_pow_2k_mul (p, rp, t0, 15, a15m1, t1); /* a^{2^{255} - 1, a3 a30m1 */ + ecc_mod_pow_2k_mul (p, t0, rp, 31, a30m1, t1); /* a^{2^{286} - 2^{30} - 1}, a3 a30m1 */ + + ecc_mod_pow_2k_mul (p, rp, t0, 2, a3, t1); /* a^{2^{288} - 2^{32} - 1, a30m1 */ + ecc_mod_pow_2k_mul (p, t0, rp, 94, a30m1, t1); /* a^{2^{392} - 2^{126} - 2^{94} + 2^{30} - 1 */ + ecc_mod_pow_2k_mul (p, rp, t0, 2, ap, t1); +} + const struct ecc_curve _nettle_secp_384r1 = { { @@ -154,7 +214,7 @@ const struct ecc_curve _nettle_secp_384r1 = ECC_LIMB_SIZE, ECC_BMODP_SIZE, ECC_REDC_SIZE, - ECC_MOD_INV_ITCH (ECC_LIMB_SIZE), + ECC_SECP384R1_INV_ITCH, 0, ecc_p, @@ -165,7 +225,7 @@ const struct ecc_curve _nettle_secp_384r1 = ecc_secp384r1_modp, ecc_secp384r1_modp, - ecc_mod_inv, + ecc_secp384r1_inv, NULL, }, { @@ -197,7 +257,7 @@ const struct ecc_curve _nettle_secp_384r1 = ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE), ECC_MUL_A_ITCH (ECC_LIMB_SIZE), ECC_MUL_G_ITCH (ECC_LIMB_SIZE), - ECC_J_TO_A_ITCH (ECC_LIMB_SIZE), + 2*ECC_LIMB_SIZE + ECC_SECP384R1_INV_ITCH, ecc_add_jja, ecc_add_jjj, |