From 6cd014b2658f2a709f22b16d06982ff5444cd6db Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Fri, 3 Mar 2017 10:12:07 -0800 Subject: rsa: Optimization of multiplications for Cortex-M0 We multiply 2 32-bit numbers (and not 64-bit numbers), and then add another 32-bit number, which makes it possible to optimize the assembly and save a few instructions. With -O3, 3072-bit exponent, lower verification time from 122 ms to 104 ms on STM32F072 @48Mhz. Optimized mac function from Dmitry Grinberg . BRANCH=poppy BUG=b:35647963 BUG=b:77608104 TEST=On staff, flash, verification successful TEST=make test-rsa, make test-rsa3 TEST=Flash test-utils and test-rsa to hammer => pass Change-Id: I584c54c631a3f59f691849a279b308e8d4b4b22d Signed-off-by: Nicolas Boichat Reviewed-on: https://chromium-review.googlesource.com/449024 Reviewed-by: Vincent Palatin Reviewed-on: https://chromium-review.googlesource.com/1080583 --- common/rsa.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'common/rsa.c') diff --git a/common/rsa.c b/common/rsa.c index e4e85c3374..3d8518e9ef 100644 --- a/common/rsa.c +++ b/common/rsa.c @@ -50,14 +50,14 @@ static void mont_mul_add(const struct rsa_public_key *key, const uint32_t a, const uint32_t *b) { - uint64_t A = (uint64_t)a * b[0] + c[0]; + uint64_t A = mula32(a, b[0], c[0]); uint32_t d0 = (uint32_t)A * key->n0inv; - uint64_t B = (uint64_t)d0 * key->n[0] + (uint32_t)A; + uint64_t B = mula32(d0, key->n[0], A); uint32_t i; for (i = 1; i < RSANUMWORDS; ++i) { - A = (A >> 32) + (uint64_t)a * b[i] + c[i]; - B = (B >> 32) + (uint64_t)d0 * key->n[i] + (uint32_t)A; + A = (A >> 32) + mula32(a, b[i], c[i]); + B = (B >> 32) + mula32(d0, key->n[i], A); c[i - 1] = (uint32_t)B; } @@ -78,11 +78,11 @@ static void mont_mul_add_0(const struct rsa_public_key *key, const uint32_t *b) { uint32_t d0 = c[0] * key->n0inv; - uint64_t B = (uint64_t)d0 * key->n[0] + c[0]; + uint64_t B = mula32(d0, key->n[0], c[0]); uint32_t i; for (i = 1; i < RSANUMWORDS; ++i) { - B = (B >> 32) + (uint64_t)d0 * key->n[i] + c[i]; + B = (B >> 32) + mula32(d0, key->n[i], c[i]); c[i - 1] = (uint32_t)B; } -- cgit v1.2.1