diff options
author | Vadim Sukhomlinov <sukhomlinov@google.com> | 2021-07-20 09:28:38 -0700 |
---|---|---|
committer | Commit Bot <commit-bot@chromium.org> | 2021-07-21 20:11:03 +0000 |
commit | 7c1e430fa0cfd8a46030c3ef69fd3ab99ebc133a (patch) | |
tree | 9d324b1d4de82f23a0e5fb86b06ab9b8ba137a01 | |
parent | afa1e4cddef431500ae1763135c4ace60289931d (diff) | |
download | chrome-ec-7c1e430fa0cfd8a46030c3ef69fd3ab99ebc133a.tar.gz |
cr50: replace call to __aeabi_uldivmod with better implementation
FIPS module should be self contained. Adding __aeabi_uldivmod is
challenging as it comes from compiler builtin and used by other parts.
In this CL we implement udiv32() which divides 64-bit number by 32-bit
number. Since it doesn't compute reminder and don't have to support
real 64-bit by 64-bit division it's faster. Also, we can use ARM
instruction to count leading zeros instead of doing it manually.
This code is reused from Ti50 cryptolib code as is.
Exhaustive bn_div() test is provided by test/tpm_test/bn_test.c which
is now can be built with board/cr50/dcrypto version.
BUG=b:138578318
TEST=make BOARD=cr50
test/tpm_test/make CR50=1
build/tpm_test/bn_test
Signed-off-by: Vadim Sukhomlinov <sukhomlinov@google.com>
Change-Id: I19cebc5c11d3a80bc50732350b7c598bfa374348
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/ec/+/3042138
Reviewed-by: Vadim Sukhomlinov <sukhomlinov@chromium.org>
Reviewed-by: Vadim Bendebury <vbendeb@chromium.org>
Tested-by: Vadim Sukhomlinov <sukhomlinov@chromium.org>
Auto-Submit: Vadim Sukhomlinov <sukhomlinov@chromium.org>
Commit-Queue: Vadim Bendebury <vbendeb@chromium.org>
-rw-r--r-- | board/cr50/dcrypto/bn.c | 126 | ||||
-rw-r--r-- | test/tpm_test/Makefile | 11 |
2 files changed, 128 insertions, 9 deletions
diff --git a/board/cr50/dcrypto/bn.c b/board/cr50/dcrypto/bn.c index 94aafa1799..7b68c10d89 100644 --- a/board/cr50/dcrypto/bn.c +++ b/board/cr50/dcrypto/bn.c @@ -491,6 +491,121 @@ static void bn_mul_ex(struct LITE_BIGNUM *c, BN_DIGIT(c, i + b->dmax - 1) = carry; } +/* Functions to convert between uint32_t and uint64_t */ +static inline uint32_t lo32(uint64_t v) +{ + return (uint32_t)v; +} +static inline uint32_t hi32(uint64_t v) +{ + return (uint32_t)(v >> 32); +} +static inline uint64_t make64(uint32_t hi, uint32_t lo) +{ + return (((uint64_t)hi) << 32) | lo; +} + +static inline uint32_t lo16(uint32_t v) +{ + return (uint32_t)(v)&0xffff; +} + +static inline uint32_t hi16(uint32_t v) +{ + return (uint32_t)(v >> 16); +} + +/* make Clang's host behavior of clz match Soteria and avoid UBSAN error */ +static inline int clz(unsigned int x) +{ + return (x) ? __builtin_clz(x) : 32; +} + +/** + * Unsigned division of 64-bit integer with 32-bit divisor, used to implement + * Knuth's long division algorithm. For platforms which don't support hardware + * 64 by 32 division we have to either rely on compiler builtins (__udivdi3, + * __aeabi_uldivmod) or implement this code explicitly. + * Due to potential build issues with dependency on compiler run-time libs, + * use our own implementation. + * + * Algorithm is adapted from GNU's libgcc and optimized for the use case. + * + */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + { \ + uint32_t __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = hi16(d); \ + __d0 = lo16(d); \ + \ + __q1 = (n1) / __d1; \ + __r1 = (n1) - (__q1 * __d1); \ + __m = __q1 * __d0; \ + __r1 = (__r1 << 16) | hi16(n0); \ + if (__r1 < __m) { \ + __q1--; \ + __r1 += (d); \ + if (__r1 >= (d)) \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + __q0 = __r1 / __d1; \ + __r0 = __r1 - (__q0 * __d1); \ + __m = __q0 * __d0; \ + __r0 = (__r0 << 16) | lo16(n0); \ + if (__r0 < __m) { \ + __q0--; \ + __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (__q1 << 16) | __q0; \ + (r) = __r0; \ + } + +uint64_t udiv32(uint64_t n, uint32_t d0) +{ + uint32_t n0, n1, n2, q0, q1, bm; + + n0 = lo32(n); + n1 = hi32(n); + + /* if it's 32-bit division or division by zero, use hardware directly */ + if (d0 == 0 || n1 == 0) + return n0 / d0; + + bm = clz(d0); + if (d0 > n1) { /* 0q = nn / 0D */ + /* make the most significant bit of the denominator set. */ + if (bm != 0) { + d0 = d0 << bm; + n1 = (n1 << bm) | (n0 >> (32 - bm)); + n0 = n0 << bm; + } + q1 = 0; + } else { + /* qq = NN / 0d */ + if (bm == 0) { + n1 -= d0; + q1 = 1; + } else { + /* Normalize. */ + d0 = d0 << bm; + n2 = n1 >> (32 - bm); + n1 = (n1 << bm) | (n0 >> (32 - bm)); + n0 = n0 << bm; + udiv_qrnnd(q1, n1, n2, n1, d0); + } + } + udiv_qrnnd(q0, n0, n1, n0, d0); + /* Remainder in n0 >> bm, but we don't use it */ + return make64(q1, q0); +} + static int bn_div_word_ex(struct LITE_BIGNUM *q, struct LITE_BIGNUM *r, const struct LITE_BIGNUM *u, int m, @@ -501,7 +616,7 @@ static int bn_div_word_ex(struct LITE_BIGNUM *q, for (i = m - 1; i >= 0; --i) { uint64_t tmp = ((uint64_t)rem << 32) + BN_DIGIT(u, i); - uint32_t qd = tmp / div; + uint32_t qd = udiv32(tmp, div); BN_DIGIT(q, i) = qd; rem = tmp - (uint64_t)qd * div; @@ -544,11 +659,8 @@ static int bn_div_ex(struct LITE_BIGNUM *q, return bn_div_word_ex(q, r, u, m, vtop); /* Compute shift factor to make v have high bit set */ - s = 0; - while ((vtop & 0x80000000) == 0) { - s = s + 1; - vtop = vtop << 1; - } + s = clz(vtop); + vtop <<= s; /* Normalize u and v into un and vn. * Note un always gains a leading digit @@ -586,7 +698,7 @@ static int bn_div_ex(struct LITE_BIGNUM *q, uint64_t rhat = ((uint64_t)un[j + n] << 32) + un[j + n - 1]; - qd = rhat / vn[n - 1]; + qd = udiv32(rhat, vn[n - 1]); rhat = rhat - (uint64_t)qd * vn[n - 1]; while ((rhat >> 32) == 0 && (uint64_t)qd * vn[n - 2] > diff --git a/test/tpm_test/Makefile b/test/tpm_test/Makefile index 23f66317f9..29d3e229bc 100644 --- a/test/tpm_test/Makefile +++ b/test/tpm_test/Makefile @@ -22,14 +22,21 @@ SWIG = /usr/bin/swig PYTHON_INCLUDE = $(shell python3 -c 'import sysconfig; \ print(sysconfig.get_paths().get("include"))') +ifeq ($(CR50),) vpath %c $(src) ../../chip/g/dcrypto $(src)/testlib +CFLAGS += -I../../chip/g/dcrypto +else -CFLAGS = -fPIC +# Use BOARD=cr50 specific implementation +vpath %c $(src) ../../board/cr50/dcrypto $(src)/testlib +CFLAGS += -I../../board/cr50/dcrypto +endif + +CFLAGS += -fPIC CFLAGS += -I ${PYTHON_INCLUDE} CFLAGS += -I../../../../third_party/cryptoc/include CFLAGS += -I../../board/cr50 CFLAGS += -I../../chip/g -CFLAGS += -I../../chip/g/dcrypto CFLAGS += -I../../fuzz CFLAGS += -I../../include CFLAGS += -I.. |