diff options
author | Niels Möller <nisse@lysator.liu.se> | 2022-01-25 20:06:14 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2022-01-25 20:06:14 +0100 |
commit | 2c0d307d6126c8ff5a6e1e1a2e533d7d492d6ed0 (patch) | |
tree | 6471ca142a8d22c323999552ffd0c491179b1020 | |
parent | b7268727a11bce0a350345c2671493d2ddd28b45 (diff) | |
download | nettle-poly1305-radix32.tar.gz |
Complete rewrite of C implementation of poly1305.poly1305-radix32
Radix 32 rather than radix 26, and use that certain key bits are
always zero.
* poly1305-internal.c (_nettle_poly1305_set_key): Rewritten.
(_nettle_poly1305_block): Rewritten.
(_nettle_poly1305_digest): Rewritten.
* poly1305.h (struct poly1305_ctx): Rearrange internal unions, but
with size and alignment unchanged.
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | poly1305-internal.c | 203 | ||||
-rw-r--r-- | poly1305.h | 11 |
3 files changed, 103 insertions, 119 deletions
@@ -1,5 +1,13 @@ 2022-01-25 Niels Möller <nisse@lysator.liu.se> + Complete rewrite of C implementation of poly1305. Radix 32 rather + than radix 26, and use that certain key bits are always zero. + * poly1305-internal.c (_nettle_poly1305_set_key): Rewritten. + (_nettle_poly1305_block): Rewritten. + (_nettle_poly1305_digest): Rewritten. + * poly1305.h (struct poly1305_ctx): Rearrange internal unions, but + with size and alignment unchanged. + Chacha implementation for arm64, contributed by Mamone Tarsha. * arm64/chacha-core-internal.asm: New file. * arm64/chacha-2core.asm: New file. diff --git a/poly1305-internal.c b/poly1305-internal.c index 490fdf71..17ad7998 100644 --- a/poly1305-internal.c +++ b/poly1305-internal.c @@ -1,8 +1,7 @@ /* poly1305-internal.c - Copyright: 2012-2013 Andrew M. (floodyberry) Copyright: 2013 Nikos Mavrogiannopoulos - Copyright: 2013 Niels Möller + Copyright: 2013, 2022 Niels Möller This file is part of GNU Nettle. @@ -31,30 +30,6 @@ not, see http://www.gnu.org/licenses/. */ -/* Based on https://github.com/floodyberry/poly1305-donna. - * Modified for nettle by Nikos Mavrogiannopoulos and Niels Möller. - * Original license notice: - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - #if HAVE_CONFIG_H #include "config.h" #endif @@ -67,44 +42,41 @@ #include "macros.h" -#define mul32x32_64(a,b) ((uint64_t)(a) * (b)) +#define M32(a,b) ((uint64_t)(a) * (b)) #define r0 r.r32[0] #define r1 r.r32[1] #define r2 r.r32[2] #define r3 r.r32[3] -#define r4 r.r32[4] +#define s0 r.r32[4] #define s1 r.r32[5] -#define s2 s32[0] -#define s3 s32[1] -#define s4 s32[2] +#define s2 r.r32[6] +#define s3 r.r32[7] #define h0 h.h32[0] #define h1 h.h32[1] #define h2 h.h32[2] #define h3 h.h32[3] -#define h4 hh +#define h4 h.h32[4] void _nettle_poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16]) { - uint32_t t0,t1,t2,t3; - - t0 = LE_READ_UINT32(key); - t1 = LE_READ_UINT32(key+4); - t2 = LE_READ_UINT32(key+8); - t3 = LE_READ_UINT32(key+12); - - ctx->r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6; - ctx->r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12; - ctx->r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18; - ctx->r3 = t2 & 0x3f03fff; t3 >>= 8; - ctx->r4 = t3 & 0x00fffff; - - ctx->s1 = ctx->r1 * 5; - ctx->s2 = ctx->r2 * 5; - ctx->s3 = ctx->r3 * 5; - ctx->s4 = ctx->r4 * 5; + uint32_t t0, t1, t2, t3; + t0 = LE_READ_UINT32 (key); + t1 = LE_READ_UINT32 (key+4); + t2 = LE_READ_UINT32 (key+8); + t3 = LE_READ_UINT32 (key+12); + + ctx->r0 = t0 & 0x0fffffff; + ctx->r1 = t1 & 0x0ffffffc; + ctx->r2 = t2 & 0x0ffffffc; + ctx->r3 = t3 & 0x0ffffffc; + + ctx->s0 = 5*ctx->r0; + ctx->s1 = 5*(ctx->r1 >> 2); + ctx->s2 = 5*(ctx->r2 >> 2); + ctx->s3 = 5*(ctx->r3 >> 2); ctx->h0 = 0; ctx->h1 = 0; @@ -114,82 +86,89 @@ _nettle_poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16]) } void -_nettle_poly1305_block (struct poly1305_ctx *ctx, const uint8_t *m, unsigned t4) +_nettle_poly1305_block (struct poly1305_ctx *ctx, const uint8_t *m, unsigned m128) { - uint32_t t0,t1,t2,t3; - uint32_t b; - uint64_t t[5]; - uint64_t c; - - t0 = LE_READ_UINT32(m); - t1 = LE_READ_UINT32(m+4); - t2 = LE_READ_UINT32(m+8); - t3 = LE_READ_UINT32(m+12); - - ctx->h0 += t0 & 0x3ffffff; - ctx->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; - ctx->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; - ctx->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; - ctx->h4 += (t3 >> 8) | ((uint32_t) t4 << 24); - - /* poly1305_donna_mul: */ - t[0] = mul32x32_64(ctx->h0,ctx->r0) + mul32x32_64(ctx->h1,ctx->s4) + mul32x32_64(ctx->h2,ctx->s3) + mul32x32_64(ctx->h3,ctx->s2) + mul32x32_64(ctx->h4,ctx->s1); - t[1] = mul32x32_64(ctx->h0,ctx->r1) + mul32x32_64(ctx->h1,ctx->r0) + mul32x32_64(ctx->h2,ctx->s4) + mul32x32_64(ctx->h3,ctx->s3) + mul32x32_64(ctx->h4,ctx->s2); - t[2] = mul32x32_64(ctx->h0,ctx->r2) + mul32x32_64(ctx->h1,ctx->r1) + mul32x32_64(ctx->h2,ctx->r0) + mul32x32_64(ctx->h3,ctx->s4) + mul32x32_64(ctx->h4,ctx->s3); - t[3] = mul32x32_64(ctx->h0,ctx->r3) + mul32x32_64(ctx->h1,ctx->r2) + mul32x32_64(ctx->h2,ctx->r1) + mul32x32_64(ctx->h3,ctx->r0) + mul32x32_64(ctx->h4,ctx->s4); - t[4] = mul32x32_64(ctx->h0,ctx->r4) + mul32x32_64(ctx->h1,ctx->r3) + mul32x32_64(ctx->h2,ctx->r2) + mul32x32_64(ctx->h3,ctx->r1) + mul32x32_64(ctx->h4,ctx->r0); - - ctx->h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >> 26); - t[1] += c; ctx->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >> 26); - t[2] += b; ctx->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >> 26); - t[3] += b; ctx->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >> 26); - t[4] += b; ctx->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >> 26); - ctx->h0 += b * 5; + uint32_t t0, t1, t2, t3, t4; + uint64_t s, f0, f1, f2, f3; + + /* Add in message block */ + t0 = ctx->h0 + LE_READ_UINT32(m); + s = (uint64_t) ctx->h1 + (t0 < ctx->h0) + LE_READ_UINT32(m+4); + t1 = s; + s = ctx->h2 + (s >> 32) + LE_READ_UINT32(m+8); + t2 = s; + s = ctx->h3 + (s >> 32) + LE_READ_UINT32(m+12); + t3 = s; + t4 = ctx->h4 + (s >> 32) + m128; + + /* Key constants are bounded by rk < 2^28, sk < 5*2^26, therefore + all the fk sums fit in 64 bits without overflow, with at least + one bit margin. */ + f0 = M32(t0, ctx->r0) + M32(t1, ctx->s3) + M32(t2, ctx->s2) + M32(t3, ctx->s1) + + M32(t4 >> 2, ctx->s0); + f1 = M32(t0, ctx->r1) + M32(t1, ctx->r0) + M32(t2, ctx->s3) + M32(t3, ctx->s2) + + M32(t4, ctx->s1); + f2 = M32(t0, ctx->r2) + M32(t1, ctx->r1) + M32(t2, ctx->r0) + M32(t3, ctx->s3) + + M32(t4, ctx->s2); + f3 = M32(t0, ctx->r3) + M32(t1, ctx->r2) + M32(t2, ctx->r1) + M32(t3, ctx->r0) + + M32(t4, ctx->s3) + ((uint64_t)((t4 & 3)*ctx->r0) << 32); + + ctx->h0 = f0; + f1 += f0 >> 32; + ctx->h1 = f1; + f2 += f1 >> 32; + ctx->h2 = f2; + f3 += f2 >> 32; + ctx->h3 = f3; + ctx->h4 = f3 >> 32; } /* Adds digest to the nonce */ void _nettle_poly1305_digest (struct poly1305_ctx *ctx, union nettle_block16 *s) { - uint32_t b, nb; - uint64_t f0,f1,f2,f3; - uint32_t g0,g1,g2,g3,g4; - - b = ctx->h0 >> 26; ctx->h0 = ctx->h0 & 0x3ffffff; - ctx->h1 += b; b = ctx->h1 >> 26; ctx->h1 = ctx->h1 & 0x3ffffff; - ctx->h2 += b; b = ctx->h2 >> 26; ctx->h2 = ctx->h2 & 0x3ffffff; - ctx->h3 += b; b = ctx->h3 >> 26; ctx->h3 = ctx->h3 & 0x3ffffff; - ctx->h4 += b; b = ctx->h4 >> 26; ctx->h4 = ctx->h4 & 0x3ffffff; - ctx->h0 += b * 5; b = ctx->h0 >> 26; ctx->h0 = ctx->h0 & 0x3ffffff; - ctx->h1 += b; - - g0 = ctx->h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff; - g1 = ctx->h1 + b; b = g1 >> 26; g1 &= 0x3ffffff; - g2 = ctx->h2 + b; b = g2 >> 26; g2 &= 0x3ffffff; - g3 = ctx->h3 + b; b = g3 >> 26; g3 &= 0x3ffffff; - g4 = ctx->h4 + b - (1 << 26); - - b = (g4 >> 31) - 1; - nb = ~b; - ctx->h0 = (ctx->h0 & nb) | (g0 & b); - ctx->h1 = (ctx->h1 & nb) | (g1 & b); - ctx->h2 = (ctx->h2 & nb) | (g2 & b); - ctx->h3 = (ctx->h3 & nb) | (g3 & b); - ctx->h4 = (ctx->h4 & nb) | (g4 & b); + uint32_t t0, t1, t2, t3, t4, c0, c1, c2, c3, mask; + uint64_t f0, f1, f2; + + t0 = ctx->h0; + t1 = ctx->h1; + t2 = ctx->h2; + t3 = ctx->h3; + t4 = ctx->h4; + + /* Fold high part of t4 */ + c0 = 5 * (t4 >> 2); + t4 &= 3; + t0 += c0; c1 = (t0 < c0); + t1 += c1; c2 = (t1 < c1); + t2 += c2; c3 = (t2 < c2); + t3 += c3; + t4 += (t3 < c3); + + /* Compute resulting carries when adding 5. */ + c1 = (t0 >= 0xfffffffb); + c2 = (t1 + c1 < c1); + c3 = (t2 + c2 < t2); + t4 += (t3 + c3 < t3); + + /* Set if H >= 2^130 - 5 */ + mask = - (t4 >> 2); + + t0 += mask & 5; + t1 += mask & c1; + t2 += mask & c2; + t3 += mask & c3; /* FIXME: Take advantage of s being aligned as an unsigned long. */ - f0 = ((ctx->h0 )|(ctx->h1<<26)) + (uint64_t)LE_READ_UINT32(s->b); - f1 = ((ctx->h1>> 6)|(ctx->h2<<20)) + (uint64_t)LE_READ_UINT32(s->b+4); - f2 = ((ctx->h2>>12)|(ctx->h3<<14)) + (uint64_t)LE_READ_UINT32(s->b+8); - f3 = ((ctx->h3>>18)|(ctx->h4<< 8)) + (uint64_t)LE_READ_UINT32(s->b+12); + f0 = (uint64_t) t0 + LE_READ_UINT32(s->b); + f1 = t1 + (f0 >> 32) + LE_READ_UINT32(s->b+4); + f2 = t2 + (f1 >> 32) + LE_READ_UINT32(s->b+8); + t3 += (f2 >> 32) + LE_READ_UINT32(s->b+12); LE_WRITE_UINT32(s->b, f0); - f1 += (f0 >> 32); LE_WRITE_UINT32(s->b+4, f1); - f2 += (f1 >> 32); LE_WRITE_UINT32(s->b+8, f2); - f3 += (f2 >> 32); - LE_WRITE_UINT32(s->b+12, f3); + LE_WRITE_UINT32(s->b+12, t3); ctx->h0 = 0; ctx->h1 = 0; @@ -55,18 +55,15 @@ struct poly1305_ctx { /* Key, 128-bit value and some cached multiples. */ union { - uint32_t r32[6]; - uint64_t r64[3]; + uint32_t r32[8]; + uint64_t r64[4]; } r; - uint32_t s32[3]; /* State, represented as words of 26, 32 or 64 bits, depending on implementation. */ - /* High bits first, to maintain alignment. */ - uint32_t hh; union { - uint32_t h32[4]; - uint64_t h64[2]; + uint32_t h32[6]; + uint64_t h64[3]; } h; }; |