diff options
author | Niels Möller <nisse@lysator.liu.se> | 2018-01-08 22:28:52 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2018-01-09 07:31:54 +0100 |
commit | 0325eaf3ac938322b964d52716b15b106adeae0e (patch) | |
tree | 6ab286900a5ca9b0dd37417976c2e1251ae4b212 | |
parent | e09ed92e51c349f2ea495a9a921cb682332700de (diff) | |
download | nettle-ctr-opt.tar.gz |
CTR mode optimizations for 16-byte block size.ctr-opt
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | ctr.c | 132 |
2 files changed, 119 insertions, 20 deletions
@@ -9,6 +9,13 @@ 2018-01-08 Niels Möller <nisse@lysator.liu.se> + * ctr.c (ctr_crypt16): New function, with optimizations specific + to 16-byte block size. + (ctr_fill16): New helper function, definition depending on + WORDS_BIGENDIAN, and little endian version requiring + HAVE_BUILTIN_BSWAP64. + (ctr_crypt): Use ctr_crypt16, when appropriate. + * nettle-types.h (union nettle_block16): Add uint64_t field. * configure.ac: Check for __builtin_bswap64, define @@ -62,36 +62,128 @@ ctr_fill (size_t block_size, uint8_t *ctr, size_t length, uint8_t *buffer) return i; } +#if WORDS_BIGENDIAN +# define USE_CTR_CRYPT16 1 +static void +ctr_fill16(uint8_t *ctr, size_t blocks, uint64_t *buffer) +{ + uint64_t hi, lo; + hi = READ_UINT64(ctr); + lo = READ_UINT64(ctr + 8); + + while (blocks-- > 0) + { + *buffer++ = hi; + *buffer++ = lo; + hi += !(++lo); + } + WRITE_UINT64(ctr, hi); + WRITE_UINT64(ctr + 8, lo); +} +#else /* !WORDS_BIGENDIAN */ +# if HAVE_BUILTIN_BSWAP64 +# define USE_CTR_CRYPT16 1 +static void +ctr_fill16(uint8_t *ctr, size_t blocks, uint64_t *buffer) +{ + uint64_t hi, lo; + /* Read hi in native endianness */ + hi = LE_READ_UINT64(ctr); + lo = READ_UINT64(ctr + 8); + + while (blocks-- > 0) + { + *buffer++ = hi; + *buffer++ = __builtin_bswap64(lo); + if (!++lo) + hi = __builtin_bswap64(__builtin_bswap64(hi) + 1); + } + LE_WRITE_UINT64(ctr, hi); + WRITE_UINT64(ctr + 8, lo); +} +# else /* ! HAVE_BUILTIN_BSWAP64 */ +# define USE_CTR_CRYPT16 0 +# endif +#endif /* !WORDS_BIGENDIAN */ + +#if USE_CTR_CRYPT16 +static size_t +ctr_crypt16(const void *ctx, nettle_cipher_func *f, + uint8_t *ctr, + size_t length, uint8_t *dst, + const uint8_t *src) +{ + if (dst != src && !((uintptr_t) dst % sizeof(uint64_t))) + { + size_t blocks = length / 16u; + ctr_fill16 (ctr, blocks, (uint64_t *) dst); + f(ctx, blocks * 16, dst, dst); + memxor (dst, src, blocks * 16); + return blocks * 16; + } + else + { + /* Construct an aligned buffer of consecutive counter values, of + size at most CTR_BUFFER_LIMIT. */ + TMP_DECL(buffer, union nettle_block16, CTR_BUFFER_LIMIT / 16); + size_t blocks = (length + 15) / 16u; + size_t i; + TMP_ALLOC(buffer, MIN(blocks, CTR_BUFFER_LIMIT / 16)); + + for (i = 0; blocks >= CTR_BUFFER_LIMIT / 16; + i += CTR_BUFFER_LIMIT, blocks -= CTR_BUFFER_LIMIT / 16) + { + ctr_fill16 (ctr, CTR_BUFFER_LIMIT / 16, buffer->u64); + f(ctx, CTR_BUFFER_LIMIT, buffer->b, buffer->b); + if (length - i < CTR_BUFFER_LIMIT) + goto done; + memxor3 (dst, src, buffer->b, CTR_BUFFER_LIMIT); + } + + if (blocks > 0) + { + assert (length - i < CTR_BUFFER_LIMIT); + ctr_fill16 (ctr, blocks, buffer->u64); + f(ctx, blocks * 16, buffer->b, buffer->b); + done: + memxor3 (dst + i, src + i, buffer->b, length - i); + } + return length; + } +} +#endif /* USE_CTR_CRYPT16 */ + void ctr_crypt(const void *ctx, nettle_cipher_func *f, size_t block_size, uint8_t *ctr, size_t length, uint8_t *dst, const uint8_t *src) { - if (src != dst) +#if USE_CTR_CRYPT16 + if (block_size == 16) { - if (length == block_size) - { - f(ctx, block_size, dst, ctr); - INCREMENT(block_size, ctr); - memxor(dst, src, block_size); - } - else - { - size_t filled = ctr_fill (block_size, ctr, length, dst); + size_t done = ctr_crypt16(ctx, f, ctr, length, dst, src); + length -= done; + src += done; + dst += done; + } +#endif - f(ctx, filled, dst, dst); - memxor(dst, src, filled); + if(src != dst) + { + size_t filled = ctr_fill (block_size, ctr, length, dst); + + f(ctx, filled, dst, dst); + memxor(dst, src, filled); - if (filled < length) - { - TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); - TMP_ALLOC(buffer, block_size); + if (filled < length) + { + TMP_DECL(block, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); + TMP_ALLOC(block, block_size); - f(ctx, block_size, buffer, ctr); - INCREMENT(block_size, ctr); - memxor3(dst + filled, src + filled, buffer, length - filled); - } + f(ctx, block_size, block, ctr); + INCREMENT(block_size, ctr); + memxor3(dst + filled, src + filled, block, length - filled); } } else |