diff options
author | Niels Möller <nisse@lysator.liu.se> | 2018-01-08 08:45:17 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2018-01-09 07:31:54 +0100 |
commit | 54b2d297afa86a84fc3dc23e0529fb5120ef5a99 (patch) | |
tree | a0f6937c492e6fef7214e6f748df3943c16786b1 | |
parent | c380faf73a1cb46ccb25ee19902663a165739171 (diff) | |
download | nettle-54b2d297afa86a84fc3dc23e0529fb5120ef5a99.tar.gz |
Increase buffer size for in-place CTR.
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | ctr.c | 65 |
2 files changed, 35 insertions, 34 deletions
@@ -9,6 +9,10 @@ 2018-01-08 Niels Möller <nisse@lysator.liu.se> + * ctr.c (ctr_crypt): For in-place operation, increase max buffer + size from 4 blocks to 512 bytes, similarly to CBC and CFB. + Improves in-place aes128 CTR performance by 25% on x86_64. + * examples/nettle-benchmark.c (time_cipher): Benchmark in-place operation separately, for cbc_decrypt and ctr_crypt. @@ -45,7 +45,8 @@ #include "memxor.h" #include "nettle-internal.h" -#define NBLOCKS 4 +/* Don't allocate any more space than this on the stack */ +#define CTR_BUFFER_LIMIT 512 void ctr_crypt(const void *ctx, nettle_cipher_func *f, @@ -90,47 +91,43 @@ ctr_crypt(const void *ctx, nettle_cipher_func *f, } else { - if (length > block_size) - { - TMP_DECL(buffer, uint8_t, NBLOCKS * NETTLE_MAX_CIPHER_BLOCK_SIZE); - size_t chunk = NBLOCKS * block_size; - - TMP_ALLOC(buffer, chunk); + /* For in-place CTR, construct a buffer of consecutive counter + values, of size at most CTR_BUFFER_LIMIT. */ + TMP_DECL(buffer, uint8_t, CTR_BUFFER_LIMIT); + + size_t buffer_size; + if (length < block_size) + buffer_size = block_size; + else if (length <= CTR_BUFFER_LIMIT) + buffer_size = length; + else + buffer_size = CTR_BUFFER_LIMIT; - for (; length >= chunk; - length -= chunk, src += chunk, dst += chunk) - { - unsigned n; - uint8_t *p; - for (n = 0, p = buffer; n < NBLOCKS; n++, p += block_size) - { - memcpy (p, ctr, block_size); - INCREMENT(block_size, ctr); - } - f(ctx, chunk, buffer, buffer); - memxor(dst, buffer, chunk); - } + TMP_ALLOC(buffer, buffer_size); - if (length > 0) + while (length >= block_size) + { + size_t i; + for (i = 0; + i + block_size <= buffer_size && i + block_size <= length; + i += block_size) { - /* Final, possibly partial, blocks */ - for (chunk = 0; chunk < length; chunk += block_size) - { - memcpy (buffer + chunk, ctr, block_size); - INCREMENT(block_size, ctr); - } - f(ctx, chunk, buffer, buffer); - memxor3(dst, src, buffer, length); + memcpy (buffer + i, ctr, block_size); + INCREMENT(block_size, ctr); } + assert (i > 0); + f(ctx, i, buffer, buffer); + memxor(dst, buffer, i); + length -= i; + dst += i; } - else if (length > 0) - { - TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); - TMP_ALLOC(buffer, block_size); + /* Final, possibly partial, block. */ + if (length > 0) + { f(ctx, block_size, buffer, ctr); INCREMENT(block_size, ctr); - memxor3(dst, src, buffer, length); + memxor(dst, buffer, length); } } } |