summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2018-01-08 08:45:17 +0100
committerNiels Möller <nisse@lysator.liu.se>2018-01-09 07:31:54 +0100
commit54b2d297afa86a84fc3dc23e0529fb5120ef5a99 (patch)
treea0f6937c492e6fef7214e6f748df3943c16786b1
parentc380faf73a1cb46ccb25ee19902663a165739171 (diff)
downloadnettle-54b2d297afa86a84fc3dc23e0529fb5120ef5a99.tar.gz
Increase buffer size for in-place CTR.
-rw-r--r--ChangeLog4
-rw-r--r--ctr.c65
2 files changed, 35 insertions, 34 deletions
diff --git a/ChangeLog b/ChangeLog
index c927848c..aa9608d7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -9,6 +9,10 @@
2018-01-08 Niels Möller <nisse@lysator.liu.se>
+ * ctr.c (ctr_crypt): For in-place operation, increase max buffer
+ size from 4 blocks to 512 bytes, similarly to CBC and CFB.
+ Improves in-place aes128 CTR performance by 25% on x86_64.
+
* examples/nettle-benchmark.c (time_cipher): Benchmark in-place
operation separately, for cbc_decrypt and ctr_crypt.
diff --git a/ctr.c b/ctr.c
index f81f74ad..42048833 100644
--- a/ctr.c
+++ b/ctr.c
@@ -45,7 +45,8 @@
#include "memxor.h"
#include "nettle-internal.h"
-#define NBLOCKS 4
+/* Don't allocate any more space than this on the stack */
+#define CTR_BUFFER_LIMIT 512
void
ctr_crypt(const void *ctx, nettle_cipher_func *f,
@@ -90,47 +91,43 @@ ctr_crypt(const void *ctx, nettle_cipher_func *f,
}
else
{
- if (length > block_size)
- {
- TMP_DECL(buffer, uint8_t, NBLOCKS * NETTLE_MAX_CIPHER_BLOCK_SIZE);
- size_t chunk = NBLOCKS * block_size;
-
- TMP_ALLOC(buffer, chunk);
+ /* For in-place CTR, construct a buffer of consecutive counter
+ values, of size at most CTR_BUFFER_LIMIT. */
+ TMP_DECL(buffer, uint8_t, CTR_BUFFER_LIMIT);
+
+ size_t buffer_size;
+ if (length < block_size)
+ buffer_size = block_size;
+ else if (length <= CTR_BUFFER_LIMIT)
+ buffer_size = length;
+ else
+ buffer_size = CTR_BUFFER_LIMIT;
- for (; length >= chunk;
- length -= chunk, src += chunk, dst += chunk)
- {
- unsigned n;
- uint8_t *p;
- for (n = 0, p = buffer; n < NBLOCKS; n++, p += block_size)
- {
- memcpy (p, ctr, block_size);
- INCREMENT(block_size, ctr);
- }
- f(ctx, chunk, buffer, buffer);
- memxor(dst, buffer, chunk);
- }
+ TMP_ALLOC(buffer, buffer_size);
- if (length > 0)
+ while (length >= block_size)
+ {
+ size_t i;
+ for (i = 0;
+ i + block_size <= buffer_size && i + block_size <= length;
+ i += block_size)
{
- /* Final, possibly partial, blocks */
- for (chunk = 0; chunk < length; chunk += block_size)
- {
- memcpy (buffer + chunk, ctr, block_size);
- INCREMENT(block_size, ctr);
- }
- f(ctx, chunk, buffer, buffer);
- memxor3(dst, src, buffer, length);
+ memcpy (buffer + i, ctr, block_size);
+ INCREMENT(block_size, ctr);
}
+ assert (i > 0);
+ f(ctx, i, buffer, buffer);
+ memxor(dst, buffer, i);
+ length -= i;
+ dst += i;
}
- else if (length > 0)
- {
- TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE);
- TMP_ALLOC(buffer, block_size);
+ /* Final, possibly partial, block. */
+ if (length > 0)
+ {
f(ctx, block_size, buffer, ctr);
INCREMENT(block_size, ctr);
- memxor3(dst, src, buffer, length);
+ memxor(dst, buffer, length);
}
}
}