summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2018-01-08 22:28:52 +0100
committerNiels Möller <nisse@lysator.liu.se>2018-01-09 07:31:54 +0100
commit0325eaf3ac938322b964d52716b15b106adeae0e (patch)
tree6ab286900a5ca9b0dd37417976c2e1251ae4b212
parente09ed92e51c349f2ea495a9a921cb682332700de (diff)
downloadnettle-ctr-opt.tar.gz
CTR mode optimizations for 16-byte block size.ctr-opt
-rw-r--r--ChangeLog7
-rw-r--r--ctr.c132
2 files changed, 119 insertions, 20 deletions
diff --git a/ChangeLog b/ChangeLog
index 5975cab8..f31a3017 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -9,6 +9,13 @@
2018-01-08 Niels Möller <nisse@lysator.liu.se>
+ * ctr.c (ctr_crypt16): New function, with optimizations specific
+ to 16-byte block size.
+ (ctr_fill16): New helper function, definition depending on
+ WORDS_BIGENDIAN, and little endian version requiring
+ HAVE_BUILTIN_BSWAP64.
+ (ctr_crypt): Use ctr_crypt16, when appropriate.
+
* nettle-types.h (union nettle_block16): Add uint64_t field.
* configure.ac: Check for __builtin_bswap64, define
diff --git a/ctr.c b/ctr.c
index f4164466..8295e1af 100644
--- a/ctr.c
+++ b/ctr.c
@@ -62,36 +62,128 @@ ctr_fill (size_t block_size, uint8_t *ctr, size_t length, uint8_t *buffer)
return i;
}
+#if WORDS_BIGENDIAN
+# define USE_CTR_CRYPT16 1
+static void
+ctr_fill16(uint8_t *ctr, size_t blocks, uint64_t *buffer)
+{
+ uint64_t hi, lo;
+ hi = READ_UINT64(ctr);
+ lo = READ_UINT64(ctr + 8);
+
+ while (blocks-- > 0)
+ {
+ *buffer++ = hi;
+ *buffer++ = lo;
+ hi += !(++lo);
+ }
+ WRITE_UINT64(ctr, hi);
+ WRITE_UINT64(ctr + 8, lo);
+}
+#else /* !WORDS_BIGENDIAN */
+# if HAVE_BUILTIN_BSWAP64
+# define USE_CTR_CRYPT16 1
+static void
+ctr_fill16(uint8_t *ctr, size_t blocks, uint64_t *buffer)
+{
+ uint64_t hi, lo;
+ /* Read hi in native endianness */
+ hi = LE_READ_UINT64(ctr);
+ lo = READ_UINT64(ctr + 8);
+
+ while (blocks-- > 0)
+ {
+ *buffer++ = hi;
+ *buffer++ = __builtin_bswap64(lo);
+ if (!++lo)
+ hi = __builtin_bswap64(__builtin_bswap64(hi) + 1);
+ }
+ LE_WRITE_UINT64(ctr, hi);
+ WRITE_UINT64(ctr + 8, lo);
+}
+# else /* ! HAVE_BUILTIN_BSWAP64 */
+# define USE_CTR_CRYPT16 0
+# endif
+#endif /* !WORDS_BIGENDIAN */
+
+#if USE_CTR_CRYPT16
+static size_t
+ctr_crypt16(const void *ctx, nettle_cipher_func *f,
+ uint8_t *ctr,
+ size_t length, uint8_t *dst,
+ const uint8_t *src)
+{
+ if (dst != src && !((uintptr_t) dst % sizeof(uint64_t)))
+ {
+ size_t blocks = length / 16u;
+ ctr_fill16 (ctr, blocks, (uint64_t *) dst);
+ f(ctx, blocks * 16, dst, dst);
+ memxor (dst, src, blocks * 16);
+ return blocks * 16;
+ }
+ else
+ {
+ /* Construct an aligned buffer of consecutive counter values, of
+ size at most CTR_BUFFER_LIMIT. */
+ TMP_DECL(buffer, union nettle_block16, CTR_BUFFER_LIMIT / 16);
+ size_t blocks = (length + 15) / 16u;
+ size_t i;
+ TMP_ALLOC(buffer, MIN(blocks, CTR_BUFFER_LIMIT / 16));
+
+ for (i = 0; blocks >= CTR_BUFFER_LIMIT / 16;
+ i += CTR_BUFFER_LIMIT, blocks -= CTR_BUFFER_LIMIT / 16)
+ {
+ ctr_fill16 (ctr, CTR_BUFFER_LIMIT / 16, buffer->u64);
+ f(ctx, CTR_BUFFER_LIMIT, buffer->b, buffer->b);
+ if (length - i < CTR_BUFFER_LIMIT)
+ goto done;
+ memxor3 (dst, src, buffer->b, CTR_BUFFER_LIMIT);
+ }
+
+ if (blocks > 0)
+ {
+ assert (length - i < CTR_BUFFER_LIMIT);
+ ctr_fill16 (ctr, blocks, buffer->u64);
+ f(ctx, blocks * 16, buffer->b, buffer->b);
+ done:
+ memxor3 (dst + i, src + i, buffer->b, length - i);
+ }
+ return length;
+ }
+}
+#endif /* USE_CTR_CRYPT16 */
+
void
ctr_crypt(const void *ctx, nettle_cipher_func *f,
size_t block_size, uint8_t *ctr,
size_t length, uint8_t *dst,
const uint8_t *src)
{
- if (src != dst)
+#if USE_CTR_CRYPT16
+ if (block_size == 16)
{
- if (length == block_size)
- {
- f(ctx, block_size, dst, ctr);
- INCREMENT(block_size, ctr);
- memxor(dst, src, block_size);
- }
- else
- {
- size_t filled = ctr_fill (block_size, ctr, length, dst);
+ size_t done = ctr_crypt16(ctx, f, ctr, length, dst, src);
+ length -= done;
+ src += done;
+ dst += done;
+ }
+#endif
- f(ctx, filled, dst, dst);
- memxor(dst, src, filled);
+ if(src != dst)
+ {
+ size_t filled = ctr_fill (block_size, ctr, length, dst);
+
+ f(ctx, filled, dst, dst);
+ memxor(dst, src, filled);
- if (filled < length)
- {
- TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE);
- TMP_ALLOC(buffer, block_size);
+ if (filled < length)
+ {
+ TMP_DECL(block, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE);
+ TMP_ALLOC(block, block_size);
- f(ctx, block_size, buffer, ctr);
- INCREMENT(block_size, ctr);
- memxor3(dst + filled, src + filled, buffer, length - filled);
- }
+ f(ctx, block_size, block, ctr);
+ INCREMENT(block_size, ctr);
+ memxor3(dst + filled, src + filled, block, length - filled);
}
}
else