summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2023-01-30 20:27:01 +0100
committerNiels Möller <nisse@lysator.liu.se>2023-02-06 20:23:42 +0100
commit8096e71bd6ab672f055709221fa81f361f2730f2 (patch)
tree8f44f2c7002d9ec862cb4aab0755fced749780a9
parent6ce57b20f20d02b865c02f4604350a36dc50c5b3 (diff)
downloadnettle-8096e71bd6ab672f055709221fa81f361f2730f2.tar.gz
Rework with ocb_fill_n, 16 blocks at a time.
-rw-r--r--block-internal.h8
-rw-r--r--ocb.c164
2 files changed, 159 insertions, 13 deletions
diff --git a/block-internal.h b/block-internal.h
index e9c26ff6..b927f352 100644
--- a/block-internal.h
+++ b/block-internal.h
@@ -51,6 +51,14 @@ block16_zero (union nettle_block16 *r)
}
static inline void
+block16_set (union nettle_block16 *r,
+ const union nettle_block16 *x)
+{
+ r->u64[0] = x->u64[0];
+ r->u64[1] = x->u64[1];
+}
+
+static inline void
block16_xor (union nettle_block16 *r,
const union nettle_block16 *x)
{
diff --git a/ocb.c b/ocb.c
index 74993e19..8dee6442 100644
--- a/ocb.c
+++ b/ocb.c
@@ -42,6 +42,8 @@
#include "bswap-internal.h"
#include "memops.h"
+#define OCB_MAX_BLOCKS 16
+
/* Returns 64 bits from the concatenation (u0, u1), starting from bit offset. */
static inline uint64_t
extract(uint64_t u0, uint64_t u1, unsigned offset)
@@ -159,29 +161,84 @@ ocb_update (struct ocb_ctx *ctx, const struct ocb_key *key,
}
}
-/* Kept temporarily for comparison purposes */
static void
-ocb_crypt_n_old (struct ocb_ctx *ctx, const struct ocb_key *key,
- const void *cipher, nettle_cipher_func *f,
- size_t n, uint8_t *dst, const uint8_t *src)
+ocb_fill_n (struct ocb_ctx *ctx, const struct ocb_key *key,
+ size_t n, union nettle_block16 *o)
{
- for (; n > 0; n--, src += OCB_BLOCK_SIZE, dst += OCB_BLOCK_SIZE)
+ assert (n > 0);
+ union nettle_block16 *prev;
+ if (ctx->message_count & 1)
+ prev = &ctx->offset;
+ else
{
- union nettle_block16 block;
- update_offset (key, &ctx->offset, ++ctx->message_count);
+ /* Do a single block to align block count. */
+ ++ctx->message_count; /* Always odd. */
+ block16_xor (&ctx->offset, &key->L[2]);
+ block16_set (&o[0], &ctx->offset);
+ prev = o;
+ n--; o++;
+ }
- memxor3 (block.b, ctx->offset.b, src, OCB_BLOCK_SIZE);
- f (cipher, OCB_BLOCK_SIZE, block.b, block.b);
- memxor3 (dst, ctx->offset.b, block.b, OCB_BLOCK_SIZE);
+ for (; n >= 2; n -= 2, o += 2)
+ {
+ size_t i;
+ ctx->message_count += 2; /* Always odd. */
+
+ /* Based on trailing zeros of ctx->message_count - 1, the
+ initial shift below discards a one bit. */
+ block16_mulx_be (&o[0], &key->L[2]);
+ for (i = ctx->message_count >> 1; !(i&1); i >>= 1)
+ block16_mulx_be (&o[0], &o[0]);
+
+ block16_xor (&o[0], prev);
+ block16_xor3 (&o[1], &o[0], &key->L[2]);
+ prev = &o[1];
+ }
+ block16_set(&ctx->offset, prev);
+
+ if (n > 0)
+ {
+ update_offset (key, &ctx->offset, ++ctx->message_count);
+ block16_set (o, &ctx->offset);
}
}
-/* Process n complete blocks (encrypt or decrypt, checksum left to caller). */
static void
ocb_crypt_n (struct ocb_ctx *ctx, const struct ocb_key *key,
const void *cipher, nettle_cipher_func *f,
size_t n, uint8_t *dst, const uint8_t *src)
{
+ union nettle_block16 o[OCB_MAX_BLOCKS], block[OCB_MAX_BLOCKS];
+ size_t size;
+ if (n == 0)
+ return;
+
+ while (n > OCB_MAX_BLOCKS)
+ {
+ size_t blocks = OCB_MAX_BLOCKS - 1 + (ctx->message_count & 1);
+ ocb_fill_n (ctx, key, blocks, o);
+
+ size = blocks * OCB_BLOCK_SIZE;
+ memxor3 (block[0].b, o[0].b, src, size);
+ f (cipher, size, block[0].b, block[0].b);
+ memxor3 (dst, block[0].b, o[0].b, size);
+
+ n -= blocks; src += size; dst -= size;
+ }
+ ocb_fill_n (ctx, key, n, o);
+ size = n * OCB_BLOCK_SIZE;
+ memxor3 (block[0].b, o[0].b, src, size);
+ f (cipher, size, block[0].b, block[0].b);
+ memxor3 (dst, block[0].b, o[0].b, size);
+}
+
+#if 0
+/* Process n complete blocks (encrypt or decrypt, checksum left to caller). */
+static void
+ocb_crypt_n_2way (struct ocb_ctx *ctx, const struct ocb_key *key,
+ const void *cipher, nettle_cipher_func *f,
+ size_t n, uint8_t *dst, const uint8_t *src)
+{
if (n == 0)
return;
@@ -218,8 +275,8 @@ ocb_crypt_n (struct ocb_ctx *ctx, const struct ocb_key *key,
memxor3 (block[0].b, o[0].b, src, 2*OCB_BLOCK_SIZE);
f (cipher, 2*OCB_BLOCK_SIZE, block[0].b, block[0].b);
-
memxor3 (dst, o[0].b, block[0].b, 2*OCB_BLOCK_SIZE);
+
ctx->offset.u64[0] = o[1].u64[0]; ctx->offset.u64[1] = o[1].u64[1];
}
if (n > 0)
@@ -233,6 +290,87 @@ ocb_crypt_n (struct ocb_ctx *ctx, const struct ocb_key *key,
}
}
+static void
+ocb_crypt_n_4way (struct ocb_ctx *ctx, const struct ocb_key *key,
+ const void *cipher, nettle_cipher_func *f,
+ size_t n, uint8_t *dst, const uint8_t *src)
+{
+ union nettle_block16 l3, o[4], block[4];
+ size_t i;
+ if (n >= 3)
+ {
+ if (!(ctx->message_count & 1))
+ {
+ /* Do a single block to align block count. */
+ ++ctx->message_count; /* Always odd. */
+ block16_xor (&ctx->offset, &key->L[2]);
+
+ memxor3 (block[0].b, ctx->offset.b, src, OCB_BLOCK_SIZE);
+ f (cipher, OCB_BLOCK_SIZE, block[0].b, block[0].b);
+
+ memxor3 (dst, ctx->offset.b, block[0].b, OCB_BLOCK_SIZE);
+
+ n--; src += OCB_BLOCK_SIZE; dst += OCB_BLOCK_SIZE;
+ }
+ if (!(ctx->message_count & 2))
+ {
+ /* Do two more blocks, to align block count to a multiple of 4. */
+ ctx->message_count += 2; /* Always = 3 (mod 4). */
+
+ /* Based on trailing zeros of ctx->message_count - 1, the
+ initial shift below discards a one bit. */
+ block16_mulx_be (&o[0], &key->L[2]);
+ for (i = ctx->message_count >> 1; !(i&1); i >>= 1)
+ block16_mulx_be (&o[0], &o[0]);
+
+ block16_xor (&o[0], &ctx->offset);
+ block16_xor3 (&o[1], &o[0], &key->L[2]);
+
+ memxor3 (block[0].b, o[0].b, src, 2*OCB_BLOCK_SIZE);
+ f (cipher, 2*OCB_BLOCK_SIZE, block[0].b, block[0].b);
+
+ memxor3 (dst, o[0].b, block[0].b, 2*OCB_BLOCK_SIZE);
+ ctx->offset.u64[0] = o[1].u64[0]; ctx->offset.u64[1] = o[1].u64[1];
+
+ n -= 2; src += 2*OCB_BLOCK_SIZE; dst += 2*OCB_BLOCK_SIZE;
+ }
+ block16_mulx_be (&l3, &key->L[2]);
+
+ for (; n >= 4; n -= 4, src += 4*OCB_BLOCK_SIZE, dst += 4*OCB_BLOCK_SIZE)
+ {
+ ctx->message_count += 4; /* Always = 3 (mod 4). */
+
+ /* Based on trailing zeros of ctx->message_count - 3, the
+ initial shift below discards two one bits. */
+ block16_mulx_be (&o[0], &l3);
+ for (i = ctx->message_count >> 2; !(i&1); i >>= 1)
+ block16_mulx_be (&o[0], &o[0]);
+ block16_xor (&o[0], &ctx->offset);
+ block16_xor3 (&o[1], &o[0], &key->L[2]);
+ block16_xor (&o[2], &l3);
+ block16_xor3 (&o[3], &o[0], &key->L[2]);
+
+ memxor3 (block[0].b, o[0].b, src, 4*OCB_BLOCK_SIZE);
+ f (cipher, 4*OCB_BLOCK_SIZE, block[0].b, block[0].b);
+ memxor3 (dst, o[0].b, block[0].b, 4*OCB_BLOCK_SIZE);
+
+ ctx->offset.u64[0] = o[3].u64[0]; ctx->offset.u64[1] = o[3].u64[1];
+ }
+ }
+ if (!n)
+ return;
+
+ for (i = 0; i < n; i++)
+ {
+ update_offset (key, &ctx->offset, ++ctx->message_count);
+ o[i].u64[0] = ctx->offset.u64[0]; o[i].u64[1] = ctx->offset.u64[1];
+ }
+ memxor3 (block[0].b, o[0].b, src, n * OCB_BLOCK_SIZE);
+ f (cipher, n * OCB_BLOCK_SIZE, block[0].b, block[0].b);
+ memxor3 (dst, o[0].b, block[0].b, n * OCB_BLOCK_SIZE);
+}
+#endif
+
/* Checksum of n complete blocks. */
static void
ocb_checksum_n (union nettle_block16 *checksum,
@@ -283,7 +421,7 @@ ocb_decrypt (struct ocb_ctx *ctx, const struct ocb_key *key,
if (ctx->message_count == 0)
ctx->offset = ctx->initial;
- ocb_crypt_n_old (ctx, key, decrypt_ctx, decrypt, n, dst, src);
+ ocb_crypt_n (ctx, key, decrypt_ctx, decrypt, n, dst, src);
ocb_checksum_n (&ctx->checksum, n, dst);
length &= 15;