summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2022-10-30 11:11:53 +0100
committerNiels Möller <nisse@lysator.liu.se>2022-10-30 11:11:53 +0100
commitf72c33d46a9c4e5c3d205562cf4b2b86109c7d9e (patch)
tree9ad8b3c3506572d714eaa725cfac5d2413039d0a
parent672b76f6a36440cb8a8417db56bdf2cc3f2c5d4f (diff)
downloadnettle-f72c33d46a9c4e5c3d205562cf4b2b86109c7d9e.tar.gz
New file poly1305-blocks.asm
-rw-r--r--configure.ac5
-rw-r--r--poly1305-internal.c9
-rw-r--r--poly1305-update.c6
-rw-r--r--x86_64/poly1305-blocks.asm128
4 files changed, 135 insertions, 13 deletions
diff --git a/configure.ac b/configure.ac
index 040b772f..5ee7239e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -612,7 +612,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \
aes256-encrypt-2.asm aes256-decrypt-2.asm \
cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \
chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \
- poly1305-internal-2.asm \
+ poly1305-blocks.asm poly1305-internal-2.asm \
ghash-set-key-2.asm ghash-update-2.asm \
salsa20-2core.asm salsa20-core-internal-2.asm \
sha1-compress-2.asm sha256-compress-n-2.asm \
@@ -759,10 +759,7 @@ AH_VERBATIM([HAVE_NATIVE],
#undef HAVE_NATIVE_ecc_secp384r1_redc
#undef HAVE_NATIVE_ecc_secp521r1_modp
#undef HAVE_NATIVE_ecc_secp521r1_redc
-#undef HAVE_NATIVE_poly1305_set_key
-#undef HAVE_NATIVE_poly1305_block
#undef HAVE_NATIVE_poly1305_blocks
-#undef HAVE_NATIVE_poly1305_digest
#undef HAVE_NATIVE_ghash_set_key
#undef HAVE_NATIVE_ghash_update
#undef HAVE_NATIVE_salsa20_core
diff --git a/poly1305-internal.c b/poly1305-internal.c
index cd9583f5..380b934e 100644
--- a/poly1305-internal.c
+++ b/poly1305-internal.c
@@ -169,15 +169,6 @@ _nettle_poly1305_block (struct poly1305_ctx *ctx, const uint8_t *m, unsigned t4)
ctx->h0 += b * 5;
}
-const uint8_t *
-_nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m)
-{
- for (; blocks > 0; blocks--, m += POLY1305_BLOCK_SIZE)
- _nettle_poly1305_block (ctx, m, 1);
-
- return m;
-}
-
/* Adds digest to the nonce */
void
_nettle_poly1305_digest (struct poly1305_ctx *ctx, union nettle_block16 *s)
diff --git a/poly1305-update.c b/poly1305-update.c
index aa391859..fdc72558 100644
--- a/poly1305-update.c
+++ b/poly1305-update.c
@@ -49,8 +49,14 @@ _nettle_poly1305_update (struct poly1305_ctx *ctx,
length, m);
_nettle_poly1305_block(ctx, block, 1);
}
+#if HAVE_NATIVE_poly1305_blocks
m = _nettle_poly1305_blocks (ctx, length >> 4, m);
length &= 15;
+#else
+ for (; length >= POLY1305_BLOCK_SIZE;
+ length -= POLY1305_BLOCK_SIZE, m += POLY1305_BLOCK_SIZE)
+ _nettle_poly1305_block (ctx, m, 1);
+#endif
memcpy (block, m, length);
return length;
diff --git a/x86_64/poly1305-blocks.asm b/x86_64/poly1305-blocks.asm
new file mode 100644
index 00000000..63bfed3e
--- /dev/null
+++ b/x86_64/poly1305-blocks.asm
@@ -0,0 +1,128 @@
+C x86_64/poly1305-blocks.asm
+
+ifelse(`
+ Copyright (C) 2022 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+ .file "poly1305-blocks.asm"
+
+define(`CTX', `%rdi') C First argument to all functions
+define(`BLOCKS', `%rsi')
+define(`MP_PARAM', `%rdx') C Moved to MP, to not collide with mul instruction.
+
+define(`MP', `%r8') C May clobber, both with unix and windows conventions.
+define(`T0', `%rbx')
+define(`T1', `%rcx')
+define(`H0', `%rbp')
+define(`H1', `%r9')
+define(`H2', `%r10')
+define(`F0', `%r11')
+define(`F1', `%r12')
+
+C const uint8_t *
+C _nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m)
+
+PROLOGUE(_nettle_poly1305_blocks)
+ W64_ENTRY(3, 0)
+ mov MP_PARAM, MP
+ test BLOCKS, BLOCKS
+ jz .Lend
+
+ push %rbx
+ push %rbp
+ push %r12
+ mov P1305_H0 (CTX), H0
+ mov P1305_H1 (CTX), H1
+ mov P1305_H2 (CTX), H2
+ ALIGN(16)
+.Loop:
+ mov (MP), T0
+ mov 8(MP), T1
+ add $16, MP
+
+ add H0, T0
+ adc H1, T1
+ adc $1, H2
+
+ mov P1305_R1 (CTX), %rax
+ mul T0 C R1*T0
+ mov %rax, F0
+ mov %rdx, F1
+
+ mov T0, %rax C Last use of T0 input
+ mov P1305_R0 (CTX), T0
+ mul T0 C R0*T0
+ mov %rax, H0
+ mov %rdx, H1
+
+ mov T1, %rax
+ mul T0 C R0*T1
+ add %rax, F0
+ adc %rdx, F1
+
+ mov P1305_S1 (CTX), T0
+ mov T1, %rax C Last use of T1 input
+ mul T0 C S1*T1
+ add %rax, H0
+ adc %rdx, H1
+
+ mov H2, %rax
+ mul T0 C S1*H2
+ add %rax, F0
+ adc %rdx, F1
+
+ mov H2, T0
+ and $3, H2
+
+ shr $2, T0
+ mov P1305_S0 (CTX), %rax
+ mul T0 C S0*(H2 >> 2)
+ add %rax, H0
+ adc %rdx, H1
+
+ imul P1305_R0 (CTX), H2 C R0*(H2 & 3)
+ add F0, H1
+ adc F1, H2
+
+ dec BLOCKS
+ jnz .Loop
+
+ mov H0, P1305_H0 (CTX)
+ mov H1, P1305_H1 (CTX)
+ mov H2, P1305_H2 (CTX)
+
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+.Lend:
+ mov MP, %rax
+ W64_EXIT(3, 0)
+ ret
+EPILOGUE(_nettle_poly1305_blocks)