diff options
author | Niels Möller <nisse@lysator.liu.se> | 2022-10-30 11:11:53 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2022-10-30 11:11:53 +0100 |
commit | f72c33d46a9c4e5c3d205562cf4b2b86109c7d9e (patch) | |
tree | 9ad8b3c3506572d714eaa725cfac5d2413039d0a | |
parent | 672b76f6a36440cb8a8417db56bdf2cc3f2c5d4f (diff) | |
download | nettle-f72c33d46a9c4e5c3d205562cf4b2b86109c7d9e.tar.gz |
New file poly1305-blocks.asm
-rw-r--r-- | configure.ac | 5 | ||||
-rw-r--r-- | poly1305-internal.c | 9 | ||||
-rw-r--r-- | poly1305-update.c | 6 | ||||
-rw-r--r-- | x86_64/poly1305-blocks.asm | 128 |
4 files changed, 135 insertions, 13 deletions
diff --git a/configure.ac b/configure.ac index 040b772f..5ee7239e 100644 --- a/configure.ac +++ b/configure.ac @@ -612,7 +612,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \ aes256-encrypt-2.asm aes256-decrypt-2.asm \ cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \ chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \ - poly1305-internal-2.asm \ + poly1305-blocks.asm poly1305-internal-2.asm \ ghash-set-key-2.asm ghash-update-2.asm \ salsa20-2core.asm salsa20-core-internal-2.asm \ sha1-compress-2.asm sha256-compress-n-2.asm \ @@ -759,10 +759,7 @@ AH_VERBATIM([HAVE_NATIVE], #undef HAVE_NATIVE_ecc_secp384r1_redc #undef HAVE_NATIVE_ecc_secp521r1_modp #undef HAVE_NATIVE_ecc_secp521r1_redc -#undef HAVE_NATIVE_poly1305_set_key -#undef HAVE_NATIVE_poly1305_block #undef HAVE_NATIVE_poly1305_blocks -#undef HAVE_NATIVE_poly1305_digest #undef HAVE_NATIVE_ghash_set_key #undef HAVE_NATIVE_ghash_update #undef HAVE_NATIVE_salsa20_core diff --git a/poly1305-internal.c b/poly1305-internal.c index cd9583f5..380b934e 100644 --- a/poly1305-internal.c +++ b/poly1305-internal.c @@ -169,15 +169,6 @@ _nettle_poly1305_block (struct poly1305_ctx *ctx, const uint8_t *m, unsigned t4) ctx->h0 += b * 5; } -const uint8_t * -_nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m) -{ - for (; blocks > 0; blocks--, m += POLY1305_BLOCK_SIZE) - _nettle_poly1305_block (ctx, m, 1); - - return m; -} - /* Adds digest to the nonce */ void _nettle_poly1305_digest (struct poly1305_ctx *ctx, union nettle_block16 *s) diff --git a/poly1305-update.c b/poly1305-update.c index aa391859..fdc72558 100644 --- a/poly1305-update.c +++ b/poly1305-update.c @@ -49,8 +49,14 @@ _nettle_poly1305_update (struct poly1305_ctx *ctx, length, m); _nettle_poly1305_block(ctx, block, 1); } +#if HAVE_NATIVE_poly1305_blocks m = _nettle_poly1305_blocks (ctx, length >> 4, m); length &= 15; +#else + for (; length >= POLY1305_BLOCK_SIZE; + length -= POLY1305_BLOCK_SIZE, m += POLY1305_BLOCK_SIZE) + _nettle_poly1305_block (ctx, m, 1); +#endif memcpy (block, m, length); return length; diff --git a/x86_64/poly1305-blocks.asm b/x86_64/poly1305-blocks.asm new file mode 100644 index 00000000..63bfed3e --- /dev/null +++ b/x86_64/poly1305-blocks.asm @@ -0,0 +1,128 @@ +C x86_64/poly1305-blocks.asm + +ifelse(` + Copyright (C) 2022 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + + .file "poly1305-blocks.asm" + +define(`CTX', `%rdi') C First argument to all functions +define(`BLOCKS', `%rsi') +define(`MP_PARAM', `%rdx') C Moved to MP, to not collide with mul instruction. + +define(`MP', `%r8') C May clobber, both with unix and windows conventions. +define(`T0', `%rbx') +define(`T1', `%rcx') +define(`H0', `%rbp') +define(`H1', `%r9') +define(`H2', `%r10') +define(`F0', `%r11') +define(`F1', `%r12') + +C const uint8_t * +C _nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m) + +PROLOGUE(_nettle_poly1305_blocks) + W64_ENTRY(3, 0) + mov MP_PARAM, MP + test BLOCKS, BLOCKS + jz .Lend + + push %rbx + push %rbp + push %r12 + mov P1305_H0 (CTX), H0 + mov P1305_H1 (CTX), H1 + mov P1305_H2 (CTX), H2 + ALIGN(16) +.Loop: + mov (MP), T0 + mov 8(MP), T1 + add $16, MP + + add H0, T0 + adc H1, T1 + adc $1, H2 + + mov P1305_R1 (CTX), %rax + mul T0 C R1*T0 + mov %rax, F0 + mov %rdx, F1 + + mov T0, %rax C Last use of T0 input + mov P1305_R0 (CTX), T0 + mul T0 C R0*T0 + mov %rax, H0 + mov %rdx, H1 + + mov T1, %rax + mul T0 C R0*T1 + add %rax, F0 + adc %rdx, F1 + + mov P1305_S1 (CTX), T0 + mov T1, %rax C Last use of T1 input + mul T0 C S1*T1 + add %rax, H0 + adc %rdx, H1 + + mov H2, %rax + mul T0 C S1*H2 + add %rax, F0 + adc %rdx, F1 + + mov H2, T0 + and $3, H2 + + shr $2, T0 + mov P1305_S0 (CTX), %rax + mul T0 C S0*(H2 >> 2) + add %rax, H0 + adc %rdx, H1 + + imul P1305_R0 (CTX), H2 C R0*(H2 & 3) + add F0, H1 + adc F1, H2 + + dec BLOCKS + jnz .Loop + + mov H0, P1305_H0 (CTX) + mov H1, P1305_H1 (CTX) + mov H2, P1305_H2 (CTX) + + pop %r12 + pop %rbp + pop %rbx + +.Lend: + mov MP, %rax + W64_EXIT(3, 0) + ret +EPILOGUE(_nettle_poly1305_blocks) |