diff options
author | Niels Möller <nisse@lysator.liu.se> | 2022-10-31 19:18:24 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2022-10-31 19:18:24 +0100 |
commit | 57dce2ba98b2fc2dda44353e4eebdc3b7c932200 (patch) | |
tree | 8d536d722784e562e1299da53bc5a091acf13579 | |
parent | bb9c0a1ed4e6b3be4c5a259d3a6192960bc0c432 (diff) | |
download | nettle-57dce2ba98b2fc2dda44353e4eebdc3b7c932200.tar.gz |
x86_64: Implement _nettle_poly1305_blocks.
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | configure.ac | 3 | ||||
-rw-r--r-- | x86_64/poly1305-blocks.asm | 128 |
3 files changed, 134 insertions, 1 deletions
@@ -1,5 +1,9 @@ 2022-10-31 Niels Möller <nisse@lysator.liu.se> + * configure.ac: (asm_file_list): Add HAVE_NATIVE_poly1305_blocks. + (asm_nettle_optional_list): Add poly1305-blocks.asm. + * x86_64/poly1305-blocks.asm: New file. + * md-internal.h (MD_FILL_OR_RETURN_INDEX): New macro. * poly1305-update.c (_nettle_poly1305_update): New file and function. diff --git a/configure.ac b/configure.ac index 59f68b00..4e9dceda 100644 --- a/configure.ac +++ b/configure.ac @@ -612,7 +612,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \ aes256-encrypt-2.asm aes256-decrypt-2.asm \ cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \ chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \ - poly1305-internal-2.asm \ + poly1305-blocks.asm poly1305-internal-2.asm \ ghash-set-key-2.asm ghash-update-2.asm \ salsa20-2core.asm salsa20-core-internal-2.asm \ sha1-compress-2.asm sha256-compress-n-2.asm \ @@ -762,6 +762,7 @@ AH_VERBATIM([HAVE_NATIVE], #undef HAVE_NATIVE_poly1305_set_key #undef HAVE_NATIVE_poly1305_block #undef HAVE_NATIVE_poly1305_digest +#undef HAVE_NATIVE_poly1305_blocks #undef HAVE_NATIVE_ghash_set_key #undef HAVE_NATIVE_ghash_update #undef HAVE_NATIVE_salsa20_core diff --git a/x86_64/poly1305-blocks.asm b/x86_64/poly1305-blocks.asm new file mode 100644 index 00000000..63bfed3e --- /dev/null +++ b/x86_64/poly1305-blocks.asm @@ -0,0 +1,128 @@ +C x86_64/poly1305-blocks.asm + +ifelse(` + Copyright (C) 2022 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + + .file "poly1305-blocks.asm" + +define(`CTX', `%rdi') C First argument to all functions +define(`BLOCKS', `%rsi') +define(`MP_PARAM', `%rdx') C Moved to MP, to not collide with mul instruction. + +define(`MP', `%r8') C May clobber, both with unix and windows conventions. +define(`T0', `%rbx') +define(`T1', `%rcx') +define(`H0', `%rbp') +define(`H1', `%r9') +define(`H2', `%r10') +define(`F0', `%r11') +define(`F1', `%r12') + +C const uint8_t * +C _nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m) + +PROLOGUE(_nettle_poly1305_blocks) + W64_ENTRY(3, 0) + mov MP_PARAM, MP + test BLOCKS, BLOCKS + jz .Lend + + push %rbx + push %rbp + push %r12 + mov P1305_H0 (CTX), H0 + mov P1305_H1 (CTX), H1 + mov P1305_H2 (CTX), H2 + ALIGN(16) +.Loop: + mov (MP), T0 + mov 8(MP), T1 + add $16, MP + + add H0, T0 + adc H1, T1 + adc $1, H2 + + mov P1305_R1 (CTX), %rax + mul T0 C R1*T0 + mov %rax, F0 + mov %rdx, F1 + + mov T0, %rax C Last use of T0 input + mov P1305_R0 (CTX), T0 + mul T0 C R0*T0 + mov %rax, H0 + mov %rdx, H1 + + mov T1, %rax + mul T0 C R0*T1 + add %rax, F0 + adc %rdx, F1 + + mov P1305_S1 (CTX), T0 + mov T1, %rax C Last use of T1 input + mul T0 C S1*T1 + add %rax, H0 + adc %rdx, H1 + + mov H2, %rax + mul T0 C S1*H2 + add %rax, F0 + adc %rdx, F1 + + mov H2, T0 + and $3, H2 + + shr $2, T0 + mov P1305_S0 (CTX), %rax + mul T0 C S0*(H2 >> 2) + add %rax, H0 + adc %rdx, H1 + + imul P1305_R0 (CTX), H2 C R0*(H2 & 3) + add F0, H1 + adc F1, H2 + + dec BLOCKS + jnz .Loop + + mov H0, P1305_H0 (CTX) + mov H1, P1305_H1 (CTX) + mov H2, P1305_H2 (CTX) + + pop %r12 + pop %rbp + pop %rbx + +.Lend: + mov MP, %rax + W64_EXIT(3, 0) + ret +EPILOGUE(_nettle_poly1305_blocks) |