From 25e4316f0e75fb16cc99111bfecd4bd8f4f2c812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= Date: Wed, 8 Jul 2020 10:46:45 +0200 Subject: Rearrange salsa20, enabling ARM fat builds to use sala20_2core. --- ChangeLog | 21 +++++++++ Makefile.in | 2 +- arm/fat/salsa20-2core.asm | 36 +++++++++++++++ configure.ac | 1 + fat-arm.c | 13 ++++++ fat-setup.h | 5 +++ salsa20-crypt-internal.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++ salsa20-crypt.c | 51 +-------------------- salsa20-internal.h | 19 ++++++++ salsa20r12-crypt.c | 51 +-------------------- 10 files changed, 209 insertions(+), 101 deletions(-) create mode 100644 arm/fat/salsa20-2core.asm create mode 100644 salsa20-crypt-internal.c diff --git a/ChangeLog b/ChangeLog index f7b2e939..84ed4923 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +2020-07-08 Niels Möller + + Rearrange salsa20, enabling ARM fat builds to use sala20_2core. + * salsa20-crypt-internal.c (_salsa20_crypt_2core) + (_salsa20_crypt_1core): New file, new functions. One or the other + is used for implementing salsa20_crypt and salsa20r12_crypt, + depending on availability of salsa20_2core. + * salsa20-crypt.c (salsa20_crypt): Call _salsa20_crypt. + * salsa20r12-crypt.c (salsa20r12_crypt): Likewise. + * salsa20-internal.h: Declare new internal functions. + * Makefile.in (nettle_SOURCES): Add salsa20-crypt-internal.c. + * fat-setup.h (salsa20_crypt_func): New typedef. + * fat-arm.c (_salsa20_crypt): Select _salsa20_crypt + implementation, use 2core version when Neon instructions are + available. + * arm/fat/salsa20-2core.asm: New file, including Neon + implementation. Trigger configure's HAVE_NATIVE_fat_salsa20_2core, + * configure.ac: Add HAVE_NATIVE_fat_salsa20_2core, to identify the + case that salsa20_2core is defined, but runtime checks are needed + to determine if it is usable. + 2020-07-06 Niels Möller * testsuite/salsa20-test.c (test_salsa20_core): New function, test diff --git a/Makefile.in b/Makefile.in index 77efb5c9..042ebe5f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -123,7 +123,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c \ poly1305-aes.c poly1305-internal.c \ realloc.c \ ripemd160.c ripemd160-compress.c ripemd160-meta.c \ - salsa20-core-internal.c \ + salsa20-core-internal.c salsa20-crypt-internal.c \ salsa20-crypt.c salsa20r12-crypt.c salsa20-set-key.c \ salsa20-set-nonce.c \ salsa20-128-set-key.c salsa20-256-set-key.c \ diff --git a/arm/fat/salsa20-2core.asm b/arm/fat/salsa20-2core.asm new file mode 100644 index 00000000..43d9a1d0 --- /dev/null +++ b/arm/fat/salsa20-2core.asm @@ -0,0 +1,36 @@ +C arm/fat/salsa20-2core.asm + + +ifelse(< + Copyright (C) 2020 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + +dnl PROLOGUE(_nettle_fat_salsa20_2core) picked up by configure + +include_src() diff --git a/configure.ac b/configure.ac index 3f6c2f3b..a89f3ec3 100644 --- a/configure.ac +++ b/configure.ac @@ -574,6 +574,7 @@ AH_VERBATIM([HAVE_NATIVE], #undef HAVE_NATIVE_gcm_hash8 #undef HAVE_NATIVE_salsa20_core #undef HAVE_NATIVE_salsa20_2core +#undef HAVE_NATIVE_fat_salsa20_2core #undef HAVE_NATIVE_sha1_compress #undef HAVE_NATIVE_sha256_compress #undef HAVE_NATIVE_sha512_compress diff --git a/fat-arm.c b/fat-arm.c index 48feb5d4..a3f0f860 100644 --- a/fat-arm.c +++ b/fat-arm.c @@ -43,6 +43,7 @@ #include "nettle-types.h" #include "aes-internal.h" +#include "salsa20-internal.h" #include "fat-setup.h" struct arm_features @@ -147,6 +148,10 @@ DECLARE_FAT_FUNC(_nettle_salsa20_core, salsa20_core_func) DECLARE_FAT_FUNC_VAR(salsa20_core, salsa20_core_func, c) DECLARE_FAT_FUNC_VAR(salsa20_core, salsa20_core_func, neon) +DECLARE_FAT_FUNC(_nettle_salsa20_crypt, salsa20_crypt_func) +DECLARE_FAT_FUNC_VAR(salsa20_crypt, salsa20_crypt_func, 1core) +DECLARE_FAT_FUNC_VAR(salsa20_crypt, salsa20_crypt_func, 2core) + DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, armv6) @@ -212,6 +217,7 @@ fat_init (void) if (verbose) fprintf (stderr, "libnettle: enabling neon code.\n"); _nettle_salsa20_core_vec = _nettle_salsa20_core_neon; + _nettle_salsa20_crypt_vec = _nettle_salsa20_crypt_2core; _nettle_sha512_compress_vec = _nettle_sha512_compress_neon; nettle_sha3_permute_vec = _nettle_sha3_permute_neon; _nettle_umac_nh_vec = _nettle_umac_nh_neon; @@ -223,6 +229,7 @@ fat_init (void) if (verbose) fprintf (stderr, "libnettle: not enabling neon code.\n"); _nettle_salsa20_core_vec = _nettle_salsa20_core_c; + _nettle_salsa20_crypt_vec = _nettle_salsa20_crypt_1core; _nettle_sha512_compress_vec = _nettle_sha512_compress_c; nettle_sha3_permute_vec = _nettle_sha3_permute_c; _nettle_umac_nh_vec = _nettle_umac_nh_c; @@ -249,6 +256,12 @@ DEFINE_FAT_FUNC(_nettle_salsa20_core, void, (uint32_t *dst, const uint32_t *src, unsigned rounds), (dst, src, rounds)) +DEFINE_FAT_FUNC(_nettle_salsa20_crypt, void, + (struct salsa20_ctx *ctx, unsigned rounds, + size_t length, uint8_t *dst, + const uint8_t *src), + (ctx, rounds, length, dst, src)) + DEFINE_FAT_FUNC(nettle_sha1_compress, void, (uint32_t *state, const uint8_t *input), (state, input)) diff --git a/fat-setup.h b/fat-setup.h index b623ebf9..58b687fd 100644 --- a/fat-setup.h +++ b/fat-setup.h @@ -93,6 +93,8 @@ #define ENV_VERBOSE "NETTLE_FAT_VERBOSE" #define ENV_OVERRIDE "NETTLE_FAT_OVERRIDE" +struct salsa20_ctx; + /* DECLARE_FAT_FUNC(name, ftype) * * name is the public function, e.g., _nettle_aes_encrypt. @@ -162,6 +164,9 @@ typedef void aes_crypt_internal_func (unsigned rounds, const uint32_t *keys, typedef void *(memxor_func)(void *dst, const void *src, size_t n); typedef void salsa20_core_func (uint32_t *dst, const uint32_t *src, unsigned rounds); +typedef void salsa20_crypt_func (struct salsa20_ctx *ctx, unsigned rounds, + size_t length, uint8_t *dst, + const uint8_t *src); typedef void sha1_compress_func(uint32_t *state, const uint8_t *input); typedef void sha256_compress_func(uint32_t *state, const uint8_t *input, const uint32_t *k); diff --git a/salsa20-crypt-internal.c b/salsa20-crypt-internal.c new file mode 100644 index 00000000..cc46d024 --- /dev/null +++ b/salsa20-crypt-internal.c @@ -0,0 +1,111 @@ +/* salsa20-crypt-internal.c + + The Salsa20 stream cipher. + + Copyright (C) 2012 Simon Josefsson + Copyright (C) 2020 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "salsa20.h" +#include "salsa20-internal.h" + +#include "macros.h" +#include "memxor.h" + +#if HAVE_NATIVE_salsa20_2core +#define _salsa20_crypt_2core _salsa20_crypt +#elif !HAVE_NATIVE_fat_salsa20_2core +#define _salsa20_crypt_1core _salsa20_crypt +#endif + +#if HAVE_NATIVE_salsa20_2core || HAVE_NATIVE_fat_salsa20_2core +void +_salsa20_crypt_2core(struct salsa20_ctx *ctx, unsigned rounds, + size_t length, uint8_t *dst, + const uint8_t *src) +{ + uint32_t x[2*_SALSA20_INPUT_LENGTH]; + while (length > SALSA20_BLOCK_SIZE) + { + _salsa20_2core (x, ctx->input, rounds); + ctx->input[8] += 2; + ctx->input[9] += (ctx->input[8] < 2); + if (length < 2 * SALSA20_BLOCK_SIZE) + { + memxor3 (dst, src, x, length); + return; + } + memxor3 (dst, src, x, 2*SALSA20_BLOCK_SIZE); + + length -= 2*SALSA20_BLOCK_SIZE; + dst += 2*SALSA20_BLOCK_SIZE; + src += 2*SALSA20_BLOCK_SIZE; + } + _salsa20_core (x, ctx->input, rounds); + ctx->input[9] += (++ctx->input[8] == 0); + memxor3 (dst, src, x, length); +} +#endif + +#if !HAVE_NATIVE_salsa20_2core +void +_salsa20_crypt_1core(struct salsa20_ctx *ctx, unsigned rounds, + size_t length, + uint8_t *dst, + const uint8_t *src) +{ + for (;;) + { + uint32_t x[_SALSA20_INPUT_LENGTH]; + + _salsa20_core (x, ctx->input, rounds); + + ctx->input[9] += (++ctx->input[8] == 0); + + /* stopping at 2^70 length per nonce is user's responsibility */ + + if (length <= SALSA20_BLOCK_SIZE) + { + memxor3 (dst, src, x, length); + return; + } + memxor3 (dst, src, x, SALSA20_BLOCK_SIZE); + + length -= SALSA20_BLOCK_SIZE; + dst += SALSA20_BLOCK_SIZE; + src += SALSA20_BLOCK_SIZE; + } +} +#endif diff --git a/salsa20-crypt.c b/salsa20-crypt.c index b25cfc3d..2031d42d 100644 --- a/salsa20-crypt.c +++ b/salsa20-crypt.c @@ -41,14 +41,9 @@ # include "config.h" #endif -#include - #include "salsa20.h" #include "salsa20-internal.h" -#include "macros.h" -#include "memxor.h" - void salsa20_crypt(struct salsa20_ctx *ctx, size_t length, @@ -58,49 +53,5 @@ salsa20_crypt(struct salsa20_ctx *ctx, if (!length) return; -#if HAVE_NATIVE_salsa20_2core - uint32_t x[2*_SALSA20_INPUT_LENGTH]; - while (length > SALSA20_BLOCK_SIZE) - { - _salsa20_2core (x, ctx->input, 20); - ctx->input[8] += 2; - ctx->input[9] += (ctx->input[8] < 2); - if (length < 2 * SALSA20_BLOCK_SIZE) - { - memxor3 (c, m, x, length); - return; - } - memxor3 (c, m, x, 2*SALSA20_BLOCK_SIZE); - - length -= 2*SALSA20_BLOCK_SIZE; - c += 2*SALSA20_BLOCK_SIZE; - m += 2*SALSA20_BLOCK_SIZE; - } - _salsa20_core (x, ctx->input, 20); - ctx->input[9] += (++ctx->input[8] == 0); - memxor3 (c, m, x, length); - return; -#else - for (;;) - { - uint32_t x[_SALSA20_INPUT_LENGTH]; - - _salsa20_core (x, ctx->input, 20); - - ctx->input[9] += (++ctx->input[8] == 0); - - /* stopping at 2^70 length per nonce is user's responsibility */ - - if (length <= SALSA20_BLOCK_SIZE) - { - memxor3 (c, m, x, length); - return; - } - memxor3 (c, m, x, SALSA20_BLOCK_SIZE); - - length -= SALSA20_BLOCK_SIZE; - c += SALSA20_BLOCK_SIZE; - m += SALSA20_BLOCK_SIZE; - } -#endif + _salsa20_crypt (ctx, 20, length, c, m); } diff --git a/salsa20-internal.h b/salsa20-internal.h index fc1bb310..8d7684e0 100644 --- a/salsa20-internal.h +++ b/salsa20-internal.h @@ -36,14 +36,33 @@ #define NETTLE_SALSA20_INTERNAL_H_INCLUDED #include "nettle-types.h" +#include "salsa20.h" #define _salsa20_core _nettle_salsa20_core #define _salsa20_2core _nettle_salsa20_2core +#define _salsa20_crypt _nettle_salsa20_crypt +#define _salsa20_crypt_1core _nettle_salsa20_crypt_1core +#define _salsa20_crypt_2core _nettle_salsa20_crypt_2core void _salsa20_core(uint32_t *dst, const uint32_t *src, unsigned rounds); +void +_salsa20_crypt(struct salsa20_ctx *ctx, unsigned rounds, + size_t length, uint8_t *dst, + const uint8_t *src); + +/* Functions available only in some configurations */ void _salsa20_2core(uint32_t *dst, const uint32_t *src, unsigned rounds); +void +_salsa20_crypt_1core(struct salsa20_ctx *ctx, unsigned rounds, + size_t length, uint8_t *dst, + const uint8_t *src); +void +_salsa20_crypt_2core(struct salsa20_ctx *ctx, unsigned rounds, + size_t length, uint8_t *dst, + const uint8_t *src); + #endif /* NETTLE_SALSA20_INTERNAL_H_INCLUDED */ diff --git a/salsa20r12-crypt.c b/salsa20r12-crypt.c index 41e32d8b..9515251a 100644 --- a/salsa20r12-crypt.c +++ b/salsa20r12-crypt.c @@ -41,14 +41,9 @@ # include "config.h" #endif -#include - #include "salsa20.h" #include "salsa20-internal.h" -#include "macros.h" -#include "memxor.h" - void salsa20r12_crypt(struct salsa20_ctx *ctx, size_t length, @@ -57,50 +52,6 @@ salsa20r12_crypt(struct salsa20_ctx *ctx, { if (!length) return; - -#if HAVE_NATIVE_salsa20_2core - uint32_t x[2*_SALSA20_INPUT_LENGTH]; - while (length > SALSA20_BLOCK_SIZE) - { - _salsa20_2core (x, ctx->input, 12); - ctx->input[8] += 2; - ctx->input[9] += (ctx->input[8] < 2); - if (length < 2 * SALSA20_BLOCK_SIZE) - { - memxor3 (c, m, x, length); - return; - } - memxor3 (c, m, x, 2*SALSA20_BLOCK_SIZE); - - length -= 2*SALSA20_BLOCK_SIZE; - c += 2*SALSA20_BLOCK_SIZE; - m += 2*SALSA20_BLOCK_SIZE; - } - _salsa20_core (x, ctx->input, 12); - ctx->input[9] += (++ctx->input[8] == 0); - memxor3 (c, m, x, length); - return; -#else - for (;;) - { - uint32_t x[_SALSA20_INPUT_LENGTH]; - - _salsa20_core (x, ctx->input, 12); - ctx->input[9] += (++ctx->input[8] == 0); - - /* stopping at 2^70 length per nonce is user's responsibility */ - - if (length <= SALSA20_BLOCK_SIZE) - { - memxor3 (c, m, x, length); - return; - } - memxor3 (c, m, x, SALSA20_BLOCK_SIZE); - - length -= SALSA20_BLOCK_SIZE; - c += SALSA20_BLOCK_SIZE; - m += SALSA20_BLOCK_SIZE; - } -#endif + _salsa20_crypt (ctx, 12, length, c, m); } -- cgit v1.2.1