diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | arm/fat/chacha-core-internal-2.asm | 37 | ||||
-rw-r--r-- | arm/neon/chacha-core-internal.asm | 144 | ||||
-rw-r--r-- | fat-arm.c | 10 |
4 files changed, 3 insertions, 192 deletions
@@ -8,8 +8,10 @@ implementation on some platforms, including the Raspberry Pi 4. With the introduction of salsa20-2core.asm, performance of this function is also less important. + * arm/neon/chacha-core-internal.asm: Deleted file, for analogous reasons. * arm/fat/salsa20-core-internal-2.asm: Deleted file. - * fat-arm.c: Delete fat setup for _nettle_salsa20_core. + * arm/fat/chacha-core-internal-2.asm: Deleted file. + * fat-arm.c (_nettle_salsa20_core, _nettle_chacha_core): Delete fat setup. 2021-01-20 Niels Möller <nisse@lysator.liu.se> diff --git a/arm/fat/chacha-core-internal-2.asm b/arm/fat/chacha-core-internal-2.asm deleted file mode 100644 index 3715471c..00000000 --- a/arm/fat/chacha-core-internal-2.asm +++ /dev/null @@ -1,37 +0,0 @@ -C arm/fat/chacha-core-internal-2.asm - - -ifelse(` - Copyright (C) 2015 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -') - -dnl PROLOGUE(_nettle_chacha_core) picked up by configure - -define(`fat_transform', `$1_neon') -include_src(`arm/neon/chacha-core-internal.asm') diff --git a/arm/neon/chacha-core-internal.asm b/arm/neon/chacha-core-internal.asm deleted file mode 100644 index 5095be6a..00000000 --- a/arm/neon/chacha-core-internal.asm +++ /dev/null @@ -1,144 +0,0 @@ -C arm/neon/chacha-core-internal.asm - -ifelse(` - Copyright (C) 2013, 2015 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -') - - .file "chacha-core-internal.asm" - .fpu neon - -define(`DST', `r0') -define(`SRC', `r1') -define(`ROUNDS', `r2') - -define(`X0', `q0') -define(`X1', `q1') -define(`X2', `q2') -define(`X3', `q3') -define(`T0', `q8') -define(`S0', `q12') -define(`S1', `q13') -define(`S2', `q14') -define(`S3', `q15') - -define(`QROUND', ` - C x0 += x1, x3 ^= x0, x3 lrot 16 - C x2 += x3, x1 ^= x2, x1 lrot 12 - C x0 += x1, x3 ^= x0, x3 lrot 8 - C x2 += x3, x1 ^= x2, x1 lrot 7 - - vadd.i32 $1, $1, $2 - veor $4, $4, $1 - vshl.i32 T0, $4, #16 - vshr.u32 $4, $4, #16 - veor $4, $4, T0 - - vadd.i32 $3, $3, $4 - veor $2, $2, $3 - vshl.i32 T0, $2, #12 - vshr.u32 $2, $2, #20 - veor $2, $2, T0 - - vadd.i32 $1, $1, $2 - veor $4, $4, $1 - vshl.i32 T0, $4, #8 - vshr.u32 $4, $4, #24 - veor $4, $4, T0 - - vadd.i32 $3, $3, $4 - veor $2, $2, $3 - vshl.i32 T0, $2, #7 - vshr.u32 $2, $2, #25 - veor $2, $2, T0 -') - - .text - .align 4 - C _chacha_core(uint32_t *dst, const uint32_t *src, unsigned rounds) - -PROLOGUE(_nettle_chacha_core) - C loads using vld1.32 to be endianness-neutral wrt consecutive 32-bit words - vld1.32 {X0,X1}, [SRC]! C SRC changed! - vld1.32 {X2,X3}, [SRC] - - vmov S0, X0 - vmov S1, X1 - vmov S2, X2 - vmov S3, X3 - - C Input rows little-endian: - C 0 1 2 3 X0 - C 4 5 6 7 X1 - C 8 9 10 11 X2 - C 12 13 14 15 X3 - -.Loop: - QROUND(X0, X1, X2, X3) - - C In little-endian rotate rows, to get - C 0 1 2 3 - C 5 6 7 4 >>> 3 - C 10 11 8 9 >>> 2 - C 15 12 13 14 >>> 1 - vext.32 X1, X1, X1, #1 - vext.32 X2, X2, X2, #2 - vext.32 X3, X3, X3, #3 - - QROUND(X0, X1, X2, X3) - - subs ROUNDS, ROUNDS, #2 - C Inverse rotation - vext.32 X1, X1, X1, #3 - vext.32 X2, X2, X2, #2 - vext.32 X3, X3, X3, #1 - - bhi .Loop - - vadd.u32 X0, X0, S0 - vadd.u32 X1, X1, S1 - - C vst1.8 because caller expects results little-endian - C use vstm when little-endian for some additional speedup -IF_BE(` vst1.8 {X0,X1}, [DST]!') - - vadd.u32 X2, X2, S2 - vadd.u32 X3, X3, S3 - -IF_BE(` vst1.8 {X2,X3}, [DST]') -IF_LE(` vstm DST, {X0,X1,X2,X3}') - bx lr -EPILOGUE(_nettle_chacha_core) - -divert(-1) -define chachastate -p/x $q0.u32 -p/x $q1.u32 -p/x $q2.u32 -p/x $q3.u32 -end @@ -173,10 +173,6 @@ DECLARE_FAT_FUNC(_nettle_umac_nh_n, umac_nh_n_func) DECLARE_FAT_FUNC_VAR(umac_nh_n, umac_nh_n_func, c); DECLARE_FAT_FUNC_VAR(umac_nh_n, umac_nh_n_func, neon); -DECLARE_FAT_FUNC(_nettle_chacha_core, chacha_core_func) -DECLARE_FAT_FUNC_VAR(chacha_core, chacha_core_func, c); -DECLARE_FAT_FUNC_VAR(chacha_core, chacha_core_func, neon); - DECLARE_FAT_FUNC(nettle_chacha_crypt, chacha_crypt_func) DECLARE_FAT_FUNC_VAR(chacha_crypt, chacha_crypt_func, 1core) DECLARE_FAT_FUNC_VAR(chacha_crypt, chacha_crypt_func, 3core) @@ -226,7 +222,6 @@ fat_init (void) nettle_sha3_permute_vec = _nettle_sha3_permute_neon; _nettle_umac_nh_vec = _nettle_umac_nh_neon; _nettle_umac_nh_n_vec = _nettle_umac_nh_n_neon; - _nettle_chacha_core_vec = _nettle_chacha_core_neon; nettle_chacha_crypt_vec = _nettle_chacha_crypt_3core; nettle_chacha_crypt32_vec = _nettle_chacha_crypt32_3core; } @@ -239,7 +234,6 @@ fat_init (void) nettle_sha3_permute_vec = _nettle_sha3_permute_c; _nettle_umac_nh_vec = _nettle_umac_nh_c; _nettle_umac_nh_n_vec = _nettle_umac_nh_n_c; - _nettle_chacha_core_vec = _nettle_chacha_core_c; nettle_chacha_crypt_vec = _nettle_chacha_crypt_1core; nettle_chacha_crypt32_vec = _nettle_chacha_crypt32_1core; } @@ -289,10 +283,6 @@ DEFINE_FAT_FUNC(_nettle_umac_nh_n, void, unsigned length, const uint8_t *msg), (out, n, key, length, msg)) -DEFINE_FAT_FUNC(_nettle_chacha_core, void, - (uint32_t *dst, const uint32_t *src, unsigned rounds), - (dst, src, rounds)) - DEFINE_FAT_FUNC(nettle_chacha_crypt, void, (struct chacha_ctx *ctx, size_t length, |