diff options
author | Niels Möller <nisse@lysator.liu.se> | 2020-11-07 11:16:35 +0100 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2020-11-07 11:16:35 +0100 |
commit | 611abe02e3a1fde17697ab70e7c2805b6cfc0eee (patch) | |
tree | 1f01e194c68915c1e49ab18438dadae8ebbbfb05 | |
parent | 4c8b0cdd97ffec3ae3f8d995afdfccbc261b3c79 (diff) | |
parent | 19ba206d46d8558bc3af6ab14f7a770a94df57b9 (diff) | |
download | nettle-611abe02e3a1fde17697ab70e7c2805b6cfc0eee.tar.gz |
Merge branch 'ppc-chacha-core'
-rw-r--r-- | ChangeLog | 15 | ||||
-rw-r--r-- | Makefile.in | 2 | ||||
-rw-r--r-- | configure.ac | 15 | ||||
-rw-r--r-- | fat-ppc.c | 33 | ||||
-rw-r--r-- | powerpc64/fat/chacha-core-internal-2.asm | 37 | ||||
-rw-r--r-- | powerpc64/p7/chacha-core-internal.asm | 160 |
6 files changed, 256 insertions, 6 deletions
@@ -1,3 +1,18 @@ +2020-11-07 Niels Möller <nisse@lysator.liu.se> + + Merged initial powerpc64 implementation of chacha. + * configure.ac: New command line option --enable-power-altivec. + Update asm_path logic, and add altivec to FAT_TEST_LIST. + * fat-ppc.c (get_ppc_features): Add logic to check for altivec and + vsx support, and select aither C or altivec implementation of + chacha_core. + * powerpc64/p7/chacha-core-internal.asm: New file. + +2020-09-25 Niels Möller <nisse@lysator.liu.se> + + * powerpc64/p7/chacha-core-internal.asm: New file. + * Makefile.in (distdir): Add powerpc64/p7. + 2020-10-29 Niels Möller <nisse@lysator.liu.se> * blowfish.c (blowfish_set_key): Add casts to uint32_t. Avoids diff --git a/Makefile.in b/Makefile.in index c10f3e9d..d955774d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -616,7 +616,7 @@ distdir: $(DISTFILES) set -e; for d in sparc32 sparc64 x86 \ x86_64 x86_64/aesni x86_64/sha_ni x86_64/fat \ arm arm/neon arm/v6 arm/fat \ - powerpc64 powerpc64/p8 powerpc64/fat ; do \ + powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat ; do \ mkdir "$(distdir)/$$d" ; \ find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' ')' \ -exec cp '{}' "$(distdir)/$$d" ';' ; \ diff --git a/configure.ac b/configure.ac index e9983697..2a47f940 100644 --- a/configure.ac +++ b/configure.ac @@ -93,6 +93,10 @@ AC_ARG_ENABLE(power-crypto-ext, AC_HELP_STRING([--enable-power-crypto-ext], [Enable POWER crypto extensions. (default=no)]),, [enable_power_crypto_ext=no]) +AC_ARG_ENABLE(power-altivec, + AC_HELP_STRING([--enable-power-altivec], [Enable POWER altivec and vsx extensions. (default=no)]),, + [enable_altivec=no]) + AC_ARG_ENABLE(mini-gmp, AC_HELP_STRING([--enable-mini-gmp], [Enable mini-gmp, used instead of libgmp.]),, [enable_mini_gmp=no]) @@ -462,9 +466,14 @@ if test "x$enable_assembler" = xyes ; then if test "x$enable_fat" = xyes ; then asm_path="powerpc64/fat $asm_path" OPT_NETTLE_SOURCES="fat-ppc.c $OPT_NETTLE_SOURCES" - FAT_TEST_LIST="none crypto_ext" - elif test "x$enable_power_crypto_ext" = xyes ; then - asm_path="powerpc64/p8 $asm_path" + FAT_TEST_LIST="none crypto_ext altivec" + else + if test "$enable_power_crypto_ext" = yes ; then + asm_path="powerpc64/p8 $asm_path" + fi + if test "$enable_power_altivec" = yes ; then + asm_path="powerpc64/p7 $asm_path" + fi fi fi ;; @@ -66,6 +66,7 @@ struct ppc_features { int have_crypto_ext; + int have_altivec; }; #define MATCH(s, slen, literal, llen) \ @@ -76,6 +77,7 @@ get_ppc_features (struct ppc_features *features) { const char *s; features->have_crypto_ext = 0; + features->have_altivec = 0; s = secure_getenv (ENV_OVERRIDE); if (s) @@ -86,6 +88,8 @@ get_ppc_features (struct ppc_features *features) if (MATCH (s, length, "crypto_ext", 10)) features->have_crypto_ext = 1; + else if (MATCH(s, length, "altivec", 7)) + features->have_altivec = 1; if (!sep) break; s = sep + 1; @@ -95,8 +99,10 @@ get_ppc_features (struct ppc_features *features) #if defined(_AIX) && defined(__power_8_andup) features->have_crypto_ext = __power_8_andup() != 0 ? 1 : 0; #else + unsigned long hwcap = 0; unsigned long hwcap2 = 0; # if defined(__linux__) + hwcap = getauxval(AT_HWCAP); hwcap2 = getauxval(AT_HWCAP2); # elif defined(__FreeBSD__) # if __FreeBSD__ >= 12 @@ -106,8 +112,13 @@ get_ppc_features (struct ppc_features *features) sysctlbyname("hw.cpu_features2", &hwcap2, &len, NULL, 0); # endif # endif - features->have_crypto_ext = - (hwcap2 & PPC_FEATURE2_VEC_CRYPTO) == PPC_FEATURE2_VEC_CRYPTO ? 1 : 0; + features->have_crypto_ext + = ((hwcap2 & PPC_FEATURE2_VEC_CRYPTO) == PPC_FEATURE2_VEC_CRYPTO); + + /* We also need VSX instructions, mainly for load and store. */ + features->have_altivec + = ((hwcap & (PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX)) + == (PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX)); #endif } } @@ -120,6 +131,10 @@ DECLARE_FAT_FUNC(_nettle_aes_decrypt, aes_crypt_internal_func) DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, c) DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, ppc64) +DECLARE_FAT_FUNC(_nettle_chacha_core, chacha_core_func) +DECLARE_FAT_FUNC_VAR(chacha_core, chacha_core_func, c); +DECLARE_FAT_FUNC_VAR(chacha_core, chacha_core_func, altivec); + static void CONSTRUCTOR fat_init (void) { @@ -145,6 +160,16 @@ fat_init (void) _nettle_aes_encrypt_vec = _nettle_aes_encrypt_c; _nettle_aes_decrypt_vec = _nettle_aes_decrypt_c; } + if (features.have_altivec) + { + if (verbose) + fprintf (stderr, "libnettle: enabling altivec code.\n"); + _nettle_chacha_core_vec = _nettle_chacha_core_altivec; + } + else + { + _nettle_chacha_core_vec = _nettle_chacha_core_c; + } } DEFINE_FAT_FUNC(_nettle_aes_encrypt, void, @@ -160,3 +185,7 @@ DEFINE_FAT_FUNC(_nettle_aes_decrypt, void, size_t length, uint8_t *dst, const uint8_t *src), (rounds, keys, T, length, dst, src)) + +DEFINE_FAT_FUNC(_nettle_chacha_core, void, + (uint32_t *dst, const uint32_t *src, unsigned rounds), + (dst, src, rounds)) diff --git a/powerpc64/fat/chacha-core-internal-2.asm b/powerpc64/fat/chacha-core-internal-2.asm new file mode 100644 index 00000000..35c059ea --- /dev/null +++ b/powerpc64/fat/chacha-core-internal-2.asm @@ -0,0 +1,37 @@ +C powerpc64/fat/chacha-core-internal-2.asm + + +ifelse(` + Copyright (C) 2020 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(_nettle_chacha_core) picked up by configure + +define(`fat_transform', `$1_altivec') +include_src(`powerpc64/p7/chacha-core-internal.asm') diff --git a/powerpc64/p7/chacha-core-internal.asm b/powerpc64/p7/chacha-core-internal.asm new file mode 100644 index 00000000..6eb1066f --- /dev/null +++ b/powerpc64/p7/chacha-core-internal.asm @@ -0,0 +1,160 @@ +C powerpc64/p7/chacha-core-internal.asm + +ifelse(` + Copyright (C) 2020 Niels Möller and Torbjörn Granlund + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Register usage: + +C Argments +define(`DST', `r3') +define(`SRC', `r4') +define(`ROUNDS', `r5') + +C Working state +define(`X0', `v0') +define(`X1', `v1') +define(`X2', `v2') +define(`X3', `v3') + +define(`ROT16', `v4') +define(`ROT12', `v5') +define(`ROT8', `v6') +define(`ROT7', `v7') + +C Original input state +define(`S0', `v8') +define(`S1', `v9') +define(`S2', `v10') +define(`S3', `v11') + +C Big-endian working state +define(`LE_MASK', `v12') +define(`LE_TEMP', `v13') + +C QROUND(X0, X1, X2, X3) +define(`QROUND', ` + C x0 += x1, x3 ^= x0, x3 lrot 16 + C x2 += x3, x1 ^= x2, x1 lrot 12 + C x0 += x1, x3 ^= x0, x3 lrot 8 + C x2 += x3, x1 ^= x2, x1 lrot 7 + + vadduwm $1, $1, $2 + vxor $4, $4, $1 + vrlw $4, $4, ROT16 + + vadduwm $3, $3, $4 + vxor $2, $2, $3 + vrlw $2, $2, ROT12 + + vadduwm $1, $1, $2 + vxor $4, $4, $1 + vrlw $4, $4, ROT8 + + vadduwm $3, $3, $4 + vxor $2, $2, $3 + vrlw $2, $2, ROT7 +') + +C LE_SWAP32(X0, X1, X2, X3) +define(`LE_SWAP32', `IF_BE(` + vperm X0, X0, X0, LE_MASK + vperm X1, X1, X1, LE_MASK + vperm X2, X2, X2, LE_MASK + vperm X3, X3, X3, LE_MASK +')') + + .text + C _chacha_core(uint32_t *dst, const uint32_t *src, unsigned rounds) + +define(`FUNC_ALIGN', `5') +PROLOGUE(_nettle_chacha_core) + + li r6, 0x10 C set up some... + li r7, 0x20 C ...useful... + li r8, 0x30 C ...offsets + + vspltisw ROT16, -16 C -16 instead of 16 actually works! + vspltisw ROT12, 12 + vspltisw ROT8, 8 + vspltisw ROT7, 7 +IF_BE(` + li r9, 0 + lvsl LE_MASK, r9, r9 C 00 01 02 03 ... 0c 0d 0e 0f + vspltisb LE_TEMP, 0x03 C 03 03 03 03 ... 03 03 03 03 + vxor LE_MASK, LE_MASK, LE_TEMP C 03 02 01 00 ... 0f 0e 0d 0c +') + + lxvw4x VSR(X0), 0, SRC + lxvw4x VSR(X1), r6, SRC + lxvw4x VSR(X2), r7, SRC + lxvw4x VSR(X3), r8, SRC + + vor S0, X0, X0 + vor S1, X1, X1 + vor S2, X2, X2 + vor S3, X3, X3 + + srdi ROUNDS, ROUNDS, 1 + mtctr ROUNDS + +.Loop: + QROUND(X0, X1, X2, X3) + C Rotate rows, to get + C 0 1 2 3 + C 5 6 7 4 <<< 1 + C 10 11 8 9 <<< 2 + C 15 12 13 14 <<< 3 + + vsldoi X1, X1, X1, 4 + vsldoi X2, X2, X2, 8 + vsldoi X3, X3, X3, 12 + + QROUND(X0, X1, X2, X3) + + C Inverse rotation + vsldoi X1, X1, X1, 12 + vsldoi X2, X2, X2, 8 + vsldoi X3, X3, X3, 4 + + bdnz .Loop + + vadduwm X0, X0, S0 + vadduwm X1, X1, S1 + vadduwm X2, X2, S2 + vadduwm X3, X3, S3 + + LE_SWAP32(X0, X1, X2, X3) + + stxvw4x VSR(X0), 0, DST + stxvw4x VSR(X1), r6, DST + stxvw4x VSR(X2), r7, DST + stxvw4x VSR(X3), r8, DST + + blr +EPILOGUE(_nettle_chacha_core) |