summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2022-08-06 19:45:01 +0000
committerNiels Möller <nisse@lysator.liu.se>2022-08-06 19:45:01 +0000
commit693820e1bad41f640159d8556b171e9a4f282c5e (patch)
tree9b952d85013718a0e594bf35dd504b827ae453b5
parentfae01ac059343d153610ec7a8334172e302d495c (diff)
parent8c3c21180a9eadba6f946a193556832d1cdb6efa (diff)
downloadnettle-693820e1bad41f640159d8556b171e9a4f282c5e.tar.gz
Merge branch 'ppc-r64-44' into 'master'
[PowerPC] Implement Poly1305 single block update based on radix 2^64 See merge request nettle/nettle!47
-rw-r--r--Makefile.in2
-rw-r--r--configure.ac15
-rw-r--r--fat-ppc.c54
-rw-r--r--fat-setup.h6
-rw-r--r--poly1305-internal.c22
-rw-r--r--powerpc64/fat/poly1305-internal-2.asm39
-rw-r--r--powerpc64/p9/poly1305-internal.asm238
7 files changed, 373 insertions, 3 deletions
diff --git a/Makefile.in b/Makefile.in
index 65911e2a..11c88114 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -607,7 +607,7 @@ distdir: $(DISTFILES)
x86_64 x86_64/aesni x86_64/sha_ni x86_64/pclmul x86_64/fat \
arm arm/neon arm/v6 arm/fat \
arm64 arm64/crypto arm64/fat \
- powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \
+ powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/p9 powerpc64/fat \
s390x s390x/vf s390x/msa s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 s390x/fat ; do \
mkdir "$(distdir)/$$d" ; \
find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \
diff --git a/configure.ac b/configure.ac
index 39b5b148..7a046f11 100644
--- a/configure.ac
+++ b/configure.ac
@@ -105,6 +105,10 @@ AC_ARG_ENABLE(power-altivec,
AC_HELP_STRING([--enable-power-altivec], [Enable POWER altivec and vsx extensions. (default=no)]),,
[enable_altivec=no])
+AC_ARG_ENABLE(power9,
+ AC_HELP_STRING([--enable-power9], [Enable POWER ISA v3.0. (default=no)]),,
+ [enable_power9=no])
+
AC_ARG_ENABLE(s390x-vf,
AC_HELP_STRING([--enable-s390x-vf], [Enable vector facility on z/Architecture. (default=no)]),,
[enable_s390x_vf=no])
@@ -539,9 +543,12 @@ if test "x$enable_assembler" = xyes ; then
if test "x$enable_fat" = xyes ; then
asm_path="powerpc64/fat $asm_path"
OPT_NETTLE_SOURCES="fat-ppc.c $OPT_NETTLE_SOURCES"
- FAT_TEST_LIST="none crypto_ext altivec"
+ FAT_TEST_LIST="none crypto_ext altivec power9"
else
- if test "$enable_power_crypto_ext" = yes ; then
+ if test "$enable_power9" = yes ; then
+ asm_path="powerpc64/p9 $asm_path"
+ fi
+ if test "$enable_power_crypto_ext" = yes ; then
asm_path="powerpc64/p8 $asm_path"
fi
if test "$enable_power_altivec" = yes ; then
@@ -605,6 +612,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \
aes256-encrypt-2.asm aes256-decrypt-2.asm \
cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \
chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \
+ poly1305-internal-2.asm \
ghash-set-key-2.asm ghash-update-2.asm \
salsa20-2core.asm salsa20-core-internal-2.asm \
sha1-compress-2.asm sha256-compress-2.asm \
@@ -751,6 +759,9 @@ AH_VERBATIM([HAVE_NATIVE],
#undef HAVE_NATIVE_ecc_secp384r1_redc
#undef HAVE_NATIVE_ecc_secp521r1_modp
#undef HAVE_NATIVE_ecc_secp521r1_redc
+#undef HAVE_NATIVE_poly1305_set_key
+#undef HAVE_NATIVE_poly1305_block
+#undef HAVE_NATIVE_poly1305_digest
#undef HAVE_NATIVE_ghash_set_key
#undef HAVE_NATIVE_ghash_update
#undef HAVE_NATIVE_salsa20_core
diff --git a/fat-ppc.c b/fat-ppc.c
index bf622cf5..7569e44d 100644
--- a/fat-ppc.c
+++ b/fat-ppc.c
@@ -65,6 +65,7 @@
#include "aes-internal.h"
#include "chacha-internal.h"
#include "ghash-internal.h"
+#include "poly1305.h"
#include "fat-setup.h"
/* Defines from arch/powerpc/include/uapi/asm/cputable.h in Linux kernel */
@@ -77,11 +78,15 @@
#ifndef PPC_FEATURE2_VEC_CRYPTO
#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
#endif
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00 0x00800000
+#endif
struct ppc_features
{
int have_crypto_ext;
int have_altivec;
+ int have_power9;
};
#define MATCH(s, slen, literal, llen) \
@@ -93,6 +98,7 @@ get_ppc_features (struct ppc_features *features)
const char *s;
features->have_crypto_ext = 0;
features->have_altivec = 0;
+ features->have_power9 = 0;
s = secure_getenv (ENV_OVERRIDE);
if (s)
@@ -105,6 +111,8 @@ get_ppc_features (struct ppc_features *features)
features->have_crypto_ext = 1;
else if (MATCH(s, length, "altivec", 7))
features->have_altivec = 1;
+ else if (MATCH(s, length, "power9", 6))
+ features->have_power9 = 1;
if (!sep)
break;
s = sep + 1;
@@ -136,6 +144,9 @@ get_ppc_features (struct ppc_features *features)
features->have_crypto_ext
= ((hwcap2 & PPC_FEATURE2_VEC_CRYPTO) == PPC_FEATURE2_VEC_CRYPTO);
+ features->have_power9
+ = ((hwcap2 & PPC_FEATURE2_ARCH_3_00) == PPC_FEATURE2_ARCH_3_00);
+
/* We also need VSX instructions, mainly for load and store. */
features->have_altivec
= ((hwcap & (PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX))
@@ -172,6 +183,18 @@ DECLARE_FAT_FUNC(nettle_chacha_crypt32, chacha_crypt_func)
DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 1core)
DECLARE_FAT_FUNC_VAR(chacha_crypt32, chacha_crypt_func, 3core)
+DECLARE_FAT_FUNC(_nettle_poly1305_set_key, poly1305_set_key_func)
+DECLARE_FAT_FUNC_VAR(poly1305_set_key, poly1305_set_key_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_set_key, poly1305_set_key_func, ppc64)
+
+DECLARE_FAT_FUNC(_nettle_poly1305_block, poly1305_block_func)
+DECLARE_FAT_FUNC_VAR(poly1305_block, poly1305_block_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_block, poly1305_block_func, ppc64)
+
+DECLARE_FAT_FUNC(_nettle_poly1305_digest, poly1305_digest_func)
+DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, ppc64)
+
static void CONSTRUCTOR
fat_init (void)
{
@@ -220,6 +243,21 @@ fat_init (void)
nettle_chacha_crypt_vec = _nettle_chacha_crypt_1core;
nettle_chacha_crypt32_vec = _nettle_chacha_crypt32_1core;
}
+
+ if (features.have_power9)
+ {
+ if (verbose)
+ fprintf (stderr, "libnettle: enabling arch 3.00 code.\n");
+ _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_ppc64;
+ _nettle_poly1305_block_vec = _nettle_poly1305_block_ppc64;
+ _nettle_poly1305_digest_vec = _nettle_poly1305_digest_ppc64;
+ }
+ else
+ {
+ _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_c;
+ _nettle_poly1305_block_vec = _nettle_poly1305_block_c;
+ _nettle_poly1305_digest_vec = _nettle_poly1305_digest_c;
+ }
}
DEFINE_FAT_FUNC(_nettle_aes_encrypt, void,
@@ -261,3 +299,19 @@ DEFINE_FAT_FUNC(nettle_chacha_crypt32, void,
uint8_t *dst,
const uint8_t *src),
(ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_set_key, void,
+ (struct poly1305_ctx *ctx,
+ const uint8_t *key),
+ (ctx, key))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_block, void,
+ (struct poly1305_ctx *ctx,
+ const uint8_t *m,
+ unsigned high),
+ (ctx, m, high))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_digest, void,
+ (struct poly1305_ctx *ctx,
+ union nettle_block16 *s),
+ (ctx, s))
diff --git a/fat-setup.h b/fat-setup.h
index e77cce02..ad3c10f0 100644
--- a/fat-setup.h
+++ b/fat-setup.h
@@ -196,6 +196,12 @@ typedef void chacha_crypt_func(struct chacha_ctx *ctx,
uint8_t *dst,
const uint8_t *src);
+struct poly1305_ctx;
+typedef void poly1305_set_key_func(struct poly1305_ctx *ctx, const uint8_t *key);
+typedef void poly1305_digest_func(struct poly1305_ctx *ctx, union nettle_block16 *s);
+typedef void poly1305_block_func(struct poly1305_ctx *ctx, const uint8_t *m,
+ unsigned high);
+
struct aes128_ctx;
typedef void aes128_set_key_func (struct aes128_ctx *ctx, const uint8_t *key);
typedef void aes128_invert_key_func (struct aes128_ctx *dst, const struct aes128_ctx *src);
diff --git a/poly1305-internal.c b/poly1305-internal.c
index 490fdf71..380b934e 100644
--- a/poly1305-internal.c
+++ b/poly1305-internal.c
@@ -85,6 +85,28 @@
#define h3 h.h32[3]
#define h4 hh
+/* For fat builds */
+#if HAVE_NATIVE_poly1305_set_key
+void
+_nettle_poly1305_set_key_c(struct poly1305_ctx *ctx,
+ const uint8_t key[16]);
+# define _nettle_poly1305_set_key _nettle_poly1305_set_key_c
+#endif
+
+#if HAVE_NATIVE_poly1305_block
+void
+_nettle_poly1305_block_c(struct poly1305_ctx *ctx, const uint8_t *m,
+ unsigned t4);
+# define _nettle_poly1305_block _nettle_poly1305_block_c
+#endif
+
+#if HAVE_NATIVE_poly1305_digest
+void
+_nettle_poly1305_digest_c(struct poly1305_ctx *ctx,
+ union nettle_block16 *s);
+# define _nettle_poly1305_digest _nettle_poly1305_digest_c
+#endif
+
void
_nettle_poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
{
diff --git a/powerpc64/fat/poly1305-internal-2.asm b/powerpc64/fat/poly1305-internal-2.asm
new file mode 100644
index 00000000..177a4563
--- /dev/null
+++ b/powerpc64/fat/poly1305-internal-2.asm
@@ -0,0 +1,39 @@
+C powerpc64/fat/poly1305-internal-2.asm
+
+ifelse(`
+ Copyright (C) 2022 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl picked up by configure
+dnl PROLOGUE(_nettle_poly1305_set_key)
+dnl PROLOGUE(_nettle_poly1305_block)
+dnl PROLOGUE(_nettle_poly1305_digest)
+
+define(`fat_transform', `$1_ppc64')
+include_src(`powerpc64/p9/poly1305-internal.asm')
diff --git a/powerpc64/p9/poly1305-internal.asm b/powerpc64/p9/poly1305-internal.asm
new file mode 100644
index 00000000..a082fed2
--- /dev/null
+++ b/powerpc64/p9/poly1305-internal.asm
@@ -0,0 +1,238 @@
+C powerpc64/p9/poly1305-internal.asm
+
+ifelse(`
+ Copyright (C) 2013, 2022 Niels Möller
+ Copyright (C) 2022 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Register usage:
+
+define(`SP', `r1')
+define(`TOCP', `r2')
+
+C Argments
+define(`CTX', `r3')
+define(`M', `r4')
+define(`M128', `r5')
+
+C Working state
+define(`H0', `r6')
+define(`H1', `r7')
+define(`H2', `r8')
+define(`T0', `r9')
+define(`T1', `r10')
+define(`T2', `r8')
+define(`T2A', `r9')
+define(`T2S', `r10')
+define(`IDX', `r6')
+define(`RZ', `r7')
+
+define(`ZERO', `v0')
+define(`F0', `v1')
+define(`F1', `v2')
+define(`F0S', `v3')
+define(`T', `v4')
+
+define(`R', `v5')
+define(`S', `v6')
+
+define(`T00', `v7')
+define(`T10', `v8')
+define(`T11', `v9')
+define(`MU0', `v10')
+define(`MU1', `v11')
+define(`TMP', `v12')
+
+.text
+
+C _poly1305_set_key(struct poly1305_ctx *ctx, const uint8_t key[16])
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_set_key)
+ li r9, 0
+ addis r5, TOCP, .key_mask@got@ha
+ ld r5, .key_mask@got@l(r5)
+ ld r8, 0(r5)
+ ori r7, r8, 3
+
+ C Load R_0 and R_1
+IF_LE(`
+ ld r5, 0(r4)
+ ld r6, 8(r4)
+')
+IF_BE(`
+ ldbrx r5, 0, r4
+ addi r4, r4, 8
+ ldbrx r6, 0, r4
+')
+ and r5, r5, r7 C R_0 &= 0x0FFFFFFC0FFFFFFF
+ and r6, r6, r8 C R_1 &= 0x0FFFFFFC0FFFFFFC
+
+ srdi r10, r6, 2
+ sldi r7, r5, 2
+ sldi r8, r10, 2
+ add r7, r7, r5
+ add r8, r8, r10
+
+ C Store key
+ std r5, P1305_R0 (r3)
+ std r6, P1305_R1 (r3)
+ std r7, P1305_S0 (r3)
+ std r8, P1305_S1 (r3)
+ C Reset state
+ std r9, P1305_H0 (r3)
+ std r9, P1305_H1 (r3)
+ std r9, P1305_H2 (r3)
+
+ blr
+EPILOGUE(_nettle_poly1305_set_key)
+
+C void _nettle_poly1305_block(struct poly1305_ctx *ctx, const uint8_t *m, unsigned m128)
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_block)
+ ld H0, P1305_H0 (CTX)
+ ld H1, P1305_H1 (CTX)
+ ld H2, P1305_H2 (CTX)
+IF_LE(`
+ ld T0, 0(M)
+ ld T1, 8(M)
+')
+IF_BE(`
+ ldbrx T0, 0, M
+ addi M, M, 8
+ ldbrx T0, 0, M
+')
+
+ addc T0, T0, H0
+ adde T1, T1, H1
+ adde T2, M128, H2
+
+ mtvsrdd VSR(T), T0, T1
+
+ li IDX, P1305_S0
+ lxvd2x VSR(R), 0, CTX
+ lxvd2x VSR(S), IDX, CTX
+
+ andi. T2A, T2, 3
+ srdi T2S, T2, 2
+
+ li RZ, 0
+ vxor ZERO, ZERO, ZERO
+
+ xxpermdi VSR(MU0), VSR(R), VSR(S), 0b01
+ xxswapd VSR(MU1), VSR(R)
+
+ mtvsrdd VSR(T11), 0, T2A
+ mtvsrdd VSR(T00), T2S, RZ
+ mtvsrdd VSR(T10), 0, T2
+
+ vmsumudm F0, T, MU0, ZERO
+ vmsumudm F1, T, MU1, ZERO
+ vmsumudm TMP, T11, MU1, ZERO
+
+ vmsumudm F0, T00, S, F0
+ vmsumudm F1, T10, MU0, F1
+
+ xxmrgld VSR(TMP), VSR(TMP), VSR(ZERO)
+ xxswapd VSR(F0S), VSR(F0)
+ vadduqm F1, F1, TMP
+ stxsd F0S, P1305_H0 (CTX)
+
+ li IDX, P1305_H1
+ xxmrghd VSR(F0), VSR(ZERO), VSR(F0)
+ vadduqm F1, F1, F0
+ xxswapd VSR(F1), VSR(F1)
+ stxvd2x VSR(F1), IDX, CTX
+
+ blr
+EPILOGUE(_nettle_poly1305_block)
+
+C _poly1305_digest (struct poly1305_ctx *ctx, uint8_t *s)
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_digest)
+ C Load current state
+ ld r5, P1305_H0 (r3)
+ ld r6, P1305_H1 (r3)
+ ld r7, P1305_H2 (r3)
+
+ C Fold high part of H2
+ li r10, 0
+ srdi r9, r7, 2
+ sldi r8, r9, 2
+ add r8, r8, r9
+ andi. r7, r7, 3
+ addc r5, r5, r8
+ adde r6, r6, r10
+ adde r7, r7, r10
+
+ C Add 5 to state, save result if it carries
+ li r8, 5
+ li r9, 0
+ li r10, -4
+ addc r8, r8, r5
+ adde r9, r9, r6
+ adde. r10, r10, r7
+ iseleq r5, r8, r5
+ iseleq r6, r9, r6
+
+ C Load digest
+IF_LE(`
+ ld r7, 0(r4)
+ ld r8, 8(r4)
+')
+IF_BE(`
+ li r10, 8
+ ldbrx r7, 0, r4
+ ldbrx r8, r10, r4
+')
+
+ C Add hash to digest
+ addc r5, r5, r7
+ adde r6, r6, r8
+
+ C Store digest
+IF_LE(`
+ std r5, 0(r4)
+ std r6, 8(r4)
+')
+IF_BE(`
+ stdbrx r5, 0, r4
+ stdbrx r6, r10, r4
+')
+ C Reset hash
+ li r9, 0
+ std r9, P1305_H0 (r3)
+ std r9, P1305_H1 (r3)
+ std r9, P1305_H2 (r3)
+
+ blr
+EPILOGUE(_nettle_poly1305_digest)
+
+.rodata
+.align 3
+.key_mask:
+.quad 0x0FFFFFFC0FFFFFFC