From 20bb4ef038a97b8bb5c07d2a1125019a93f618b3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:19 +0200 Subject: crypto: arm/aes-ce - cosmetic/whitespace cleanup Rearrange the aes_algs[] array for legibility. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-glue.c | 116 ++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 60 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 620aacf0d128..e4139a0b0d75 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -334,69 +334,65 @@ static int xts_decrypt(struct skcipher_request *req) } static struct skcipher_alg aes_algs[] = { { - .base = { - .cra_name = "__ecb(aes)", - .cra_driver_name = "__ecb-aes-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = ce_aes_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, + .base.cra_name = "__ecb(aes)", + .base.cra_driver_name = "__ecb-aes-ce", + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, }, { - .base = { - .cra_name = "__cbc(aes)", - .cra_driver_name = "__cbc-aes-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ce_aes_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, + .base.cra_name = "__cbc(aes)", + .base.cra_driver_name = "__cbc-aes-ce", + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, }, { - .base = { - .cra_name = "__ctr(aes)", - .cra_driver_name = "__ctr-aes-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - .setkey = ce_aes_setkey, - .encrypt = ctr_encrypt, - .decrypt = ctr_encrypt, + .base.cra_name = "__ctr(aes)", + .base.cra_driver_name = "__ctr-aes-ce", + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, }, { - .base = { - .cra_name = "__xts(aes)", - .cra_driver_name = "__xts-aes-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), - .cra_module = THIS_MODULE, - }, - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_set_key, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, + .base.cra_name = "__xts(aes)", + .base.cra_driver_name = "__xts-aes-ce", + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, } }; static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; -- cgit v1.2.1 From 724ecd3c0eb7040d423b22332a60d097e2666820 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:20 +0200 Subject: crypto: aes - rename local routines to prevent future clashes Rename some local AES encrypt/decrypt routines so they don't clash with the names we are about to introduce for the routines exposed by the generic AES library. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-cipher-glue.c | 8 ++++---- arch/arm64/crypto/aes-cipher-glue.c | 8 ++++---- arch/sparc/crypto/aes_glue.c | 8 ++++---- arch/x86/crypto/aesni-intel_glue.c | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c index 128d0a1ac068..6efb3c04353f 100644 --- a/arch/arm/crypto/aes-cipher-glue.c +++ b/arch/arm/crypto/aes-cipher-glue.c @@ -16,7 +16,7 @@ EXPORT_SYMBOL(__aes_arm_encrypt); asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out); EXPORT_SYMBOL(__aes_arm_decrypt); -static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); int rounds = 6 + ctx->key_length / 4; @@ -24,7 +24,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) __aes_arm_encrypt(ctx->key_enc, rounds, in, out); } -static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void aes_arm_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); int rounds = 6 + ctx->key_length / 4; @@ -44,8 +44,8 @@ static struct crypto_alg aes_alg = { .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, .cra_cipher.cia_setkey = crypto_aes_set_key, - .cra_cipher.cia_encrypt = aes_encrypt, - .cra_cipher.cia_decrypt = aes_decrypt, + .cra_cipher.cia_encrypt = aes_arm_encrypt, + .cra_cipher.cia_decrypt = aes_arm_decrypt, #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS .cra_alignmask = 3, diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c index 0913966aa6fa..cc7a6dad7c2e 100644 --- a/arch/arm64/crypto/aes-cipher-glue.c +++ b/arch/arm64/crypto/aes-cipher-glue.c @@ -15,7 +15,7 @@ EXPORT_SYMBOL(__aes_arm64_encrypt); asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); EXPORT_SYMBOL(__aes_arm64_decrypt); -static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void aes_arm64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); int rounds = 6 + ctx->key_length / 4; @@ -23,7 +23,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) __aes_arm64_encrypt(ctx->key_enc, out, in, rounds); } -static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void aes_arm64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); int rounds = 6 + ctx->key_length / 4; @@ -43,8 +43,8 @@ static struct crypto_alg aes_alg = { .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE, .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE, .cra_cipher.cia_setkey = crypto_aes_set_key, - .cra_cipher.cia_encrypt = aes_encrypt, - .cra_cipher.cia_decrypt = aes_decrypt + .cra_cipher.cia_encrypt = aes_arm64_encrypt, + .cra_cipher.cia_decrypt = aes_arm64_decrypt }; static int __init aes_init(void) diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 57b474113168..7b946b3dee9d 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -197,14 +197,14 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); } -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); @@ -396,8 +396,8 @@ static struct crypto_alg algs[] = { { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, .cia_setkey = aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt + .cia_encrypt = crypto_aes_encrypt, + .cia_decrypt = crypto_aes_decrypt } } }, { diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 73c0ccb009a0..6c1be5b6134a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -345,7 +345,7 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len); } -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); @@ -358,7 +358,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) } } -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); @@ -919,8 +919,8 @@ static struct crypto_alg aesni_cipher_alg = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, .cia_setkey = aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt + .cia_encrypt = aesni_encrypt, + .cia_decrypt = aesni_decrypt } } }; -- cgit v1.2.1 From 2c53fd11f7624658222d175ec27e6c07b20b63d0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:23 +0200 Subject: crypto: x86/aes-ni - switch to generic for fallback and key routines The AES-NI code contains fallbacks for invocations that occur from a context where the SIMD unit is unavailable, which really only occurs when running in softirq context that was entered from a hard IRQ that was taken while running kernel code that was already using the FPU. That means performance is not really a consideration, and we can just use the new library code for this use case, which has a smaller footprint and is believed to be time invariant. This will allow us to drop the non-SIMD asm routines in a subsequent patch. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 15 +++++++-------- arch/x86/include/asm/crypto/aes.h | 12 ------------ 2 files changed, 7 insertions(+), 20 deletions(-) delete mode 100644 arch/x86/include/asm/crypto/aes.h (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 6c1be5b6134a..ef165d8cf443 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -329,7 +328,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, } if (!crypto_simd_usable()) - err = crypto_aes_expand_key(ctx, in_key, key_len); + err = aes_expandkey(ctx, in_key, key_len); else { kernel_fpu_begin(); err = aesni_set_key(ctx, in_key, key_len); @@ -349,9 +348,9 @@ static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (!crypto_simd_usable()) - crypto_aes_encrypt_x86(ctx, dst, src); - else { + if (!crypto_simd_usable()) { + aes_encrypt(ctx, dst, src); + } else { kernel_fpu_begin(); aesni_enc(ctx, dst, src); kernel_fpu_end(); @@ -362,9 +361,9 @@ static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (!crypto_simd_usable()) - crypto_aes_decrypt_x86(ctx, dst, src); - else { + if (!crypto_simd_usable()) { + aes_decrypt(ctx, dst, src); + } else { kernel_fpu_begin(); aesni_dec(ctx, dst, src); kernel_fpu_end(); diff --git a/arch/x86/include/asm/crypto/aes.h b/arch/x86/include/asm/crypto/aes.h deleted file mode 100644 index c508521dd190..000000000000 --- a/arch/x86/include/asm/crypto/aes.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_X86_AES_H -#define ASM_X86_AES_H - -#include -#include - -void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, - const u8 *src); -void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, - const u8 *src); -#endif -- cgit v1.2.1 From 1d2c3279311e4f03fcf164e1366f2fda9f4bfccf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:24 +0200 Subject: crypto: x86/aes - drop scalar assembler implementations The AES assembler code for x86 isn't actually faster than code generated by the compiler from aes_generic.c, and considering the disproportionate maintenance burden of assembler code on x86, it is better just to drop it entirely. Modern x86 systems will use AES-NI anyway, and given that the modules being removed have a dependency on aes_generic already, we can remove them without running the risk of regressions. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 4 - arch/x86/crypto/aes-i586-asm_32.S | 362 ------------------------------------ arch/x86/crypto/aes-x86_64-asm_64.S | 185 ------------------ arch/x86/crypto/aes_glue.c | 70 ------- 4 files changed, 621 deletions(-) delete mode 100644 arch/x86/crypto/aes-i586-asm_32.S delete mode 100644 arch/x86/crypto/aes-x86_64-asm_64.S (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 45734e1cf967..b96a14e67ab0 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -14,11 +14,9 @@ sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no) obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o -obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o -obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o @@ -68,11 +66,9 @@ ifeq ($(avx2_supported),yes) obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o endif -aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o -aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S deleted file mode 100644 index 2849dbc59e11..000000000000 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ /dev/null @@ -1,362 +0,0 @@ -// ------------------------------------------------------------------------- -// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. -// All rights reserved. -// -// LICENSE TERMS -// -// The free distribution and use of this software in both source and binary -// form is allowed (with or without changes) provided that: -// -// 1. distributions of this source code include the above copyright -// notice, this list of conditions and the following disclaimer// -// -// 2. distributions in binary form include the above copyright -// notice, this list of conditions and the following disclaimer -// in the documentation and/or other associated materials// -// -// 3. the copyright holder's name is not used to endorse products -// built using this software without specific written permission. -// -// -// ALTERNATIVELY, provided that this notice is retained in full, this product -// may be distributed under the terms of the GNU General Public License (GPL), -// in which case the provisions of the GPL apply INSTEAD OF those given above. -// -// Copyright (c) 2004 Linus Torvalds -// Copyright (c) 2004 Red Hat, Inc., James Morris - -// DISCLAIMER -// -// This software is provided 'as is' with no explicit or implied warranties -// in respect of its properties including, but not limited to, correctness -// and fitness for purpose. -// ------------------------------------------------------------------------- -// Issue Date: 29/07/2002 - -.file "aes-i586-asm.S" -.text - -#include -#include - -#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) - -/* offsets to parameters with one register pushed onto stack */ -#define ctx 8 -#define out_blk 12 -#define in_blk 16 - -/* offsets in crypto_aes_ctx structure */ -#define klen (480) -#define ekey (0) -#define dkey (240) - -// register mapping for encrypt and decrypt subroutines - -#define r0 eax -#define r1 ebx -#define r2 ecx -#define r3 edx -#define r4 esi -#define r5 edi - -#define eaxl al -#define eaxh ah -#define ebxl bl -#define ebxh bh -#define ecxl cl -#define ecxh ch -#define edxl dl -#define edxh dh - -#define _h(reg) reg##h -#define h(reg) _h(reg) - -#define _l(reg) reg##l -#define l(reg) _l(reg) - -// This macro takes a 32-bit word representing a column and uses -// each of its four bytes to index into four tables of 256 32-bit -// words to obtain values that are then xored into the appropriate -// output registers r0, r1, r4 or r5. - -// Parameters: -// table table base address -// %1 out_state[0] -// %2 out_state[1] -// %3 out_state[2] -// %4 out_state[3] -// idx input register for the round (destroyed) -// tmp scratch register for the round -// sched key schedule - -#define do_col(table, a1,a2,a3,a4, idx, tmp) \ - movzx %l(idx),%tmp; \ - xor table(,%tmp,4),%a1; \ - movzx %h(idx),%tmp; \ - shr $16,%idx; \ - xor table+tlen(,%tmp,4),%a2; \ - movzx %l(idx),%tmp; \ - movzx %h(idx),%idx; \ - xor table+2*tlen(,%tmp,4),%a3; \ - xor table+3*tlen(,%idx,4),%a4; - -// initialise output registers from the key schedule -// NB1: original value of a3 is in idx on exit -// NB2: original values of a1,a2,a4 aren't used -#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ - mov 0 sched,%a1; \ - movzx %l(idx),%tmp; \ - mov 12 sched,%a2; \ - xor table(,%tmp,4),%a1; \ - mov 4 sched,%a4; \ - movzx %h(idx),%tmp; \ - shr $16,%idx; \ - xor table+tlen(,%tmp,4),%a2; \ - movzx %l(idx),%tmp; \ - movzx %h(idx),%idx; \ - xor table+3*tlen(,%idx,4),%a4; \ - mov %a3,%idx; \ - mov 8 sched,%a3; \ - xor table+2*tlen(,%tmp,4),%a3; - -// initialise output registers from the key schedule -// NB1: original value of a3 is in idx on exit -// NB2: original values of a1,a2,a4 aren't used -#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ - mov 0 sched,%a1; \ - movzx %l(idx),%tmp; \ - mov 4 sched,%a2; \ - xor table(,%tmp,4),%a1; \ - mov 12 sched,%a4; \ - movzx %h(idx),%tmp; \ - shr $16,%idx; \ - xor table+tlen(,%tmp,4),%a2; \ - movzx %l(idx),%tmp; \ - movzx %h(idx),%idx; \ - xor table+3*tlen(,%idx,4),%a4; \ - mov %a3,%idx; \ - mov 8 sched,%a3; \ - xor table+2*tlen(,%tmp,4),%a3; - - -// original Gladman had conditional saves to MMX regs. -#define save(a1, a2) \ - mov %a2,4*a1(%esp) - -#define restore(a1, a2) \ - mov 4*a2(%esp),%a1 - -// These macros perform a forward encryption cycle. They are entered with -// the first previous round column values in r0,r1,r4,r5 and -// exit with the final values in the same registers, using stack -// for temporary storage. - -// round column values -// on entry: r0,r1,r4,r5 -// on exit: r2,r1,r4,r5 -#define fwd_rnd1(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ - do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ - restore(r0,0); \ - do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ - restore(r0,1); \ - do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ - -// round column values -// on entry: r2,r1,r4,r5 -// on exit: r0,r1,r4,r5 -#define fwd_rnd2(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ - do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ - restore(r2,0); \ - do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ - restore(r2,1); \ - do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ - -// These macros performs an inverse encryption cycle. They are entered with -// the first previous round column values in r0,r1,r4,r5 and -// exit with the final values in the same registers, using stack -// for temporary storage - -// round column values -// on entry: r0,r1,r4,r5 -// on exit: r2,r1,r4,r5 -#define inv_rnd1(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ - do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ - restore(r0,0); \ - do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ - restore(r0,1); \ - do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ - -// round column values -// on entry: r2,r1,r4,r5 -// on exit: r0,r1,r4,r5 -#define inv_rnd2(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ - do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ - restore(r2,0); \ - do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ - restore(r2,1); \ - do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ - -// AES (Rijndael) Encryption Subroutine -/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ - -.extern crypto_ft_tab -.extern crypto_fl_tab - -ENTRY(aes_enc_blk) - push %ebp - mov ctx(%esp),%ebp - -// CAUTION: the order and the values used in these assigns -// rely on the register mappings - -1: push %ebx - mov in_blk+4(%esp),%r2 - push %esi - mov klen(%ebp),%r3 // key size - push %edi -#if ekey != 0 - lea ekey(%ebp),%ebp // key pointer -#endif - -// input four columns and xor in first round key - - mov (%r2),%r0 - mov 4(%r2),%r1 - mov 8(%r2),%r4 - mov 12(%r2),%r5 - xor (%ebp),%r0 - xor 4(%ebp),%r1 - xor 8(%ebp),%r4 - xor 12(%ebp),%r5 - - sub $8,%esp // space for register saves on stack - add $16,%ebp // increment to next round key - cmp $24,%r3 - jb 4f // 10 rounds for 128-bit key - lea 32(%ebp),%ebp - je 3f // 12 rounds for 192-bit key - lea 32(%ebp),%ebp - -2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key - fwd_rnd2( -48(%ebp), crypto_ft_tab) -3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key - fwd_rnd2( -16(%ebp), crypto_ft_tab) -4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key - fwd_rnd2( +16(%ebp), crypto_ft_tab) - fwd_rnd1( +32(%ebp), crypto_ft_tab) - fwd_rnd2( +48(%ebp), crypto_ft_tab) - fwd_rnd1( +64(%ebp), crypto_ft_tab) - fwd_rnd2( +80(%ebp), crypto_ft_tab) - fwd_rnd1( +96(%ebp), crypto_ft_tab) - fwd_rnd2(+112(%ebp), crypto_ft_tab) - fwd_rnd1(+128(%ebp), crypto_ft_tab) - fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table - -// move final values to the output array. CAUTION: the -// order of these assigns rely on the register mappings - - add $8,%esp - mov out_blk+12(%esp),%ebp - mov %r5,12(%ebp) - pop %edi - mov %r4,8(%ebp) - pop %esi - mov %r1,4(%ebp) - pop %ebx - mov %r0,(%ebp) - pop %ebp - ret -ENDPROC(aes_enc_blk) - -// AES (Rijndael) Decryption Subroutine -/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ - -.extern crypto_it_tab -.extern crypto_il_tab - -ENTRY(aes_dec_blk) - push %ebp - mov ctx(%esp),%ebp - -// CAUTION: the order and the values used in these assigns -// rely on the register mappings - -1: push %ebx - mov in_blk+4(%esp),%r2 - push %esi - mov klen(%ebp),%r3 // key size - push %edi -#if dkey != 0 - lea dkey(%ebp),%ebp // key pointer -#endif - -// input four columns and xor in first round key - - mov (%r2),%r0 - mov 4(%r2),%r1 - mov 8(%r2),%r4 - mov 12(%r2),%r5 - xor (%ebp),%r0 - xor 4(%ebp),%r1 - xor 8(%ebp),%r4 - xor 12(%ebp),%r5 - - sub $8,%esp // space for register saves on stack - add $16,%ebp // increment to next round key - cmp $24,%r3 - jb 4f // 10 rounds for 128-bit key - lea 32(%ebp),%ebp - je 3f // 12 rounds for 192-bit key - lea 32(%ebp),%ebp - -2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key - inv_rnd2( -48(%ebp), crypto_it_tab) -3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key - inv_rnd2( -16(%ebp), crypto_it_tab) -4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key - inv_rnd2( +16(%ebp), crypto_it_tab) - inv_rnd1( +32(%ebp), crypto_it_tab) - inv_rnd2( +48(%ebp), crypto_it_tab) - inv_rnd1( +64(%ebp), crypto_it_tab) - inv_rnd2( +80(%ebp), crypto_it_tab) - inv_rnd1( +96(%ebp), crypto_it_tab) - inv_rnd2(+112(%ebp), crypto_it_tab) - inv_rnd1(+128(%ebp), crypto_it_tab) - inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table - -// move final values to the output array. CAUTION: the -// order of these assigns rely on the register mappings - - add $8,%esp - mov out_blk+12(%esp),%ebp - mov %r5,12(%ebp) - pop %edi - mov %r4,8(%ebp) - pop %esi - mov %r1,4(%ebp) - pop %ebx - mov %r0,(%ebp) - pop %ebp - ret -ENDPROC(aes_dec_blk) diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S deleted file mode 100644 index 8739cf7795de..000000000000 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ /dev/null @@ -1,185 +0,0 @@ -/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64 - * - * Copyright (C) 2005 Andreas Steinmetz, - * - * License: - * This code can be distributed under the terms of the GNU General Public - * License (GPL) Version 2 provided that the above header down to and - * including this sentence is retained in full. - */ - -.extern crypto_ft_tab -.extern crypto_it_tab -.extern crypto_fl_tab -.extern crypto_il_tab - -.text - -#include -#include - -#define R1 %rax -#define R1E %eax -#define R1X %ax -#define R1H %ah -#define R1L %al -#define R2 %rbx -#define R2E %ebx -#define R2X %bx -#define R2H %bh -#define R2L %bl -#define R3 %rcx -#define R3E %ecx -#define R3X %cx -#define R3H %ch -#define R3L %cl -#define R4 %rdx -#define R4E %edx -#define R4X %dx -#define R4H %dh -#define R4L %dl -#define R5 %rsi -#define R5E %esi -#define R6 %rdi -#define R6E %edi -#define R7 %r9 /* don't use %rbp; it breaks stack traces */ -#define R7E %r9d -#define R8 %r8 -#define R10 %r10 -#define R11 %r11 - -#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \ - ENTRY(FUNC); \ - movq r1,r2; \ - leaq KEY+48(r8),r9; \ - movq r10,r11; \ - movl (r7),r5 ## E; \ - movl 4(r7),r1 ## E; \ - movl 8(r7),r6 ## E; \ - movl 12(r7),r7 ## E; \ - movl 480(r8),r10 ## E; \ - xorl -48(r9),r5 ## E; \ - xorl -44(r9),r1 ## E; \ - xorl -40(r9),r6 ## E; \ - xorl -36(r9),r7 ## E; \ - cmpl $24,r10 ## E; \ - jb B128; \ - leaq 32(r9),r9; \ - je B192; \ - leaq 32(r9),r9; - -#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \ - movq r1,r2; \ - movl r5 ## E,(r9); \ - movl r6 ## E,4(r9); \ - movl r7 ## E,8(r9); \ - movl r8 ## E,12(r9); \ - ret; \ - ENDPROC(FUNC); - -#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ - movzbl r2 ## H,r5 ## E; \ - movzbl r2 ## L,r6 ## E; \ - movl TAB+1024(,r5,4),r5 ## E;\ - movw r4 ## X,r2 ## X; \ - movl TAB(,r6,4),r6 ## E; \ - roll $16,r2 ## E; \ - shrl $16,r4 ## E; \ - movzbl r4 ## L,r7 ## E; \ - movzbl r4 ## H,r4 ## E; \ - xorl OFFSET(r8),ra ## E; \ - xorl OFFSET+4(r8),rb ## E; \ - xorl TAB+3072(,r4,4),r5 ## E;\ - xorl TAB+2048(,r7,4),r6 ## E;\ - movzbl r1 ## L,r7 ## E; \ - movzbl r1 ## H,r4 ## E; \ - movl TAB+1024(,r4,4),r4 ## E;\ - movw r3 ## X,r1 ## X; \ - roll $16,r1 ## E; \ - shrl $16,r3 ## E; \ - xorl TAB(,r7,4),r5 ## E; \ - movzbl r3 ## L,r7 ## E; \ - movzbl r3 ## H,r3 ## E; \ - xorl TAB+3072(,r3,4),r4 ## E;\ - xorl TAB+2048(,r7,4),r5 ## E;\ - movzbl r1 ## L,r7 ## E; \ - movzbl r1 ## H,r3 ## E; \ - shrl $16,r1 ## E; \ - xorl TAB+3072(,r3,4),r6 ## E;\ - movl TAB+2048(,r7,4),r3 ## E;\ - movzbl r1 ## L,r7 ## E; \ - movzbl r1 ## H,r1 ## E; \ - xorl TAB+1024(,r1,4),r6 ## E;\ - xorl TAB(,r7,4),r3 ## E; \ - movzbl r2 ## H,r1 ## E; \ - movzbl r2 ## L,r7 ## E; \ - shrl $16,r2 ## E; \ - xorl TAB+3072(,r1,4),r3 ## E;\ - xorl TAB+2048(,r7,4),r4 ## E;\ - movzbl r2 ## H,r1 ## E; \ - movzbl r2 ## L,r2 ## E; \ - xorl OFFSET+8(r8),rc ## E; \ - xorl OFFSET+12(r8),rd ## E; \ - xorl TAB+1024(,r1,4),r3 ## E;\ - xorl TAB(,r2,4),r4 ## E; - -#define move_regs(r1,r2,r3,r4) \ - movl r3 ## E,r1 ## E; \ - movl r4 ## E,r2 ## E; - -#define entry(FUNC,KEY,B128,B192) \ - prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11) - -#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11) - -#define encrypt_round(TAB,OFFSET) \ - round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ - move_regs(R1,R2,R5,R6) - -#define encrypt_final(TAB,OFFSET) \ - round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) - -#define decrypt_round(TAB,OFFSET) \ - round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \ - move_regs(R1,R2,R5,R6) - -#define decrypt_final(TAB,OFFSET) \ - round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) - -/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ - - entry(aes_enc_blk,0,.Le128,.Le192) - encrypt_round(crypto_ft_tab,-96) - encrypt_round(crypto_ft_tab,-80) -.Le192: encrypt_round(crypto_ft_tab,-64) - encrypt_round(crypto_ft_tab,-48) -.Le128: encrypt_round(crypto_ft_tab,-32) - encrypt_round(crypto_ft_tab,-16) - encrypt_round(crypto_ft_tab, 0) - encrypt_round(crypto_ft_tab, 16) - encrypt_round(crypto_ft_tab, 32) - encrypt_round(crypto_ft_tab, 48) - encrypt_round(crypto_ft_tab, 64) - encrypt_round(crypto_ft_tab, 80) - encrypt_round(crypto_ft_tab, 96) - encrypt_final(crypto_fl_tab,112) - return(aes_enc_blk) - -/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ - - entry(aes_dec_blk,240,.Ld128,.Ld192) - decrypt_round(crypto_it_tab,-96) - decrypt_round(crypto_it_tab,-80) -.Ld192: decrypt_round(crypto_it_tab,-64) - decrypt_round(crypto_it_tab,-48) -.Ld128: decrypt_round(crypto_it_tab,-32) - decrypt_round(crypto_it_tab,-16) - decrypt_round(crypto_it_tab, 0) - decrypt_round(crypto_it_tab, 16) - decrypt_round(crypto_it_tab, 32) - decrypt_round(crypto_it_tab, 48) - decrypt_round(crypto_it_tab, 64) - decrypt_round(crypto_it_tab, 80) - decrypt_round(crypto_it_tab, 96) - decrypt_final(crypto_il_tab,112) - return(aes_dec_blk) diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 9e9d819e8bc3..7b7dc05fa1a4 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -1,71 +1 @@ // SPDX-License-Identifier: GPL-2.0-only -/* - * Glue Code for the asm optimized version of the AES Cipher Algorithm - * - */ - -#include -#include -#include - -asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); -asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); - -void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) -{ - aes_enc_blk(ctx, dst, src); -} -EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86); - -void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) -{ - aes_dec_blk(ctx, dst, src); -} -EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86); - -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_enc_blk(crypto_tfm_ctx(tfm), dst, src); -} - -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_dec_blk(crypto_tfm_ctx(tfm), dst, src); -} - -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = crypto_aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt - } - } -}; - -static int __init aes_init(void) -{ - return crypto_register_alg(&aes_alg); -} - -static void __exit aes_fini(void) -{ - crypto_unregister_alg(&aes_alg); -} - -module_init(aes_init); -module_exit(aes_fini); - -MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("aes"); -MODULE_ALIAS_CRYPTO("aes-asm"); -- cgit v1.2.1 From fe3b99b64909e8994f2606120e4703c9a1e8c080 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:28 +0200 Subject: crypto: arm64/ghash - switch to AES library The GHASH code uses the generic AES key expansion routines, and calls directly into the scalar table based AES cipher for arm64 from the fallback path, and since this implementation is known to be non-time invariant, doing so from a time invariant SIMD cipher is a bit nasty. So let's switch to the AES library - this makes the code more robust, and drops the dependency on the generic AES cipher, allowing us to omit it entirely in the future. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 3 +-- arch/arm64/crypto/ghash-ce-glue.c | 30 ++++++++++-------------------- 2 files changed, 11 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index d9a523ecdd83..1762055e7093 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -58,8 +58,7 @@ config CRYPTO_GHASH_ARM64_CE depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_GF128MUL - select CRYPTO_AES - select CRYPTO_AES_ARM64 + select CRYPTO_LIB_AES config CRYPTO_CRCT10DIF_ARM64_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 16c5da9be9fb..70b1469783f9 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -70,8 +70,6 @@ asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[], u32 const rk[], int rounds); -asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); - static int ghash_init(struct shash_desc *desc) { struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); @@ -309,14 +307,13 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey, u8 key[GHASH_BLOCK_SIZE]; int ret; - ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen); + ret = aes_expandkey(&ctx->aes_key, inkey, keylen); if (ret) { tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } - __aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){}, - num_rounds(&ctx->aes_key)); + aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){}); return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128)); } @@ -467,7 +464,7 @@ static int gcm_encrypt(struct aead_request *req) rk = ctx->aes_key.key_enc; } while (walk.nbytes >= 2 * AES_BLOCK_SIZE); } else { - __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); + aes_encrypt(&ctx->aes_key, tag, iv); put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { @@ -478,8 +475,7 @@ static int gcm_encrypt(struct aead_request *req) int remaining = blocks; do { - __aes_arm64_encrypt(ctx->aes_key.key_enc, - ks, iv, nrounds); + aes_encrypt(&ctx->aes_key, ks, iv); crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE); crypto_inc(iv, AES_BLOCK_SIZE); @@ -495,13 +491,10 @@ static int gcm_encrypt(struct aead_request *req) walk.nbytes % (2 * AES_BLOCK_SIZE)); } if (walk.nbytes) { - __aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv, - nrounds); + aes_encrypt(&ctx->aes_key, ks, iv); if (walk.nbytes > AES_BLOCK_SIZE) { crypto_inc(iv, AES_BLOCK_SIZE); - __aes_arm64_encrypt(ctx->aes_key.key_enc, - ks + AES_BLOCK_SIZE, iv, - nrounds); + aes_encrypt(&ctx->aes_key, ks + AES_BLOCK_SIZE, iv); } } } @@ -605,7 +598,7 @@ static int gcm_decrypt(struct aead_request *req) rk = ctx->aes_key.key_enc; } while (walk.nbytes >= 2 * AES_BLOCK_SIZE); } else { - __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); + aes_encrypt(&ctx->aes_key, tag, iv); put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { @@ -618,8 +611,7 @@ static int gcm_decrypt(struct aead_request *req) pmull_ghash_update_p64); do { - __aes_arm64_encrypt(ctx->aes_key.key_enc, - buf, iv, nrounds); + aes_encrypt(&ctx->aes_key, buf, iv); crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE); crypto_inc(iv, AES_BLOCK_SIZE); @@ -637,11 +629,9 @@ static int gcm_decrypt(struct aead_request *req) memcpy(iv2, iv, AES_BLOCK_SIZE); crypto_inc(iv2, AES_BLOCK_SIZE); - __aes_arm64_encrypt(ctx->aes_key.key_enc, iv2, - iv2, nrounds); + aes_encrypt(&ctx->aes_key, iv2, iv2); } - __aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv, - nrounds); + aes_encrypt(&ctx->aes_key, iv, iv); } } -- cgit v1.2.1 From aa6e2d2b355f89e22c30b41d7e6653c8189235b8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:29 +0200 Subject: crypto: arm/aes-neonbs - switch to library version of key expansion routine Switch to the new AES library that also provides an implementation of the AES key expansion routine. This removes the dependency on the generic AES cipher, allowing it to be omitted entirely in the future. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/Kconfig | 2 +- arch/arm/crypto/aes-neonbs-glue.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index a95322b59799..b24df84a1d7a 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -82,8 +82,8 @@ config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER + select CRYPTO_LIB_AES select CRYPTO_SIMD - select CRYPTO_AES help Use a faster and more secure NEON based implementation of AES in CBC, CTR and XTS modes diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index bd0bee9c8f7b..8c22d10bd909 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -61,7 +61,7 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, struct crypto_aes_ctx rk; int err; - err = crypto_aes_expand_key(&rk, in_key, key_len); + err = aes_expandkey(&rk, in_key, key_len); if (err) return err; @@ -120,7 +120,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, struct crypto_aes_ctx rk; int err; - err = crypto_aes_expand_key(&rk, in_key, key_len); + err = aes_expandkey(&rk, in_key, key_len); if (err) return err; -- cgit v1.2.1 From c59a6dffa3cdf826cba92f1b967f8dba2fafec53 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:30 +0200 Subject: crypto: arm64/aes-ccm - switch to AES library The CCM code calls directly into the scalar table based AES cipher for arm64 from the fallback path, and since this implementation is known to be non-time invariant, doing so from a time invariant SIMD cipher is a bit nasty. So let's switch to the AES library - this makes the code more robust, and drops the dependency on the generic AES cipher, allowing us to omit it entirely in the future. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 2 +- arch/arm64/crypto/aes-ce-ccm-glue.c | 18 ++++++------------ 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 1762055e7093..c6032bfb44fb 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -80,8 +80,8 @@ config CRYPTO_AES_ARM64_CE_CCM depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_ALGAPI select CRYPTO_AES_ARM64_CE - select CRYPTO_AES_ARM64 select CRYPTO_AEAD + select CRYPTO_LIB_AES config CRYPTO_AES_ARM64_CE_BLK tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 827e5473e5de..541cf9165748 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -43,8 +43,6 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[], u32 rounds); -asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); - static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, unsigned int key_len) { @@ -124,8 +122,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], } while (abytes >= AES_BLOCK_SIZE) { - __aes_arm64_encrypt(key->key_enc, mac, mac, - num_rounds(key)); + aes_encrypt(key, mac, mac); crypto_xor(mac, in, AES_BLOCK_SIZE); in += AES_BLOCK_SIZE; @@ -133,8 +130,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], } if (abytes > 0) { - __aes_arm64_encrypt(key->key_enc, mac, mac, - num_rounds(key)); + aes_encrypt(key, mac, mac); crypto_xor(mac, in, abytes); *macp = abytes; } @@ -206,10 +202,8 @@ static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[], bsize = nbytes; crypto_inc(walk->iv, AES_BLOCK_SIZE); - __aes_arm64_encrypt(ctx->key_enc, buf, walk->iv, - num_rounds(ctx)); - __aes_arm64_encrypt(ctx->key_enc, mac, mac, - num_rounds(ctx)); + aes_encrypt(ctx, buf, walk->iv); + aes_encrypt(ctx, mac, mac); if (enc) crypto_xor(mac, src, bsize); crypto_xor_cpy(dst, src, buf, bsize); @@ -224,8 +218,8 @@ static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[], } if (!err) { - __aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx)); - __aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx)); + aes_encrypt(ctx, buf, iv0); + aes_encrypt(ctx, mac, mac); crypto_xor(mac, buf, AES_BLOCK_SIZE); } return err; -- cgit v1.2.1 From f68df54307aaae01df66977511ea719158201188 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:31 +0200 Subject: crypto: arm64/aes-neonbs - switch to library version of key expansion routine Switch to the new AES library that also provides an implementation of the AES key expansion routine. This removes the dependency on the generic AES cipher, allowing it to be omitted entirely in the future. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 1 + arch/arm64/crypto/aes-neonbs-glue.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index c6032bfb44fb..17bf5dc10aad 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -116,6 +116,7 @@ config CRYPTO_AES_ARM64_BS select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64_NEON_BLK select CRYPTO_AES_ARM64 + select CRYPTO_LIB_AES select CRYPTO_SIMD endif diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 281d23087697..25fe51eedc98 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -74,7 +74,7 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, struct crypto_aes_ctx rk; int err; - err = crypto_aes_expand_key(&rk, in_key, key_len); + err = aes_expandkey(&rk, in_key, key_len); if (err) return err; @@ -133,7 +133,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, struct crypto_aes_ctx rk; int err; - err = crypto_aes_expand_key(&rk, in_key, key_len); + err = aes_expandkey(&rk, in_key, key_len); if (err) return err; @@ -205,7 +205,7 @@ static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key, struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); int err; - err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len); + err = aes_expandkey(&ctx->fallback, in_key, key_len); if (err) return err; @@ -271,7 +271,7 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return err; key_len /= 2; - err = crypto_aes_expand_key(&rk, in_key + key_len, key_len); + err = aes_expandkey(&rk, in_key + key_len, key_len); if (err) return err; -- cgit v1.2.1 From c184472902d87189082e5a349051197e252ae9af Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:32 +0200 Subject: crypto: arm64/aes-ce - switch to library version of key expansion routine Switch to the new AES library that also provides an implementation of the AES key expansion routine. This removes the dependency on the generic AES cipher, allowing it to be omitted entirely in the future. While at it, remove some references to the table based arm64 version of AES and replace them with AES library calls as well. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 2 +- arch/arm64/crypto/aes-glue.c | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 17bf5dc10aad..66dea518221c 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -96,7 +96,7 @@ config CRYPTO_AES_ARM64_NEON_BLK depends on KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64 - select CRYPTO_AES + select CRYPTO_LIB_AES select CRYPTO_SIMD config CRYPTO_CHACHA20_NEON diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 8d6c8932c841..843fb27c4961 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -23,7 +23,6 @@ #ifdef USE_V8_CRYPTO_EXTENSIONS #define MODE "ce" #define PRIO 300 -#define aes_setkey ce_aes_setkey #define aes_expandkey ce_aes_expandkey #define aes_ecb_encrypt ce_aes_ecb_encrypt #define aes_ecb_decrypt ce_aes_ecb_decrypt @@ -39,8 +38,6 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #else #define MODE "neon" #define PRIO 200 -#define aes_setkey crypto_aes_set_key -#define aes_expandkey crypto_aes_expand_key #define aes_ecb_encrypt neon_aes_ecb_encrypt #define aes_ecb_decrypt neon_aes_ecb_decrypt #define aes_cbc_encrypt neon_aes_cbc_encrypt @@ -118,7 +115,14 @@ struct mac_desc_ctx { static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - return aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + int ret; + + ret = aes_expandkey(ctx, in_key, key_len); + if (ret) + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + + return ret; } static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, @@ -646,15 +650,14 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks, kernel_neon_end(); } else { if (enc_before) - __aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds); + aes_encrypt(ctx, dg, dg); while (blocks--) { crypto_xor(dg, in, AES_BLOCK_SIZE); in += AES_BLOCK_SIZE; if (blocks || enc_after) - __aes_arm64_encrypt(ctx->key_enc, dg, dg, - rounds); + aes_encrypt(ctx, dg, dg); } } } -- cgit v1.2.1 From ff6f4115cb953c5be8d7a76b2ec1877df2f4c2c0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:35 +0200 Subject: crypto: aes - move sync ctr(aes) to AES library and generic helper In preparation of duplicating the sync ctr(aes) functionality to modules under arch/arm, move the helper function from a inline .h file to the AES library, which is already depended upon by the drivers that use this fallback. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-ctr-fallback.h | 50 ------------------------------------ arch/arm64/crypto/aes-glue.c | 22 ++++++++++++---- arch/arm64/crypto/aes-neonbs-glue.c | 21 +++++++++++---- 3 files changed, 33 insertions(+), 60 deletions(-) delete mode 100644 arch/arm64/crypto/aes-ctr-fallback.h (limited to 'arch') diff --git a/arch/arm64/crypto/aes-ctr-fallback.h b/arch/arm64/crypto/aes-ctr-fallback.h deleted file mode 100644 index 3ac911990ec7..000000000000 --- a/arch/arm64/crypto/aes-ctr-fallback.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Fallback for sync aes(ctr) in contexts where kernel mode NEON - * is not allowed - * - * Copyright (C) 2017 Linaro Ltd - */ - -#include -#include - -asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); - -static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx, - struct skcipher_request *req) -{ - struct skcipher_walk walk; - u8 buf[AES_BLOCK_SIZE]; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - while (walk.nbytes > 0) { - u8 *dst = walk.dst.virt.addr; - u8 *src = walk.src.virt.addr; - int nbytes = walk.nbytes; - int tail = 0; - - if (nbytes < walk.total) { - nbytes = round_down(nbytes, AES_BLOCK_SIZE); - tail = walk.nbytes % AES_BLOCK_SIZE; - } - - do { - int bsize = min(nbytes, AES_BLOCK_SIZE); - - __aes_arm64_encrypt(ctx->key_enc, buf, walk.iv, - 6 + ctx->key_length / 4); - crypto_xor_cpy(dst, src, buf, bsize); - crypto_inc(walk.iv, AES_BLOCK_SIZE); - - dst += AES_BLOCK_SIZE; - src += AES_BLOCK_SIZE; - nbytes -= AES_BLOCK_SIZE; - } while (nbytes > 0); - - err = skcipher_walk_done(&walk, tail); - } - return err; -} diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 843fb27c4961..55d6d4838708 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include #include "aes-ce-setkey.h" -#include "aes-ctr-fallback.h" #ifdef USE_V8_CRYPTO_EXTENSIONS #define MODE "ce" @@ -401,13 +401,25 @@ static int ctr_encrypt(struct skcipher_request *req) return err; } -static int ctr_encrypt_sync(struct skcipher_request *req) +static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned long flags; + + /* + * Temporarily disable interrupts to avoid races where + * cachelines are evicted when the CPU is interrupted + * to do something else. + */ + local_irq_save(flags); + aes_encrypt(ctx, dst, src); + local_irq_restore(flags); +} +static int ctr_encrypt_sync(struct skcipher_request *req) +{ if (!crypto_simd_usable()) - return aes_ctr_encrypt_fallback(ctx, req); + return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); return ctr_encrypt(req); } diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 25fe51eedc98..bafd2ebef8f1 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -8,13 +8,12 @@ #include #include #include +#include #include #include #include #include -#include "aes-ctr-fallback.h" - MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); @@ -280,13 +279,25 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return aesbs_setkey(tfm, in_key, key_len); } -static int ctr_encrypt_sync(struct skcipher_request *req) +static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned long flags; + + /* + * Temporarily disable interrupts to avoid races where + * cachelines are evicted when the CPU is interrupted + * to do something else. + */ + local_irq_save(flags); + aes_encrypt(&ctx->fallback, dst, src); + local_irq_restore(flags); +} +static int ctr_encrypt_sync(struct skcipher_request *req) +{ if (!crypto_simd_usable()) - return aes_ctr_encrypt_fallback(&ctx->fallback, req); + return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); return ctr_encrypt(req); } -- cgit v1.2.1 From 4d3f9d89c745ea5494ea23e545385af1b6f81363 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:36 +0200 Subject: crypto: arm64/aes-ce-cipher - use AES library as fallback Instead of calling into the table based scalar AES code in situations where the SIMD unit may not be used, use the generic AES code, which is more appropriate since it is less likely to be susceptible to timing attacks. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/Kconfig | 2 +- arch/arm64/crypto/aes-ce-glue.c | 7 ++----- arch/arm64/crypto/aes-cipher-glue.c | 3 --- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 66dea518221c..4922c4451e7c 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -73,7 +73,7 @@ config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_ALGAPI - select CRYPTO_AES_ARM64 + select CRYPTO_LIB_AES config CRYPTO_AES_ARM64_CE_CCM tristate "AES in CCM mode using ARMv8 Crypto Extensions" diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index d3bc97afde20..6d085dc56c51 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -20,9 +20,6 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); -asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); -asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); - struct aes_block { u8 b[AES_BLOCK_SIZE]; }; @@ -51,7 +48,7 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - __aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); + aes_encrypt(ctx, dst, src); return; } @@ -65,7 +62,7 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); if (!crypto_simd_usable()) { - __aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); + aes_decrypt(ctx, dst, src); return; } diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c index cc7a6dad7c2e..8caf6dfefce8 100644 --- a/arch/arm64/crypto/aes-cipher-glue.c +++ b/arch/arm64/crypto/aes-cipher-glue.c @@ -10,10 +10,7 @@ #include asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); -EXPORT_SYMBOL(__aes_arm64_encrypt); - asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds); -EXPORT_SYMBOL(__aes_arm64_decrypt); static void aes_arm64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { -- cgit v1.2.1 From fafb1dca6fad0efeeaaaeae5067cf7e194a47027 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:37 +0200 Subject: crypto: arm/aes - use native endiannes for key schedule Align ARM's hw instruction based AES implementation with other versions that keep the key schedule in native endianness. This will allow us to merge the various implementations going forward. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-core.S | 20 ++++++++++---------- arch/arm/crypto/aes-ce-glue.c | 9 +++------ 2 files changed, 13 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index caac519d6249..425000232d49 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -88,19 +88,19 @@ .macro do_block, dround, fround cmp r3, #12 @ which key size? - vld1.8 {q10-q11}, [ip]! + vld1.32 {q10-q11}, [ip]! \dround q8, q9 - vld1.8 {q12-q13}, [ip]! + vld1.32 {q12-q13}, [ip]! \dround q10, q11 - vld1.8 {q10-q11}, [ip]! + vld1.32 {q10-q11}, [ip]! \dround q12, q13 - vld1.8 {q12-q13}, [ip]! + vld1.32 {q12-q13}, [ip]! \dround q10, q11 blo 0f @ AES-128: 10 rounds - vld1.8 {q10-q11}, [ip]! + vld1.32 {q10-q11}, [ip]! \dround q12, q13 beq 1f @ AES-192: 12 rounds - vld1.8 {q12-q13}, [ip] + vld1.32 {q12-q13}, [ip] \dround q10, q11 0: \fround q12, q13, q14 bx lr @@ -149,8 +149,8 @@ ENDPROC(aes_decrypt_3x) .macro prepare_key, rk, rounds add ip, \rk, \rounds, lsl #4 - vld1.8 {q8-q9}, [\rk] @ load first 2 round keys - vld1.8 {q14}, [ip] @ load last round key + vld1.32 {q8-q9}, [\rk] @ load first 2 round keys + vld1.32 {q14}, [ip] @ load last round key .endm /* @@ -505,8 +505,8 @@ ENDPROC(ce_aes_sub) * operation on round key *src */ ENTRY(ce_aes_invert) - vld1.8 {q0}, [r1] + vld1.32 {q0}, [r1] aesimc.8 q0, q0 - vst1.8 {q0}, [r0] + vst1.32 {q0}, [r0] bx lr ENDPROC(ce_aes_invert) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index e4139a0b0d75..36d1a5301284 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -77,21 +78,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, key_len != AES_KEYSIZE_256) return -EINVAL; - memcpy(ctx->key_enc, in_key, key_len); ctx->key_length = key_len; + for (i = 0; i < kwords; i++) + ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); kernel_neon_begin(); for (i = 0; i < sizeof(rcon); i++) { u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; -#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); rko[0] = rko[0] ^ rki[0] ^ rcon[i]; -#else - rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8); - rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24); -#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; -- cgit v1.2.1 From 5eedf315f27d3990f4d00c7cd3b9751e184a83b1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:38 +0200 Subject: crypto: arm/aes-ce - provide a synchronous version of ctr(aes) AES in CTR mode is used by modes such as GCM and CCM, which are often used in contexts where only synchronous ciphers are permitted. So provide a synchronous version of ctr(aes) based on the existing code. This requires a non-SIMD fallback to deal with invocations occurring from a context where SIMD instructions may not be used. We have a helper for this now in the AES library, so wire that up. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-glue.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'arch') diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 36d1a5301284..a7265d0a7063 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -7,8 +7,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -286,6 +288,29 @@ static int ctr_encrypt(struct skcipher_request *req) return err; } +static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) +{ + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned long flags; + + /* + * Temporarily disable interrupts to avoid races where + * cachelines are evicted when the CPU is interrupted + * to do something else. + */ + local_irq_save(flags); + aes_encrypt(ctx, dst, src); + local_irq_restore(flags); +} + +static int ctr_encrypt_sync(struct skcipher_request *req) +{ + if (!crypto_simd_usable()) + return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); + + return ctr_encrypt(req); +} + static int xts_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -375,6 +400,21 @@ static struct skcipher_alg aes_algs[] = { { .setkey = ce_aes_setkey, .encrypt = ctr_encrypt, .decrypt = ctr_encrypt, +}, { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-ce-sync", + .base.cra_priority = 300 - 1, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ctr_encrypt_sync, + .decrypt = ctr_encrypt_sync, }, { .base.cra_name = "__xts(aes)", .base.cra_driver_name = "__xts-aes-ce", @@ -418,6 +458,9 @@ static int __init aes_init(void) return err; for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) + continue; + algname = aes_algs[i].base.cra_name + 2; drvname = aes_algs[i].base.cra_driver_name + 2; basename = aes_algs[i].base.cra_driver_name; -- cgit v1.2.1 From e5f050402f1870317826990e968493e4411a0d53 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:39 +0200 Subject: crypto: arm/aes-neonbs - provide a synchronous version of ctr(aes) AES in CTR mode is used by modes such as GCM and CCM, which are often used in contexts where only synchronous ciphers are permitted. So provide a synchronous version of ctr(aes) based on the existing code. This requires a non-SIMD fallback to deal with invocations occurring from a context where SIMD instructions may not be used. We have a helper for this now in the AES library, so wire that up. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-neonbs-glue.c | 65 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'arch') diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index 8c22d10bd909..45cd9818791e 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -6,8 +6,10 @@ */ #include +#include #include #include +#include #include #include #include @@ -54,6 +56,11 @@ struct aesbs_xts_ctx { struct crypto_cipher *tweak_tfm; }; +struct aesbs_ctr_ctx { + struct aesbs_ctx key; /* must be first member */ + struct crypto_aes_ctx fallback; +}; + static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { @@ -189,6 +196,25 @@ static void cbc_exit(struct crypto_tfm *tfm) crypto_free_cipher(ctx->enc_tfm); } +static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = aes_expandkey(&ctx->fallback, in_key, key_len); + if (err) + return err; + + ctx->key.rounds = 6 + key_len / 4; + + kernel_neon_begin(); + aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds); + kernel_neon_end(); + + return 0; +} + static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -231,6 +257,29 @@ static int ctr_encrypt(struct skcipher_request *req) return err; } +static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) +{ + struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned long flags; + + /* + * Temporarily disable interrupts to avoid races where + * cachelines are evicted when the CPU is interrupted + * to do something else. + */ + local_irq_save(flags); + aes_encrypt(&ctx->fallback, dst, src); + local_irq_restore(flags); +} + +static int ctr_encrypt_sync(struct skcipher_request *req) +{ + if (!crypto_simd_usable()) + return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); + + return ctr_encrypt(req); +} + static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { @@ -358,6 +407,22 @@ static struct skcipher_alg aes_algs[] = { { .setkey = aesbs_setkey, .encrypt = ctr_encrypt, .decrypt = ctr_encrypt, +}, { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-neonbs-sync", + .base.cra_priority = 250 - 1, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .chunksize = AES_BLOCK_SIZE, + .walksize = 8 * AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_ctr_setkey_sync, + .encrypt = ctr_encrypt_sync, + .decrypt = ctr_encrypt_sync, }, { .base.cra_name = "__xts(aes)", .base.cra_driver_name = "__xts-aes-neonbs", -- cgit v1.2.1 From 0a5dff9882e56f80ff021e039aee93e167f16b9e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:40 +0200 Subject: crypto: arm/ghash - provide a synchronous version GHASH is used by the GCM mode, which is often used in contexts where only synchronous ciphers are permitted. So provide a synchronous version of GHASH based on the existing code. This requires a non-SIMD fallback to deal with invocations occurring from a context where SIMD instructions may not be used. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/ghash-ce-glue.c | 78 +++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index 52d472a050e6..bb906b5f1eb3 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,8 @@ struct ghash_key { u64 h2[2]; u64 h3[2]; u64 h4[2]; + + be128 k; }; struct ghash_desc_ctx { @@ -62,6 +65,36 @@ static int ghash_init(struct shash_desc *desc) return 0; } +static void ghash_do_update(int blocks, u64 dg[], const char *src, + struct ghash_key *key, const char *head) +{ + if (likely(crypto_simd_usable())) { + kernel_neon_begin(); + pmull_ghash_update(blocks, dg, src, key, head); + kernel_neon_end(); + } else { + be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) }; + + do { + const u8 *in = src; + + if (head) { + in = head; + blocks++; + head = NULL; + } else { + src += GHASH_BLOCK_SIZE; + } + + crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE); + gf128mul_lle(&dst, &key->k); + } while (--blocks); + + dg[0] = be64_to_cpu(dst.b); + dg[1] = be64_to_cpu(dst.a); + } +} + static int ghash_update(struct shash_desc *desc, const u8 *src, unsigned int len) { @@ -85,10 +118,8 @@ static int ghash_update(struct shash_desc *desc, const u8 *src, blocks = len / GHASH_BLOCK_SIZE; len %= GHASH_BLOCK_SIZE; - kernel_neon_begin(); - pmull_ghash_update(blocks, ctx->digest, src, key, - partial ? ctx->buf : NULL); - kernel_neon_end(); + ghash_do_update(blocks, ctx->digest, src, key, + partial ? ctx->buf : NULL); src += blocks * GHASH_BLOCK_SIZE; partial = 0; } @@ -106,9 +137,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) struct ghash_key *key = crypto_shash_ctx(desc->tfm); memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); - kernel_neon_begin(); - pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); - kernel_neon_end(); + ghash_do_update(1, ctx->digest, ctx->buf, key, NULL); } put_unaligned_be64(ctx->digest[1], dst); put_unaligned_be64(ctx->digest[0], dst + 8); @@ -132,24 +161,25 @@ static int ghash_setkey(struct crypto_shash *tfm, const u8 *inkey, unsigned int keylen) { struct ghash_key *key = crypto_shash_ctx(tfm); - be128 h, k; + be128 h; if (keylen != GHASH_BLOCK_SIZE) { crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } - memcpy(&k, inkey, GHASH_BLOCK_SIZE); - ghash_reflect(key->h, &k); + /* needed for the fallback */ + memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); + ghash_reflect(key->h, &key->k); - h = k; - gf128mul_lle(&h, &k); + h = key->k; + gf128mul_lle(&h, &key->k); ghash_reflect(key->h2, &h); - gf128mul_lle(&h, &k); + gf128mul_lle(&h, &key->k); ghash_reflect(key->h3, &h); - gf128mul_lle(&h, &k); + gf128mul_lle(&h, &key->k); ghash_reflect(key->h4, &h); return 0; @@ -162,15 +192,13 @@ static struct shash_alg ghash_alg = { .final = ghash_final, .setkey = ghash_setkey, .descsize = sizeof(struct ghash_desc_ctx), - .base = { - .cra_name = "__ghash", - .cra_driver_name = "__driver-ghash-ce", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ghash_key), - .cra_module = THIS_MODULE, - }, + + .base.cra_name = "ghash", + .base.cra_driver_name = "ghash-ce-sync", + .base.cra_priority = 300 - 1, + .base.cra_blocksize = GHASH_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ghash_key), + .base.cra_module = THIS_MODULE, }; static int ghash_async_init(struct ahash_request *req) @@ -285,9 +313,7 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm) struct cryptd_ahash *cryptd_tfm; struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); - cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); + cryptd_tfm = cryptd_alloc_ahash("ghash-ce-sync", 0, 0); if (IS_ERR(cryptd_tfm)) return PTR_ERR(cryptd_tfm); ctx->cryptd_tfm = cryptd_tfm; -- cgit v1.2.1 From 58144b8d03fca6209a5097e85bcd63a6a956e75f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:47 +0200 Subject: crypto: arm64/aes-neon - switch to shared AES Sboxes Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-neon.S | 74 ++------------------------------------------ 1 file changed, 3 insertions(+), 71 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index 2bebccc73869..0cac5df6c901 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -49,7 +49,7 @@ /* do preload for encryption */ .macro enc_prepare, ignore0, ignore1, temp - prepare .LForward_Sbox, .LForward_ShiftRows, \temp + prepare crypto_aes_sbox, .LForward_ShiftRows, \temp .endm .macro enc_switch_key, ignore0, ignore1, temp @@ -58,7 +58,7 @@ /* do preload for decryption */ .macro dec_prepare, ignore0, ignore1, temp - prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp + prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp .endm /* apply SubBytes transformation using the the preloaded Sbox */ @@ -234,75 +234,7 @@ #include "aes-modes.S" .section ".rodata", "a" - .align 6 -.LForward_Sbox: - .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 - .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 - .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 - .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 - .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc - .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 - .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a - .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 - .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 - .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 - .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b - .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf - .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 - .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 - .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 - .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 - .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 - .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 - .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 - .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb - .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c - .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 - .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 - .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 - .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 - .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a - .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e - .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e - .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 - .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf - .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 - .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 - -.LReverse_Sbox: - .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d - + .align 4 .LForward_ShiftRows: .octa 0x0b06010c07020d08030e09040f0a0500 -- cgit v1.2.1 From 8de6dd3386003a3fe4509304494061b3554fe516 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:48 +0200 Subject: crypto: arm/aes-cipher - switch to shared AES inverse Sbox Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-cipher-core.S | 40 +-------------------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S index 4460ed05d6ff..472e56d09eea 100644 --- a/arch/arm/crypto/aes-cipher-core.S +++ b/arch/arm/crypto/aes-cipher-core.S @@ -219,43 +219,5 @@ ENDPROC(__aes_arm_encrypt) .align 5 ENTRY(__aes_arm_decrypt) - do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 + do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 ENDPROC(__aes_arm_decrypt) - - .section ".rodata", "a" - .align L1_CACHE_SHIFT - .type __aes_arm_inverse_sbox, %object -__aes_arm_inverse_sbox: - .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d - .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox -- cgit v1.2.1 From 642a88fbe94441de7f2c807263599d5ef61b70fb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:49 +0200 Subject: crypto: arm64/aes-cipher - switch to shared AES inverse Sbox Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-cipher-core.S | 40 +------------------------------------ 1 file changed, 1 insertion(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S index f06df0d2080c..423d0aebc570 100644 --- a/arch/arm64/crypto/aes-cipher-core.S +++ b/arch/arm64/crypto/aes-cipher-core.S @@ -128,43 +128,5 @@ ENDPROC(__aes_arm64_encrypt) .align 5 ENTRY(__aes_arm64_decrypt) - do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0 + do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 ENDPROC(__aes_arm64_decrypt) - - .section ".rodata", "a" - .align L1_CACHE_SHIFT - .type __aes_arm64_inverse_sbox, %object -__aes_arm64_inverse_sbox: - .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 - .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb - .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 - .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb - .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d - .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e - .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 - .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 - .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 - .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 - .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda - .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 - .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a - .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 - .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 - .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b - .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea - .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 - .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 - .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e - .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 - .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b - .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 - .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 - .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 - .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f - .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d - .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef - .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 - .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 - .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 - .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d - .size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox -- cgit v1.2.1 From b46033fdd25abb30b253cd283b551323b723a2db Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jul 2019 21:41:50 +0200 Subject: crypto: arm/aes-scalar - unexport en/decryption routines The scalar table based AES routines are not used by other drivers, so let's keep it that way and unexport the symbols. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-cipher-glue.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c index 6efb3c04353f..8cd00f56800e 100644 --- a/arch/arm/crypto/aes-cipher-glue.c +++ b/arch/arm/crypto/aes-cipher-glue.c @@ -11,10 +11,7 @@ #include asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out); -EXPORT_SYMBOL(__aes_arm_encrypt); - asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out); -EXPORT_SYMBOL(__aes_arm_decrypt); static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { -- cgit v1.2.1 From 5cb97700beaa005ceb2a127b6f53536a4544c9d8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Jul 2019 10:55:06 +0200 Subject: crypto: morus - remove generic and x86 implementations MORUS was not selected as a winner in the CAESAR competition, which is not surprising since it is considered to be cryptographically broken [0]. (Note that this is not an implementation defect, but a flaw in the underlying algorithm). Since it is unlikely to be in use currently, let's remove it before we're stuck with it. [0] https://eprint.iacr.org/2019/172.pdf Reviewed-by: Ondrej Mosnacek Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 13 - arch/x86/crypto/morus1280-avx2-asm.S | 619 ----------------------- arch/x86/crypto/morus1280-avx2-glue.c | 62 --- arch/x86/crypto/morus1280-sse2-asm.S | 893 ---------------------------------- arch/x86/crypto/morus1280-sse2-glue.c | 61 --- arch/x86/crypto/morus1280_glue.c | 205 -------- arch/x86/crypto/morus640-sse2-asm.S | 612 ----------------------- arch/x86/crypto/morus640-sse2-glue.c | 61 --- arch/x86/crypto/morus640_glue.c | 200 -------- 9 files changed, 2726 deletions(-) delete mode 100644 arch/x86/crypto/morus1280-avx2-asm.S delete mode 100644 arch/x86/crypto/morus1280-avx2-glue.c delete mode 100644 arch/x86/crypto/morus1280-sse2-asm.S delete mode 100644 arch/x86/crypto/morus1280-sse2-glue.c delete mode 100644 arch/x86/crypto/morus1280_glue.c delete mode 100644 arch/x86/crypto/morus640-sse2-asm.S delete mode 100644 arch/x86/crypto/morus640-sse2-glue.c delete mode 100644 arch/x86/crypto/morus640_glue.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index b96a14e67ab0..6f1d825fbb09 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -39,12 +39,6 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o -obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o -obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o - -obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o -obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o - obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o @@ -62,8 +56,6 @@ endif ifeq ($(avx2_supported),yes) obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o - - obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o endif twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o @@ -81,9 +73,6 @@ aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o -morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o -morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o - nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o ifeq ($(avx_supported),yes) @@ -102,8 +91,6 @@ ifeq ($(avx2_supported),yes) chacha-x86_64-y += chacha-avx2-x86_64.o serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o - morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o - nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o endif diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S deleted file mode 100644 index 5413fee33481..000000000000 --- a/arch/x86/crypto/morus1280-avx2-asm.S +++ /dev/null @@ -1,619 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AVX2 implementation of MORUS-1280 - * - * Copyright (c) 2017-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include - -#define SHUFFLE_MASK(i0, i1, i2, i3) \ - (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6)) - -#define MASK1 SHUFFLE_MASK(3, 0, 1, 2) -#define MASK2 SHUFFLE_MASK(2, 3, 0, 1) -#define MASK3 SHUFFLE_MASK(1, 2, 3, 0) - -#define STATE0 %ymm0 -#define STATE0_LOW %xmm0 -#define STATE1 %ymm1 -#define STATE2 %ymm2 -#define STATE3 %ymm3 -#define STATE4 %ymm4 -#define KEY %ymm5 -#define MSG %ymm5 -#define MSG_LOW %xmm5 -#define T0 %ymm6 -#define T0_LOW %xmm6 -#define T1 %ymm7 - -.section .rodata.cst32.morus1280_const, "aM", @progbits, 32 -.align 32 -.Lmorus1280_const: - .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d - .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 - .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 - .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd - -.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32 -.align 32 -.Lmorus1280_counter: - .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 - .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f - .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 - .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f - -.text - -.macro morus1280_round s0, s1, s2, s3, s4, b, w - vpand \s1, \s2, T0 - vpxor T0, \s0, \s0 - vpxor \s3, \s0, \s0 - vpsllq $\b, \s0, T0 - vpsrlq $(64 - \b), \s0, \s0 - vpxor T0, \s0, \s0 - vpermq $\w, \s3, \s3 -.endm - -/* - * __morus1280_update: internal ABI - * input: - * STATE[0-4] - input state - * MSG - message block - * output: - * STATE[0-4] - output state - * changed: - * T0 - */ -__morus1280_update: - morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1 - vpxor MSG, STATE1, STATE1 - morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2 - vpxor MSG, STATE2, STATE2 - morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3 - vpxor MSG, STATE3, STATE3 - morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2 - vpxor MSG, STATE4, STATE4 - morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1 - ret -ENDPROC(__morus1280_update) - -/* - * __morus1280_update_zero: internal ABI - * input: - * STATE[0-4] - input state - * output: - * STATE[0-4] - output state - * changed: - * T0 - */ -__morus1280_update_zero: - morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1 - morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2 - morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3 - morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2 - morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1 - ret -ENDPROC(__morus1280_update_zero) - -/* - * __load_partial: internal ABI - * input: - * %rsi - src - * %rcx - bytes - * output: - * MSG - message block - * changed: - * %r8 - * %r9 - */ -__load_partial: - xor %r9d, %r9d - vpxor MSG, MSG, MSG - - mov %rcx, %r8 - and $0x1, %r8 - jz .Lld_partial_1 - - mov %rcx, %r8 - and $0x1E, %r8 - add %rsi, %r8 - mov (%r8), %r9b - -.Lld_partial_1: - mov %rcx, %r8 - and $0x2, %r8 - jz .Lld_partial_2 - - mov %rcx, %r8 - and $0x1C, %r8 - add %rsi, %r8 - shl $16, %r9 - mov (%r8), %r9w - -.Lld_partial_2: - mov %rcx, %r8 - and $0x4, %r8 - jz .Lld_partial_4 - - mov %rcx, %r8 - and $0x18, %r8 - add %rsi, %r8 - shl $32, %r9 - mov (%r8), %r8d - xor %r8, %r9 - -.Lld_partial_4: - movq %r9, MSG_LOW - - mov %rcx, %r8 - and $0x8, %r8 - jz .Lld_partial_8 - - mov %rcx, %r8 - and $0x10, %r8 - add %rsi, %r8 - pshufd $MASK2, MSG_LOW, MSG_LOW - pinsrq $0, (%r8), MSG_LOW - -.Lld_partial_8: - mov %rcx, %r8 - and $0x10, %r8 - jz .Lld_partial_16 - - vpermq $MASK2, MSG, MSG - movdqu (%rsi), MSG_LOW - -.Lld_partial_16: - ret -ENDPROC(__load_partial) - -/* - * __store_partial: internal ABI - * input: - * %rdx - dst - * %rcx - bytes - * output: - * T0 - message block - * changed: - * %r8 - * %r9 - * %r10 - */ -__store_partial: - mov %rcx, %r8 - mov %rdx, %r9 - - cmp $16, %r8 - jl .Lst_partial_16 - - movdqu T0_LOW, (%r9) - vpermq $MASK2, T0, T0 - - sub $16, %r8 - add $16, %r9 - -.Lst_partial_16: - movq T0_LOW, %r10 - - cmp $8, %r8 - jl .Lst_partial_8 - - mov %r10, (%r9) - pextrq $1, T0_LOW, %r10 - - sub $8, %r8 - add $8, %r9 - -.Lst_partial_8: - cmp $4, %r8 - jl .Lst_partial_4 - - mov %r10d, (%r9) - shr $32, %r10 - - sub $4, %r8 - add $4, %r9 - -.Lst_partial_4: - cmp $2, %r8 - jl .Lst_partial_2 - - mov %r10w, (%r9) - shr $16, %r10 - - sub $2, %r8 - add $2, %r9 - -.Lst_partial_2: - cmp $1, %r8 - jl .Lst_partial_1 - - mov %r10b, (%r9) - -.Lst_partial_1: - ret -ENDPROC(__store_partial) - -/* - * void crypto_morus1280_avx2_init(void *state, const void *key, - * const void *iv); - */ -ENTRY(crypto_morus1280_avx2_init) - FRAME_BEGIN - - /* load IV: */ - vpxor STATE0, STATE0, STATE0 - movdqu (%rdx), STATE0_LOW - /* load key: */ - vmovdqu (%rsi), KEY - vmovdqa KEY, STATE1 - /* load all ones: */ - vpcmpeqd STATE2, STATE2, STATE2 - /* load all zeros: */ - vpxor STATE3, STATE3, STATE3 - /* load the constant: */ - vmovdqa .Lmorus1280_const, STATE4 - - /* update 16 times with zero: */ - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - - /* xor-in the key again after updates: */ - vpxor KEY, STATE1, STATE1 - - /* store the state: */ - vmovdqu STATE0, (0 * 32)(%rdi) - vmovdqu STATE1, (1 * 32)(%rdi) - vmovdqu STATE2, (2 * 32)(%rdi) - vmovdqu STATE3, (3 * 32)(%rdi) - vmovdqu STATE4, (4 * 32)(%rdi) - - FRAME_END - ret -ENDPROC(crypto_morus1280_avx2_init) - -/* - * void crypto_morus1280_avx2_ad(void *state, const void *data, - * unsigned int length); - */ -ENTRY(crypto_morus1280_avx2_ad) - FRAME_BEGIN - - cmp $32, %rdx - jb .Lad_out - - /* load the state: */ - vmovdqu (0 * 32)(%rdi), STATE0 - vmovdqu (1 * 32)(%rdi), STATE1 - vmovdqu (2 * 32)(%rdi), STATE2 - vmovdqu (3 * 32)(%rdi), STATE3 - vmovdqu (4 * 32)(%rdi), STATE4 - - mov %rsi, %r8 - and $0x1F, %r8 - jnz .Lad_u_loop - -.align 4 -.Lad_a_loop: - vmovdqa (%rsi), MSG - call __morus1280_update - sub $32, %rdx - add $32, %rsi - cmp $32, %rdx - jge .Lad_a_loop - - jmp .Lad_cont -.align 4 -.Lad_u_loop: - vmovdqu (%rsi), MSG - call __morus1280_update - sub $32, %rdx - add $32, %rsi - cmp $32, %rdx - jge .Lad_u_loop - -.Lad_cont: - /* store the state: */ - vmovdqu STATE0, (0 * 32)(%rdi) - vmovdqu STATE1, (1 * 32)(%rdi) - vmovdqu STATE2, (2 * 32)(%rdi) - vmovdqu STATE3, (3 * 32)(%rdi) - vmovdqu STATE4, (4 * 32)(%rdi) - -.Lad_out: - FRAME_END - ret -ENDPROC(crypto_morus1280_avx2_ad) - -/* - * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus1280_avx2_enc) - FRAME_BEGIN - - cmp $32, %rcx - jb .Lenc_out - - /* load the state: */ - vmovdqu (0 * 32)(%rdi), STATE0 - vmovdqu (1 * 32)(%rdi), STATE1 - vmovdqu (2 * 32)(%rdi), STATE2 - vmovdqu (3 * 32)(%rdi), STATE3 - vmovdqu (4 * 32)(%rdi), STATE4 - - mov %rsi, %r8 - or %rdx, %r8 - and $0x1F, %r8 - jnz .Lenc_u_loop - -.align 4 -.Lenc_a_loop: - vmovdqa (%rsi), MSG - vmovdqa MSG, T0 - vpxor STATE0, T0, T0 - vpermq $MASK3, STATE1, T1 - vpxor T1, T0, T0 - vpand STATE2, STATE3, T1 - vpxor T1, T0, T0 - vmovdqa T0, (%rdx) - - call __morus1280_update - sub $32, %rcx - add $32, %rsi - add $32, %rdx - cmp $32, %rcx - jge .Lenc_a_loop - - jmp .Lenc_cont -.align 4 -.Lenc_u_loop: - vmovdqu (%rsi), MSG - vmovdqa MSG, T0 - vpxor STATE0, T0, T0 - vpermq $MASK3, STATE1, T1 - vpxor T1, T0, T0 - vpand STATE2, STATE3, T1 - vpxor T1, T0, T0 - vmovdqu T0, (%rdx) - - call __morus1280_update - sub $32, %rcx - add $32, %rsi - add $32, %rdx - cmp $32, %rcx - jge .Lenc_u_loop - -.Lenc_cont: - /* store the state: */ - vmovdqu STATE0, (0 * 32)(%rdi) - vmovdqu STATE1, (1 * 32)(%rdi) - vmovdqu STATE2, (2 * 32)(%rdi) - vmovdqu STATE3, (3 * 32)(%rdi) - vmovdqu STATE4, (4 * 32)(%rdi) - -.Lenc_out: - FRAME_END - ret -ENDPROC(crypto_morus1280_avx2_enc) - -/* - * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus1280_avx2_enc_tail) - FRAME_BEGIN - - /* load the state: */ - vmovdqu (0 * 32)(%rdi), STATE0 - vmovdqu (1 * 32)(%rdi), STATE1 - vmovdqu (2 * 32)(%rdi), STATE2 - vmovdqu (3 * 32)(%rdi), STATE3 - vmovdqu (4 * 32)(%rdi), STATE4 - - /* encrypt message: */ - call __load_partial - - vmovdqa MSG, T0 - vpxor STATE0, T0, T0 - vpermq $MASK3, STATE1, T1 - vpxor T1, T0, T0 - vpand STATE2, STATE3, T1 - vpxor T1, T0, T0 - - call __store_partial - - call __morus1280_update - - /* store the state: */ - vmovdqu STATE0, (0 * 32)(%rdi) - vmovdqu STATE1, (1 * 32)(%rdi) - vmovdqu STATE2, (2 * 32)(%rdi) - vmovdqu STATE3, (3 * 32)(%rdi) - vmovdqu STATE4, (4 * 32)(%rdi) - - FRAME_END - ret -ENDPROC(crypto_morus1280_avx2_enc_tail) - -/* - * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus1280_avx2_dec) - FRAME_BEGIN - - cmp $32, %rcx - jb .Ldec_out - - /* load the state: */ - vmovdqu (0 * 32)(%rdi), STATE0 - vmovdqu (1 * 32)(%rdi), STATE1 - vmovdqu (2 * 32)(%rdi), STATE2 - vmovdqu (3 * 32)(%rdi), STATE3 - vmovdqu (4 * 32)(%rdi), STATE4 - - mov %rsi, %r8 - or %rdx, %r8 - and $0x1F, %r8 - jnz .Ldec_u_loop - -.align 4 -.Ldec_a_loop: - vmovdqa (%rsi), MSG - vpxor STATE0, MSG, MSG - vpermq $MASK3, STATE1, T0 - vpxor T0, MSG, MSG - vpand STATE2, STATE3, T0 - vpxor T0, MSG, MSG - vmovdqa MSG, (%rdx) - - call __morus1280_update - sub $32, %rcx - add $32, %rsi - add $32, %rdx - cmp $32, %rcx - jge .Ldec_a_loop - - jmp .Ldec_cont -.align 4 -.Ldec_u_loop: - vmovdqu (%rsi), MSG - vpxor STATE0, MSG, MSG - vpermq $MASK3, STATE1, T0 - vpxor T0, MSG, MSG - vpand STATE2, STATE3, T0 - vpxor T0, MSG, MSG - vmovdqu MSG, (%rdx) - - call __morus1280_update - sub $32, %rcx - add $32, %rsi - add $32, %rdx - cmp $32, %rcx - jge .Ldec_u_loop - -.Ldec_cont: - /* store the state: */ - vmovdqu STATE0, (0 * 32)(%rdi) - vmovdqu STATE1, (1 * 32)(%rdi) - vmovdqu STATE2, (2 * 32)(%rdi) - vmovdqu STATE3, (3 * 32)(%rdi) - vmovdqu STATE4, (4 * 32)(%rdi) - -.Ldec_out: - FRAME_END - ret -ENDPROC(crypto_morus1280_avx2_dec) - -/* - * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus1280_avx2_dec_tail) - FRAME_BEGIN - - /* load the state: */ - vmovdqu (0 * 32)(%rdi), STATE0 - vmovdqu (1 * 32)(%rdi), STATE1 - vmovdqu (2 * 32)(%rdi), STATE2 - vmovdqu (3 * 32)(%rdi), STATE3 - vmovdqu (4 * 32)(%rdi), STATE4 - - /* decrypt message: */ - call __load_partial - - vpxor STATE0, MSG, MSG - vpermq $MASK3, STATE1, T0 - vpxor T0, MSG, MSG - vpand STATE2, STATE3, T0 - vpxor T0, MSG, MSG - vmovdqa MSG, T0 - - call __store_partial - - /* mask with byte count: */ - movq %rcx, T0_LOW - vpbroadcastb T0_LOW, T0 - vmovdqa .Lmorus1280_counter, T1 - vpcmpgtb T1, T0, T0 - vpand T0, MSG, MSG - - call __morus1280_update - - /* store the state: */ - vmovdqu STATE0, (0 * 32)(%rdi) - vmovdqu STATE1, (1 * 32)(%rdi) - vmovdqu STATE2, (2 * 32)(%rdi) - vmovdqu STATE3, (3 * 32)(%rdi) - vmovdqu STATE4, (4 * 32)(%rdi) - - FRAME_END - ret -ENDPROC(crypto_morus1280_avx2_dec_tail) - -/* - * void crypto_morus1280_avx2_final(void *state, void *tag_xor, - * u64 assoclen, u64 cryptlen); - */ -ENTRY(crypto_morus1280_avx2_final) - FRAME_BEGIN - - /* load the state: */ - vmovdqu (0 * 32)(%rdi), STATE0 - vmovdqu (1 * 32)(%rdi), STATE1 - vmovdqu (2 * 32)(%rdi), STATE2 - vmovdqu (3 * 32)(%rdi), STATE3 - vmovdqu (4 * 32)(%rdi), STATE4 - - /* xor state[0] into state[4]: */ - vpxor STATE0, STATE4, STATE4 - - /* prepare length block: */ - vpxor MSG, MSG, MSG - vpinsrq $0, %rdx, MSG_LOW, MSG_LOW - vpinsrq $1, %rcx, MSG_LOW, MSG_LOW - vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */ - - /* update state: */ - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - - /* xor tag: */ - vmovdqu (%rsi), MSG - - vpxor STATE0, MSG, MSG - vpermq $MASK3, STATE1, T0 - vpxor T0, MSG, MSG - vpand STATE2, STATE3, T0 - vpxor T0, MSG, MSG - vmovdqu MSG, (%rsi) - - FRAME_END - ret -ENDPROC(crypto_morus1280_avx2_final) diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c deleted file mode 100644 index 2d000d66ba4c..000000000000 --- a/arch/x86/crypto/morus1280-avx2-glue.c +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The MORUS-1280 Authenticated-Encryption Algorithm - * Glue for AVX2 implementation - * - * Copyright (c) 2016-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include - -asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key, - const void *iv); -asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data, - unsigned int length); - -asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src, - void *dst, unsigned int length); -asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src, - void *dst, unsigned int length); - -asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src, - void *dst, unsigned int length); -asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src, - void *dst, unsigned int length); - -asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor, - u64 assoclen, u64 cryptlen); - -MORUS1280_DECLARE_ALG(avx2, "morus1280-avx2", 400); - -static struct simd_aead_alg *simd_alg; - -static int __init crypto_morus1280_avx2_module_init(void) -{ - if (!boot_cpu_has(X86_FEATURE_AVX2) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || - !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) - return -ENODEV; - - return simd_register_aeads_compat(&crypto_morus1280_avx2_alg, 1, - &simd_alg); -} - -static void __exit crypto_morus1280_avx2_module_exit(void) -{ - simd_unregister_aeads(&crypto_morus1280_avx2_alg, 1, &simd_alg); -} - -module_init(crypto_morus1280_avx2_module_init); -module_exit(crypto_morus1280_avx2_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Ondrej Mosnacek "); -MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation"); -MODULE_ALIAS_CRYPTO("morus1280"); -MODULE_ALIAS_CRYPTO("morus1280-avx2"); diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S deleted file mode 100644 index 0eece772866b..000000000000 --- a/arch/x86/crypto/morus1280-sse2-asm.S +++ /dev/null @@ -1,893 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * SSE2 implementation of MORUS-1280 - * - * Copyright (c) 2017-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include - -#define SHUFFLE_MASK(i0, i1, i2, i3) \ - (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6)) - -#define MASK2 SHUFFLE_MASK(2, 3, 0, 1) - -#define STATE0_LO %xmm0 -#define STATE0_HI %xmm1 -#define STATE1_LO %xmm2 -#define STATE1_HI %xmm3 -#define STATE2_LO %xmm4 -#define STATE2_HI %xmm5 -#define STATE3_LO %xmm6 -#define STATE3_HI %xmm7 -#define STATE4_LO %xmm8 -#define STATE4_HI %xmm9 -#define KEY_LO %xmm10 -#define KEY_HI %xmm11 -#define MSG_LO %xmm10 -#define MSG_HI %xmm11 -#define T0_LO %xmm12 -#define T0_HI %xmm13 -#define T1_LO %xmm14 -#define T1_HI %xmm15 - -.section .rodata.cst16.morus640_const, "aM", @progbits, 16 -.align 16 -.Lmorus640_const_0: - .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d - .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 -.Lmorus640_const_1: - .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 - .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd - -.section .rodata.cst16.morus640_counter, "aM", @progbits, 16 -.align 16 -.Lmorus640_counter_0: - .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 - .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f -.Lmorus640_counter_1: - .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 - .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f - -.text - -.macro rol1 hi, lo - /* - * HI_1 | HI_0 || LO_1 | LO_0 - * ==> - * HI_0 | HI_1 || LO_1 | LO_0 - * ==> - * HI_0 | LO_1 || LO_0 | HI_1 - */ - pshufd $MASK2, \hi, \hi - movdqa \hi, T0_LO - punpcklqdq \lo, T0_LO - punpckhqdq \hi, \lo - movdqa \lo, \hi - movdqa T0_LO, \lo -.endm - -.macro rol2 hi, lo - movdqa \lo, T0_LO - movdqa \hi, \lo - movdqa T0_LO, \hi -.endm - -.macro rol3 hi, lo - /* - * HI_1 | HI_0 || LO_1 | LO_0 - * ==> - * HI_0 | HI_1 || LO_1 | LO_0 - * ==> - * LO_0 | HI_1 || HI_0 | LO_1 - */ - pshufd $MASK2, \hi, \hi - movdqa \lo, T0_LO - punpckhqdq \hi, T0_LO - punpcklqdq \lo, \hi - movdqa T0_LO, \lo -.endm - -.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w - movdqa \s1_l, T0_LO - pand \s2_l, T0_LO - pxor T0_LO, \s0_l - - movdqa \s1_h, T0_LO - pand \s2_h, T0_LO - pxor T0_LO, \s0_h - - pxor \s3_l, \s0_l - pxor \s3_h, \s0_h - - movdqa \s0_l, T0_LO - psllq $\b, T0_LO - psrlq $(64 - \b), \s0_l - pxor T0_LO, \s0_l - - movdqa \s0_h, T0_LO - psllq $\b, T0_LO - psrlq $(64 - \b), \s0_h - pxor T0_LO, \s0_h - - \w \s3_h, \s3_l -.endm - -/* - * __morus1280_update: internal ABI - * input: - * STATE[0-4] - input state - * MSG - message block - * output: - * STATE[0-4] - output state - * changed: - * T0 - */ -__morus1280_update: - morus1280_round \ - STATE0_LO, STATE0_HI, \ - STATE1_LO, STATE1_HI, \ - STATE2_LO, STATE2_HI, \ - STATE3_LO, STATE3_HI, \ - STATE4_LO, STATE4_HI, \ - 13, rol1 - pxor MSG_LO, STATE1_LO - pxor MSG_HI, STATE1_HI - morus1280_round \ - STATE1_LO, STATE1_HI, \ - STATE2_LO, STATE2_HI, \ - STATE3_LO, STATE3_HI, \ - STATE4_LO, STATE4_HI, \ - STATE0_LO, STATE0_HI, \ - 46, rol2 - pxor MSG_LO, STATE2_LO - pxor MSG_HI, STATE2_HI - morus1280_round \ - STATE2_LO, STATE2_HI, \ - STATE3_LO, STATE3_HI, \ - STATE4_LO, STATE4_HI, \ - STATE0_LO, STATE0_HI, \ - STATE1_LO, STATE1_HI, \ - 38, rol3 - pxor MSG_LO, STATE3_LO - pxor MSG_HI, STATE3_HI - morus1280_round \ - STATE3_LO, STATE3_HI, \ - STATE4_LO, STATE4_HI, \ - STATE0_LO, STATE0_HI, \ - STATE1_LO, STATE1_HI, \ - STATE2_LO, STATE2_HI, \ - 7, rol2 - pxor MSG_LO, STATE4_LO - pxor MSG_HI, STATE4_HI - morus1280_round \ - STATE4_LO, STATE4_HI, \ - STATE0_LO, STATE0_HI, \ - STATE1_LO, STATE1_HI, \ - STATE2_LO, STATE2_HI, \ - STATE3_LO, STATE3_HI, \ - 4, rol1 - ret -ENDPROC(__morus1280_update) - -/* - * __morus1280_update_zero: internal ABI - * input: - * STATE[0-4] - input state - * output: - * STATE[0-4] - output state - * changed: - * T0 - */ -__morus1280_update_zero: - morus1280_round \ - STATE0_LO, STATE0_HI, \ - STATE1_LO, STATE1_HI, \ - STATE2_LO, STATE2_HI, \ - STATE3_LO, STATE3_HI, \ - STATE4_LO, STATE4_HI, \ - 13, rol1 - morus1280_round \ - STATE1_LO, STATE1_HI, \ - STATE2_LO, STATE2_HI, \ - STATE3_LO, STATE3_HI, \ - STATE4_LO, STATE4_HI, \ - STATE0_LO, STATE0_HI, \ - 46, rol2 - morus1280_round \ - STATE2_LO, STATE2_HI, \ - STATE3_LO, STATE3_HI, \ - STATE4_LO, STATE4_HI, \ - STATE0_LO, STATE0_HI, \ - STATE1_LO, STATE1_HI, \ - 38, rol3 - morus1280_round \ - STATE3_LO, STATE3_HI, \ - STATE4_LO, STATE4_HI, \ - STATE0_LO, STATE0_HI, \ - STATE1_LO, STATE1_HI, \ - STATE2_LO, STATE2_HI, \ - 7, rol2 - morus1280_round \ - STATE4_LO, STATE4_HI, \ - STATE0_LO, STATE0_HI, \ - STATE1_LO, STATE1_HI, \ - STATE2_LO, STATE2_HI, \ - STATE3_LO, STATE3_HI, \ - 4, rol1 - ret -ENDPROC(__morus1280_update_zero) - -/* - * __load_partial: internal ABI - * input: - * %rsi - src - * %rcx - bytes - * output: - * MSG - message block - * changed: - * %r8 - * %r9 - */ -__load_partial: - xor %r9d, %r9d - pxor MSG_LO, MSG_LO - pxor MSG_HI, MSG_HI - - mov %rcx, %r8 - and $0x1, %r8 - jz .Lld_partial_1 - - mov %rcx, %r8 - and $0x1E, %r8 - add %rsi, %r8 - mov (%r8), %r9b - -.Lld_partial_1: - mov %rcx, %r8 - and $0x2, %r8 - jz .Lld_partial_2 - - mov %rcx, %r8 - and $0x1C, %r8 - add %rsi, %r8 - shl $16, %r9 - mov (%r8), %r9w - -.Lld_partial_2: - mov %rcx, %r8 - and $0x4, %r8 - jz .Lld_partial_4 - - mov %rcx, %r8 - and $0x18, %r8 - add %rsi, %r8 - shl $32, %r9 - mov (%r8), %r8d - xor %r8, %r9 - -.Lld_partial_4: - movq %r9, MSG_LO - - mov %rcx, %r8 - and $0x8, %r8 - jz .Lld_partial_8 - - mov %rcx, %r8 - and $0x10, %r8 - add %rsi, %r8 - pslldq $8, MSG_LO - movq (%r8), T0_LO - pxor T0_LO, MSG_LO - -.Lld_partial_8: - mov %rcx, %r8 - and $0x10, %r8 - jz .Lld_partial_16 - - movdqa MSG_LO, MSG_HI - movdqu (%rsi), MSG_LO - -.Lld_partial_16: - ret -ENDPROC(__load_partial) - -/* - * __store_partial: internal ABI - * input: - * %rdx - dst - * %rcx - bytes - * output: - * T0 - message block - * changed: - * %r8 - * %r9 - * %r10 - */ -__store_partial: - mov %rcx, %r8 - mov %rdx, %r9 - - cmp $16, %r8 - jl .Lst_partial_16 - - movdqu T0_LO, (%r9) - movdqa T0_HI, T0_LO - - sub $16, %r8 - add $16, %r9 - -.Lst_partial_16: - movq T0_LO, %r10 - - cmp $8, %r8 - jl .Lst_partial_8 - - mov %r10, (%r9) - psrldq $8, T0_LO - movq T0_LO, %r10 - - sub $8, %r8 - add $8, %r9 - -.Lst_partial_8: - cmp $4, %r8 - jl .Lst_partial_4 - - mov %r10d, (%r9) - shr $32, %r10 - - sub $4, %r8 - add $4, %r9 - -.Lst_partial_4: - cmp $2, %r8 - jl .Lst_partial_2 - - mov %r10w, (%r9) - shr $16, %r10 - - sub $2, %r8 - add $2, %r9 - -.Lst_partial_2: - cmp $1, %r8 - jl .Lst_partial_1 - - mov %r10b, (%r9) - -.Lst_partial_1: - ret -ENDPROC(__store_partial) - -/* - * void crypto_morus1280_sse2_init(void *state, const void *key, - * const void *iv); - */ -ENTRY(crypto_morus1280_sse2_init) - FRAME_BEGIN - - /* load IV: */ - pxor STATE0_HI, STATE0_HI - movdqu (%rdx), STATE0_LO - /* load key: */ - movdqu 0(%rsi), KEY_LO - movdqu 16(%rsi), KEY_HI - movdqa KEY_LO, STATE1_LO - movdqa KEY_HI, STATE1_HI - /* load all ones: */ - pcmpeqd STATE2_LO, STATE2_LO - pcmpeqd STATE2_HI, STATE2_HI - /* load all zeros: */ - pxor STATE3_LO, STATE3_LO - pxor STATE3_HI, STATE3_HI - /* load the constant: */ - movdqa .Lmorus640_const_0, STATE4_LO - movdqa .Lmorus640_const_1, STATE4_HI - - /* update 16 times with zero: */ - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - call __morus1280_update_zero - - /* xor-in the key again after updates: */ - pxor KEY_LO, STATE1_LO - pxor KEY_HI, STATE1_HI - - /* store the state: */ - movdqu STATE0_LO, (0 * 16)(%rdi) - movdqu STATE0_HI, (1 * 16)(%rdi) - movdqu STATE1_LO, (2 * 16)(%rdi) - movdqu STATE1_HI, (3 * 16)(%rdi) - movdqu STATE2_LO, (4 * 16)(%rdi) - movdqu STATE2_HI, (5 * 16)(%rdi) - movdqu STATE3_LO, (6 * 16)(%rdi) - movdqu STATE3_HI, (7 * 16)(%rdi) - movdqu STATE4_LO, (8 * 16)(%rdi) - movdqu STATE4_HI, (9 * 16)(%rdi) - - FRAME_END - ret -ENDPROC(crypto_morus1280_sse2_init) - -/* - * void crypto_morus1280_sse2_ad(void *state, const void *data, - * unsigned int length); - */ -ENTRY(crypto_morus1280_sse2_ad) - FRAME_BEGIN - - cmp $32, %rdx - jb .Lad_out - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0_LO - movdqu (1 * 16)(%rdi), STATE0_HI - movdqu (2 * 16)(%rdi), STATE1_LO - movdqu (3 * 16)(%rdi), STATE1_HI - movdqu (4 * 16)(%rdi), STATE2_LO - movdqu (5 * 16)(%rdi), STATE2_HI - movdqu (6 * 16)(%rdi), STATE3_LO - movdqu (7 * 16)(%rdi), STATE3_HI - movdqu (8 * 16)(%rdi), STATE4_LO - movdqu (9 * 16)(%rdi), STATE4_HI - - mov %rsi, %r8 - and $0xF, %r8 - jnz .Lad_u_loop - -.align 4 -.Lad_a_loop: - movdqa 0(%rsi), MSG_LO - movdqa 16(%rsi), MSG_HI - call __morus1280_update - sub $32, %rdx - add $32, %rsi - cmp $32, %rdx - jge .Lad_a_loop - - jmp .Lad_cont -.align 4 -.Lad_u_loop: - movdqu 0(%rsi), MSG_LO - movdqu 16(%rsi), MSG_HI - call __morus1280_update - sub $32, %rdx - add $32, %rsi - cmp $32, %rdx - jge .Lad_u_loop - -.Lad_cont: - /* store the state: */ - movdqu STATE0_LO, (0 * 16)(%rdi) - movdqu STATE0_HI, (1 * 16)(%rdi) - movdqu STATE1_LO, (2 * 16)(%rdi) - movdqu STATE1_HI, (3 * 16)(%rdi) - movdqu STATE2_LO, (4 * 16)(%rdi) - movdqu STATE2_HI, (5 * 16)(%rdi) - movdqu STATE3_LO, (6 * 16)(%rdi) - movdqu STATE3_HI, (7 * 16)(%rdi) - movdqu STATE4_LO, (8 * 16)(%rdi) - movdqu STATE4_HI, (9 * 16)(%rdi) - -.Lad_out: - FRAME_END - ret -ENDPROC(crypto_morus1280_sse2_ad) - -/* - * void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus1280_sse2_enc) - FRAME_BEGIN - - cmp $32, %rcx - jb .Lenc_out - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0_LO - movdqu (1 * 16)(%rdi), STATE0_HI - movdqu (2 * 16)(%rdi), STATE1_LO - movdqu (3 * 16)(%rdi), STATE1_HI - movdqu (4 * 16)(%rdi), STATE2_LO - movdqu (5 * 16)(%rdi), STATE2_HI - movdqu (6 * 16)(%rdi), STATE3_LO - movdqu (7 * 16)(%rdi), STATE3_HI - movdqu (8 * 16)(%rdi), STATE4_LO - movdqu (9 * 16)(%rdi), STATE4_HI - - mov %rsi, %r8 - or %rdx, %r8 - and $0xF, %r8 - jnz .Lenc_u_loop - -.align 4 -.Lenc_a_loop: - movdqa 0(%rsi), MSG_LO - movdqa 16(%rsi), MSG_HI - movdqa STATE1_LO, T1_LO - movdqa STATE1_HI, T1_HI - rol3 T1_HI, T1_LO - movdqa MSG_LO, T0_LO - movdqa MSG_HI, T0_HI - pxor T1_LO, T0_LO - pxor T1_HI, T0_HI - pxor STATE0_LO, T0_LO - pxor STATE0_HI, T0_HI - movdqa STATE2_LO, T1_LO - movdqa STATE2_HI, T1_HI - pand STATE3_LO, T1_LO - pand STATE3_HI, T1_HI - pxor T1_LO, T0_LO - pxor T1_HI, T0_HI - movdqa T0_LO, 0(%rdx) - movdqa T0_HI, 16(%rdx) - - call __morus1280_update - sub $32, %rcx - add $32, %rsi - add $32, %rdx - cmp $32, %rcx - jge .Lenc_a_loop - - jmp .Lenc_cont -.align 4 -.Lenc_u_loop: - movdqu 0(%rsi), MSG_LO - movdqu 16(%rsi), MSG_HI - movdqa STATE1_LO, T1_LO - movdqa STATE1_HI, T1_HI - rol3 T1_HI, T1_LO - movdqa MSG_LO, T0_LO - movdqa MSG_HI, T0_HI - pxor T1_LO, T0_LO - pxor T1_HI, T0_HI - pxor STATE0_LO, T0_LO - pxor STATE0_HI, T0_HI - movdqa STATE2_LO, T1_LO - movdqa STATE2_HI, T1_HI - pand STATE3_LO, T1_LO - pand STATE3_HI, T1_HI - pxor T1_LO, T0_LO - pxor T1_HI, T0_HI - movdqu T0_LO, 0(%rdx) - movdqu T0_HI, 16(%rdx) - - call __morus1280_update - sub $32, %rcx - add $32, %rsi - add $32, %rdx - cmp $32, %rcx - jge .Lenc_u_loop - -.Lenc_cont: - /* store the state: */ - movdqu STATE0_LO, (0 * 16)(%rdi) - movdqu STATE0_HI, (1 * 16)(%rdi) - movdqu STATE1_LO, (2 * 16)(%rdi) - movdqu STATE1_HI, (3 * 16)(%rdi) - movdqu STATE2_LO, (4 * 16)(%rdi) - movdqu STATE2_HI, (5 * 16)(%rdi) - movdqu STATE3_LO, (6 * 16)(%rdi) - movdqu STATE3_HI, (7 * 16)(%rdi) - movdqu STATE4_LO, (8 * 16)(%rdi) - movdqu STATE4_HI, (9 * 16)(%rdi) - -.Lenc_out: - FRAME_END - ret -ENDPROC(crypto_morus1280_sse2_enc) - -/* - * void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus1280_sse2_enc_tail) - FRAME_BEGIN - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0_LO - movdqu (1 * 16)(%rdi), STATE0_HI - movdqu (2 * 16)(%rdi), STATE1_LO - movdqu (3 * 16)(%rdi), STATE1_HI - movdqu (4 * 16)(%rdi), STATE2_LO - movdqu (5 * 16)(%rdi), STATE2_HI - movdqu (6 * 16)(%rdi), STATE3_LO - movdqu (7 * 16)(%rdi), STATE3_HI - movdqu (8 * 16)(%rdi), STATE4_LO - movdqu (9 * 16)(%rdi), STATE4_HI - - /* encrypt message: */ - call __load_partial - - movdqa STATE1_LO, T1_LO - movdqa STATE1_HI, T1_HI - rol3 T1_HI, T1_LO - movdqa MSG_LO, T0_LO - movdqa MSG_HI, T0_HI - pxor T1_LO, T0_LO - pxor T1_HI, T0_HI - pxor STATE0_LO, T0_LO - pxor STATE0_HI, T0_HI - movdqa STATE2_LO, T1_LO - movdqa STATE2_HI, T1_HI - pand STATE3_LO, T1_LO - pand STATE3_HI, T1_HI - pxor T1_LO, T0_LO - pxor T1_HI, T0_HI - - call __store_partial - - call __morus1280_update - - /* store the state: */ - movdqu STATE0_LO, (0 * 16)(%rdi) - movdqu STATE0_HI, (1 * 16)(%rdi) - movdqu STATE1_LO, (2 * 16)(%rdi) - movdqu STATE1_HI, (3 * 16)(%rdi) - movdqu STATE2_LO, (4 * 16)(%rdi) - movdqu STATE2_HI, (5 * 16)(%rdi) - movdqu STATE3_LO, (6 * 16)(%rdi) - movdqu STATE3_HI, (7 * 16)(%rdi) - movdqu STATE4_LO, (8 * 16)(%rdi) - movdqu STATE4_HI, (9 * 16)(%rdi) - - FRAME_END - ret -ENDPROC(crypto_morus1280_sse2_enc_tail) - -/* - * void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus1280_sse2_dec) - FRAME_BEGIN - - cmp $32, %rcx - jb .Ldec_out - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0_LO - movdqu (1 * 16)(%rdi), STATE0_HI - movdqu (2 * 16)(%rdi), STATE1_LO - movdqu (3 * 16)(%rdi), STATE1_HI - movdqu (4 * 16)(%rdi), STATE2_LO - movdqu (5 * 16)(%rdi), STATE2_HI - movdqu (6 * 16)(%rdi), STATE3_LO - movdqu (7 * 16)(%rdi), STATE3_HI - movdqu (8 * 16)(%rdi), STATE4_LO - movdqu (9 * 16)(%rdi), STATE4_HI - - mov %rsi, %r8 - or %rdx, %r8 - and $0xF, %r8 - jnz .Ldec_u_loop - -.align 4 -.Ldec_a_loop: - movdqa 0(%rsi), MSG_LO - movdqa 16(%rsi), MSG_HI - pxor STATE0_LO, MSG_LO - pxor STATE0_HI, MSG_HI - movdqa STATE1_LO, T1_LO - movdqa STATE1_HI, T1_HI - rol3 T1_HI, T1_LO - pxor T1_LO, MSG_LO - pxor T1_HI, MSG_HI - movdqa STATE2_LO, T1_LO - movdqa STATE2_HI, T1_HI - pand STATE3_LO, T1_LO - pand STATE3_HI, T1_HI - pxor T1_LO, MSG_LO - pxor T1_HI, MSG_HI - movdqa MSG_LO, 0(%rdx) - movdqa MSG_HI, 16(%rdx) - - call __morus1280_update - sub $32, %rcx - add $32, %rsi - add $32, %rdx - cmp $32, %rcx - jge .Ldec_a_loop - - jmp .Ldec_cont -.align 4 -.Ldec_u_loop: - movdqu 0(%rsi), MSG_LO - movdqu 16(%rsi), MSG_HI - pxor STATE0_LO, MSG_LO - pxor STATE0_HI, MSG_HI - movdqa STATE1_LO, T1_LO - movdqa STATE1_HI, T1_HI - rol3 T1_HI, T1_LO - pxor T1_LO, MSG_LO - pxor T1_HI, MSG_HI - movdqa STATE2_LO, T1_LO - movdqa STATE2_HI, T1_HI - pand STATE3_LO, T1_LO - pand STATE3_HI, T1_HI - pxor T1_LO, MSG_LO - pxor T1_HI, MSG_HI - movdqu MSG_LO, 0(%rdx) - movdqu MSG_HI, 16(%rdx) - - call __morus1280_update - sub $32, %rcx - add $32, %rsi - add $32, %rdx - cmp $32, %rcx - jge .Ldec_u_loop - -.Ldec_cont: - /* store the state: */ - movdqu STATE0_LO, (0 * 16)(%rdi) - movdqu STATE0_HI, (1 * 16)(%rdi) - movdqu STATE1_LO, (2 * 16)(%rdi) - movdqu STATE1_HI, (3 * 16)(%rdi) - movdqu STATE2_LO, (4 * 16)(%rdi) - movdqu STATE2_HI, (5 * 16)(%rdi) - movdqu STATE3_LO, (6 * 16)(%rdi) - movdqu STATE3_HI, (7 * 16)(%rdi) - movdqu STATE4_LO, (8 * 16)(%rdi) - movdqu STATE4_HI, (9 * 16)(%rdi) - -.Ldec_out: - FRAME_END - ret -ENDPROC(crypto_morus1280_sse2_dec) - -/* - * void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus1280_sse2_dec_tail) - FRAME_BEGIN - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0_LO - movdqu (1 * 16)(%rdi), STATE0_HI - movdqu (2 * 16)(%rdi), STATE1_LO - movdqu (3 * 16)(%rdi), STATE1_HI - movdqu (4 * 16)(%rdi), STATE2_LO - movdqu (5 * 16)(%rdi), STATE2_HI - movdqu (6 * 16)(%rdi), STATE3_LO - movdqu (7 * 16)(%rdi), STATE3_HI - movdqu (8 * 16)(%rdi), STATE4_LO - movdqu (9 * 16)(%rdi), STATE4_HI - - /* decrypt message: */ - call __load_partial - - pxor STATE0_LO, MSG_LO - pxor STATE0_HI, MSG_HI - movdqa STATE1_LO, T1_LO - movdqa STATE1_HI, T1_HI - rol3 T1_HI, T1_LO - pxor T1_LO, MSG_LO - pxor T1_HI, MSG_HI - movdqa STATE2_LO, T1_LO - movdqa STATE2_HI, T1_HI - pand STATE3_LO, T1_LO - pand STATE3_HI, T1_HI - pxor T1_LO, MSG_LO - pxor T1_HI, MSG_HI - movdqa MSG_LO, T0_LO - movdqa MSG_HI, T0_HI - - call __store_partial - - /* mask with byte count: */ - movq %rcx, T0_LO - punpcklbw T0_LO, T0_LO - punpcklbw T0_LO, T0_LO - punpcklbw T0_LO, T0_LO - punpcklbw T0_LO, T0_LO - movdqa T0_LO, T0_HI - movdqa .Lmorus640_counter_0, T1_LO - movdqa .Lmorus640_counter_1, T1_HI - pcmpgtb T1_LO, T0_LO - pcmpgtb T1_HI, T0_HI - pand T0_LO, MSG_LO - pand T0_HI, MSG_HI - - call __morus1280_update - - /* store the state: */ - movdqu STATE0_LO, (0 * 16)(%rdi) - movdqu STATE0_HI, (1 * 16)(%rdi) - movdqu STATE1_LO, (2 * 16)(%rdi) - movdqu STATE1_HI, (3 * 16)(%rdi) - movdqu STATE2_LO, (4 * 16)(%rdi) - movdqu STATE2_HI, (5 * 16)(%rdi) - movdqu STATE3_LO, (6 * 16)(%rdi) - movdqu STATE3_HI, (7 * 16)(%rdi) - movdqu STATE4_LO, (8 * 16)(%rdi) - movdqu STATE4_HI, (9 * 16)(%rdi) - - FRAME_END - ret -ENDPROC(crypto_morus1280_sse2_dec_tail) - -/* - * void crypto_morus1280_sse2_final(void *state, void *tag_xor, - * u64 assoclen, u64 cryptlen); - */ -ENTRY(crypto_morus1280_sse2_final) - FRAME_BEGIN - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0_LO - movdqu (1 * 16)(%rdi), STATE0_HI - movdqu (2 * 16)(%rdi), STATE1_LO - movdqu (3 * 16)(%rdi), STATE1_HI - movdqu (4 * 16)(%rdi), STATE2_LO - movdqu (5 * 16)(%rdi), STATE2_HI - movdqu (6 * 16)(%rdi), STATE3_LO - movdqu (7 * 16)(%rdi), STATE3_HI - movdqu (8 * 16)(%rdi), STATE4_LO - movdqu (9 * 16)(%rdi), STATE4_HI - - /* xor state[0] into state[4]: */ - pxor STATE0_LO, STATE4_LO - pxor STATE0_HI, STATE4_HI - - /* prepare length block: */ - movq %rdx, MSG_LO - movq %rcx, T0_LO - pslldq $8, T0_LO - pxor T0_LO, MSG_LO - psllq $3, MSG_LO /* multiply by 8 (to get bit count) */ - pxor MSG_HI, MSG_HI - - /* update state: */ - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - call __morus1280_update - - /* xor tag: */ - movdqu 0(%rsi), MSG_LO - movdqu 16(%rsi), MSG_HI - - pxor STATE0_LO, MSG_LO - pxor STATE0_HI, MSG_HI - movdqa STATE1_LO, T0_LO - movdqa STATE1_HI, T0_HI - rol3 T0_HI, T0_LO - pxor T0_LO, MSG_LO - pxor T0_HI, MSG_HI - movdqa STATE2_LO, T0_LO - movdqa STATE2_HI, T0_HI - pand STATE3_LO, T0_LO - pand STATE3_HI, T0_HI - pxor T0_LO, MSG_LO - pxor T0_HI, MSG_HI - - movdqu MSG_LO, 0(%rsi) - movdqu MSG_HI, 16(%rsi) - - FRAME_END - ret -ENDPROC(crypto_morus1280_sse2_final) diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c deleted file mode 100644 index aada9d774293..000000000000 --- a/arch/x86/crypto/morus1280-sse2-glue.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The MORUS-1280 Authenticated-Encryption Algorithm - * Glue for SSE2 implementation - * - * Copyright (c) 2016-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include - -asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key, - const void *iv); -asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data, - unsigned int length); - -asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src, - void *dst, unsigned int length); -asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src, - void *dst, unsigned int length); - -asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src, - void *dst, unsigned int length); -asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src, - void *dst, unsigned int length); - -asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor, - u64 assoclen, u64 cryptlen); - -MORUS1280_DECLARE_ALG(sse2, "morus1280-sse2", 350); - -static struct simd_aead_alg *simd_alg; - -static int __init crypto_morus1280_sse2_module_init(void) -{ - if (!boot_cpu_has(X86_FEATURE_XMM2) || - !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) - return -ENODEV; - - return simd_register_aeads_compat(&crypto_morus1280_sse2_alg, 1, - &simd_alg); -} - -static void __exit crypto_morus1280_sse2_module_exit(void) -{ - simd_unregister_aeads(&crypto_morus1280_sse2_alg, 1, &simd_alg); -} - -module_init(crypto_morus1280_sse2_module_init); -module_exit(crypto_morus1280_sse2_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Ondrej Mosnacek "); -MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation"); -MODULE_ALIAS_CRYPTO("morus1280"); -MODULE_ALIAS_CRYPTO("morus1280-sse2"); diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c deleted file mode 100644 index ffbde8b22838..000000000000 --- a/arch/x86/crypto/morus1280_glue.c +++ /dev/null @@ -1,205 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The MORUS-1280 Authenticated-Encryption Algorithm - * Common x86 SIMD glue skeleton - * - * Copyright (c) 2016-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct morus1280_state { - struct morus1280_block s[MORUS_STATE_BLOCKS]; -}; - -struct morus1280_ops { - int (*skcipher_walk_init)(struct skcipher_walk *walk, - struct aead_request *req, bool atomic); - - void (*crypt_blocks)(void *state, const void *src, void *dst, - unsigned int length); - void (*crypt_tail)(void *state, const void *src, void *dst, - unsigned int length); -}; - -static void crypto_morus1280_glue_process_ad( - struct morus1280_state *state, - const struct morus1280_glue_ops *ops, - struct scatterlist *sg_src, unsigned int assoclen) -{ - struct scatter_walk walk; - struct morus1280_block buf; - unsigned int pos = 0; - - scatterwalk_start(&walk, sg_src); - while (assoclen != 0) { - unsigned int size = scatterwalk_clamp(&walk, assoclen); - unsigned int left = size; - void *mapped = scatterwalk_map(&walk); - const u8 *src = (const u8 *)mapped; - - if (pos + size >= MORUS1280_BLOCK_SIZE) { - if (pos > 0) { - unsigned int fill = MORUS1280_BLOCK_SIZE - pos; - memcpy(buf.bytes + pos, src, fill); - ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE); - pos = 0; - left -= fill; - src += fill; - } - - ops->ad(state, src, left); - src += left & ~(MORUS1280_BLOCK_SIZE - 1); - left &= MORUS1280_BLOCK_SIZE - 1; - } - - memcpy(buf.bytes + pos, src, left); - - pos += left; - assoclen -= size; - scatterwalk_unmap(mapped); - scatterwalk_advance(&walk, size); - scatterwalk_done(&walk, 0, assoclen); - } - - if (pos > 0) { - memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos); - ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE); - } -} - -static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state, - struct morus1280_ops ops, - struct skcipher_walk *walk) -{ - while (walk->nbytes >= MORUS1280_BLOCK_SIZE) { - ops.crypt_blocks(state, walk->src.virt.addr, - walk->dst.virt.addr, - round_down(walk->nbytes, - MORUS1280_BLOCK_SIZE)); - skcipher_walk_done(walk, walk->nbytes % MORUS1280_BLOCK_SIZE); - } - - if (walk->nbytes) { - ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr, - walk->nbytes); - skcipher_walk_done(walk, 0); - } -} - -int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) -{ - struct morus1280_ctx *ctx = crypto_aead_ctx(aead); - - if (keylen == MORUS1280_BLOCK_SIZE) { - memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE); - } else if (keylen == MORUS1280_BLOCK_SIZE / 2) { - memcpy(ctx->key.bytes, key, keylen); - memcpy(ctx->key.bytes + keylen, key, keylen); - } else { - crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - return 0; -} -EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey); - -int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm, - unsigned int authsize) -{ - return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL; -} -EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize); - -static void crypto_morus1280_glue_crypt(struct aead_request *req, - struct morus1280_ops ops, - unsigned int cryptlen, - struct morus1280_block *tag_xor) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct morus1280_ctx *ctx = crypto_aead_ctx(tfm); - struct morus1280_state state; - struct skcipher_walk walk; - - ops.skcipher_walk_init(&walk, req, true); - - kernel_fpu_begin(); - - ctx->ops->init(&state, &ctx->key, req->iv); - crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); - crypto_morus1280_glue_process_crypt(&state, ops, &walk); - ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); - - kernel_fpu_end(); -} - -int crypto_morus1280_glue_encrypt(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct morus1280_ctx *ctx = crypto_aead_ctx(tfm); - struct morus1280_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_encrypt, - .crypt_blocks = ctx->ops->enc, - .crypt_tail = ctx->ops->enc_tail, - }; - - struct morus1280_block tag = {}; - unsigned int authsize = crypto_aead_authsize(tfm); - unsigned int cryptlen = req->cryptlen; - - crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag); - - scatterwalk_map_and_copy(tag.bytes, req->dst, - req->assoclen + cryptlen, authsize, 1); - return 0; -} -EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt); - -int crypto_morus1280_glue_decrypt(struct aead_request *req) -{ - static const u8 zeros[MORUS1280_BLOCK_SIZE] = {}; - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct morus1280_ctx *ctx = crypto_aead_ctx(tfm); - struct morus1280_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_decrypt, - .crypt_blocks = ctx->ops->dec, - .crypt_tail = ctx->ops->dec_tail, - }; - - struct morus1280_block tag; - unsigned int authsize = crypto_aead_authsize(tfm); - unsigned int cryptlen = req->cryptlen - authsize; - - scatterwalk_map_and_copy(tag.bytes, req->src, - req->assoclen + cryptlen, authsize, 0); - - crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag); - - return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0; -} -EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt); - -void crypto_morus1280_glue_init_ops(struct crypto_aead *aead, - const struct morus1280_glue_ops *ops) -{ - struct morus1280_ctx *ctx = crypto_aead_ctx(aead); - ctx->ops = ops; -} -EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Ondrej Mosnacek "); -MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations"); diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S deleted file mode 100644 index a60891101bbd..000000000000 --- a/arch/x86/crypto/morus640-sse2-asm.S +++ /dev/null @@ -1,612 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * SSE2 implementation of MORUS-640 - * - * Copyright (c) 2017-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include - -#define SHUFFLE_MASK(i0, i1, i2, i3) \ - (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6)) - -#define MASK1 SHUFFLE_MASK(3, 0, 1, 2) -#define MASK2 SHUFFLE_MASK(2, 3, 0, 1) -#define MASK3 SHUFFLE_MASK(1, 2, 3, 0) - -#define STATE0 %xmm0 -#define STATE1 %xmm1 -#define STATE2 %xmm2 -#define STATE3 %xmm3 -#define STATE4 %xmm4 -#define KEY %xmm5 -#define MSG %xmm5 -#define T0 %xmm6 -#define T1 %xmm7 - -.section .rodata.cst16.morus640_const, "aM", @progbits, 32 -.align 16 -.Lmorus640_const_0: - .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d - .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 -.Lmorus640_const_1: - .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 - .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd - -.section .rodata.cst16.morus640_counter, "aM", @progbits, 16 -.align 16 -.Lmorus640_counter: - .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 - .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f - -.text - -.macro morus640_round s0, s1, s2, s3, s4, b, w - movdqa \s1, T0 - pand \s2, T0 - pxor T0, \s0 - pxor \s3, \s0 - movdqa \s0, T0 - pslld $\b, T0 - psrld $(32 - \b), \s0 - pxor T0, \s0 - pshufd $\w, \s3, \s3 -.endm - -/* - * __morus640_update: internal ABI - * input: - * STATE[0-4] - input state - * MSG - message block - * output: - * STATE[0-4] - output state - * changed: - * T0 - */ -__morus640_update: - morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1 - pxor MSG, STATE1 - morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2 - pxor MSG, STATE2 - morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3 - pxor MSG, STATE3 - morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2 - pxor MSG, STATE4 - morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1 - ret -ENDPROC(__morus640_update) - - -/* - * __morus640_update_zero: internal ABI - * input: - * STATE[0-4] - input state - * output: - * STATE[0-4] - output state - * changed: - * T0 - */ -__morus640_update_zero: - morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1 - morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2 - morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3 - morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2 - morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1 - ret -ENDPROC(__morus640_update_zero) - -/* - * __load_partial: internal ABI - * input: - * %rsi - src - * %rcx - bytes - * output: - * MSG - message block - * changed: - * T0 - * %r8 - * %r9 - */ -__load_partial: - xor %r9d, %r9d - pxor MSG, MSG - - mov %rcx, %r8 - and $0x1, %r8 - jz .Lld_partial_1 - - mov %rcx, %r8 - and $0x1E, %r8 - add %rsi, %r8 - mov (%r8), %r9b - -.Lld_partial_1: - mov %rcx, %r8 - and $0x2, %r8 - jz .Lld_partial_2 - - mov %rcx, %r8 - and $0x1C, %r8 - add %rsi, %r8 - shl $16, %r9 - mov (%r8), %r9w - -.Lld_partial_2: - mov %rcx, %r8 - and $0x4, %r8 - jz .Lld_partial_4 - - mov %rcx, %r8 - and $0x18, %r8 - add %rsi, %r8 - shl $32, %r9 - mov (%r8), %r8d - xor %r8, %r9 - -.Lld_partial_4: - movq %r9, MSG - - mov %rcx, %r8 - and $0x8, %r8 - jz .Lld_partial_8 - - mov %rcx, %r8 - and $0x10, %r8 - add %rsi, %r8 - pslldq $8, MSG - movq (%r8), T0 - pxor T0, MSG - -.Lld_partial_8: - ret -ENDPROC(__load_partial) - -/* - * __store_partial: internal ABI - * input: - * %rdx - dst - * %rcx - bytes - * output: - * T0 - message block - * changed: - * %r8 - * %r9 - * %r10 - */ -__store_partial: - mov %rcx, %r8 - mov %rdx, %r9 - - movq T0, %r10 - - cmp $8, %r8 - jl .Lst_partial_8 - - mov %r10, (%r9) - psrldq $8, T0 - movq T0, %r10 - - sub $8, %r8 - add $8, %r9 - -.Lst_partial_8: - cmp $4, %r8 - jl .Lst_partial_4 - - mov %r10d, (%r9) - shr $32, %r10 - - sub $4, %r8 - add $4, %r9 - -.Lst_partial_4: - cmp $2, %r8 - jl .Lst_partial_2 - - mov %r10w, (%r9) - shr $16, %r10 - - sub $2, %r8 - add $2, %r9 - -.Lst_partial_2: - cmp $1, %r8 - jl .Lst_partial_1 - - mov %r10b, (%r9) - -.Lst_partial_1: - ret -ENDPROC(__store_partial) - -/* - * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv); - */ -ENTRY(crypto_morus640_sse2_init) - FRAME_BEGIN - - /* load IV: */ - movdqu (%rdx), STATE0 - /* load key: */ - movdqu (%rsi), KEY - movdqa KEY, STATE1 - /* load all ones: */ - pcmpeqd STATE2, STATE2 - /* load the constants: */ - movdqa .Lmorus640_const_0, STATE3 - movdqa .Lmorus640_const_1, STATE4 - - /* update 16 times with zero: */ - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - call __morus640_update_zero - - /* xor-in the key again after updates: */ - pxor KEY, STATE1 - - /* store the state: */ - movdqu STATE0, (0 * 16)(%rdi) - movdqu STATE1, (1 * 16)(%rdi) - movdqu STATE2, (2 * 16)(%rdi) - movdqu STATE3, (3 * 16)(%rdi) - movdqu STATE4, (4 * 16)(%rdi) - - FRAME_END - ret -ENDPROC(crypto_morus640_sse2_init) - -/* - * void crypto_morus640_sse2_ad(void *state, const void *data, - * unsigned int length); - */ -ENTRY(crypto_morus640_sse2_ad) - FRAME_BEGIN - - cmp $16, %rdx - jb .Lad_out - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0 - movdqu (1 * 16)(%rdi), STATE1 - movdqu (2 * 16)(%rdi), STATE2 - movdqu (3 * 16)(%rdi), STATE3 - movdqu (4 * 16)(%rdi), STATE4 - - mov %rsi, %r8 - and $0xF, %r8 - jnz .Lad_u_loop - -.align 4 -.Lad_a_loop: - movdqa (%rsi), MSG - call __morus640_update - sub $16, %rdx - add $16, %rsi - cmp $16, %rdx - jge .Lad_a_loop - - jmp .Lad_cont -.align 4 -.Lad_u_loop: - movdqu (%rsi), MSG - call __morus640_update - sub $16, %rdx - add $16, %rsi - cmp $16, %rdx - jge .Lad_u_loop - -.Lad_cont: - /* store the state: */ - movdqu STATE0, (0 * 16)(%rdi) - movdqu STATE1, (1 * 16)(%rdi) - movdqu STATE2, (2 * 16)(%rdi) - movdqu STATE3, (3 * 16)(%rdi) - movdqu STATE4, (4 * 16)(%rdi) - -.Lad_out: - FRAME_END - ret -ENDPROC(crypto_morus640_sse2_ad) - -/* - * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus640_sse2_enc) - FRAME_BEGIN - - cmp $16, %rcx - jb .Lenc_out - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0 - movdqu (1 * 16)(%rdi), STATE1 - movdqu (2 * 16)(%rdi), STATE2 - movdqu (3 * 16)(%rdi), STATE3 - movdqu (4 * 16)(%rdi), STATE4 - - mov %rsi, %r8 - or %rdx, %r8 - and $0xF, %r8 - jnz .Lenc_u_loop - -.align 4 -.Lenc_a_loop: - movdqa (%rsi), MSG - movdqa MSG, T0 - pxor STATE0, T0 - pshufd $MASK3, STATE1, T1 - pxor T1, T0 - movdqa STATE2, T1 - pand STATE3, T1 - pxor T1, T0 - movdqa T0, (%rdx) - - call __morus640_update - sub $16, %rcx - add $16, %rsi - add $16, %rdx - cmp $16, %rcx - jge .Lenc_a_loop - - jmp .Lenc_cont -.align 4 -.Lenc_u_loop: - movdqu (%rsi), MSG - movdqa MSG, T0 - pxor STATE0, T0 - pshufd $MASK3, STATE1, T1 - pxor T1, T0 - movdqa STATE2, T1 - pand STATE3, T1 - pxor T1, T0 - movdqu T0, (%rdx) - - call __morus640_update - sub $16, %rcx - add $16, %rsi - add $16, %rdx - cmp $16, %rcx - jge .Lenc_u_loop - -.Lenc_cont: - /* store the state: */ - movdqu STATE0, (0 * 16)(%rdi) - movdqu STATE1, (1 * 16)(%rdi) - movdqu STATE2, (2 * 16)(%rdi) - movdqu STATE3, (3 * 16)(%rdi) - movdqu STATE4, (4 * 16)(%rdi) - -.Lenc_out: - FRAME_END - ret -ENDPROC(crypto_morus640_sse2_enc) - -/* - * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus640_sse2_enc_tail) - FRAME_BEGIN - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0 - movdqu (1 * 16)(%rdi), STATE1 - movdqu (2 * 16)(%rdi), STATE2 - movdqu (3 * 16)(%rdi), STATE3 - movdqu (4 * 16)(%rdi), STATE4 - - /* encrypt message: */ - call __load_partial - - movdqa MSG, T0 - pxor STATE0, T0 - pshufd $MASK3, STATE1, T1 - pxor T1, T0 - movdqa STATE2, T1 - pand STATE3, T1 - pxor T1, T0 - - call __store_partial - - call __morus640_update - - /* store the state: */ - movdqu STATE0, (0 * 16)(%rdi) - movdqu STATE1, (1 * 16)(%rdi) - movdqu STATE2, (2 * 16)(%rdi) - movdqu STATE3, (3 * 16)(%rdi) - movdqu STATE4, (4 * 16)(%rdi) - - FRAME_END - ret -ENDPROC(crypto_morus640_sse2_enc_tail) - -/* - * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus640_sse2_dec) - FRAME_BEGIN - - cmp $16, %rcx - jb .Ldec_out - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0 - movdqu (1 * 16)(%rdi), STATE1 - movdqu (2 * 16)(%rdi), STATE2 - movdqu (3 * 16)(%rdi), STATE3 - movdqu (4 * 16)(%rdi), STATE4 - - mov %rsi, %r8 - or %rdx, %r8 - and $0xF, %r8 - jnz .Ldec_u_loop - -.align 4 -.Ldec_a_loop: - movdqa (%rsi), MSG - pxor STATE0, MSG - pshufd $MASK3, STATE1, T0 - pxor T0, MSG - movdqa STATE2, T0 - pand STATE3, T0 - pxor T0, MSG - movdqa MSG, (%rdx) - - call __morus640_update - sub $16, %rcx - add $16, %rsi - add $16, %rdx - cmp $16, %rcx - jge .Ldec_a_loop - - jmp .Ldec_cont -.align 4 -.Ldec_u_loop: - movdqu (%rsi), MSG - pxor STATE0, MSG - pshufd $MASK3, STATE1, T0 - pxor T0, MSG - movdqa STATE2, T0 - pand STATE3, T0 - pxor T0, MSG - movdqu MSG, (%rdx) - - call __morus640_update - sub $16, %rcx - add $16, %rsi - add $16, %rdx - cmp $16, %rcx - jge .Ldec_u_loop - -.Ldec_cont: - /* store the state: */ - movdqu STATE0, (0 * 16)(%rdi) - movdqu STATE1, (1 * 16)(%rdi) - movdqu STATE2, (2 * 16)(%rdi) - movdqu STATE3, (3 * 16)(%rdi) - movdqu STATE4, (4 * 16)(%rdi) - -.Ldec_out: - FRAME_END - ret -ENDPROC(crypto_morus640_sse2_dec) - -/* - * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst, - * unsigned int length); - */ -ENTRY(crypto_morus640_sse2_dec_tail) - FRAME_BEGIN - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0 - movdqu (1 * 16)(%rdi), STATE1 - movdqu (2 * 16)(%rdi), STATE2 - movdqu (3 * 16)(%rdi), STATE3 - movdqu (4 * 16)(%rdi), STATE4 - - /* decrypt message: */ - call __load_partial - - pxor STATE0, MSG - pshufd $MASK3, STATE1, T0 - pxor T0, MSG - movdqa STATE2, T0 - pand STATE3, T0 - pxor T0, MSG - movdqa MSG, T0 - - call __store_partial - - /* mask with byte count: */ - movq %rcx, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - movdqa .Lmorus640_counter, T1 - pcmpgtb T1, T0 - pand T0, MSG - - call __morus640_update - - /* store the state: */ - movdqu STATE0, (0 * 16)(%rdi) - movdqu STATE1, (1 * 16)(%rdi) - movdqu STATE2, (2 * 16)(%rdi) - movdqu STATE3, (3 * 16)(%rdi) - movdqu STATE4, (4 * 16)(%rdi) - - FRAME_END - ret -ENDPROC(crypto_morus640_sse2_dec_tail) - -/* - * void crypto_morus640_sse2_final(void *state, void *tag_xor, - * u64 assoclen, u64 cryptlen); - */ -ENTRY(crypto_morus640_sse2_final) - FRAME_BEGIN - - /* load the state: */ - movdqu (0 * 16)(%rdi), STATE0 - movdqu (1 * 16)(%rdi), STATE1 - movdqu (2 * 16)(%rdi), STATE2 - movdqu (3 * 16)(%rdi), STATE3 - movdqu (4 * 16)(%rdi), STATE4 - - /* xor state[0] into state[4]: */ - pxor STATE0, STATE4 - - /* prepare length block: */ - movq %rdx, MSG - movq %rcx, T0 - pslldq $8, T0 - pxor T0, MSG - psllq $3, MSG /* multiply by 8 (to get bit count) */ - - /* update state: */ - call __morus640_update - call __morus640_update - call __morus640_update - call __morus640_update - call __morus640_update - call __morus640_update - call __morus640_update - call __morus640_update - call __morus640_update - call __morus640_update - - /* xor tag: */ - movdqu (%rsi), MSG - - pxor STATE0, MSG - pshufd $MASK3, STATE1, T0 - pxor T0, MSG - movdqa STATE2, T0 - pand STATE3, T0 - pxor T0, MSG - - movdqu MSG, (%rsi) - - FRAME_END - ret -ENDPROC(crypto_morus640_sse2_final) diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c deleted file mode 100644 index 8ef68134aef4..000000000000 --- a/arch/x86/crypto/morus640-sse2-glue.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The MORUS-640 Authenticated-Encryption Algorithm - * Glue for SSE2 implementation - * - * Copyright (c) 2016-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include - -asmlinkage void crypto_morus640_sse2_init(void *state, const void *key, - const void *iv); -asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data, - unsigned int length); - -asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src, - void *dst, unsigned int length); -asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src, - void *dst, unsigned int length); - -asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src, - void *dst, unsigned int length); -asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src, - void *dst, unsigned int length); - -asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor, - u64 assoclen, u64 cryptlen); - -MORUS640_DECLARE_ALG(sse2, "morus640-sse2", 400); - -static struct simd_aead_alg *simd_alg; - -static int __init crypto_morus640_sse2_module_init(void) -{ - if (!boot_cpu_has(X86_FEATURE_XMM2) || - !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) - return -ENODEV; - - return simd_register_aeads_compat(&crypto_morus640_sse2_alg, 1, - &simd_alg); -} - -static void __exit crypto_morus640_sse2_module_exit(void) -{ - simd_unregister_aeads(&crypto_morus640_sse2_alg, 1, &simd_alg); -} - -module_init(crypto_morus640_sse2_module_init); -module_exit(crypto_morus640_sse2_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Ondrej Mosnacek "); -MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation"); -MODULE_ALIAS_CRYPTO("morus640"); -MODULE_ALIAS_CRYPTO("morus640-sse2"); diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c deleted file mode 100644 index d8b5fd6cef29..000000000000 --- a/arch/x86/crypto/morus640_glue.c +++ /dev/null @@ -1,200 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The MORUS-640 Authenticated-Encryption Algorithm - * Common x86 SIMD glue skeleton - * - * Copyright (c) 2016-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct morus640_state { - struct morus640_block s[MORUS_STATE_BLOCKS]; -}; - -struct morus640_ops { - int (*skcipher_walk_init)(struct skcipher_walk *walk, - struct aead_request *req, bool atomic); - - void (*crypt_blocks)(void *state, const void *src, void *dst, - unsigned int length); - void (*crypt_tail)(void *state, const void *src, void *dst, - unsigned int length); -}; - -static void crypto_morus640_glue_process_ad( - struct morus640_state *state, - const struct morus640_glue_ops *ops, - struct scatterlist *sg_src, unsigned int assoclen) -{ - struct scatter_walk walk; - struct morus640_block buf; - unsigned int pos = 0; - - scatterwalk_start(&walk, sg_src); - while (assoclen != 0) { - unsigned int size = scatterwalk_clamp(&walk, assoclen); - unsigned int left = size; - void *mapped = scatterwalk_map(&walk); - const u8 *src = (const u8 *)mapped; - - if (pos + size >= MORUS640_BLOCK_SIZE) { - if (pos > 0) { - unsigned int fill = MORUS640_BLOCK_SIZE - pos; - memcpy(buf.bytes + pos, src, fill); - ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE); - pos = 0; - left -= fill; - src += fill; - } - - ops->ad(state, src, left); - src += left & ~(MORUS640_BLOCK_SIZE - 1); - left &= MORUS640_BLOCK_SIZE - 1; - } - - memcpy(buf.bytes + pos, src, left); - - pos += left; - assoclen -= size; - scatterwalk_unmap(mapped); - scatterwalk_advance(&walk, size); - scatterwalk_done(&walk, 0, assoclen); - } - - if (pos > 0) { - memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos); - ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE); - } -} - -static void crypto_morus640_glue_process_crypt(struct morus640_state *state, - struct morus640_ops ops, - struct skcipher_walk *walk) -{ - while (walk->nbytes >= MORUS640_BLOCK_SIZE) { - ops.crypt_blocks(state, walk->src.virt.addr, - walk->dst.virt.addr, - round_down(walk->nbytes, MORUS640_BLOCK_SIZE)); - skcipher_walk_done(walk, walk->nbytes % MORUS640_BLOCK_SIZE); - } - - if (walk->nbytes) { - ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr, - walk->nbytes); - skcipher_walk_done(walk, 0); - } -} - -int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) -{ - struct morus640_ctx *ctx = crypto_aead_ctx(aead); - - if (keylen != MORUS640_BLOCK_SIZE) { - crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE); - return 0; -} -EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey); - -int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm, - unsigned int authsize) -{ - return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL; -} -EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize); - -static void crypto_morus640_glue_crypt(struct aead_request *req, - struct morus640_ops ops, - unsigned int cryptlen, - struct morus640_block *tag_xor) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct morus640_ctx *ctx = crypto_aead_ctx(tfm); - struct morus640_state state; - struct skcipher_walk walk; - - ops.skcipher_walk_init(&walk, req, true); - - kernel_fpu_begin(); - - ctx->ops->init(&state, &ctx->key, req->iv); - crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); - crypto_morus640_glue_process_crypt(&state, ops, &walk); - ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); - - kernel_fpu_end(); -} - -int crypto_morus640_glue_encrypt(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct morus640_ctx *ctx = crypto_aead_ctx(tfm); - struct morus640_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_encrypt, - .crypt_blocks = ctx->ops->enc, - .crypt_tail = ctx->ops->enc_tail, - }; - - struct morus640_block tag = {}; - unsigned int authsize = crypto_aead_authsize(tfm); - unsigned int cryptlen = req->cryptlen; - - crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag); - - scatterwalk_map_and_copy(tag.bytes, req->dst, - req->assoclen + cryptlen, authsize, 1); - return 0; -} -EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt); - -int crypto_morus640_glue_decrypt(struct aead_request *req) -{ - static const u8 zeros[MORUS640_BLOCK_SIZE] = {}; - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct morus640_ctx *ctx = crypto_aead_ctx(tfm); - struct morus640_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_decrypt, - .crypt_blocks = ctx->ops->dec, - .crypt_tail = ctx->ops->dec_tail, - }; - - struct morus640_block tag; - unsigned int authsize = crypto_aead_authsize(tfm); - unsigned int cryptlen = req->cryptlen - authsize; - - scatterwalk_map_and_copy(tag.bytes, req->src, - req->assoclen + cryptlen, authsize, 0); - - crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag); - - return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0; -} -EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt); - -void crypto_morus640_glue_init_ops(struct crypto_aead *aead, - const struct morus640_glue_ops *ops) -{ - struct morus640_ctx *ctx = crypto_aead_ctx(aead); - ctx->ops = ops; -} -EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Ondrej Mosnacek "); -MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations"); -- cgit v1.2.1 From 520c1993bbe620e39fd93de1a01b9e0dc0b97aa6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Jul 2019 10:55:07 +0200 Subject: crypto: aegis128l/aegis256 - remove x86 and generic implementations Three variants of AEGIS were proposed for the CAESAR competition, and only one was selected for the final portfolio: AEGIS128. The other variants, AEGIS128L and AEGIS256, are not likely to ever turn up in networking protocols or other places where interoperability between Linux and other systems is a concern, nor are they likely to be subjected to further cryptanalysis. However, uninformed users may think that AEGIS128L (which is faster) is equally fit for use. So let's remove them now, before anyone starts using them and we are forced to support them forever. Note that there are no known flaws in the algorithms or in any of these implementations, but they have simply outlived their usefulness. Reviewed-by: Ondrej Mosnacek Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 4 - arch/x86/crypto/aegis128l-aesni-asm.S | 823 --------------------------------- arch/x86/crypto/aegis128l-aesni-glue.c | 293 ------------ arch/x86/crypto/aegis256-aesni-asm.S | 700 ---------------------------- arch/x86/crypto/aegis256-aesni-glue.c | 293 ------------ 5 files changed, 2113 deletions(-) delete mode 100644 arch/x86/crypto/aegis128l-aesni-asm.S delete mode 100644 arch/x86/crypto/aegis128l-aesni-glue.c delete mode 100644 arch/x86/crypto/aegis256-aesni-asm.S delete mode 100644 arch/x86/crypto/aegis256-aesni-glue.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 6f1d825fbb09..759b1a927826 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -36,8 +36,6 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o -obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o -obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o @@ -70,8 +68,6 @@ chacha-x86_64-y := chacha-ssse3-x86_64.o chacha_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o -aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o -aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S deleted file mode 100644 index 1461ef00c0e8..000000000000 --- a/arch/x86/crypto/aegis128l-aesni-asm.S +++ /dev/null @@ -1,823 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AES-NI + SSE2 implementation of AEGIS-128L - * - * Copyright (c) 2017-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include - -#define STATE0 %xmm0 -#define STATE1 %xmm1 -#define STATE2 %xmm2 -#define STATE3 %xmm3 -#define STATE4 %xmm4 -#define STATE5 %xmm5 -#define STATE6 %xmm6 -#define STATE7 %xmm7 -#define MSG0 %xmm8 -#define MSG1 %xmm9 -#define T0 %xmm10 -#define T1 %xmm11 -#define T2 %xmm12 -#define T3 %xmm13 - -#define STATEP %rdi -#define LEN %rsi -#define SRC %rdx -#define DST %rcx - -.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32 -.align 16 -.Laegis128l_const_0: - .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d - .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 -.Laegis128l_const_1: - .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 - .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd - -.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16 -.align 16 -.Laegis128l_counter0: - .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 - .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f -.Laegis128l_counter1: - .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 - .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f - -.text - -/* - * __load_partial: internal ABI - * input: - * LEN - bytes - * SRC - src - * output: - * MSG0 - first message block - * MSG1 - second message block - * changed: - * T0 - * %r8 - * %r9 - */ -__load_partial: - xor %r9d, %r9d - pxor MSG0, MSG0 - pxor MSG1, MSG1 - - mov LEN, %r8 - and $0x1, %r8 - jz .Lld_partial_1 - - mov LEN, %r8 - and $0x1E, %r8 - add SRC, %r8 - mov (%r8), %r9b - -.Lld_partial_1: - mov LEN, %r8 - and $0x2, %r8 - jz .Lld_partial_2 - - mov LEN, %r8 - and $0x1C, %r8 - add SRC, %r8 - shl $0x10, %r9 - mov (%r8), %r9w - -.Lld_partial_2: - mov LEN, %r8 - and $0x4, %r8 - jz .Lld_partial_4 - - mov LEN, %r8 - and $0x18, %r8 - add SRC, %r8 - shl $32, %r9 - mov (%r8), %r8d - xor %r8, %r9 - -.Lld_partial_4: - movq %r9, MSG0 - - mov LEN, %r8 - and $0x8, %r8 - jz .Lld_partial_8 - - mov LEN, %r8 - and $0x10, %r8 - add SRC, %r8 - pslldq $8, MSG0 - movq (%r8), T0 - pxor T0, MSG0 - -.Lld_partial_8: - mov LEN, %r8 - and $0x10, %r8 - jz .Lld_partial_16 - - movdqa MSG0, MSG1 - movdqu (SRC), MSG0 - -.Lld_partial_16: - ret -ENDPROC(__load_partial) - -/* - * __store_partial: internal ABI - * input: - * LEN - bytes - * DST - dst - * output: - * T0 - first message block - * T1 - second message block - * changed: - * %r8 - * %r9 - * %r10 - */ -__store_partial: - mov LEN, %r8 - mov DST, %r9 - - cmp $16, %r8 - jl .Lst_partial_16 - - movdqu T0, (%r9) - movdqa T1, T0 - - sub $16, %r8 - add $16, %r9 - -.Lst_partial_16: - movq T0, %r10 - - cmp $8, %r8 - jl .Lst_partial_8 - - mov %r10, (%r9) - psrldq $8, T0 - movq T0, %r10 - - sub $8, %r8 - add $8, %r9 - -.Lst_partial_8: - cmp $4, %r8 - jl .Lst_partial_4 - - mov %r10d, (%r9) - shr $32, %r10 - - sub $4, %r8 - add $4, %r9 - -.Lst_partial_4: - cmp $2, %r8 - jl .Lst_partial_2 - - mov %r10w, (%r9) - shr $0x10, %r10 - - sub $2, %r8 - add $2, %r9 - -.Lst_partial_2: - cmp $1, %r8 - jl .Lst_partial_1 - - mov %r10b, (%r9) - -.Lst_partial_1: - ret -ENDPROC(__store_partial) - -.macro update - movdqa STATE7, T0 - aesenc STATE0, STATE7 - aesenc STATE1, STATE0 - aesenc STATE2, STATE1 - aesenc STATE3, STATE2 - aesenc STATE4, STATE3 - aesenc STATE5, STATE4 - aesenc STATE6, STATE5 - aesenc T0, STATE6 -.endm - -.macro update0 - update - pxor MSG0, STATE7 - pxor MSG1, STATE3 -.endm - -.macro update1 - update - pxor MSG0, STATE6 - pxor MSG1, STATE2 -.endm - -.macro update2 - update - pxor MSG0, STATE5 - pxor MSG1, STATE1 -.endm - -.macro update3 - update - pxor MSG0, STATE4 - pxor MSG1, STATE0 -.endm - -.macro update4 - update - pxor MSG0, STATE3 - pxor MSG1, STATE7 -.endm - -.macro update5 - update - pxor MSG0, STATE2 - pxor MSG1, STATE6 -.endm - -.macro update6 - update - pxor MSG0, STATE1 - pxor MSG1, STATE5 -.endm - -.macro update7 - update - pxor MSG0, STATE0 - pxor MSG1, STATE4 -.endm - -.macro state_load - movdqu 0x00(STATEP), STATE0 - movdqu 0x10(STATEP), STATE1 - movdqu 0x20(STATEP), STATE2 - movdqu 0x30(STATEP), STATE3 - movdqu 0x40(STATEP), STATE4 - movdqu 0x50(STATEP), STATE5 - movdqu 0x60(STATEP), STATE6 - movdqu 0x70(STATEP), STATE7 -.endm - -.macro state_store s0 s1 s2 s3 s4 s5 s6 s7 - movdqu \s7, 0x00(STATEP) - movdqu \s0, 0x10(STATEP) - movdqu \s1, 0x20(STATEP) - movdqu \s2, 0x30(STATEP) - movdqu \s3, 0x40(STATEP) - movdqu \s4, 0x50(STATEP) - movdqu \s5, 0x60(STATEP) - movdqu \s6, 0x70(STATEP) -.endm - -.macro state_store0 - state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 -.endm - -.macro state_store1 - state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 -.endm - -.macro state_store2 - state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 -.endm - -.macro state_store3 - state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 -.endm - -.macro state_store4 - state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 -.endm - -.macro state_store5 - state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 -.endm - -.macro state_store6 - state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 -.endm - -.macro state_store7 - state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 -.endm - -/* - * void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv); - */ -ENTRY(crypto_aegis128l_aesni_init) - FRAME_BEGIN - - /* load key: */ - movdqa (%rsi), MSG1 - movdqa MSG1, STATE0 - movdqa MSG1, STATE4 - movdqa MSG1, STATE5 - movdqa MSG1, STATE6 - movdqa MSG1, STATE7 - - /* load IV: */ - movdqu (%rdx), MSG0 - pxor MSG0, STATE0 - pxor MSG0, STATE4 - - /* load the constants: */ - movdqa .Laegis128l_const_0, STATE2 - movdqa .Laegis128l_const_1, STATE1 - movdqa STATE1, STATE3 - pxor STATE2, STATE5 - pxor STATE1, STATE6 - pxor STATE2, STATE7 - - /* update 10 times with IV and KEY: */ - update0 - update1 - update2 - update3 - update4 - update5 - update6 - update7 - update0 - update1 - - state_store1 - - FRAME_END - ret -ENDPROC(crypto_aegis128l_aesni_init) - -.macro ad_block a i - movdq\a (\i * 0x20 + 0x00)(SRC), MSG0 - movdq\a (\i * 0x20 + 0x10)(SRC), MSG1 - update\i - sub $0x20, LEN - cmp $0x20, LEN - jl .Lad_out_\i -.endm - -/* - * void crypto_aegis128l_aesni_ad(void *state, unsigned int length, - * const void *data); - */ -ENTRY(crypto_aegis128l_aesni_ad) - FRAME_BEGIN - - cmp $0x20, LEN - jb .Lad_out - - state_load - - mov SRC, %r8 - and $0xf, %r8 - jnz .Lad_u_loop - -.align 8 -.Lad_a_loop: - ad_block a 0 - ad_block a 1 - ad_block a 2 - ad_block a 3 - ad_block a 4 - ad_block a 5 - ad_block a 6 - ad_block a 7 - - add $0x100, SRC - jmp .Lad_a_loop - -.align 8 -.Lad_u_loop: - ad_block u 0 - ad_block u 1 - ad_block u 2 - ad_block u 3 - ad_block u 4 - ad_block u 5 - ad_block u 6 - ad_block u 7 - - add $0x100, SRC - jmp .Lad_u_loop - -.Lad_out_0: - state_store0 - FRAME_END - ret - -.Lad_out_1: - state_store1 - FRAME_END - ret - -.Lad_out_2: - state_store2 - FRAME_END - ret - -.Lad_out_3: - state_store3 - FRAME_END - ret - -.Lad_out_4: - state_store4 - FRAME_END - ret - -.Lad_out_5: - state_store5 - FRAME_END - ret - -.Lad_out_6: - state_store6 - FRAME_END - ret - -.Lad_out_7: - state_store7 - FRAME_END - ret - -.Lad_out: - FRAME_END - ret -ENDPROC(crypto_aegis128l_aesni_ad) - -.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7 - pxor \s1, \m0 - pxor \s6, \m0 - movdqa \s2, T3 - pand \s3, T3 - pxor T3, \m0 - - pxor \s2, \m1 - pxor \s5, \m1 - movdqa \s6, T3 - pand \s7, T3 - pxor T3, \m1 -.endm - -.macro crypt0 m0 m1 - crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 -.endm - -.macro crypt1 m0 m1 - crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 -.endm - -.macro crypt2 m0 m1 - crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 -.endm - -.macro crypt3 m0 m1 - crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 -.endm - -.macro crypt4 m0 m1 - crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 -.endm - -.macro crypt5 m0 m1 - crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 -.endm - -.macro crypt6 m0 m1 - crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 -.endm - -.macro crypt7 m0 m1 - crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 -.endm - -.macro encrypt_block a i - movdq\a (\i * 0x20 + 0x00)(SRC), MSG0 - movdq\a (\i * 0x20 + 0x10)(SRC), MSG1 - movdqa MSG0, T0 - movdqa MSG1, T1 - crypt\i T0, T1 - movdq\a T0, (\i * 0x20 + 0x00)(DST) - movdq\a T1, (\i * 0x20 + 0x10)(DST) - - update\i - - sub $0x20, LEN - cmp $0x20, LEN - jl .Lenc_out_\i -.endm - -.macro decrypt_block a i - movdq\a (\i * 0x20 + 0x00)(SRC), MSG0 - movdq\a (\i * 0x20 + 0x10)(SRC), MSG1 - crypt\i MSG0, MSG1 - movdq\a MSG0, (\i * 0x20 + 0x00)(DST) - movdq\a MSG1, (\i * 0x20 + 0x10)(DST) - - update\i - - sub $0x20, LEN - cmp $0x20, LEN - jl .Ldec_out_\i -.endm - -/* - * void crypto_aegis128l_aesni_enc(void *state, unsigned int length, - * const void *src, void *dst); - */ -ENTRY(crypto_aegis128l_aesni_enc) - FRAME_BEGIN - - cmp $0x20, LEN - jb .Lenc_out - - state_load - - mov SRC, %r8 - or DST, %r8 - and $0xf, %r8 - jnz .Lenc_u_loop - -.align 8 -.Lenc_a_loop: - encrypt_block a 0 - encrypt_block a 1 - encrypt_block a 2 - encrypt_block a 3 - encrypt_block a 4 - encrypt_block a 5 - encrypt_block a 6 - encrypt_block a 7 - - add $0x100, SRC - add $0x100, DST - jmp .Lenc_a_loop - -.align 8 -.Lenc_u_loop: - encrypt_block u 0 - encrypt_block u 1 - encrypt_block u 2 - encrypt_block u 3 - encrypt_block u 4 - encrypt_block u 5 - encrypt_block u 6 - encrypt_block u 7 - - add $0x100, SRC - add $0x100, DST - jmp .Lenc_u_loop - -.Lenc_out_0: - state_store0 - FRAME_END - ret - -.Lenc_out_1: - state_store1 - FRAME_END - ret - -.Lenc_out_2: - state_store2 - FRAME_END - ret - -.Lenc_out_3: - state_store3 - FRAME_END - ret - -.Lenc_out_4: - state_store4 - FRAME_END - ret - -.Lenc_out_5: - state_store5 - FRAME_END - ret - -.Lenc_out_6: - state_store6 - FRAME_END - ret - -.Lenc_out_7: - state_store7 - FRAME_END - ret - -.Lenc_out: - FRAME_END - ret -ENDPROC(crypto_aegis128l_aesni_enc) - -/* - * void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length, - * const void *src, void *dst); - */ -ENTRY(crypto_aegis128l_aesni_enc_tail) - FRAME_BEGIN - - state_load - - /* encrypt message: */ - call __load_partial - - movdqa MSG0, T0 - movdqa MSG1, T1 - crypt0 T0, T1 - - call __store_partial - - update0 - - state_store0 - - FRAME_END - ret -ENDPROC(crypto_aegis128l_aesni_enc_tail) - -/* - * void crypto_aegis128l_aesni_dec(void *state, unsigned int length, - * const void *src, void *dst); - */ -ENTRY(crypto_aegis128l_aesni_dec) - FRAME_BEGIN - - cmp $0x20, LEN - jb .Ldec_out - - state_load - - mov SRC, %r8 - or DST, %r8 - and $0xF, %r8 - jnz .Ldec_u_loop - -.align 8 -.Ldec_a_loop: - decrypt_block a 0 - decrypt_block a 1 - decrypt_block a 2 - decrypt_block a 3 - decrypt_block a 4 - decrypt_block a 5 - decrypt_block a 6 - decrypt_block a 7 - - add $0x100, SRC - add $0x100, DST - jmp .Ldec_a_loop - -.align 8 -.Ldec_u_loop: - decrypt_block u 0 - decrypt_block u 1 - decrypt_block u 2 - decrypt_block u 3 - decrypt_block u 4 - decrypt_block u 5 - decrypt_block u 6 - decrypt_block u 7 - - add $0x100, SRC - add $0x100, DST - jmp .Ldec_u_loop - -.Ldec_out_0: - state_store0 - FRAME_END - ret - -.Ldec_out_1: - state_store1 - FRAME_END - ret - -.Ldec_out_2: - state_store2 - FRAME_END - ret - -.Ldec_out_3: - state_store3 - FRAME_END - ret - -.Ldec_out_4: - state_store4 - FRAME_END - ret - -.Ldec_out_5: - state_store5 - FRAME_END - ret - -.Ldec_out_6: - state_store6 - FRAME_END - ret - -.Ldec_out_7: - state_store7 - FRAME_END - ret - -.Ldec_out: - FRAME_END - ret -ENDPROC(crypto_aegis128l_aesni_dec) - -/* - * void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length, - * const void *src, void *dst); - */ -ENTRY(crypto_aegis128l_aesni_dec_tail) - FRAME_BEGIN - - state_load - - /* decrypt message: */ - call __load_partial - - crypt0 MSG0, MSG1 - - movdqa MSG0, T0 - movdqa MSG1, T1 - call __store_partial - - /* mask with byte count: */ - movq LEN, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - movdqa T0, T1 - movdqa .Laegis128l_counter0, T2 - movdqa .Laegis128l_counter1, T3 - pcmpgtb T2, T0 - pcmpgtb T3, T1 - pand T0, MSG0 - pand T1, MSG1 - - update0 - - state_store0 - - FRAME_END - ret -ENDPROC(crypto_aegis128l_aesni_dec_tail) - -/* - * void crypto_aegis128l_aesni_final(void *state, void *tag_xor, - * u64 assoclen, u64 cryptlen); - */ -ENTRY(crypto_aegis128l_aesni_final) - FRAME_BEGIN - - state_load - - /* prepare length block: */ - movq %rdx, MSG0 - movq %rcx, T0 - pslldq $8, T0 - pxor T0, MSG0 - psllq $3, MSG0 /* multiply by 8 (to get bit count) */ - - pxor STATE2, MSG0 - movdqa MSG0, MSG1 - - /* update state: */ - update0 - update1 - update2 - update3 - update4 - update5 - update6 - - /* xor tag: */ - movdqu (%rsi), T0 - - pxor STATE1, T0 - pxor STATE2, T0 - pxor STATE3, T0 - pxor STATE4, T0 - pxor STATE5, T0 - pxor STATE6, T0 - pxor STATE7, T0 - - movdqu T0, (%rsi) - - FRAME_END - ret -ENDPROC(crypto_aegis128l_aesni_final) diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c deleted file mode 100644 index 19eb28b316f0..000000000000 --- a/arch/x86/crypto/aegis128l-aesni-glue.c +++ /dev/null @@ -1,293 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The AEGIS-128L Authenticated-Encryption Algorithm - * Glue for AES-NI + SSE2 implementation - * - * Copyright (c) 2017-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define AEGIS128L_BLOCK_ALIGN 16 -#define AEGIS128L_BLOCK_SIZE 32 -#define AEGIS128L_NONCE_SIZE 16 -#define AEGIS128L_STATE_BLOCKS 8 -#define AEGIS128L_KEY_SIZE 16 -#define AEGIS128L_MIN_AUTH_SIZE 8 -#define AEGIS128L_MAX_AUTH_SIZE 16 - -asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv); - -asmlinkage void crypto_aegis128l_aesni_ad( - void *state, unsigned int length, const void *data); - -asmlinkage void crypto_aegis128l_aesni_enc( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128l_aesni_dec( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128l_aesni_enc_tail( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128l_aesni_dec_tail( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128l_aesni_final( - void *state, void *tag_xor, unsigned int cryptlen, - unsigned int assoclen); - -struct aegis_block { - u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN); -}; - -struct aegis_state { - struct aegis_block blocks[AEGIS128L_STATE_BLOCKS]; -}; - -struct aegis_ctx { - struct aegis_block key; -}; - -struct aegis_crypt_ops { - int (*skcipher_walk_init)(struct skcipher_walk *walk, - struct aead_request *req, bool atomic); - - void (*crypt_blocks)(void *state, unsigned int length, const void *src, - void *dst); - void (*crypt_tail)(void *state, unsigned int length, const void *src, - void *dst); -}; - -static void crypto_aegis128l_aesni_process_ad( - struct aegis_state *state, struct scatterlist *sg_src, - unsigned int assoclen) -{ - struct scatter_walk walk; - struct aegis_block buf; - unsigned int pos = 0; - - scatterwalk_start(&walk, sg_src); - while (assoclen != 0) { - unsigned int size = scatterwalk_clamp(&walk, assoclen); - unsigned int left = size; - void *mapped = scatterwalk_map(&walk); - const u8 *src = (const u8 *)mapped; - - if (pos + size >= AEGIS128L_BLOCK_SIZE) { - if (pos > 0) { - unsigned int fill = AEGIS128L_BLOCK_SIZE - pos; - memcpy(buf.bytes + pos, src, fill); - crypto_aegis128l_aesni_ad(state, - AEGIS128L_BLOCK_SIZE, - buf.bytes); - pos = 0; - left -= fill; - src += fill; - } - - crypto_aegis128l_aesni_ad(state, left, src); - - src += left & ~(AEGIS128L_BLOCK_SIZE - 1); - left &= AEGIS128L_BLOCK_SIZE - 1; - } - - memcpy(buf.bytes + pos, src, left); - pos += left; - assoclen -= size; - - scatterwalk_unmap(mapped); - scatterwalk_advance(&walk, size); - scatterwalk_done(&walk, 0, assoclen); - } - - if (pos > 0) { - memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos); - crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes); - } -} - -static void crypto_aegis128l_aesni_process_crypt( - struct aegis_state *state, struct skcipher_walk *walk, - const struct aegis_crypt_ops *ops) -{ - while (walk->nbytes >= AEGIS128L_BLOCK_SIZE) { - ops->crypt_blocks(state, round_down(walk->nbytes, - AEGIS128L_BLOCK_SIZE), - walk->src.virt.addr, walk->dst.virt.addr); - skcipher_walk_done(walk, walk->nbytes % AEGIS128L_BLOCK_SIZE); - } - - if (walk->nbytes) { - ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, - walk->dst.virt.addr); - skcipher_walk_done(walk, 0); - } -} - -static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead) -{ - u8 *ctx = crypto_aead_ctx(aead); - ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx)); - return (void *)ctx; -} - -static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead); - - if (keylen != AEGIS128L_KEY_SIZE) { - crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE); - - return 0; -} - -static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm, - unsigned int authsize) -{ - if (authsize > AEGIS128L_MAX_AUTH_SIZE) - return -EINVAL; - if (authsize < AEGIS128L_MIN_AUTH_SIZE) - return -EINVAL; - return 0; -} - -static void crypto_aegis128l_aesni_crypt(struct aead_request *req, - struct aegis_block *tag_xor, - unsigned int cryptlen, - const struct aegis_crypt_ops *ops) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm); - struct skcipher_walk walk; - struct aegis_state state; - - ops->skcipher_walk_init(&walk, req, true); - - kernel_fpu_begin(); - - crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv); - crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen); - crypto_aegis128l_aesni_process_crypt(&state, &walk, ops); - crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen); - - kernel_fpu_end(); -} - -static int crypto_aegis128l_aesni_encrypt(struct aead_request *req) -{ - static const struct aegis_crypt_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_encrypt, - .crypt_blocks = crypto_aegis128l_aesni_enc, - .crypt_tail = crypto_aegis128l_aesni_enc_tail, - }; - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aegis_block tag = {}; - unsigned int authsize = crypto_aead_authsize(tfm); - unsigned int cryptlen = req->cryptlen; - - crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS); - - scatterwalk_map_and_copy(tag.bytes, req->dst, - req->assoclen + cryptlen, authsize, 1); - return 0; -} - -static int crypto_aegis128l_aesni_decrypt(struct aead_request *req) -{ - static const struct aegis_block zeros = {}; - - static const struct aegis_crypt_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_decrypt, - .crypt_blocks = crypto_aegis128l_aesni_dec, - .crypt_tail = crypto_aegis128l_aesni_dec_tail, - }; - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aegis_block tag; - unsigned int authsize = crypto_aead_authsize(tfm); - unsigned int cryptlen = req->cryptlen - authsize; - - scatterwalk_map_and_copy(tag.bytes, req->src, - req->assoclen + cryptlen, authsize, 0); - - crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS); - - return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; -} - -static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead) -{ - return 0; -} - -static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead) -{ -} - -static struct aead_alg crypto_aegis128l_aesni_alg = { - .setkey = crypto_aegis128l_aesni_setkey, - .setauthsize = crypto_aegis128l_aesni_setauthsize, - .encrypt = crypto_aegis128l_aesni_encrypt, - .decrypt = crypto_aegis128l_aesni_decrypt, - .init = crypto_aegis128l_aesni_init_tfm, - .exit = crypto_aegis128l_aesni_exit_tfm, - - .ivsize = AEGIS128L_NONCE_SIZE, - .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, - .chunksize = AEGIS128L_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - .cra_priority = 400, - - .cra_name = "__aegis128l", - .cra_driver_name = "__aegis128l-aesni", - - .cra_module = THIS_MODULE, - } -}; - -static struct simd_aead_alg *simd_alg; - -static int __init crypto_aegis128l_aesni_module_init(void) -{ - if (!boot_cpu_has(X86_FEATURE_XMM2) || - !boot_cpu_has(X86_FEATURE_AES) || - !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) - return -ENODEV; - - return simd_register_aeads_compat(&crypto_aegis128l_aesni_alg, 1, - &simd_alg); -} - -static void __exit crypto_aegis128l_aesni_module_exit(void) -{ - simd_unregister_aeads(&crypto_aegis128l_aesni_alg, 1, &simd_alg); -} - -module_init(crypto_aegis128l_aesni_module_init); -module_exit(crypto_aegis128l_aesni_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Ondrej Mosnacek "); -MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation"); -MODULE_ALIAS_CRYPTO("aegis128l"); -MODULE_ALIAS_CRYPTO("aegis128l-aesni"); diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S deleted file mode 100644 index 37d9b13dfd85..000000000000 --- a/arch/x86/crypto/aegis256-aesni-asm.S +++ /dev/null @@ -1,700 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AES-NI + SSE2 implementation of AEGIS-128L - * - * Copyright (c) 2017-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include - -#define STATE0 %xmm0 -#define STATE1 %xmm1 -#define STATE2 %xmm2 -#define STATE3 %xmm3 -#define STATE4 %xmm4 -#define STATE5 %xmm5 -#define MSG %xmm6 -#define T0 %xmm7 -#define T1 %xmm8 -#define T2 %xmm9 -#define T3 %xmm10 - -#define STATEP %rdi -#define LEN %rsi -#define SRC %rdx -#define DST %rcx - -.section .rodata.cst16.aegis256_const, "aM", @progbits, 32 -.align 16 -.Laegis256_const_0: - .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d - .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 -.Laegis256_const_1: - .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 - .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd - -.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16 -.align 16 -.Laegis256_counter: - .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 - .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f - -.text - -/* - * __load_partial: internal ABI - * input: - * LEN - bytes - * SRC - src - * output: - * MSG - message block - * changed: - * T0 - * %r8 - * %r9 - */ -__load_partial: - xor %r9d, %r9d - pxor MSG, MSG - - mov LEN, %r8 - and $0x1, %r8 - jz .Lld_partial_1 - - mov LEN, %r8 - and $0x1E, %r8 - add SRC, %r8 - mov (%r8), %r9b - -.Lld_partial_1: - mov LEN, %r8 - and $0x2, %r8 - jz .Lld_partial_2 - - mov LEN, %r8 - and $0x1C, %r8 - add SRC, %r8 - shl $0x10, %r9 - mov (%r8), %r9w - -.Lld_partial_2: - mov LEN, %r8 - and $0x4, %r8 - jz .Lld_partial_4 - - mov LEN, %r8 - and $0x18, %r8 - add SRC, %r8 - shl $32, %r9 - mov (%r8), %r8d - xor %r8, %r9 - -.Lld_partial_4: - movq %r9, MSG - - mov LEN, %r8 - and $0x8, %r8 - jz .Lld_partial_8 - - mov LEN, %r8 - and $0x10, %r8 - add SRC, %r8 - pslldq $8, MSG - movq (%r8), T0 - pxor T0, MSG - -.Lld_partial_8: - ret -ENDPROC(__load_partial) - -/* - * __store_partial: internal ABI - * input: - * LEN - bytes - * DST - dst - * output: - * T0 - message block - * changed: - * %r8 - * %r9 - * %r10 - */ -__store_partial: - mov LEN, %r8 - mov DST, %r9 - - movq T0, %r10 - - cmp $8, %r8 - jl .Lst_partial_8 - - mov %r10, (%r9) - psrldq $8, T0 - movq T0, %r10 - - sub $8, %r8 - add $8, %r9 - -.Lst_partial_8: - cmp $4, %r8 - jl .Lst_partial_4 - - mov %r10d, (%r9) - shr $32, %r10 - - sub $4, %r8 - add $4, %r9 - -.Lst_partial_4: - cmp $2, %r8 - jl .Lst_partial_2 - - mov %r10w, (%r9) - shr $0x10, %r10 - - sub $2, %r8 - add $2, %r9 - -.Lst_partial_2: - cmp $1, %r8 - jl .Lst_partial_1 - - mov %r10b, (%r9) - -.Lst_partial_1: - ret -ENDPROC(__store_partial) - -.macro update - movdqa STATE5, T0 - aesenc STATE0, STATE5 - aesenc STATE1, STATE0 - aesenc STATE2, STATE1 - aesenc STATE3, STATE2 - aesenc STATE4, STATE3 - aesenc T0, STATE4 -.endm - -.macro update0 m - update - pxor \m, STATE5 -.endm - -.macro update1 m - update - pxor \m, STATE4 -.endm - -.macro update2 m - update - pxor \m, STATE3 -.endm - -.macro update3 m - update - pxor \m, STATE2 -.endm - -.macro update4 m - update - pxor \m, STATE1 -.endm - -.macro update5 m - update - pxor \m, STATE0 -.endm - -.macro state_load - movdqu 0x00(STATEP), STATE0 - movdqu 0x10(STATEP), STATE1 - movdqu 0x20(STATEP), STATE2 - movdqu 0x30(STATEP), STATE3 - movdqu 0x40(STATEP), STATE4 - movdqu 0x50(STATEP), STATE5 -.endm - -.macro state_store s0 s1 s2 s3 s4 s5 - movdqu \s5, 0x00(STATEP) - movdqu \s0, 0x10(STATEP) - movdqu \s1, 0x20(STATEP) - movdqu \s2, 0x30(STATEP) - movdqu \s3, 0x40(STATEP) - movdqu \s4, 0x50(STATEP) -.endm - -.macro state_store0 - state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 -.endm - -.macro state_store1 - state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 -.endm - -.macro state_store2 - state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 -.endm - -.macro state_store3 - state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 -.endm - -.macro state_store4 - state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 -.endm - -.macro state_store5 - state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 -.endm - -/* - * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv); - */ -ENTRY(crypto_aegis256_aesni_init) - FRAME_BEGIN - - /* load key: */ - movdqa 0x00(%rsi), MSG - movdqa 0x10(%rsi), T1 - movdqa MSG, STATE4 - movdqa T1, STATE5 - - /* load IV: */ - movdqu 0x00(%rdx), T2 - movdqu 0x10(%rdx), T3 - pxor MSG, T2 - pxor T1, T3 - movdqa T2, STATE0 - movdqa T3, STATE1 - - /* load the constants: */ - movdqa .Laegis256_const_0, STATE3 - movdqa .Laegis256_const_1, STATE2 - pxor STATE3, STATE4 - pxor STATE2, STATE5 - - /* update 10 times with IV and KEY: */ - update0 MSG - update1 T1 - update2 T2 - update3 T3 - update4 MSG - update5 T1 - update0 T2 - update1 T3 - update2 MSG - update3 T1 - update4 T2 - update5 T3 - update0 MSG - update1 T1 - update2 T2 - update3 T3 - - state_store3 - - FRAME_END - ret -ENDPROC(crypto_aegis256_aesni_init) - -.macro ad_block a i - movdq\a (\i * 0x10)(SRC), MSG - update\i MSG - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_\i -.endm - -/* - * void crypto_aegis256_aesni_ad(void *state, unsigned int length, - * const void *data); - */ -ENTRY(crypto_aegis256_aesni_ad) - FRAME_BEGIN - - cmp $0x10, LEN - jb .Lad_out - - state_load - - mov SRC, %r8 - and $0xf, %r8 - jnz .Lad_u_loop - -.align 8 -.Lad_a_loop: - ad_block a 0 - ad_block a 1 - ad_block a 2 - ad_block a 3 - ad_block a 4 - ad_block a 5 - - add $0x60, SRC - jmp .Lad_a_loop - -.align 8 -.Lad_u_loop: - ad_block u 0 - ad_block u 1 - ad_block u 2 - ad_block u 3 - ad_block u 4 - ad_block u 5 - - add $0x60, SRC - jmp .Lad_u_loop - -.Lad_out_0: - state_store0 - FRAME_END - ret - -.Lad_out_1: - state_store1 - FRAME_END - ret - -.Lad_out_2: - state_store2 - FRAME_END - ret - -.Lad_out_3: - state_store3 - FRAME_END - ret - -.Lad_out_4: - state_store4 - FRAME_END - ret - -.Lad_out_5: - state_store5 - FRAME_END - ret - -.Lad_out: - FRAME_END - ret -ENDPROC(crypto_aegis256_aesni_ad) - -.macro crypt m s0 s1 s2 s3 s4 s5 - pxor \s1, \m - pxor \s4, \m - pxor \s5, \m - movdqa \s2, T3 - pand \s3, T3 - pxor T3, \m -.endm - -.macro crypt0 m - crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 -.endm - -.macro crypt1 m - crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 -.endm - -.macro crypt2 m - crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 -.endm - -.macro crypt3 m - crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 -.endm - -.macro crypt4 m - crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 -.endm - -.macro crypt5 m - crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 -.endm - -.macro encrypt_block a i - movdq\a (\i * 0x10)(SRC), MSG - movdqa MSG, T0 - crypt\i T0 - movdq\a T0, (\i * 0x10)(DST) - - update\i MSG - - sub $0x10, LEN - cmp $0x10, LEN - jl .Lenc_out_\i -.endm - -.macro decrypt_block a i - movdq\a (\i * 0x10)(SRC), MSG - crypt\i MSG - movdq\a MSG, (\i * 0x10)(DST) - - update\i MSG - - sub $0x10, LEN - cmp $0x10, LEN - jl .Ldec_out_\i -.endm - -/* - * void crypto_aegis256_aesni_enc(void *state, unsigned int length, - * const void *src, void *dst); - */ -ENTRY(crypto_aegis256_aesni_enc) - FRAME_BEGIN - - cmp $0x10, LEN - jb .Lenc_out - - state_load - - mov SRC, %r8 - or DST, %r8 - and $0xf, %r8 - jnz .Lenc_u_loop - -.align 8 -.Lenc_a_loop: - encrypt_block a 0 - encrypt_block a 1 - encrypt_block a 2 - encrypt_block a 3 - encrypt_block a 4 - encrypt_block a 5 - - add $0x60, SRC - add $0x60, DST - jmp .Lenc_a_loop - -.align 8 -.Lenc_u_loop: - encrypt_block u 0 - encrypt_block u 1 - encrypt_block u 2 - encrypt_block u 3 - encrypt_block u 4 - encrypt_block u 5 - - add $0x60, SRC - add $0x60, DST - jmp .Lenc_u_loop - -.Lenc_out_0: - state_store0 - FRAME_END - ret - -.Lenc_out_1: - state_store1 - FRAME_END - ret - -.Lenc_out_2: - state_store2 - FRAME_END - ret - -.Lenc_out_3: - state_store3 - FRAME_END - ret - -.Lenc_out_4: - state_store4 - FRAME_END - ret - -.Lenc_out_5: - state_store5 - FRAME_END - ret - -.Lenc_out: - FRAME_END - ret -ENDPROC(crypto_aegis256_aesni_enc) - -/* - * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length, - * const void *src, void *dst); - */ -ENTRY(crypto_aegis256_aesni_enc_tail) - FRAME_BEGIN - - state_load - - /* encrypt message: */ - call __load_partial - - movdqa MSG, T0 - crypt0 T0 - - call __store_partial - - update0 MSG - - state_store0 - - FRAME_END - ret -ENDPROC(crypto_aegis256_aesni_enc_tail) - -/* - * void crypto_aegis256_aesni_dec(void *state, unsigned int length, - * const void *src, void *dst); - */ -ENTRY(crypto_aegis256_aesni_dec) - FRAME_BEGIN - - cmp $0x10, LEN - jb .Ldec_out - - state_load - - mov SRC, %r8 - or DST, %r8 - and $0xF, %r8 - jnz .Ldec_u_loop - -.align 8 -.Ldec_a_loop: - decrypt_block a 0 - decrypt_block a 1 - decrypt_block a 2 - decrypt_block a 3 - decrypt_block a 4 - decrypt_block a 5 - - add $0x60, SRC - add $0x60, DST - jmp .Ldec_a_loop - -.align 8 -.Ldec_u_loop: - decrypt_block u 0 - decrypt_block u 1 - decrypt_block u 2 - decrypt_block u 3 - decrypt_block u 4 - decrypt_block u 5 - - add $0x60, SRC - add $0x60, DST - jmp .Ldec_u_loop - -.Ldec_out_0: - state_store0 - FRAME_END - ret - -.Ldec_out_1: - state_store1 - FRAME_END - ret - -.Ldec_out_2: - state_store2 - FRAME_END - ret - -.Ldec_out_3: - state_store3 - FRAME_END - ret - -.Ldec_out_4: - state_store4 - FRAME_END - ret - -.Ldec_out_5: - state_store5 - FRAME_END - ret - -.Ldec_out: - FRAME_END - ret -ENDPROC(crypto_aegis256_aesni_dec) - -/* - * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length, - * const void *src, void *dst); - */ -ENTRY(crypto_aegis256_aesni_dec_tail) - FRAME_BEGIN - - state_load - - /* decrypt message: */ - call __load_partial - - crypt0 MSG - - movdqa MSG, T0 - call __store_partial - - /* mask with byte count: */ - movq LEN, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - movdqa .Laegis256_counter, T1 - pcmpgtb T1, T0 - pand T0, MSG - - update0 MSG - - state_store0 - - FRAME_END - ret -ENDPROC(crypto_aegis256_aesni_dec_tail) - -/* - * void crypto_aegis256_aesni_final(void *state, void *tag_xor, - * u64 assoclen, u64 cryptlen); - */ -ENTRY(crypto_aegis256_aesni_final) - FRAME_BEGIN - - state_load - - /* prepare length block: */ - movq %rdx, MSG - movq %rcx, T0 - pslldq $8, T0 - pxor T0, MSG - psllq $3, MSG /* multiply by 8 (to get bit count) */ - - pxor STATE3, MSG - - /* update state: */ - update0 MSG - update1 MSG - update2 MSG - update3 MSG - update4 MSG - update5 MSG - update0 MSG - - /* xor tag: */ - movdqu (%rsi), MSG - - pxor STATE0, MSG - pxor STATE1, MSG - pxor STATE2, MSG - pxor STATE3, MSG - pxor STATE4, MSG - pxor STATE5, MSG - - movdqu MSG, (%rsi) - - FRAME_END - ret -ENDPROC(crypto_aegis256_aesni_final) diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c deleted file mode 100644 index f84da27171d3..000000000000 --- a/arch/x86/crypto/aegis256-aesni-glue.c +++ /dev/null @@ -1,293 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The AEGIS-256 Authenticated-Encryption Algorithm - * Glue for AES-NI + SSE2 implementation - * - * Copyright (c) 2017-2018 Ondrej Mosnacek - * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define AEGIS256_BLOCK_ALIGN 16 -#define AEGIS256_BLOCK_SIZE 16 -#define AEGIS256_NONCE_SIZE 32 -#define AEGIS256_STATE_BLOCKS 6 -#define AEGIS256_KEY_SIZE 32 -#define AEGIS256_MIN_AUTH_SIZE 8 -#define AEGIS256_MAX_AUTH_SIZE 16 - -asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv); - -asmlinkage void crypto_aegis256_aesni_ad( - void *state, unsigned int length, const void *data); - -asmlinkage void crypto_aegis256_aesni_enc( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis256_aesni_dec( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis256_aesni_enc_tail( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis256_aesni_dec_tail( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis256_aesni_final( - void *state, void *tag_xor, unsigned int cryptlen, - unsigned int assoclen); - -struct aegis_block { - u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN); -}; - -struct aegis_state { - struct aegis_block blocks[AEGIS256_STATE_BLOCKS]; -}; - -struct aegis_ctx { - struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE]; -}; - -struct aegis_crypt_ops { - int (*skcipher_walk_init)(struct skcipher_walk *walk, - struct aead_request *req, bool atomic); - - void (*crypt_blocks)(void *state, unsigned int length, const void *src, - void *dst); - void (*crypt_tail)(void *state, unsigned int length, const void *src, - void *dst); -}; - -static void crypto_aegis256_aesni_process_ad( - struct aegis_state *state, struct scatterlist *sg_src, - unsigned int assoclen) -{ - struct scatter_walk walk; - struct aegis_block buf; - unsigned int pos = 0; - - scatterwalk_start(&walk, sg_src); - while (assoclen != 0) { - unsigned int size = scatterwalk_clamp(&walk, assoclen); - unsigned int left = size; - void *mapped = scatterwalk_map(&walk); - const u8 *src = (const u8 *)mapped; - - if (pos + size >= AEGIS256_BLOCK_SIZE) { - if (pos > 0) { - unsigned int fill = AEGIS256_BLOCK_SIZE - pos; - memcpy(buf.bytes + pos, src, fill); - crypto_aegis256_aesni_ad(state, - AEGIS256_BLOCK_SIZE, - buf.bytes); - pos = 0; - left -= fill; - src += fill; - } - - crypto_aegis256_aesni_ad(state, left, src); - - src += left & ~(AEGIS256_BLOCK_SIZE - 1); - left &= AEGIS256_BLOCK_SIZE - 1; - } - - memcpy(buf.bytes + pos, src, left); - pos += left; - assoclen -= size; - - scatterwalk_unmap(mapped); - scatterwalk_advance(&walk, size); - scatterwalk_done(&walk, 0, assoclen); - } - - if (pos > 0) { - memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos); - crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes); - } -} - -static void crypto_aegis256_aesni_process_crypt( - struct aegis_state *state, struct skcipher_walk *walk, - const struct aegis_crypt_ops *ops) -{ - while (walk->nbytes >= AEGIS256_BLOCK_SIZE) { - ops->crypt_blocks(state, - round_down(walk->nbytes, AEGIS256_BLOCK_SIZE), - walk->src.virt.addr, walk->dst.virt.addr); - skcipher_walk_done(walk, walk->nbytes % AEGIS256_BLOCK_SIZE); - } - - if (walk->nbytes) { - ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, - walk->dst.virt.addr); - skcipher_walk_done(walk, 0); - } -} - -static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead) -{ - u8 *ctx = crypto_aead_ctx(aead); - ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx)); - return (void *)ctx; -} - -static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) -{ - struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead); - - if (keylen != AEGIS256_KEY_SIZE) { - crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - memcpy(ctx->key, key, AEGIS256_KEY_SIZE); - - return 0; -} - -static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm, - unsigned int authsize) -{ - if (authsize > AEGIS256_MAX_AUTH_SIZE) - return -EINVAL; - if (authsize < AEGIS256_MIN_AUTH_SIZE) - return -EINVAL; - return 0; -} - -static void crypto_aegis256_aesni_crypt(struct aead_request *req, - struct aegis_block *tag_xor, - unsigned int cryptlen, - const struct aegis_crypt_ops *ops) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm); - struct skcipher_walk walk; - struct aegis_state state; - - ops->skcipher_walk_init(&walk, req, true); - - kernel_fpu_begin(); - - crypto_aegis256_aesni_init(&state, ctx->key, req->iv); - crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen); - crypto_aegis256_aesni_process_crypt(&state, &walk, ops); - crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen); - - kernel_fpu_end(); -} - -static int crypto_aegis256_aesni_encrypt(struct aead_request *req) -{ - static const struct aegis_crypt_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_encrypt, - .crypt_blocks = crypto_aegis256_aesni_enc, - .crypt_tail = crypto_aegis256_aesni_enc_tail, - }; - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aegis_block tag = {}; - unsigned int authsize = crypto_aead_authsize(tfm); - unsigned int cryptlen = req->cryptlen; - - crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS); - - scatterwalk_map_and_copy(tag.bytes, req->dst, - req->assoclen + cryptlen, authsize, 1); - return 0; -} - -static int crypto_aegis256_aesni_decrypt(struct aead_request *req) -{ - static const struct aegis_block zeros = {}; - - static const struct aegis_crypt_ops OPS = { - .skcipher_walk_init = skcipher_walk_aead_decrypt, - .crypt_blocks = crypto_aegis256_aesni_dec, - .crypt_tail = crypto_aegis256_aesni_dec_tail, - }; - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aegis_block tag; - unsigned int authsize = crypto_aead_authsize(tfm); - unsigned int cryptlen = req->cryptlen - authsize; - - scatterwalk_map_and_copy(tag.bytes, req->src, - req->assoclen + cryptlen, authsize, 0); - - crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS); - - return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; -} - -static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead) -{ - return 0; -} - -static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead) -{ -} - -static struct aead_alg crypto_aegis256_aesni_alg = { - .setkey = crypto_aegis256_aesni_setkey, - .setauthsize = crypto_aegis256_aesni_setauthsize, - .encrypt = crypto_aegis256_aesni_encrypt, - .decrypt = crypto_aegis256_aesni_decrypt, - .init = crypto_aegis256_aesni_init_tfm, - .exit = crypto_aegis256_aesni_exit_tfm, - - .ivsize = AEGIS256_NONCE_SIZE, - .maxauthsize = AEGIS256_MAX_AUTH_SIZE, - .chunksize = AEGIS256_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - .cra_priority = 400, - - .cra_name = "__aegis256", - .cra_driver_name = "__aegis256-aesni", - - .cra_module = THIS_MODULE, - } -}; - -static struct simd_aead_alg *simd_alg; - -static int __init crypto_aegis256_aesni_module_init(void) -{ - if (!boot_cpu_has(X86_FEATURE_XMM2) || - !boot_cpu_has(X86_FEATURE_AES) || - !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) - return -ENODEV; - - return simd_register_aeads_compat(&crypto_aegis256_aesni_alg, 1, - &simd_alg); -} - -static void __exit crypto_aegis256_aesni_module_exit(void) -{ - simd_unregister_aeads(&crypto_aegis256_aesni_alg, 1, &simd_alg); -} - -module_init(crypto_aegis256_aesni_module_init); -module_exit(crypto_aegis256_aesni_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Ondrej Mosnacek "); -MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation"); -MODULE_ALIAS_CRYPTO("aegis256"); -MODULE_ALIAS_CRYPTO("aegis256-aesni"); -- cgit v1.2.1 From 8dfa20fcfbeb245642dfe3a43f8a3735d9aed42a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 19 Jul 2019 23:09:18 -0700 Subject: crypto: ghash - add comment and improve help text To help avoid confusion, add a comment to ghash-generic.c which explains the convention that the kernel's implementation of GHASH uses. Also update the Kconfig help text and module descriptions to call GHASH a "hash function" rather than a "message digest", since the latter normally means a real cryptographic hash function, which GHASH is not. Cc: Pascal Van Leeuwen Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel Acked-by: Pascal Van Leeuwen Signed-off-by: Herbert Xu --- arch/arm/crypto/ghash-ce-glue.c | 2 +- arch/s390/crypto/ghash_s390.c | 2 +- arch/x86/crypto/ghash-clmulni-intel_glue.c | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index bb906b5f1eb3..c691077679a6 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -18,7 +18,7 @@ #include #include -MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); +MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("ghash"); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index eeeb6a7737a4..a3e7400e031c 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -153,4 +153,4 @@ module_exit(ghash_mod_exit); MODULE_ALIAS_CRYPTO("ghash"); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation"); +MODULE_DESCRIPTION("GHASH hash function, s390 implementation"); diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index ac76fe88ac4f..04d72a5a8ce9 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -357,6 +357,5 @@ module_init(ghash_pclmulqdqni_mod_init); module_exit(ghash_pclmulqdqni_mod_exit); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " - "accelerated by PCLMULQDQ-NI"); +MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI"); MODULE_ALIAS_CRYPTO("ghash"); -- cgit v1.2.1 From 931c940fa5643d3d8cd62ba7bfa0c61077eb7b59 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 26 Jul 2019 21:19:04 +0300 Subject: crypto: s390/aes - fix name clash after AES library refactor The newly introduced AES library exposes aes_encrypt/aes_decrypt routines so rename existing occurrences of those identifiers in the s390 driver. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index d00f84add5f4..dc0f72dd6e03 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -108,7 +108,7 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); @@ -119,7 +119,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) cpacf_km(sctx->fc, &sctx->key, out, in, AES_BLOCK_SIZE); } -static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); @@ -172,8 +172,8 @@ static struct crypto_alg aes_alg = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, .cia_setkey = aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt, + .cia_encrypt = crypto_aes_encrypt, + .cia_decrypt = crypto_aes_decrypt, } } }; -- cgit v1.2.1 From 155e4db324874d22aa8d3892da2a37fb39c89578 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Aug 2019 12:11:33 +1000 Subject: asm-generic: Remove redundant arch-specific rules for simd.h Now that simd.h is in include/asm-generic/Kbuild we don't need the arch-specific Kbuild rules for them. Reported-by: Stephen Rothwell Fixes: 82cb54856874 ("asm-generic: make simd.h a mandatory...") Signed-off-by: Herbert Xu Acked-by: Ard Biesheuvel Acked-by: Arnd Bergmann --- arch/arm/include/asm/Kbuild | 1 - arch/powerpc/include/asm/Kbuild | 1 - 2 files changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 6b2dc15b6dff..68ca86f85eb7 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -17,7 +17,6 @@ generic-y += parport.h generic-y += preempt.h generic-y += seccomp.h generic-y += serial.h -generic-y += simd.h generic-y += trace_clock.h generated-y += mach-types.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 9a1d2fc6ceb7..64870c7be4a3 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -11,4 +11,3 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += vtime.h generic-y += msi.h -generic-y += simd.h -- cgit v1.2.1 From 4e2c820897d93a4dbe76865d860087f809963308 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Aug 2019 12:00:44 +0300 Subject: crypto: s390/des - switch to new verification routines Switch to the refactored DES key verification routines. While at it, rename the DES encrypt/decrypt routines so they will not conflict with the DES library later on. Reviewed-by: Harald Freudenberger Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/s390/crypto/des_s390.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 374b42fc7637..439b100c6f2e 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #define DES3_KEY_SIZE (3 * DES_KEY_SIZE) @@ -35,27 +35,24 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - u32 tmp[DES_EXPKEY_WORDS]; + int err; - /* check for weak keys */ - if (!des_ekey(tmp, key) && - (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { - tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; - } + err = crypto_des_verify_key(tfm, key); + if (err) + return err; memcpy(ctx->key, key, key_len); return 0; } -static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); cpacf_km(CPACF_KM_DEA, ctx->key, out, in, DES_BLOCK_SIZE); } -static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +static void s390_des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); @@ -76,8 +73,8 @@ static struct crypto_alg des_alg = { .cia_min_keysize = DES_KEY_SIZE, .cia_max_keysize = DES_KEY_SIZE, .cia_setkey = des_setkey, - .cia_encrypt = des_encrypt, - .cia_decrypt = des_decrypt, + .cia_encrypt = s390_des_encrypt, + .cia_decrypt = s390_des_decrypt, } } }; @@ -227,8 +224,8 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); int err; - err = __des3_verify_key(&tfm->crt_flags, key); - if (unlikely(err)) + err = crypto_des3_ede_verify_key(tfm, key); + if (err) return err; memcpy(ctx->key, key, key_len); -- cgit v1.2.1 From 8f467cf29f192f9ebd89dcc4a18901e62c17e37a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Aug 2019 12:00:45 +0300 Subject: crypto: sparc/des - switch to new verification routines Switch to the refactored DES key verification routines. While at it, rename the DES encrypt/decrypt routines so they will not conflict with the DES library later on. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/sparc/crypto/des_glue.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 281448f72c90..db6010b4e52e 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include @@ -45,19 +45,15 @@ static int des_set_key(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - u32 tmp[DES_EXPKEY_WORDS]; - int ret; + int err; /* Even though we have special instructions for key expansion, - * we call des_ekey() so that we don't have to write our own + * we call des_verify_key() so that we don't have to write our own * weak key detection code. */ - ret = des_ekey(tmp, key); - if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { - *flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; - } + err = crypto_des_verify_key(tfm, key); + if (err) + return err; des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]); encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]); @@ -68,7 +64,7 @@ static int des_set_key(struct crypto_tfm *tfm, const u8 *key, extern void des_sparc64_crypt(const u64 *key, const u64 *input, u64 *output); -static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void sparc_des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); const u64 *K = ctx->encrypt_expkey; @@ -76,7 +72,7 @@ static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); } -static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void sparc_des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); const u64 *K = ctx->decrypt_expkey; @@ -202,14 +198,13 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; u64 k1[DES_EXPKEY_WORDS / 2]; u64 k2[DES_EXPKEY_WORDS / 2]; u64 k3[DES_EXPKEY_WORDS / 2]; int err; - err = __des3_verify_key(flags, key); - if (unlikely(err)) + err = crypto_des3_ede_verify_key(tfm, key); + if (err) return err; des_sparc64_key_expand((const u32 *)key, k1); @@ -235,7 +230,7 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input, u64 *output); -static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void sparc_des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); const u64 *K = ctx->encrypt_expkey; @@ -243,7 +238,7 @@ static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); } -static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +static void sparc_des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); const u64 *K = ctx->decrypt_expkey; @@ -390,8 +385,8 @@ static struct crypto_alg algs[] = { { .cia_min_keysize = DES_KEY_SIZE, .cia_max_keysize = DES_KEY_SIZE, .cia_setkey = des_set_key, - .cia_encrypt = des_encrypt, - .cia_decrypt = des_decrypt + .cia_encrypt = sparc_des_encrypt, + .cia_decrypt = sparc_des_decrypt } } }, { @@ -447,8 +442,8 @@ static struct crypto_alg algs[] = { { .cia_min_keysize = DES3_EDE_KEY_SIZE, .cia_max_keysize = DES3_EDE_KEY_SIZE, .cia_setkey = des3_ede_set_key, - .cia_encrypt = des3_ede_encrypt, - .cia_decrypt = des3_ede_decrypt + .cia_encrypt = sparc_des3_ede_encrypt, + .cia_decrypt = sparc_des3_ede_decrypt } } }, { -- cgit v1.2.1 From 4fd4be0576166f0b360d22744e143cd7847b0c29 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Aug 2019 12:01:07 +0300 Subject: crypto: 3des - move verification out of exported routine In preparation of moving the shared key expansion routine into the DES library, move the verification done by __des3_ede_setkey() into its callers. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/des3_ede_glue.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 968386c21ef4..ec608babc22b 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -348,6 +348,10 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, u32 i, j, tmp; int err; + err = crypto_des3_ede_verify_key(tfm, key); + if (err) + return err; + /* Generate encryption context using generic implementation. */ err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen); if (err < 0) -- cgit v1.2.1 From 04007b0e6cbbab5836ac891626e91edf10d46341 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Aug 2019 12:01:09 +0300 Subject: crypto: des - split off DES library from generic DES cipher driver Another one for the cipher museum: split off DES core processing into a separate module so other drivers (mostly for crypto accelerators) can reuse the code without pulling in the generic DES cipher itself. This will also permit the cipher interface to be made private to the crypto API itself once we move the only user in the kernel (CIFS) to this library interface. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/des3_ede_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index ec608babc22b..f730a312ce35 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include -- cgit v1.2.1 From cc1d24b980ded2124f6f80b1321427031fd2beb3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Aug 2019 12:01:10 +0300 Subject: crypto: x86/des - switch to library interface Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/des3_ede_glue.c | 42 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index f730a312ce35..89830e531350 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include @@ -19,8 +19,8 @@ #include struct des3_ede_x86_ctx { - u32 enc_expkey[DES3_EDE_EXPKEY_WORDS]; - u32 dec_expkey[DES3_EDE_EXPKEY_WORDS]; + struct des3_ede_ctx enc; + struct des3_ede_ctx dec; }; /* regular block cipher functions */ @@ -34,7 +34,7 @@ asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst, static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, const u8 *src) { - u32 *enc_ctx = ctx->enc_expkey; + u32 *enc_ctx = ctx->enc.expkey; des3_ede_x86_64_crypt_blk(enc_ctx, dst, src); } @@ -42,7 +42,7 @@ static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, const u8 *src) { - u32 *dec_ctx = ctx->dec_expkey; + u32 *dec_ctx = ctx->dec.expkey; des3_ede_x86_64_crypt_blk(dec_ctx, dst, src); } @@ -50,7 +50,7 @@ static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, const u8 *src) { - u32 *enc_ctx = ctx->enc_expkey; + u32 *enc_ctx = ctx->enc.expkey; des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src); } @@ -58,7 +58,7 @@ static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, const u8 *src) { - u32 *dec_ctx = ctx->dec_expkey; + u32 *dec_ctx = ctx->dec.expkey; des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src); } @@ -122,7 +122,7 @@ static int ecb_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - return ecb_crypt(req, ctx->enc_expkey); + return ecb_crypt(req, ctx->enc.expkey); } static int ecb_decrypt(struct skcipher_request *req) @@ -130,7 +130,7 @@ static int ecb_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - return ecb_crypt(req, ctx->dec_expkey); + return ecb_crypt(req, ctx->dec.expkey); } static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx, @@ -348,24 +348,28 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, u32 i, j, tmp; int err; - err = crypto_des3_ede_verify_key(tfm, key); - if (err) - return err; + err = des3_ede_expand_key(&ctx->enc, key, keylen); + if (err == -ENOKEY) { + if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) + err = -EINVAL; + else + err = 0; + } - /* Generate encryption context using generic implementation. */ - err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen); - if (err < 0) + if (err) { + memset(ctx, 0, sizeof(*ctx)); return err; + } /* Fix encryption context for this implementation and form decryption * context. */ j = DES3_EDE_EXPKEY_WORDS - 2; for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) { - tmp = ror32(ctx->enc_expkey[i + 1], 4); - ctx->enc_expkey[i + 1] = tmp; + tmp = ror32(ctx->enc.expkey[i + 1], 4); + ctx->enc.expkey[i + 1] = tmp; - ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0]; - ctx->dec_expkey[j + 1] = tmp; + ctx->dec.expkey[j + 0] = ctx->enc.expkey[i + 0]; + ctx->dec.expkey[j + 1] = tmp; } return 0; -- cgit v1.2.1 From 8ce5fac2dc1bf64e1e6d2371e4ff9a9bfe8fd49f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 16 Aug 2019 15:21:50 +0300 Subject: crypto: x86/xts - implement support for ciphertext stealing Align the x86 code with the generic XTS template, which now supports ciphertext stealing as described by the IEEE XTS-AES spec P1619. Tested-by: Stephan Mueller Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 6 ++- arch/x86/crypto/camellia_aesni_avx2_glue.c | 4 +- arch/x86/crypto/camellia_aesni_avx_glue.c | 4 +- arch/x86/crypto/cast6_avx_glue.c | 4 +- arch/x86/crypto/glue_helper.c | 67 ++++++++++++++++++++++++++++-- arch/x86/crypto/serpent_avx2_glue.c | 4 +- arch/x86/crypto/serpent_avx_glue.c | 4 +- arch/x86/crypto/twofish_avx_glue.c | 4 +- arch/x86/include/asm/crypto/glue_helper.h | 2 +- 9 files changed, 81 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ef165d8cf443..bf12bb71cecc 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -609,7 +609,8 @@ static int xts_encrypt(struct skcipher_request *req) return glue_xts_req_128bit(&aesni_enc_xts, req, XTS_TWEAK_CAST(aesni_xts_tweak), aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx)); + aes_ctx(ctx->raw_crypt_ctx), + false); } static int xts_decrypt(struct skcipher_request *req) @@ -620,7 +621,8 @@ static int xts_decrypt(struct skcipher_request *req) return glue_xts_req_128bit(&aesni_dec_xts, req, XTS_TWEAK_CAST(aesni_xts_tweak), aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx)); + aes_ctx(ctx->raw_crypt_ctx), + true); } static int diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index abf298c272dc..a4f00128ea55 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -182,7 +182,7 @@ static int xts_encrypt(struct skcipher_request *req) return glue_xts_req_128bit(&camellia_enc_xts, req, XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -192,7 +192,7 @@ static int xts_decrypt(struct skcipher_request *req) return glue_xts_req_128bit(&camellia_dec_xts, req, XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, true); } static struct skcipher_alg camellia_algs[] = { diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 0c22d84750a3..f28d282779b8 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -208,7 +208,7 @@ static int xts_encrypt(struct skcipher_request *req) return glue_xts_req_128bit(&camellia_enc_xts, req, XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -218,7 +218,7 @@ static int xts_decrypt(struct skcipher_request *req) return glue_xts_req_128bit(&camellia_dec_xts, req, XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, true); } static struct skcipher_alg camellia_algs[] = { diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 645f8f16815c..a8a38fffb4a9 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -201,7 +201,7 @@ static int xts_encrypt(struct skcipher_request *req) return glue_xts_req_128bit(&cast6_enc_xts, req, XTS_TWEAK_CAST(__cast6_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -211,7 +211,7 @@ static int xts_decrypt(struct skcipher_request *req) return glue_xts_req_128bit(&cast6_dec_xts, req, XTS_TWEAK_CAST(__cast6_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, true); } static struct skcipher_alg cast6_algs[] = { diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 901551445387..d15b99397480 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -259,17 +260,36 @@ done: int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx) + void *crypt_ctx, bool decrypt) { + const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); const unsigned int bsize = 128 / 8; + struct skcipher_request subreq; struct skcipher_walk walk; bool fpu_enabled = false; - unsigned int nbytes; + unsigned int nbytes, tail; int err; + if (req->cryptlen < XTS_BLOCK_SIZE) + return -EINVAL; + + if (unlikely(cts)) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + + tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + crypto_skcipher_get_flags(tfm), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + req->cryptlen - tail, req->iv); + req = &subreq; + } + err = skcipher_walk_virt(&walk, req, false); nbytes = walk.nbytes; - if (!nbytes) + if (err) return err; /* set minimum length to bsize, for tweak_fn */ @@ -287,6 +307,47 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, nbytes = walk.nbytes; } + if (unlikely(cts)) { + u8 *next_tweak, *final_tweak = req->iv; + struct scatterlist *src, *dst; + struct scatterlist s[2], d[2]; + le128 b[2]; + + dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(d, req->dst, req->cryptlen); + + if (decrypt) { + next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE); + gf128mul_x_ble(b, b); + } else { + next_tweak = req->iv; + } + + skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE, + next_tweak); + + err = skcipher_walk_virt(&walk, req, false) ?: + skcipher_walk_done(&walk, + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); + if (err) + goto out; + + scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0); + memcpy(b + 1, b, tail - XTS_BLOCK_SIZE); + scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE, + tail - XTS_BLOCK_SIZE, 0); + scatterwalk_map_and_copy(b, dst, 0, tail, 1); + + skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE, + final_tweak); + + err = skcipher_walk_virt(&walk, req, false) ?: + skcipher_walk_done(&walk, + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); + } + +out: glue_fpu_end(fpu_enabled); return err; diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index b871728e0b2f..13fd8d3d2da0 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -167,7 +167,7 @@ static int xts_encrypt(struct skcipher_request *req) return glue_xts_req_128bit(&serpent_enc_xts, req, XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -177,7 +177,7 @@ static int xts_decrypt(struct skcipher_request *req) return glue_xts_req_128bit(&serpent_dec_xts, req, XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, true); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 4a9a9f2ee1d8..7d3dca38a5a2 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -207,7 +207,7 @@ static int xts_encrypt(struct skcipher_request *req) return glue_xts_req_128bit(&serpent_enc_xts, req, XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -217,7 +217,7 @@ static int xts_decrypt(struct skcipher_request *req) return glue_xts_req_128bit(&serpent_dec_xts, req, XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, true); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 0dbf8e8b09d7..d561c821788b 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -210,7 +210,7 @@ static int xts_encrypt(struct skcipher_request *req) return glue_xts_req_128bit(&twofish_enc_xts, req, XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -220,7 +220,7 @@ static int xts_decrypt(struct skcipher_request *req) return glue_xts_req_128bit(&twofish_dec_xts, req, XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + &ctx->tweak_ctx, &ctx->crypt_ctx, true); } static struct skcipher_alg twofish_algs[] = { diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index d1818634ae7e..8d4a8e1226ee 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -114,7 +114,7 @@ extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); + void *crypt_ctx, bool decrypt); extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, common_glue_func_t fn); -- cgit v1.2.1 From ce68acbcb6a5d5dbaa9e76df924e1c191e8c7516 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 16 Aug 2019 15:35:45 +0300 Subject: crypto: s390/xts-aes - invoke fallback for ciphertext stealing For correctness and compliance with the XTS-AES specification, we are adding support for ciphertext stealing to XTS implementations, even though no use cases are known that will be enabled by this. Since the s390 implementation already has a fallback skcipher standby for other purposes, let's use it for this purpose as well. If ciphertext stealing use cases ever become a bottleneck, we can always revisit this. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index dc0f72dd6e03..a34faadc757e 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -512,7 +512,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned long fc; int err; - err = xts_check_key(tfm, in_key, key_len); + err = xts_fallback_setkey(tfm, in_key, key_len); if (err) return err; @@ -529,7 +529,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, /* Check if the function code is available */ xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; if (!xts_ctx->fc) - return xts_fallback_setkey(tfm, in_key, key_len); + return 0; /* Split the XTS key into the two subkeys */ key_len = key_len / 2; @@ -586,7 +586,7 @@ static int xts_aes_encrypt(struct blkcipher_desc *desc, struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(!xts_ctx->fc)) + if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCKSIZE) != 0)) return xts_fallback_encrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); @@ -600,7 +600,7 @@ static int xts_aes_decrypt(struct blkcipher_desc *desc, struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(!xts_ctx->fc)) + if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCKSIZE) != 0)) return xts_fallback_decrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); -- cgit v1.2.1 From ad767ee858b38af634c957a792cb001d54a7b981 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 17 Aug 2019 16:24:31 +0200 Subject: crypto: sha256 - Move lib/sha256.c to lib/crypto Generic crypto implementations belong under lib/crypto not directly in lib, likewise the header should be in include/crypto, not include/linux. Note that the code in lib/crypto/sha256.c is not yet available for generic use after this commit, it is still only used by the s390 and x86 purgatory code. Making it suitable for generic use is done in further patches in this series. Signed-off-by: Hans de Goede Signed-off-by: Herbert Xu --- arch/s390/purgatory/Makefile | 2 +- arch/s390/purgatory/purgatory.c | 2 +- arch/x86/purgatory/Makefile | 2 +- arch/x86/purgatory/purgatory.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index dc1ae4ff79d7..85b05c9e40f5 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -7,7 +7,7 @@ purgatory-y := head.o purgatory.o string.o sha256.o mem.o targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE +$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 3528e6da4e87..a80c78da9985 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -8,8 +8,8 @@ */ #include -#include #include +#include #include int verify_sha256_digest(void) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 3cf302b26332..026fa0006f0b 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -6,7 +6,7 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE +$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 6d8d5a34c377..7cd7a2618180 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include "../boot/string.h" -- cgit v1.2.1 From 01d3aee86625bd798a5e69afb92517d5530c7ed1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 17 Aug 2019 16:24:33 +0200 Subject: crypto: sha256 - Make lib/crypto/sha256.c suitable for generic use Before this commit lib/crypto/sha256.c has only been used in the s390 and x86 purgatory code, make it suitable for generic use: * Export interesting symbols * Add -D__DISABLE_EXPORTS to CFLAGS_sha256.o for purgatory builds to avoid the exports for the purgatory builds * Add to lib/crypto/Makefile and crypto/Kconfig Signed-off-by: Hans de Goede Signed-off-by: Herbert Xu --- arch/s390/purgatory/Makefile | 2 ++ arch/x86/purgatory/Makefile | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 85b05c9e40f5..bc0d7a0d0394 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -10,6 +10,8 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) +CFLAGS_sha256.o := -D__DISABLE_EXPORTS + $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 026fa0006f0b..ea86982aba27 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -9,6 +9,8 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) +CFLAGS_sha256.o := -D__DISABLE_EXPORTS + LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib targets += purgatory.ro -- cgit v1.2.1 From 5a74362c9a8b122366dd8f2197fc6b784300d6a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 22 Aug 2019 13:24:54 +0300 Subject: crypto: s390/aes - fix typo in XTS_BLOCK_SIZE identifier Fix a typo XTS_BLOCKSIZE -> XTS_BLOCK_SIZE, causing the build to break. Fixes: ce68acbcb6a5 ("crypto: s390/xts-aes - invoke fallback for...") Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index a34faadc757e..d4f6fd42a105 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -586,7 +586,7 @@ static int xts_aes_encrypt(struct blkcipher_desc *desc, struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCKSIZE) != 0)) + if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0)) return xts_fallback_encrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); @@ -600,7 +600,7 @@ static int xts_aes_decrypt(struct blkcipher_desc *desc, struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; - if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCKSIZE) != 0)) + if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0)) return xts_fallback_decrypt(desc, dst, src, nbytes); blkcipher_walk_init(&walk, dst, src, nbytes); -- cgit v1.2.1 From 65d0042b529048b80dcb1b6b4a738a8f71828146 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 19 Aug 2019 17:17:35 +0300 Subject: crypto: arm64/aes-cts-cbc - factor out CBC en/decryption of a walk The plain CBC driver and the CTS one share some code that iterates over a scatterwalk and invokes the CBC asm code to do the processing. The upcoming ESSIV/CBC mode will clone that pattern for the third time, so let's factor it out first. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 82 +++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 55d6d4838708..23abf335f1ee 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -186,46 +186,64 @@ static int ecb_decrypt(struct skcipher_request *req) return err; } -static int cbc_encrypt(struct skcipher_request *req) +static int cbc_encrypt_walk(struct skcipher_request *req, + struct skcipher_walk *walk) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int err, rounds = 6 + ctx->key_length / 4; - struct skcipher_walk walk; + int err = 0, rounds = 6 + ctx->key_length / 4; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, false); - - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { kernel_neon_begin(); - aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, blocks, walk.iv); + aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_enc, rounds, blocks, walk->iv); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; } -static int cbc_decrypt(struct skcipher_request *req) +static int cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - int err, rounds = 6 + ctx->key_length / 4; struct skcipher_walk walk; - unsigned int blocks; + int err; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + return cbc_encrypt_walk(req, &walk); +} - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { +static int cbc_decrypt_walk(struct skcipher_request *req, + struct skcipher_walk *walk) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + int err = 0, rounds = 6 + ctx->key_length / 4; + unsigned int blocks; + + while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { kernel_neon_begin(); - aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, blocks, walk.iv); + aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, + ctx->key_dec, rounds, blocks, walk->iv); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; } +static int cbc_decrypt(struct skcipher_request *req) +{ + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + return cbc_decrypt_walk(req, &walk); +} + static int cts_cbc_init_tfm(struct crypto_skcipher *tfm) { crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx)); @@ -251,22 +269,12 @@ static int cts_cbc_encrypt(struct skcipher_request *req) } if (cbc_blocks > 0) { - unsigned int blocks; - skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst, cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false); - - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_enc, rounds, blocks, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); - } + err = skcipher_walk_virt(&walk, &rctx->subreq, false) ?: + cbc_encrypt_walk(&rctx->subreq, &walk); if (err) return err; @@ -316,22 +324,12 @@ static int cts_cbc_decrypt(struct skcipher_request *req) } if (cbc_blocks > 0) { - unsigned int blocks; - skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst, cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false); - - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { - kernel_neon_begin(); - aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key_dec, rounds, blocks, walk.iv); - kernel_neon_end(); - err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); - } + err = skcipher_walk_virt(&walk, &rctx->subreq, false) ?: + cbc_decrypt_walk(&rctx->subreq, &walk); if (err) return err; -- cgit v1.2.1 From 735177ca148af5049e9e98b38eaba0c769182655 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 19 Aug 2019 17:17:36 +0300 Subject: crypto: arm64/aes - implement accelerated ESSIV/CBC mode Add an accelerated version of the 'essiv(cbc(aes),sha256)' skcipher, which is used by fscrypt or dm-crypt on systems where CBC mode is signficantly more performant than XTS mode (e.g., when using a h/w accelerator which supports the former but not the latter) This avoids a separate call into the AES cipher for every invocation. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 124 ++++++++++++++++++++++++++++++++++++++++++ arch/arm64/crypto/aes-modes.S | 28 ++++++++++ 2 files changed, 152 insertions(+) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 23abf335f1ee..ca0c84d56cba 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,8 @@ #define aes_cbc_decrypt ce_aes_cbc_decrypt #define aes_cbc_cts_encrypt ce_aes_cbc_cts_encrypt #define aes_cbc_cts_decrypt ce_aes_cbc_cts_decrypt +#define aes_essiv_cbc_encrypt ce_aes_essiv_cbc_encrypt +#define aes_essiv_cbc_decrypt ce_aes_essiv_cbc_decrypt #define aes_ctr_encrypt ce_aes_ctr_encrypt #define aes_xts_encrypt ce_aes_xts_encrypt #define aes_xts_decrypt ce_aes_xts_decrypt @@ -44,6 +47,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #define aes_cbc_decrypt neon_aes_cbc_decrypt #define aes_cbc_cts_encrypt neon_aes_cbc_cts_encrypt #define aes_cbc_cts_decrypt neon_aes_cbc_cts_decrypt +#define aes_essiv_cbc_encrypt neon_aes_essiv_cbc_encrypt +#define aes_essiv_cbc_decrypt neon_aes_essiv_cbc_decrypt #define aes_ctr_encrypt neon_aes_ctr_encrypt #define aes_xts_encrypt neon_aes_xts_encrypt #define aes_xts_decrypt neon_aes_xts_decrypt @@ -51,6 +56,7 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); MODULE_ALIAS_CRYPTO("ecb(aes)"); MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); MODULE_ALIAS_CRYPTO("cmac(aes)"); @@ -87,6 +93,13 @@ asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int blocks, u32 const rk2[], u8 iv[], int first); +asmlinkage void aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], + int rounds, int blocks, u8 iv[], + u32 const rk2[]); +asmlinkage void aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], + int rounds, int blocks, u8 iv[], + u32 const rk2[]); + asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds, int blocks, u8 dg[], int enc_before, int enc_after); @@ -102,6 +115,12 @@ struct crypto_aes_xts_ctx { struct crypto_aes_ctx __aligned(8) key2; }; +struct crypto_aes_essiv_cbc_ctx { + struct crypto_aes_ctx key1; + struct crypto_aes_ctx __aligned(8) key2; + struct crypto_shash *hash; +}; + struct mac_tfm_ctx { struct crypto_aes_ctx key; u8 __aligned(8) consts[]; @@ -146,6 +165,31 @@ static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, return -EINVAL; } +static int essiv_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + SHASH_DESC_ON_STACK(desc, ctx->hash); + u8 digest[SHA256_DIGEST_SIZE]; + int ret; + + ret = aes_expandkey(&ctx->key1, in_key, key_len); + if (ret) + goto out; + + desc->tfm = ctx->hash; + crypto_shash_digest(desc, in_key, key_len, digest); + + ret = aes_expandkey(&ctx->key2, digest, sizeof(digest)); + if (ret) + goto out; + + return 0; +out: + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; +} + static int ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -360,6 +404,68 @@ static int cts_cbc_decrypt(struct skcipher_request *req) return skcipher_walk_done(&walk, 0); } +static int essiv_cbc_init_tfm(struct crypto_skcipher *tfm) +{ + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + ctx->hash = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(ctx->hash)) + return PTR_ERR(ctx->hash); + + return 0; +} + +static void essiv_cbc_exit_tfm(struct crypto_skcipher *tfm) +{ + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_shash(ctx->hash); +} + +static int essiv_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + int err, rounds = 6 + ctx->key1.key_length / 4; + struct skcipher_walk walk; + unsigned int blocks; + + err = skcipher_walk_virt(&walk, req, false); + + blocks = walk.nbytes / AES_BLOCK_SIZE; + if (blocks) { + kernel_neon_begin(); + aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, blocks, + req->iv, ctx->key2.key_enc); + kernel_neon_end(); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + } + return err ?: cbc_encrypt_walk(req, &walk); +} + +static int essiv_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + int err, rounds = 6 + ctx->key1.key_length / 4; + struct skcipher_walk walk; + unsigned int blocks; + + err = skcipher_walk_virt(&walk, req, false); + + blocks = walk.nbytes / AES_BLOCK_SIZE; + if (blocks) { + kernel_neon_begin(); + aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, blocks, + req->iv, ctx->key2.key_enc); + kernel_neon_end(); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + } + return err ?: cbc_decrypt_walk(req, &walk); +} + static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -515,6 +621,24 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = cts_cbc_encrypt, .decrypt = cts_cbc_decrypt, .init = cts_cbc_init_tfm, +}, { + .base = { + .cra_name = "__essiv(cbc(aes),sha256)", + .cra_driver_name = "__essiv-cbc-aes-sha256-" MODE, + .cra_priority = PRIO + 1, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_essiv_cbc_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = essiv_cbc_set_key, + .encrypt = essiv_cbc_encrypt, + .decrypt = essiv_cbc_decrypt, + .init = essiv_cbc_init_tfm, + .exit = essiv_cbc_exit_tfm, }, { .base = { .cra_name = "__ctr(aes)", diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 324039b72094..2879f030a749 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -118,8 +118,23 @@ AES_ENDPROC(aes_ecb_decrypt) * int blocks, u8 iv[]) * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) + * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], + * int rounds, int blocks, u8 iv[], + * u32 const rk2[]); + * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], + * int rounds, int blocks, u8 iv[], + * u32 const rk2[]); */ +AES_ENTRY(aes_essiv_cbc_encrypt) + ld1 {v4.16b}, [x5] /* get iv */ + + mov w8, #14 /* AES-256: 14 rounds */ + enc_prepare w8, x6, x7 + encrypt_block v4, w8, x6, x7, w9 + enc_switch_key w3, x2, x6 + b .Lcbcencloop4x + AES_ENTRY(aes_cbc_encrypt) ld1 {v4.16b}, [x5] /* get iv */ enc_prepare w3, x2, x6 @@ -153,13 +168,25 @@ AES_ENTRY(aes_cbc_encrypt) st1 {v4.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_encrypt) +AES_ENDPROC(aes_essiv_cbc_encrypt) + +AES_ENTRY(aes_essiv_cbc_decrypt) + stp x29, x30, [sp, #-16]! + mov x29, sp + + ld1 {cbciv.16b}, [x5] /* get iv */ + mov w8, #14 /* AES-256: 14 rounds */ + enc_prepare w8, x6, x7 + encrypt_block cbciv, w8, x6, x7, w9 + b .Lessivcbcdecstart AES_ENTRY(aes_cbc_decrypt) stp x29, x30, [sp, #-16]! mov x29, sp ld1 {cbciv.16b}, [x5] /* get iv */ +.Lessivcbcdecstart: dec_prepare w3, x2, x6 .LcbcdecloopNx: @@ -212,6 +239,7 @@ ST5( st1 {v4.16b}, [x0], #16 ) ldp x29, x30, [sp], #16 ret AES_ENDPROC(aes_cbc_decrypt) +AES_ENDPROC(aes_essiv_cbc_decrypt) /* -- cgit v1.2.1 From e4dcc1be15268b6d34de3968f906577591521bd5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Sep 2019 22:35:24 +0200 Subject: crypto: arm - Rename functions to avoid conflict with crypto/sha256.h Rename static / file-local functions so that they do not conflict with the functions declared in crypto/sha256.h. This is a preparation patch for folding crypto/sha256.h into crypto/sha.h. Signed-off-by: Hans de Goede Signed-off-by: Herbert Xu --- arch/arm/crypto/sha256_glue.c | 8 ++++---- arch/arm/crypto/sha256_neon_glue.c | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index 70efa9656bff..215497f011f2 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -39,7 +39,7 @@ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, } EXPORT_SYMBOL(crypto_sha256_arm_update); -static int sha256_final(struct shash_desc *desc, u8 *out) +static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out) { sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_block_data_order); @@ -51,7 +51,7 @@ int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, { sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); - return sha256_final(desc, out); + return crypto_sha256_arm_final(desc, out); } EXPORT_SYMBOL(crypto_sha256_arm_finup); @@ -59,7 +59,7 @@ static struct shash_alg algs[] = { { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_base_init, .update = crypto_sha256_arm_update, - .final = sha256_final, + .final = crypto_sha256_arm_final, .finup = crypto_sha256_arm_finup, .descsize = sizeof(struct sha256_state), .base = { @@ -73,7 +73,7 @@ static struct shash_alg algs[] = { { .digestsize = SHA224_DIGEST_SIZE, .init = sha224_base_init, .update = crypto_sha256_arm_update, - .final = sha256_final, + .final = crypto_sha256_arm_final, .finup = crypto_sha256_arm_finup, .descsize = sizeof(struct sha256_state), .base = { diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c index a7ce38a36006..38645e415196 100644 --- a/arch/arm/crypto/sha256_neon_glue.c +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -25,8 +25,8 @@ asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data, unsigned int num_blks); -static int sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int crypto_sha256_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { struct sha256_state *sctx = shash_desc_ctx(desc); @@ -42,8 +42,8 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, return 0; } -static int sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int crypto_sha256_neon_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { if (!crypto_simd_usable()) return crypto_sha256_arm_finup(desc, data, len, out); @@ -59,17 +59,17 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data, return sha256_base_finish(desc, out); } -static int sha256_final(struct shash_desc *desc, u8 *out) +static int crypto_sha256_neon_final(struct shash_desc *desc, u8 *out) { - return sha256_finup(desc, NULL, 0, out); + return crypto_sha256_neon_finup(desc, NULL, 0, out); } struct shash_alg sha256_neon_algs[] = { { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_base_init, - .update = sha256_update, - .final = sha256_final, - .finup = sha256_finup, + .update = crypto_sha256_neon_update, + .final = crypto_sha256_neon_final, + .finup = crypto_sha256_neon_finup, .descsize = sizeof(struct sha256_state), .base = { .cra_name = "sha256", @@ -81,9 +81,9 @@ struct shash_alg sha256_neon_algs[] = { { }, { .digestsize = SHA224_DIGEST_SIZE, .init = sha224_base_init, - .update = sha256_update, - .final = sha256_final, - .finup = sha256_finup, + .update = crypto_sha256_neon_update, + .final = crypto_sha256_neon_final, + .finup = crypto_sha256_neon_finup, .descsize = sizeof(struct sha256_state), .base = { .cra_name = "sha224", -- cgit v1.2.1 From 8f373bf4935b8218e737d2e6a9cb34da53d216ba Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Sep 2019 22:35:25 +0200 Subject: crypto: arm64 - Rename functions to avoid conflict with crypto/sha256.h Rename static / file-local functions so that they do not conflict with the functions declared in crypto/sha256.h. This is a preparation patch for folding crypto/sha256.h into crypto/sha.h. Signed-off-by: Hans de Goede Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha256-glue.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 04b9d17b0733..e273faca924f 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -30,15 +30,15 @@ EXPORT_SYMBOL(sha256_block_data_order); asmlinkage void sha256_block_neon(u32 *digest, const void *data, unsigned int num_blks); -static int sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int crypto_sha256_arm64_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { return sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); } -static int sha256_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { if (len) sha256_base_do_update(desc, data, len, @@ -49,17 +49,17 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data, return sha256_base_finish(desc, out); } -static int sha256_final(struct shash_desc *desc, u8 *out) +static int crypto_sha256_arm64_final(struct shash_desc *desc, u8 *out) { - return sha256_finup(desc, NULL, 0, out); + return crypto_sha256_arm64_finup(desc, NULL, 0, out); } static struct shash_alg algs[] = { { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_base_init, - .update = sha256_update, - .final = sha256_final, - .finup = sha256_finup, + .update = crypto_sha256_arm64_update, + .final = crypto_sha256_arm64_final, + .finup = crypto_sha256_arm64_finup, .descsize = sizeof(struct sha256_state), .base.cra_name = "sha256", .base.cra_driver_name = "sha256-arm64", @@ -69,9 +69,9 @@ static struct shash_alg algs[] = { { }, { .digestsize = SHA224_DIGEST_SIZE, .init = sha224_base_init, - .update = sha256_update, - .final = sha256_final, - .finup = sha256_finup, + .update = crypto_sha256_arm64_update, + .final = crypto_sha256_arm64_final, + .finup = crypto_sha256_arm64_finup, .descsize = sizeof(struct sha256_state), .base.cra_name = "sha224", .base.cra_driver_name = "sha224-arm64", -- cgit v1.2.1 From b86fc489ef53881625fd4be026ddb898f0baadb5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Sep 2019 22:35:26 +0200 Subject: crypto: s390 - Rename functions to avoid conflict with crypto/sha256.h Rename static / file-local functions so that they do not conflict with the functions declared in crypto/sha256.h. This is a preparation patch for folding crypto/sha256.h into crypto/sha.h. Signed-off-by: Hans de Goede Signed-off-by: Herbert Xu --- arch/s390/crypto/sha256_s390.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index af7505148f80..b52c87e44939 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -17,7 +17,7 @@ #include "sha.h" -static int sha256_init(struct shash_desc *desc) +static int s390_sha256_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); @@ -60,7 +60,7 @@ static int sha256_import(struct shash_desc *desc, const void *in) static struct shash_alg sha256_alg = { .digestsize = SHA256_DIGEST_SIZE, - .init = sha256_init, + .init = s390_sha256_init, .update = s390_sha_update, .final = s390_sha_final, .export = sha256_export, @@ -76,7 +76,7 @@ static struct shash_alg sha256_alg = { } }; -static int sha224_init(struct shash_desc *desc) +static int s390_sha224_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); @@ -96,7 +96,7 @@ static int sha224_init(struct shash_desc *desc) static struct shash_alg sha224_alg = { .digestsize = SHA224_DIGEST_SIZE, - .init = sha224_init, + .init = s390_sha224_init, .update = s390_sha_update, .final = s390_sha_final, .export = sha256_export, -- cgit v1.2.1 From eb7d6ba882f1c508f5387416375d4c2f99debc7b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Sep 2019 22:35:27 +0200 Subject: crypto: x86 - Rename functions to avoid conflict with crypto/sha256.h Rename static / file-local functions so that they do not conflict with the functions declared in crypto/sha256.h. This is a preparation patch for folding crypto/sha256.h into crypto/sha.h. Signed-off-by: Hans de Goede Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256_ssse3_glue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 73867da3cbee..f9aff31fe59e 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -45,8 +45,8 @@ asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, u64 rounds); typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds); -static int sha256_update(struct shash_desc *desc, const u8 *data, - unsigned int len, sha256_transform_fn *sha256_xform) +static int _sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_transform_fn *sha256_xform) { struct sha256_state *sctx = shash_desc_ctx(desc); @@ -84,7 +84,7 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data, static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_transform_ssse3); + return _sha256_update(desc, data, len, sha256_transform_ssse3); } static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, @@ -151,7 +151,7 @@ asmlinkage void sha256_transform_avx(u32 *digest, const char *data, static int sha256_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_transform_avx); + return _sha256_update(desc, data, len, sha256_transform_avx); } static int sha256_avx_finup(struct shash_desc *desc, const u8 *data, @@ -233,7 +233,7 @@ asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_transform_rorx); + return _sha256_update(desc, data, len, sha256_transform_rorx); } static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data, @@ -313,7 +313,7 @@ asmlinkage void sha256_ni_transform(u32 *digest, const char *data, static int sha256_ni_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - return sha256_update(desc, data, len, sha256_ni_transform); + return _sha256_update(desc, data, len, sha256_ni_transform); } static int sha256_ni_finup(struct shash_desc *desc, const u8 *data, -- cgit v1.2.1 From 34d6245fbc81e764806a65fceaeb3ab3274a1e63 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Sep 2019 22:35:31 +0200 Subject: crypto: sha256 - Merge crypto/sha256.h into crypto/sha.h The generic sha256 implementation from lib/crypto/sha256.c uses data structs defined in crypto/sha.h, so lets move the function prototypes there too. Signed-off-by: Hans de Goede Signed-off-by: Herbert Xu --- arch/s390/purgatory/purgatory.c | 2 +- arch/x86/purgatory/purgatory.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index a80c78da9985..0a423bcf6746 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include int verify_sha256_digest(void) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 7cd7a2618180..c4295256cc0c 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include "../boot/string.h" -- cgit v1.2.1 From 7b865ec15ed3a1a4204537bc28bbc68202f4c52f Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Tue, 3 Sep 2019 14:54:16 +0800 Subject: crypto: arm64/aes - Use PTR_ERR_OR_ZERO rather than its implementation. PTR_ERR_OR_ZERO contains if(IS_ERR(...)) + PTR_ERR. It is better to use it directly. hence just replace it. Signed-off-by: zhong jiang Acked-by: Will Deacon Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index ca0c84d56cba..2a2e0a3fc4eb 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -409,10 +409,8 @@ static int essiv_cbc_init_tfm(struct crypto_skcipher *tfm) struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); ctx->hash = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(ctx->hash)) - return PTR_ERR(ctx->hash); - return 0; + return PTR_ERR_OR_ZERO(ctx->hash); } static void essiv_cbc_exit_tfm(struct crypto_skcipher *tfm) -- cgit v1.2.1 From fcb0e30df0e881fd5290674536d99375d348a151 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:23 -0700 Subject: crypto: arm/aes - fix round key prototypes The AES round keys are arrays of u32s in native endianness now, so update the function prototypes accordingly. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-core.S | 18 +++++++++--------- arch/arm/crypto/aes-ce-glue.c | 40 ++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 425000232d49..1e0d45183590 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -154,9 +154,9 @@ ENDPROC(aes_decrypt_3x) .endm /* - * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, * int blocks) - * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, * int blocks) */ ENTRY(ce_aes_ecb_encrypt) @@ -212,9 +212,9 @@ ENTRY(ce_aes_ecb_decrypt) ENDPROC(ce_aes_ecb_decrypt) /* - * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, * int blocks, u8 iv[]) - * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, * int blocks, u8 iv[]) */ ENTRY(ce_aes_cbc_encrypt) @@ -272,7 +272,7 @@ ENTRY(ce_aes_cbc_decrypt) ENDPROC(ce_aes_cbc_decrypt) /* - * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, * int blocks, u8 ctr[]) */ ENTRY(ce_aes_ctr_encrypt) @@ -349,10 +349,10 @@ ENTRY(ce_aes_ctr_encrypt) ENDPROC(ce_aes_ctr_encrypt) /* - * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int blocks, u8 iv[], u8 const rk2[], int first) - * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int blocks, u8 iv[], u8 const rk2[], int first) + * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, + * int blocks, u8 iv[], u32 const rk2[], int first) + * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, + * int blocks, u8 iv[], u32 const rk2[], int first) */ .macro next_tweak, out, in, const, tmp diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index a7265d0a7063..75d2ff03a63e 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -25,25 +25,25 @@ MODULE_LICENSE("GPL v2"); asmlinkage u32 ce_aes_sub(u32 input); asmlinkage void ce_aes_invert(void *dst, void *src); -asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], +asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks); -asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], +asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks); -asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], +asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 iv[]); -asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], +asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 iv[]); -asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], +asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 ctr[]); -asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], +asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int blocks, u8 iv[], - u8 const rk2[], int first); -asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], + u32 const rk2[], int first); +asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int blocks, u8 iv[], - u8 const rk2[], int first); + u32 const rk2[], int first); struct aes_block { u8 b[AES_BLOCK_SIZE]; @@ -182,7 +182,7 @@ static int ecb_encrypt(struct skcipher_request *req) kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_enc, num_rounds(ctx), blocks); + ctx->key_enc, num_rounds(ctx), blocks); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -202,7 +202,7 @@ static int ecb_decrypt(struct skcipher_request *req) kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_dec, num_rounds(ctx), blocks); + ctx->key_dec, num_rounds(ctx), blocks); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -222,7 +222,7 @@ static int cbc_encrypt(struct skcipher_request *req) kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_enc, num_rounds(ctx), blocks, + ctx->key_enc, num_rounds(ctx), blocks, walk.iv); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } @@ -243,7 +243,7 @@ static int cbc_decrypt(struct skcipher_request *req) kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_dec, num_rounds(ctx), blocks, + ctx->key_dec, num_rounds(ctx), blocks, walk.iv); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } @@ -263,7 +263,7 @@ static int ctr_encrypt(struct skcipher_request *req) kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key_enc, num_rounds(ctx), blocks, + ctx->key_enc, num_rounds(ctx), blocks, walk.iv); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } @@ -278,8 +278,8 @@ static int ctr_encrypt(struct skcipher_request *req) */ blocks = -1; - ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, - num_rounds(ctx), blocks, walk.iv); + ce_aes_ctr_encrypt(tail, NULL, ctx->key_enc, num_rounds(ctx), + blocks, walk.iv); crypto_xor_cpy(tdst, tsrc, tail, nbytes); err = skcipher_walk_done(&walk, 0); } @@ -324,8 +324,8 @@ static int xts_encrypt(struct skcipher_request *req) kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key1.key_enc, rounds, blocks, - walk.iv, (u8 *)ctx->key2.key_enc, first); + ctx->key1.key_enc, rounds, blocks, walk.iv, + ctx->key2.key_enc, first); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -346,8 +346,8 @@ static int xts_decrypt(struct skcipher_request *req) kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - (u8 *)ctx->key1.key_dec, rounds, blocks, - walk.iv, (u8 *)ctx->key2.key_enc, first); + ctx->key1.key_dec, rounds, blocks, walk.iv, + ctx->key2.key_enc, first); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); -- cgit v1.2.1 From 46a22776bc97aa5ab9d5f9dc4829859219b86365 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:24 -0700 Subject: crypto: arm/aes-ce - yield the SIMD unit between scatterwalk steps Reduce the scope of the kernel_neon_begin/end regions so that the SIMD unit is released (and thus preemption re-enabled) if the crypto operation cannot be completed in a single scatterwalk step. This avoids scheduling blackouts due to preemption being enabled for unbounded periods, resulting in a more responsive system. After this change, we can also permit the cipher_walk infrastructure to sleep, so set the 'atomic' parameter to skcipher_walk_virt() to false as well. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-glue.c | 47 +++++++++++++++++++-------------------- arch/arm/crypto/aes-neonbs-glue.c | 22 +++++++++--------- 2 files changed, 34 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 75d2ff03a63e..486e862ae34a 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -177,15 +177,15 @@ static int ecb_encrypt(struct skcipher_request *req) unsigned int blocks; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key_enc, num_rounds(ctx), blocks); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -197,15 +197,15 @@ static int ecb_decrypt(struct skcipher_request *req) unsigned int blocks; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key_dec, num_rounds(ctx), blocks); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -217,16 +217,16 @@ static int cbc_encrypt(struct skcipher_request *req) unsigned int blocks; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key_enc, num_rounds(ctx), blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -238,16 +238,16 @@ static int cbc_decrypt(struct skcipher_request *req) unsigned int blocks; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key_dec, num_rounds(ctx), blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -258,13 +258,14 @@ static int ctr_encrypt(struct skcipher_request *req) struct skcipher_walk walk; int err, blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + kernel_neon_begin(); ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key_enc, num_rounds(ctx), blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } if (walk.nbytes) { @@ -278,13 +279,13 @@ static int ctr_encrypt(struct skcipher_request *req) */ blocks = -1; + kernel_neon_begin(); ce_aes_ctr_encrypt(tail, NULL, ctx->key_enc, num_rounds(ctx), blocks, walk.iv); + kernel_neon_end(); crypto_xor_cpy(tdst, tsrc, tail, nbytes); err = skcipher_walk_done(&walk, 0); } - kernel_neon_end(); - return err; } @@ -319,17 +320,16 @@ static int xts_encrypt(struct skcipher_request *req) struct skcipher_walk walk; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + kernel_neon_begin(); ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key1.key_enc, rounds, blocks, walk.iv, ctx->key2.key_enc, first); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); - return err; } @@ -341,17 +341,16 @@ static int xts_decrypt(struct skcipher_request *req) struct skcipher_walk walk; unsigned int blocks; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + kernel_neon_begin(); ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key1.key_dec, rounds, blocks, walk.iv, ctx->key2.key_enc, first); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); - return err; } diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index 45cd9818791e..9000d0796d5e 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -90,9 +90,8 @@ static int __ecb_crypt(struct skcipher_request *req, struct skcipher_walk walk; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -100,12 +99,13 @@ static int __ecb_crypt(struct skcipher_request *req, blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); + kernel_neon_begin(); fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, ctx->rounds, blocks); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -159,9 +159,8 @@ static int cbc_decrypt(struct skcipher_request *req) struct skcipher_walk walk; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -169,13 +168,14 @@ static int cbc_decrypt(struct skcipher_request *req) blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); + kernel_neon_begin(); aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, ctx->key.rounds, blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -223,9 +223,8 @@ static int ctr_encrypt(struct skcipher_request *req) u8 buf[AES_BLOCK_SIZE]; int err; - err = skcipher_walk_virt(&walk, req, true); + err = skcipher_walk_virt(&walk, req, false); - kernel_neon_begin(); while (walk.nbytes > 0) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; @@ -236,8 +235,10 @@ static int ctr_encrypt(struct skcipher_request *req) final = NULL; } + kernel_neon_begin(); aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, ctx->rounds, blocks, walk.iv, final); + kernel_neon_end(); if (final) { u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; @@ -252,7 +253,6 @@ static int ctr_encrypt(struct skcipher_request *req) err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } @@ -329,7 +329,6 @@ static int __xts_crypt(struct skcipher_request *req, crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv); - kernel_neon_begin(); while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -337,12 +336,13 @@ static int __xts_crypt(struct skcipher_request *req, blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); + kernel_neon_begin(); fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, ctx->key.rounds, blocks, walk.iv); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } - kernel_neon_end(); return err; } -- cgit v1.2.1 From 1dede02bdd64116b98bdcbc2e2be20e62afd43f5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:25 -0700 Subject: crypto: arm/aes-ce - switch to 4x interleave When the ARM AES instruction based crypto driver was introduced, there were no known implementations that could benefit from a 4-way interleave, and so a 3-way interleave was used instead. Since we have sufficient space in the SIMD register file, let's switch to a 4-way interleave to align with the 64-bit driver, and to ensure that we can reach optimum performance when running under emulation on high end 64-bit cores. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-core.S | 263 +++++++++++++++++++++++------------------- 1 file changed, 144 insertions(+), 119 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 1e0d45183590..a3ca4ac2d7bb 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -44,46 +44,56 @@ veor q0, q0, \key3 .endm - .macro enc_dround_3x, key1, key2 + .macro enc_dround_4x, key1, key2 enc_round q0, \key1 enc_round q1, \key1 enc_round q2, \key1 + enc_round q3, \key1 enc_round q0, \key2 enc_round q1, \key2 enc_round q2, \key2 + enc_round q3, \key2 .endm - .macro dec_dround_3x, key1, key2 + .macro dec_dround_4x, key1, key2 dec_round q0, \key1 dec_round q1, \key1 dec_round q2, \key1 + dec_round q3, \key1 dec_round q0, \key2 dec_round q1, \key2 dec_round q2, \key2 + dec_round q3, \key2 .endm - .macro enc_fround_3x, key1, key2, key3 + .macro enc_fround_4x, key1, key2, key3 enc_round q0, \key1 enc_round q1, \key1 enc_round q2, \key1 + enc_round q3, \key1 aese.8 q0, \key2 aese.8 q1, \key2 aese.8 q2, \key2 + aese.8 q3, \key2 veor q0, q0, \key3 veor q1, q1, \key3 veor q2, q2, \key3 + veor q3, q3, \key3 .endm - .macro dec_fround_3x, key1, key2, key3 + .macro dec_fround_4x, key1, key2, key3 dec_round q0, \key1 dec_round q1, \key1 dec_round q2, \key1 + dec_round q3, \key1 aesd.8 q0, \key2 aesd.8 q1, \key2 aesd.8 q2, \key2 + aesd.8 q3, \key2 veor q0, q0, \key3 veor q1, q1, \key3 veor q2, q2, \key3 + veor q3, q3, \key3 .endm .macro do_block, dround, fround @@ -114,8 +124,9 @@ * transforms. These should preserve all registers except q0 - q2 and ip * Arguments: * q0 : first in/output block - * q1 : second in/output block (_3x version only) - * q2 : third in/output block (_3x version only) + * q1 : second in/output block (_4x version only) + * q2 : third in/output block (_4x version only) + * q3 : fourth in/output block (_4x version only) * q8 : first round key * q9 : secound round key * q14 : final round key @@ -136,16 +147,16 @@ aes_decrypt: ENDPROC(aes_decrypt) .align 6 -aes_encrypt_3x: +aes_encrypt_4x: add ip, r2, #32 @ 3rd round key - do_block enc_dround_3x, enc_fround_3x -ENDPROC(aes_encrypt_3x) + do_block enc_dround_4x, enc_fround_4x +ENDPROC(aes_encrypt_4x) .align 6 -aes_decrypt_3x: +aes_decrypt_4x: add ip, r2, #32 @ 3rd round key - do_block dec_dround_3x, dec_fround_3x -ENDPROC(aes_decrypt_3x) + do_block dec_dround_4x, dec_fround_4x +ENDPROC(aes_decrypt_4x) .macro prepare_key, rk, rounds add ip, \rk, \rounds, lsl #4 @@ -163,17 +174,17 @@ ENTRY(ce_aes_ecb_encrypt) push {r4, lr} ldr r4, [sp, #8] prepare_key r2, r3 -.Lecbencloop3x: - subs r4, r4, #3 +.Lecbencloop4x: + subs r4, r4, #4 bmi .Lecbenc1x vld1.8 {q0-q1}, [r1]! - vld1.8 {q2}, [r1]! - bl aes_encrypt_3x + vld1.8 {q2-q3}, [r1]! + bl aes_encrypt_4x vst1.8 {q0-q1}, [r0]! - vst1.8 {q2}, [r0]! - b .Lecbencloop3x + vst1.8 {q2-q3}, [r0]! + b .Lecbencloop4x .Lecbenc1x: - adds r4, r4, #3 + adds r4, r4, #4 beq .Lecbencout .Lecbencloop: vld1.8 {q0}, [r1]! @@ -189,17 +200,17 @@ ENTRY(ce_aes_ecb_decrypt) push {r4, lr} ldr r4, [sp, #8] prepare_key r2, r3 -.Lecbdecloop3x: - subs r4, r4, #3 +.Lecbdecloop4x: + subs r4, r4, #4 bmi .Lecbdec1x vld1.8 {q0-q1}, [r1]! - vld1.8 {q2}, [r1]! - bl aes_decrypt_3x + vld1.8 {q2-q3}, [r1]! + bl aes_decrypt_4x vst1.8 {q0-q1}, [r0]! - vst1.8 {q2}, [r0]! - b .Lecbdecloop3x + vst1.8 {q2-q3}, [r0]! + b .Lecbdecloop4x .Lecbdec1x: - adds r4, r4, #3 + adds r4, r4, #4 beq .Lecbdecout .Lecbdecloop: vld1.8 {q0}, [r1]! @@ -236,38 +247,40 @@ ENDPROC(ce_aes_cbc_encrypt) ENTRY(ce_aes_cbc_decrypt) push {r4-r6, lr} ldrd r4, r5, [sp, #16] - vld1.8 {q6}, [r5] @ keep iv in q6 + vld1.8 {q15}, [r5] @ keep iv in q15 prepare_key r2, r3 -.Lcbcdecloop3x: - subs r4, r4, #3 +.Lcbcdecloop4x: + subs r4, r4, #4 bmi .Lcbcdec1x vld1.8 {q0-q1}, [r1]! - vld1.8 {q2}, [r1]! - vmov q3, q0 - vmov q4, q1 - vmov q5, q2 - bl aes_decrypt_3x - veor q0, q0, q6 - veor q1, q1, q3 - veor q2, q2, q4 - vmov q6, q5 + vld1.8 {q2-q3}, [r1]! + vmov q4, q0 + vmov q5, q1 + vmov q6, q2 + vmov q7, q3 + bl aes_decrypt_4x + veor q0, q0, q15 + veor q1, q1, q4 + veor q2, q2, q5 + veor q3, q3, q6 + vmov q15, q7 vst1.8 {q0-q1}, [r0]! - vst1.8 {q2}, [r0]! - b .Lcbcdecloop3x + vst1.8 {q2-q3}, [r0]! + b .Lcbcdecloop4x .Lcbcdec1x: - adds r4, r4, #3 + adds r4, r4, #4 beq .Lcbcdecout - vmov q15, q14 @ preserve last round key + vmov q6, q14 @ preserve last round key .Lcbcdecloop: vld1.8 {q0}, [r1]! @ get next ct block veor q14, q15, q6 @ combine prev ct with last key - vmov q6, q0 + vmov q15, q0 bl aes_decrypt vst1.8 {q0}, [r0]! subs r4, r4, #1 bne .Lcbcdecloop .Lcbcdecout: - vst1.8 {q6}, [r5] @ keep iv in q6 + vst1.8 {q15}, [r5] @ keep iv in q15 pop {r4-r6, pc} ENDPROC(ce_aes_cbc_decrypt) @@ -278,46 +291,52 @@ ENDPROC(ce_aes_cbc_decrypt) ENTRY(ce_aes_ctr_encrypt) push {r4-r6, lr} ldrd r4, r5, [sp, #16] - vld1.8 {q6}, [r5] @ load ctr + vld1.8 {q7}, [r5] @ load ctr prepare_key r2, r3 - vmov r6, s27 @ keep swabbed ctr in r6 + vmov r6, s31 @ keep swabbed ctr in r6 rev r6, r6 cmn r6, r4 @ 32 bit overflow? bcs .Lctrloop -.Lctrloop3x: - subs r4, r4, #3 +.Lctrloop4x: + subs r4, r4, #4 bmi .Lctr1x add r6, r6, #1 - vmov q0, q6 - vmov q1, q6 + vmov q0, q7 + vmov q1, q7 rev ip, r6 add r6, r6, #1 - vmov q2, q6 + vmov q2, q7 vmov s7, ip rev ip, r6 add r6, r6, #1 + vmov q3, q7 vmov s11, ip - vld1.8 {q3-q4}, [r1]! - vld1.8 {q5}, [r1]! - bl aes_encrypt_3x - veor q0, q0, q3 - veor q1, q1, q4 - veor q2, q2, q5 + rev ip, r6 + add r6, r6, #1 + vmov s15, ip + vld1.8 {q4-q5}, [r1]! + vld1.8 {q6}, [r1]! + vld1.8 {q15}, [r1]! + bl aes_encrypt_4x + veor q0, q0, q4 + veor q1, q1, q5 + veor q2, q2, q6 + veor q3, q3, q15 rev ip, r6 vst1.8 {q0-q1}, [r0]! - vst1.8 {q2}, [r0]! - vmov s27, ip - b .Lctrloop3x + vst1.8 {q2-q3}, [r0]! + vmov s31, ip + b .Lctrloop4x .Lctr1x: - adds r4, r4, #3 + adds r4, r4, #4 beq .Lctrout .Lctrloop: - vmov q0, q6 + vmov q0, q7 bl aes_encrypt adds r6, r6, #1 @ increment BE ctr rev ip, r6 - vmov s27, ip + vmov s31, ip bcs .Lctrcarry .Lctrcarrydone: @@ -329,7 +348,7 @@ ENTRY(ce_aes_ctr_encrypt) bne .Lctrloop .Lctrout: - vst1.8 {q6}, [r5] @ return next CTR value + vst1.8 {q7}, [r5] @ return next CTR value pop {r4-r6, pc} .Lctrtailblock: @@ -337,7 +356,7 @@ ENTRY(ce_aes_ctr_encrypt) b .Lctrout .Lctrcarry: - .irp sreg, s26, s25, s24 + .irp sreg, s30, s29, s28 vmov ip, \sreg @ load next word of ctr rev ip, ip @ ... to handle the carry adds ip, ip, #1 @@ -368,8 +387,8 @@ ENDPROC(ce_aes_ctr_encrypt) .quad 1, 0x87 ce_aes_xts_init: - vldr d14, .Lxts_mul_x - vldr d15, .Lxts_mul_x + 8 + vldr d30, .Lxts_mul_x + vldr d31, .Lxts_mul_x + 8 ldrd r4, r5, [sp, #16] @ load args ldr r6, [sp, #28] @@ -390,48 +409,51 @@ ENTRY(ce_aes_xts_encrypt) bl ce_aes_xts_init @ run shared prologue prepare_key r2, r3 - vmov q3, q0 + vmov q4, q0 teq r6, #0 @ start of a block? - bne .Lxtsenc3x + bne .Lxtsenc4x -.Lxtsencloop3x: - next_tweak q3, q3, q7, q6 -.Lxtsenc3x: - subs r4, r4, #3 +.Lxtsencloop4x: + next_tweak q4, q4, q15, q10 +.Lxtsenc4x: + subs r4, r4, #4 bmi .Lxtsenc1x - vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks - vld1.8 {q2}, [r1]! - next_tweak q4, q3, q7, q6 - veor q0, q0, q3 - next_tweak q5, q4, q7, q6 - veor q1, q1, q4 - veor q2, q2, q5 - bl aes_encrypt_3x - veor q0, q0, q3 - veor q1, q1, q4 - veor q2, q2, q5 - vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks - vst1.8 {q2}, [r0]! - vmov q3, q5 + vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks + vld1.8 {q2-q3}, [r1]! + next_tweak q5, q4, q15, q10 + veor q0, q0, q4 + next_tweak q6, q5, q15, q10 + veor q1, q1, q5 + next_tweak q7, q6, q15, q10 + veor q2, q2, q6 + veor q3, q3, q7 + bl aes_encrypt_4x + veor q0, q0, q4 + veor q1, q1, q5 + veor q2, q2, q6 + veor q3, q3, q7 + vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks + vst1.8 {q2-q3}, [r0]! + vmov q4, q7 teq r4, #0 beq .Lxtsencout - b .Lxtsencloop3x + b .Lxtsencloop4x .Lxtsenc1x: - adds r4, r4, #3 + adds r4, r4, #4 beq .Lxtsencout .Lxtsencloop: vld1.8 {q0}, [r1]! - veor q0, q0, q3 + veor q0, q0, q4 bl aes_encrypt - veor q0, q0, q3 + veor q0, q0, q4 vst1.8 {q0}, [r0]! subs r4, r4, #1 beq .Lxtsencout - next_tweak q3, q3, q7, q6 + next_tweak q4, q4, q15, q6 b .Lxtsencloop .Lxtsencout: - vst1.8 {q3}, [r5] + vst1.8 {q4}, [r5] pop {r4-r6, pc} ENDPROC(ce_aes_xts_encrypt) @@ -441,49 +463,52 @@ ENTRY(ce_aes_xts_decrypt) bl ce_aes_xts_init @ run shared prologue prepare_key r2, r3 - vmov q3, q0 + vmov q4, q0 teq r6, #0 @ start of a block? - bne .Lxtsdec3x + bne .Lxtsdec4x -.Lxtsdecloop3x: - next_tweak q3, q3, q7, q6 -.Lxtsdec3x: - subs r4, r4, #3 +.Lxtsdecloop4x: + next_tweak q4, q4, q15, q10 +.Lxtsdec4x: + subs r4, r4, #4 bmi .Lxtsdec1x - vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks - vld1.8 {q2}, [r1]! - next_tweak q4, q3, q7, q6 - veor q0, q0, q3 - next_tweak q5, q4, q7, q6 - veor q1, q1, q4 - veor q2, q2, q5 - bl aes_decrypt_3x - veor q0, q0, q3 - veor q1, q1, q4 - veor q2, q2, q5 - vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks - vst1.8 {q2}, [r0]! - vmov q3, q5 + vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks + vld1.8 {q2-q3}, [r1]! + next_tweak q5, q4, q15, q10 + veor q0, q0, q4 + next_tweak q6, q5, q15, q10 + veor q1, q1, q5 + next_tweak q7, q6, q15, q10 + veor q2, q2, q6 + veor q3, q3, q7 + bl aes_decrypt_4x + veor q0, q0, q4 + veor q1, q1, q5 + veor q2, q2, q6 + veor q3, q3, q7 + vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks + vst1.8 {q2-q3}, [r0]! + vmov q4, q7 teq r4, #0 beq .Lxtsdecout - b .Lxtsdecloop3x + b .Lxtsdecloop4x .Lxtsdec1x: - adds r4, r4, #3 + adds r4, r4, #4 beq .Lxtsdecout .Lxtsdecloop: vld1.8 {q0}, [r1]! - veor q0, q0, q3 + veor q0, q0, q4 add ip, r2, #32 @ 3rd round key bl aes_decrypt - veor q0, q0, q3 + veor q0, q0, q4 vst1.8 {q0}, [r0]! subs r4, r4, #1 beq .Lxtsdecout - next_tweak q3, q3, q7, q6 + next_tweak q4, q4, q15, q6 b .Lxtsdecloop .Lxtsdecout: - vst1.8 {q3}, [r5] + vst1.8 {q4}, [r5] pop {r4-r6, pc} ENDPROC(ce_aes_xts_decrypt) -- cgit v1.2.1 From e53b43d8d41f6c9bab8b4db73e64bc23c8063795 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:26 -0700 Subject: crypto: arm/aes-ce - replace tweak mask literal with composition Replace the vector load from memory sequence with a simple instruction sequence to compose the tweak vector directly. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-core.S | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index a3ca4ac2d7bb..bb6ec1844370 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -382,13 +382,10 @@ ENDPROC(ce_aes_ctr_encrypt) veor \out, \out, \tmp .endm - .align 3 -.Lxts_mul_x: - .quad 1, 0x87 - ce_aes_xts_init: - vldr d30, .Lxts_mul_x - vldr d31, .Lxts_mul_x + 8 + vmov.i32 d30, #0x87 @ compose tweak mask vector + vmovl.u32 q15, d30 + vshr.u64 d30, d31, #7 ldrd r4, r5, [sp, #16] @ load args ldr r6, [sp, #28] -- cgit v1.2.1 From 38e73b3d38c49afb0be460f8b9dbd7eb559b4dd8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:27 -0700 Subject: crypto: arm/aes-neonbs - replace tweak mask literal with composition Replace the vector load from memory sequence with a simple instruction sequence to compose the tweak vector directly. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-neonbs-core.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S index d3eab76b6e1b..bb75918e4984 100644 --- a/arch/arm/crypto/aes-neonbs-core.S +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -887,10 +887,6 @@ ENDPROC(aesbs_ctr_encrypt) veor \out, \out, \tmp .endm - .align 4 -.Lxts_mul_x: - .quad 1, 0x87 - /* * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) @@ -899,7 +895,9 @@ ENDPROC(aesbs_ctr_encrypt) */ __xts_prepare8: vld1.8 {q14}, [r7] // load iv - __ldr q15, .Lxts_mul_x // load tweak mask + vmov.i32 d30, #0x87 // compose tweak mask vector + vmovl.u32 q15, d30 + vshr.u64 d30, d31, #7 vmov q12, q14 __adr ip, 0f -- cgit v1.2.1 From 7a3b1c6ee761b7b2e49e01f0f2922c3ace3aa5b3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:28 -0700 Subject: crypto: arm64/aes-neonbs - replace tweak mask literal with composition Replace the vector load from memory sequence with a simple instruction sequence to compose the tweak vector directly. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-neonbs-core.S | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index cf10ff8878a3..65982039fa36 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -730,11 +730,6 @@ ENDPROC(aesbs_cbc_decrypt) eor \out\().16b, \out\().16b, \tmp\().16b .endm - .align 4 -.Lxts_mul_x: -CPU_LE( .quad 1, 0x87 ) -CPU_BE( .quad 0x87, 1 ) - /* * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) @@ -806,7 +801,9 @@ ENDPROC(__xts_crypt8) mov x23, x4 mov x24, x5 -0: ldr q30, .Lxts_mul_x +0: movi v30.2s, #0x1 + movi v25.2s, #0x87 + uzp1 v30.4s, v30.4s, v25.4s ld1 {v25.16b}, [x24] 99: adr x7, \do8 -- cgit v1.2.1 From 69b6f2e817e5bdb6d536241edaa11d7a67c64d00 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:29 -0700 Subject: crypto: arm64/aes-neon - limit exposed routines if faster driver is enabled The pure NEON AES implementation predates the bit-slicing one, and is generally slower, unless the algorithm in question can only execute sequentially. So advertising the skciphers that the bit-slicing driver implements as well serves no real purpose, and we can just disable them. Note that the bit-slicing driver also has a link time dependency on the pure NEON driver, for CBC encryption and for XTS tweak calculation, so we still need both drivers on systems that do not implement the Crypto Extensions. At the same time, expose those modaliases for the AES instruction based driver. This is necessary since otherwise, we may end up loading the wrong driver when any of the skciphers are instantiated before the CPU capability based module loading has completed. Finally, add the missing modalias for cts(cbc(aes)) so requests for this algorithm will autoload the correct module. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 112 +++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 53 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 2a2e0a3fc4eb..a1c4e30d76c9 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -54,15 +54,18 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #define aes_xts_decrypt neon_aes_xts_decrypt #define aes_mac_update neon_aes_mac_update MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); +#endif +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) MODULE_ALIAS_CRYPTO("ecb(aes)"); MODULE_ALIAS_CRYPTO("cbc(aes)"); -MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); +#endif +MODULE_ALIAS_CRYPTO("cts(cbc(aes))"); +MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)"); MODULE_ALIAS_CRYPTO("cmac(aes)"); MODULE_ALIAS_CRYPTO("xcbc(aes)"); MODULE_ALIAS_CRYPTO("cbcmac(aes)"); -#endif MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); @@ -144,8 +147,8 @@ static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return ret; } -static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm, + const u8 *in_key, unsigned int key_len) { struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; @@ -165,8 +168,9 @@ static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, return -EINVAL; } -static int essiv_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) +static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm, + const u8 *in_key, + unsigned int key_len) { struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); SHASH_DESC_ON_STACK(desc, ctx->hash); @@ -190,7 +194,7 @@ out: return -EINVAL; } -static int ecb_encrypt(struct skcipher_request *req) +static int __maybe_unused ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -210,7 +214,7 @@ static int ecb_encrypt(struct skcipher_request *req) return err; } -static int ecb_decrypt(struct skcipher_request *req) +static int __maybe_unused ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -248,7 +252,7 @@ static int cbc_encrypt_walk(struct skcipher_request *req, return err; } -static int cbc_encrypt(struct skcipher_request *req) +static int __maybe_unused cbc_encrypt(struct skcipher_request *req) { struct skcipher_walk walk; int err; @@ -277,7 +281,7 @@ static int cbc_decrypt_walk(struct skcipher_request *req, return err; } -static int cbc_decrypt(struct skcipher_request *req) +static int __maybe_unused cbc_decrypt(struct skcipher_request *req) { struct skcipher_walk walk; int err; @@ -404,7 +408,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req) return skcipher_walk_done(&walk, 0); } -static int essiv_cbc_init_tfm(struct crypto_skcipher *tfm) +static int __maybe_unused essiv_cbc_init_tfm(struct crypto_skcipher *tfm) { struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -413,14 +417,14 @@ static int essiv_cbc_init_tfm(struct crypto_skcipher *tfm) return PTR_ERR_OR_ZERO(ctx->hash); } -static void essiv_cbc_exit_tfm(struct crypto_skcipher *tfm) +static void __maybe_unused essiv_cbc_exit_tfm(struct crypto_skcipher *tfm) { struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_shash(ctx->hash); } -static int essiv_cbc_encrypt(struct skcipher_request *req) +static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -442,7 +446,7 @@ static int essiv_cbc_encrypt(struct skcipher_request *req) return err ?: cbc_encrypt_walk(req, &walk); } -static int essiv_cbc_decrypt(struct skcipher_request *req) +static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -518,7 +522,7 @@ static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) local_irq_restore(flags); } -static int ctr_encrypt_sync(struct skcipher_request *req) +static int __maybe_unused ctr_encrypt_sync(struct skcipher_request *req) { if (!crypto_simd_usable()) return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); @@ -526,7 +530,7 @@ static int ctr_encrypt_sync(struct skcipher_request *req) return ctr_encrypt(req); } -static int xts_encrypt(struct skcipher_request *req) +static int __maybe_unused xts_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -548,7 +552,7 @@ static int xts_encrypt(struct skcipher_request *req) return err; } -static int xts_decrypt(struct skcipher_request *req) +static int __maybe_unused xts_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -571,6 +575,7 @@ static int xts_decrypt(struct skcipher_request *req) } static struct skcipher_alg aes_algs[] = { { +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) .base = { .cra_name = "__ecb(aes)", .cra_driver_name = "__ecb-aes-" MODE, @@ -601,42 +606,6 @@ static struct skcipher_alg aes_algs[] = { { .setkey = skcipher_aes_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, -}, { - .base = { - .cra_name = "__cts(cbc(aes))", - .cra_driver_name = "__cts-cbc-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .walksize = 2 * AES_BLOCK_SIZE, - .setkey = skcipher_aes_setkey, - .encrypt = cts_cbc_encrypt, - .decrypt = cts_cbc_decrypt, - .init = cts_cbc_init_tfm, -}, { - .base = { - .cra_name = "__essiv(cbc(aes),sha256)", - .cra_driver_name = "__essiv-cbc-aes-sha256-" MODE, - .cra_priority = PRIO + 1, - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_essiv_cbc_ctx), - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = essiv_cbc_set_key, - .encrypt = essiv_cbc_encrypt, - .decrypt = essiv_cbc_decrypt, - .init = essiv_cbc_init_tfm, - .exit = essiv_cbc_exit_tfm, }, { .base = { .cra_name = "__ctr(aes)", @@ -686,6 +655,43 @@ static struct skcipher_alg aes_algs[] = { { .setkey = xts_set_key, .encrypt = xts_encrypt, .decrypt = xts_decrypt, +}, { +#endif + .base = { + .cra_name = "__cts(cbc(aes))", + .cra_driver_name = "__cts-cbc-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = cts_cbc_encrypt, + .decrypt = cts_cbc_decrypt, + .init = cts_cbc_init_tfm, +}, { + .base = { + .cra_name = "__essiv(cbc(aes),sha256)", + .cra_driver_name = "__essiv-cbc-aes-sha256-" MODE, + .cra_priority = PRIO + 1, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_essiv_cbc_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = essiv_cbc_set_key, + .encrypt = essiv_cbc_encrypt, + .decrypt = essiv_cbc_decrypt, + .init = essiv_cbc_init_tfm, + .exit = essiv_cbc_exit_tfm, } }; static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key, -- cgit v1.2.1 From 0cfd507c83469b07f037100a6389b02ce99b546f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:31 -0700 Subject: crypto: arm64/aes-cts-cbc-ce - performance tweak Optimize away one of the tbl instructions in the decryption path, which turns out to be unnecessary. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-modes.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 2879f030a749..38cd5a2091a8 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -293,12 +293,11 @@ AES_ENTRY(aes_cbc_cts_decrypt) ld1 {v5.16b}, [x5] /* get iv */ dec_prepare w3, x2, x6 - tbl v2.16b, {v1.16b}, v4.16b decrypt_block v0, w3, x2, x6, w7 - eor v2.16b, v2.16b, v0.16b + tbl v2.16b, {v0.16b}, v3.16b + eor v2.16b, v2.16b, v1.16b tbx v0.16b, {v1.16b}, v4.16b - tbl v2.16b, {v2.16b}, v3.16b decrypt_block v0, w3, x2, x6, w7 eor v0.16b, v0.16b, v5.16b /* xor with iv */ -- cgit v1.2.1 From 7c9d65c40a53290039c701cacfd4316a699061a5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:32 -0700 Subject: crypto: arm64/aes-cts-cbc - move request context data to the stack Since the CTS-CBC code completes synchronously, there is no point in keeping part of the scratch data it uses in the request context, so move it to the stack instead. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 61 +++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index a1c4e30d76c9..e78f69b81dfc 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -107,12 +107,6 @@ asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds, int blocks, u8 dg[], int enc_before, int enc_after); -struct cts_cbc_req_ctx { - struct scatterlist sg_src[2]; - struct scatterlist sg_dst[2]; - struct skcipher_request subreq; -}; - struct crypto_aes_xts_ctx { struct crypto_aes_ctx key1; struct crypto_aes_ctx __aligned(8) key2; @@ -292,23 +286,20 @@ static int __maybe_unused cbc_decrypt(struct skcipher_request *req) return cbc_decrypt_walk(req, &walk); } -static int cts_cbc_init_tfm(struct crypto_skcipher *tfm) -{ - crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx)); - return 0; -} - static int cts_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req); int err, rounds = 6 + ctx->key_length / 4; int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; struct skcipher_walk walk; - skcipher_request_set_tfm(&rctx->subreq, tfm); + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); if (req->cryptlen <= AES_BLOCK_SIZE) { if (req->cryptlen < AES_BLOCK_SIZE) @@ -317,31 +308,30 @@ static int cts_cbc_encrypt(struct skcipher_request *req) } if (cbc_blocks > 0) { - skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst, + skcipher_request_set_crypt(&subreq, req->src, req->dst, cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false) ?: - cbc_encrypt_walk(&rctx->subreq, &walk); + err = skcipher_walk_virt(&walk, &subreq, false) ?: + cbc_encrypt_walk(&subreq, &walk); if (err) return err; if (req->cryptlen == AES_BLOCK_SIZE) return 0; - dst = src = scatterwalk_ffwd(rctx->sg_src, req->src, - rctx->subreq.cryptlen); + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); if (req->dst != req->src) - dst = scatterwalk_ffwd(rctx->sg_dst, req->dst, - rctx->subreq.cryptlen); + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); } /* handle ciphertext stealing */ - skcipher_request_set_crypt(&rctx->subreq, src, dst, + skcipher_request_set_crypt(&subreq, src, dst, req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false); + err = skcipher_walk_virt(&walk, &subreq, false); if (err) return err; @@ -357,13 +347,16 @@ static int cts_cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req); int err, rounds = 6 + ctx->key_length / 4; int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; struct skcipher_walk walk; - skcipher_request_set_tfm(&rctx->subreq, tfm); + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); if (req->cryptlen <= AES_BLOCK_SIZE) { if (req->cryptlen < AES_BLOCK_SIZE) @@ -372,31 +365,30 @@ static int cts_cbc_decrypt(struct skcipher_request *req) } if (cbc_blocks > 0) { - skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst, + skcipher_request_set_crypt(&subreq, req->src, req->dst, cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false) ?: - cbc_decrypt_walk(&rctx->subreq, &walk); + err = skcipher_walk_virt(&walk, &subreq, false) ?: + cbc_decrypt_walk(&subreq, &walk); if (err) return err; if (req->cryptlen == AES_BLOCK_SIZE) return 0; - dst = src = scatterwalk_ffwd(rctx->sg_src, req->src, - rctx->subreq.cryptlen); + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); if (req->dst != req->src) - dst = scatterwalk_ffwd(rctx->sg_dst, req->dst, - rctx->subreq.cryptlen); + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); } /* handle ciphertext stealing */ - skcipher_request_set_crypt(&rctx->subreq, src, dst, + skcipher_request_set_crypt(&subreq, src, dst, req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, req->iv); - err = skcipher_walk_virt(&walk, &rctx->subreq, false); + err = skcipher_walk_virt(&walk, &subreq, false); if (err) return err; @@ -673,7 +665,6 @@ static struct skcipher_alg aes_algs[] = { { .setkey = skcipher_aes_setkey, .encrypt = cts_cbc_encrypt, .decrypt = cts_cbc_decrypt, - .init = cts_cbc_init_tfm, }, { .base = { .cra_name = "__essiv(cbc(aes),sha256)", -- cgit v1.2.1 From 7cceca8b25b39486c647ceaf6a29f833af16f7c0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:33 -0700 Subject: crypto: arm64/aes - implement support for XTS ciphertext stealing Add the missing support for ciphertext stealing in the implementation of AES-XTS, which is part of the XTS specification but was omitted up until now due to lack of a need for it. The asm helpers are updated so they can deal with any input size, as long as the last full block and the final partial block are presented at the same time. The glue code is updated so that the common case of operating on a sector or page is mostly as before. When CTS is needed, the walk is split up into two pieces, unless the entire input is covered by a single step. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 126 ++++++++++++++++++++++++++++++++++++++---- arch/arm64/crypto/aes-modes.S | 99 +++++++++++++++++++++++++++------ 2 files changed, 195 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index e78f69b81dfc..23ee7c85c0b7 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -90,10 +90,10 @@ asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 ctr[]); asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], - int rounds, int blocks, u32 const rk2[], u8 iv[], + int rounds, int bytes, u32 const rk2[], u8 iv[], int first); asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], - int rounds, int blocks, u32 const rk2[], u8 iv[], + int rounds, int bytes, u32 const rk2[], u8 iv[], int first); asmlinkage void aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], @@ -527,21 +527,71 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key1.key_length / 4; + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; struct skcipher_walk walk; - unsigned int blocks; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; err = skcipher_walk_virt(&walk, req, false); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int xts_blocks = DIV_ROUND_UP(req->cryptlen, + AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + xts_blocks * AES_BLOCK_SIZE, + req->iv); + req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + } else { + tail = 0; + } + + for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { + int nbytes = walk.nbytes; + + if (walk.nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + kernel_neon_begin(); aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, blocks, + ctx->key1.key_enc, rounds, nbytes, ctx->key2.key_enc, walk.iv, first); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } - return err; + if (err || likely(!tail)) + return err; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_neon_begin(); + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); } static int __maybe_unused xts_decrypt(struct skcipher_request *req) @@ -549,21 +599,72 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key1.key_length / 4; + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; struct skcipher_walk walk; - unsigned int blocks; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; err = skcipher_walk_virt(&walk, req, false); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int xts_blocks = DIV_ROUND_UP(req->cryptlen, + AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + xts_blocks * AES_BLOCK_SIZE, + req->iv); + req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + } else { + tail = 0; + } + + for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { + int nbytes = walk.nbytes; + + if (walk.nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + kernel_neon_begin(); aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, blocks, + ctx->key1.key_dec, rounds, nbytes, ctx->key2.key_enc, walk.iv, first); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } - return err; + if (err || likely(!tail)) + return err; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + + kernel_neon_begin(); + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, walk.nbytes, + ctx->key2.key_enc, walk.iv, first); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); } static struct skcipher_alg aes_algs[] = { { @@ -644,6 +745,7 @@ static struct skcipher_alg aes_algs[] = { { .min_keysize = 2 * AES_MIN_KEY_SIZE, .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, .setkey = xts_set_key, .encrypt = xts_encrypt, .decrypt = xts_decrypt, diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 38cd5a2091a8..f2c2ba739f36 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -413,10 +413,10 @@ AES_ENDPROC(aes_ctr_encrypt) /* + * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int bytes, u8 const rk2[], u8 iv[], int first) * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int blocks, u8 const rk2[], u8 iv[], int first) - * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int blocks, u8 const rk2[], u8 iv[], int first) + * int bytes, u8 const rk2[], u8 iv[], int first) */ .macro next_tweak, out, in, tmp @@ -451,7 +451,7 @@ AES_ENTRY(aes_xts_encrypt) .LxtsencloopNx: next_tweak v4, v4, v8 .LxtsencNx: - subs w4, w4, #4 + subs w4, w4, #64 bmi .Lxtsenc1x ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ next_tweak v5, v4, v8 @@ -468,33 +468,66 @@ AES_ENTRY(aes_xts_encrypt) eor v2.16b, v2.16b, v6.16b st1 {v0.16b-v3.16b}, [x0], #64 mov v4.16b, v7.16b - cbz w4, .Lxtsencout + cbz w4, .Lxtsencret xts_reload_mask v8 b .LxtsencloopNx .Lxtsenc1x: - adds w4, w4, #4 + adds w4, w4, #64 beq .Lxtsencout + subs w4, w4, #16 + bmi .LxtsencctsNx .Lxtsencloop: - ld1 {v1.16b}, [x1], #16 - eor v0.16b, v1.16b, v4.16b + ld1 {v0.16b}, [x1], #16 +.Lxtsencctsout: + eor v0.16b, v0.16b, v4.16b encrypt_block v0, w3, x2, x8, w7 eor v0.16b, v0.16b, v4.16b - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 - beq .Lxtsencout + cbz w4, .Lxtsencout + subs w4, w4, #16 next_tweak v4, v4, v8 + bmi .Lxtsenccts + st1 {v0.16b}, [x0], #16 b .Lxtsencloop .Lxtsencout: + st1 {v0.16b}, [x0] +.Lxtsencret: st1 {v4.16b}, [x6] ldp x29, x30, [sp], #16 ret -AES_ENDPROC(aes_xts_encrypt) +.LxtsencctsNx: + mov v0.16b, v3.16b + sub x0, x0, #16 +.Lxtsenccts: + adr_l x8, .Lcts_permute_table + + add x1, x1, w4, sxtw /* rewind input pointer */ + add w4, w4, #16 /* # bytes in final block */ + add x9, x8, #32 + add x8, x8, x4 + sub x9, x9, x4 + add x4, x0, x4 /* output address of final block */ + + ld1 {v1.16b}, [x1] /* load final block */ + ld1 {v2.16b}, [x8] + ld1 {v3.16b}, [x9] + + tbl v2.16b, {v0.16b}, v2.16b + tbx v0.16b, {v1.16b}, v3.16b + st1 {v2.16b}, [x4] /* overlapping stores */ + mov w4, wzr + b .Lxtsencctsout +AES_ENDPROC(aes_xts_encrypt) AES_ENTRY(aes_xts_decrypt) stp x29, x30, [sp, #-16]! mov x29, sp + /* subtract 16 bytes if we are doing CTS */ + sub w8, w4, #0x10 + tst w4, #0xf + csel w4, w4, w8, eq + ld1 {v4.16b}, [x6] xts_load_mask v8 cbz w7, .Lxtsdecnotfirst @@ -509,7 +542,7 @@ AES_ENTRY(aes_xts_decrypt) .LxtsdecloopNx: next_tweak v4, v4, v8 .LxtsdecNx: - subs w4, w4, #4 + subs w4, w4, #64 bmi .Lxtsdec1x ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ next_tweak v5, v4, v8 @@ -530,22 +563,52 @@ AES_ENTRY(aes_xts_decrypt) xts_reload_mask v8 b .LxtsdecloopNx .Lxtsdec1x: - adds w4, w4, #4 + adds w4, w4, #64 beq .Lxtsdecout + subs w4, w4, #16 .Lxtsdecloop: - ld1 {v1.16b}, [x1], #16 - eor v0.16b, v1.16b, v4.16b + ld1 {v0.16b}, [x1], #16 + bmi .Lxtsdeccts +.Lxtsdecctsout: + eor v0.16b, v0.16b, v4.16b decrypt_block v0, w3, x2, x8, w7 eor v0.16b, v0.16b, v4.16b st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 - beq .Lxtsdecout + cbz w4, .Lxtsdecout + subs w4, w4, #16 next_tweak v4, v4, v8 b .Lxtsdecloop .Lxtsdecout: st1 {v4.16b}, [x6] ldp x29, x30, [sp], #16 ret + +.Lxtsdeccts: + adr_l x8, .Lcts_permute_table + + add x1, x1, w4, sxtw /* rewind input pointer */ + add w4, w4, #16 /* # bytes in final block */ + add x9, x8, #32 + add x8, x8, x4 + sub x9, x9, x4 + add x4, x0, x4 /* output address of final block */ + + next_tweak v5, v4, v8 + + ld1 {v1.16b}, [x1] /* load final block */ + ld1 {v2.16b}, [x8] + ld1 {v3.16b}, [x9] + + eor v0.16b, v0.16b, v5.16b + decrypt_block v0, w3, x2, x8, w7 + eor v0.16b, v0.16b, v5.16b + + tbl v2.16b, {v0.16b}, v2.16b + tbx v0.16b, {v1.16b}, v3.16b + + st1 {v2.16b}, [x4] /* overlapping stores */ + mov w4, wzr + b .Lxtsdecctsout AES_ENDPROC(aes_xts_decrypt) /* -- cgit v1.2.1 From 67cfa5d3b7214ce944747908f9a1a3cba8b989b9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:34 -0700 Subject: crypto: arm64/aes-neonbs - implement ciphertext stealing for XTS Update the AES-XTS implementation based on NEON instructions so that it can deal with inputs whose size is not a multiple of the cipher block size. This is part of the original XTS specification, but was never implemented before in the Linux kernel. Since the bit slicing driver is only faster if it can operate on at least 7 blocks of input at the same time, let's reuse the alternate path we are adding for CTS to process any data tail whose size is not a multiple of 128 bytes. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-ce.S | 3 + arch/arm64/crypto/aes-glue.c | 2 + arch/arm64/crypto/aes-modes.S | 3 + arch/arm64/crypto/aes-neon.S | 5 ++ arch/arm64/crypto/aes-neonbs-glue.c | 111 +++++++++++++++++++++++++++++++----- 5 files changed, 110 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 00bd2885feaa..c132c49c89a8 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -21,6 +21,9 @@ .macro xts_reload_mask, tmp .endm + .macro xts_cts_skip_tw, reg, lbl + .endm + /* preload all round keys */ .macro load_round_keys, rounds, rk cmp \rounds, #12 diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 23ee7c85c0b7..aa57dc639f77 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -1071,5 +1071,7 @@ module_cpu_feature_match(AES, aes_init); module_init(aes_init); EXPORT_SYMBOL(neon_aes_ecb_encrypt); EXPORT_SYMBOL(neon_aes_cbc_encrypt); +EXPORT_SYMBOL(neon_aes_xts_encrypt); +EXPORT_SYMBOL(neon_aes_xts_decrypt); #endif module_exit(aes_exit); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index f2c2ba739f36..131618389f1f 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -442,6 +442,7 @@ AES_ENTRY(aes_xts_encrypt) cbz w7, .Lxtsencnotfirst enc_prepare w3, x5, x8 + xts_cts_skip_tw w7, .LxtsencNx encrypt_block v4, w3, x5, x8, w7 /* first tweak */ enc_switch_key w3, x2, x8 b .LxtsencNx @@ -530,10 +531,12 @@ AES_ENTRY(aes_xts_decrypt) ld1 {v4.16b}, [x6] xts_load_mask v8 + xts_cts_skip_tw w7, .Lxtsdecskiptw cbz w7, .Lxtsdecnotfirst enc_prepare w3, x5, x8 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ +.Lxtsdecskiptw: dec_prepare w3, x2, x8 b .LxtsdecNx diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index 0cac5df6c901..22d9b110cf78 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -19,6 +19,11 @@ xts_load_mask \tmp .endm + /* special case for the neon-bs driver calling into this one for CTS */ + .macro xts_cts_skip_tw, reg, lbl + tbnz \reg, #1, \lbl + .endm + /* multiply by polynomial 'x' in GF(2^8) */ .macro mul_by_x, out, in, temp, const sshr \temp, \in, #7 diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index bafd2ebef8f1..ea873b8904c4 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,12 @@ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks); asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 iv[]); +asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[], + u32 const rk1[], int rounds, int bytes, + u32 const rk2[], u8 iv[], int first); +asmlinkage void neon_aes_xts_decrypt(u8 out[], u8 const in[], + u32 const rk1[], int rounds, int bytes, + u32 const rk2[], u8 iv[], int first); struct aesbs_ctx { u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32]; @@ -64,6 +71,7 @@ struct aesbs_ctr_ctx { struct aesbs_xts_ctx { struct aesbs_ctx key; u32 twkey[AES_MAX_KEYLENGTH_U32]; + struct crypto_aes_ctx cts; }; static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, @@ -270,6 +278,10 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return err; key_len /= 2; + err = aes_expandkey(&ctx->cts, in_key, key_len); + if (err) + return err; + err = aes_expandkey(&rk, in_key + key_len, key_len); if (err) return err; @@ -302,48 +314,119 @@ static int ctr_encrypt_sync(struct skcipher_request *req) return ctr_encrypt(req); } -static int __xts_crypt(struct skcipher_request *req, +static int __xts_crypt(struct skcipher_request *req, bool encrypt, void (*fn)(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[])) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % (8 * AES_BLOCK_SIZE); + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; struct skcipher_walk walk; - int err; + int nbytes, err; + int first = 1; + u8 *out, *in; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + /* ensure that the cts tail is covered by a single step */ + if (unlikely(tail > 0 && tail < AES_BLOCK_SIZE)) { + int xts_blocks = DIV_ROUND_UP(req->cryptlen, + AES_BLOCK_SIZE) - 2; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + xts_blocks * AES_BLOCK_SIZE, + req->iv); + req = &subreq; + } else { + tail = 0; + } err = skcipher_walk_virt(&walk, req, false); if (err) return err; - kernel_neon_begin(); - neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); - kernel_neon_end(); - while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - if (walk.nbytes < walk.total) + if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE) blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); + out = walk.dst.virt.addr; + in = walk.src.virt.addr; + nbytes = walk.nbytes; + kernel_neon_begin(); - fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, - ctx->key.rounds, blocks, walk.iv); + if (likely(blocks > 6)) { /* plain NEON is faster otherwise */ + if (first) + neon_aes_ecb_encrypt(walk.iv, walk.iv, + ctx->twkey, + ctx->key.rounds, 1); + first = 0; + + fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, + walk.iv); + + out += blocks * AES_BLOCK_SIZE; + in += blocks * AES_BLOCK_SIZE; + nbytes -= blocks * AES_BLOCK_SIZE; + } + + if (walk.nbytes == walk.total && nbytes > 0) + goto xts_tail; + kernel_neon_end(); - err = skcipher_walk_done(&walk, - walk.nbytes - blocks * AES_BLOCK_SIZE); + skcipher_walk_done(&walk, nbytes); } - return err; + + if (err || likely(!tail)) + return err; + + /* handle ciphertext stealing */ + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + out = walk.dst.virt.addr; + in = walk.src.virt.addr; + nbytes = walk.nbytes; + + kernel_neon_begin(); +xts_tail: + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, + nbytes, ctx->twkey, walk.iv, first ?: 2); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, + nbytes, ctx->twkey, walk.iv, first ?: 2); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); } static int xts_encrypt(struct skcipher_request *req) { - return __xts_crypt(req, aesbs_xts_encrypt); + return __xts_crypt(req, true, aesbs_xts_encrypt); } static int xts_decrypt(struct skcipher_request *req) { - return __xts_crypt(req, aesbs_xts_decrypt); + return __xts_crypt(req, false, aesbs_xts_decrypt); } static struct skcipher_alg aes_algs[] = { { -- cgit v1.2.1 From c61b1607ed4fbbf2ba7c86f29768cff44a1a88f8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:35 -0700 Subject: crypto: arm/aes-ce - implement ciphertext stealing for XTS Update the AES-XTS implementation based on AES instructions so that it can deal with inputs whose size is not a multiple of the cipher block size. This is part of the original XTS specification, but was never implemented before in the Linux kernel. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-core.S | 103 +++++++++++++++++++++++++++++---- arch/arm/crypto/aes-ce-glue.c | 128 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 208 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index bb6ec1844370..763e51604ab6 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -369,9 +369,9 @@ ENDPROC(ce_aes_ctr_encrypt) /* * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, - * int blocks, u8 iv[], u32 const rk2[], int first) + * int bytes, u8 iv[], u32 const rk2[], int first) * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, - * int blocks, u8 iv[], u32 const rk2[], int first) + * int bytes, u8 iv[], u32 const rk2[], int first) */ .macro next_tweak, out, in, const, tmp @@ -414,7 +414,7 @@ ENTRY(ce_aes_xts_encrypt) .Lxtsencloop4x: next_tweak q4, q4, q15, q10 .Lxtsenc4x: - subs r4, r4, #4 + subs r4, r4, #64 bmi .Lxtsenc1x vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks vld1.8 {q2-q3}, [r1]! @@ -434,24 +434,58 @@ ENTRY(ce_aes_xts_encrypt) vst1.8 {q2-q3}, [r0]! vmov q4, q7 teq r4, #0 - beq .Lxtsencout + beq .Lxtsencret b .Lxtsencloop4x .Lxtsenc1x: - adds r4, r4, #4 + adds r4, r4, #64 beq .Lxtsencout + subs r4, r4, #16 + bmi .LxtsencctsNx .Lxtsencloop: vld1.8 {q0}, [r1]! +.Lxtsencctsout: veor q0, q0, q4 bl aes_encrypt veor q0, q0, q4 - vst1.8 {q0}, [r0]! - subs r4, r4, #1 + teq r4, #0 beq .Lxtsencout + subs r4, r4, #16 next_tweak q4, q4, q15, q6 + bmi .Lxtsenccts + vst1.8 {q0}, [r0]! b .Lxtsencloop .Lxtsencout: + vst1.8 {q0}, [r0] +.Lxtsencret: vst1.8 {q4}, [r5] pop {r4-r6, pc} + +.LxtsencctsNx: + vmov q0, q3 + sub r0, r0, #16 +.Lxtsenccts: + movw ip, :lower16:.Lcts_permute_table + movt ip, :upper16:.Lcts_permute_table + + add r1, r1, r4 @ rewind input pointer + add r4, r4, #16 @ # bytes in final block + add lr, ip, #32 + add ip, ip, r4 + sub lr, lr, r4 + add r4, r0, r4 @ output address of final block + + vld1.8 {q1}, [r1] @ load final partial block + vld1.8 {q2}, [ip] + vld1.8 {q3}, [lr] + + vtbl.8 d4, {d0-d1}, d4 + vtbl.8 d5, {d0-d1}, d5 + vtbx.8 d0, {d2-d3}, d6 + vtbx.8 d1, {d2-d3}, d7 + + vst1.8 {q2}, [r4] @ overlapping stores + mov r4, #0 + b .Lxtsencctsout ENDPROC(ce_aes_xts_encrypt) @@ -462,13 +496,17 @@ ENTRY(ce_aes_xts_decrypt) prepare_key r2, r3 vmov q4, q0 + /* subtract 16 bytes if we are doing CTS */ + tst r4, #0xf + subne r4, r4, #0x10 + teq r6, #0 @ start of a block? bne .Lxtsdec4x .Lxtsdecloop4x: next_tweak q4, q4, q15, q10 .Lxtsdec4x: - subs r4, r4, #4 + subs r4, r4, #64 bmi .Lxtsdec1x vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks vld1.8 {q2-q3}, [r1]! @@ -491,22 +529,55 @@ ENTRY(ce_aes_xts_decrypt) beq .Lxtsdecout b .Lxtsdecloop4x .Lxtsdec1x: - adds r4, r4, #4 + adds r4, r4, #64 beq .Lxtsdecout + subs r4, r4, #16 .Lxtsdecloop: vld1.8 {q0}, [r1]! + bmi .Lxtsdeccts +.Lxtsdecctsout: veor q0, q0, q4 - add ip, r2, #32 @ 3rd round key bl aes_decrypt veor q0, q0, q4 vst1.8 {q0}, [r0]! - subs r4, r4, #1 + teq r4, #0 beq .Lxtsdecout + subs r4, r4, #16 next_tweak q4, q4, q15, q6 b .Lxtsdecloop .Lxtsdecout: vst1.8 {q4}, [r5] pop {r4-r6, pc} + +.Lxtsdeccts: + movw ip, :lower16:.Lcts_permute_table + movt ip, :upper16:.Lcts_permute_table + + add r1, r1, r4 @ rewind input pointer + add r4, r4, #16 @ # bytes in final block + add lr, ip, #32 + add ip, ip, r4 + sub lr, lr, r4 + add r4, r0, r4 @ output address of final block + + next_tweak q5, q4, q15, q6 + + vld1.8 {q1}, [r1] @ load final partial block + vld1.8 {q2}, [ip] + vld1.8 {q3}, [lr] + + veor q0, q0, q5 + bl aes_decrypt + veor q0, q0, q5 + + vtbl.8 d4, {d0-d1}, d4 + vtbl.8 d5, {d0-d1}, d5 + vtbx.8 d0, {d2-d3}, d6 + vtbx.8 d1, {d2-d3}, d7 + + vst1.8 {q2}, [r4] @ overlapping stores + mov r4, #0 + b .Lxtsdecctsout ENDPROC(ce_aes_xts_decrypt) /* @@ -532,3 +603,13 @@ ENTRY(ce_aes_invert) vst1.32 {q0}, [r0] bx lr ENDPROC(ce_aes_invert) + + .section ".rodata", "a" + .align 6 +.Lcts_permute_table: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index 486e862ae34a..c215792a2494 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -39,10 +40,10 @@ asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 ctr[]); asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], - int rounds, int blocks, u8 iv[], + int rounds, int bytes, u8 iv[], u32 const rk2[], int first); asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], - int rounds, int blocks, u8 iv[], + int rounds, int bytes, u8 iv[], u32 const rk2[], int first); struct aes_block { @@ -317,20 +318,71 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = num_rounds(&ctx->key1); + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; struct skcipher_walk walk; - unsigned int blocks; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; err = skcipher_walk_virt(&walk, req, false); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int xts_blocks = DIV_ROUND_UP(req->cryptlen, + AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + xts_blocks * AES_BLOCK_SIZE, + req->iv); + req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + } else { + tail = 0; + } + + for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { + int nbytes = walk.nbytes; + + if (walk.nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + kernel_neon_begin(); ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_enc, rounds, blocks, walk.iv, + ctx->key1.key_enc, rounds, nbytes, walk.iv, ctx->key2.key_enc, first); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } - return err; + + if (err || likely(!tail)) + return err; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + kernel_neon_begin(); + ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_enc, rounds, walk.nbytes, walk.iv, + ctx->key2.key_enc, first); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); } static int xts_decrypt(struct skcipher_request *req) @@ -338,20 +390,71 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = num_rounds(&ctx->key1); + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; struct skcipher_walk walk; - unsigned int blocks; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; err = skcipher_walk_virt(&walk, req, false); - for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int xts_blocks = DIV_ROUND_UP(req->cryptlen, + AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + xts_blocks * AES_BLOCK_SIZE, + req->iv); + req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + } else { + tail = 0; + } + + for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) { + int nbytes = walk.nbytes; + + if (walk.nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + kernel_neon_begin(); ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->key1.key_dec, rounds, blocks, walk.iv, + ctx->key1.key_dec, rounds, nbytes, walk.iv, ctx->key2.key_enc, first); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } - return err; + + if (err || likely(!tail)) + return err; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + kernel_neon_begin(); + ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key1.key_dec, rounds, walk.nbytes, walk.iv, + ctx->key2.key_enc, first); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); } static struct skcipher_alg aes_algs[] = { { @@ -426,6 +529,7 @@ static struct skcipher_alg aes_algs[] = { { .min_keysize = 2 * AES_MIN_KEY_SIZE, .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, .setkey = xts_set_key, .encrypt = xts_encrypt, .decrypt = xts_decrypt, -- cgit v1.2.1 From 2ed8b79098cf76287c519d781a14c7983ab7e4f7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:36 -0700 Subject: crypto: arm/aes-neonbs - implement ciphertext stealing for XTS Update the AES-XTS implementation based on NEON instructions so that it can deal with inputs whose size is not a multiple of the cipher block size. This is part of the original XTS specification, but was never implemented before in the Linux kernel. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-neonbs-core.S | 16 ++++++--- arch/arm/crypto/aes-neonbs-glue.c | 69 ++++++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S index bb75918e4984..cfaed4e67535 100644 --- a/arch/arm/crypto/aes-neonbs-core.S +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -889,9 +889,9 @@ ENDPROC(aesbs_ctr_encrypt) /* * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[]) + * int blocks, u8 iv[], int reorder_last_tweak) * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int blocks, u8 iv[]) + * int blocks, u8 iv[], int reorder_last_tweak) */ __xts_prepare8: vld1.8 {q14}, [r7] // load iv @@ -944,17 +944,25 @@ __xts_prepare8: vld1.8 {q7}, [r1]! next_tweak q14, q12, q15, q13 - veor q7, q7, q12 +THUMB( itt le ) + W(cmple) r8, #0 + ble 1f +0: veor q7, q7, q12 vst1.8 {q12}, [r4, :128] -0: vst1.8 {q14}, [r7] // store next iv + vst1.8 {q14}, [r7] // store next iv bx lr + +1: vswp q12, q14 + b 0b ENDPROC(__xts_prepare8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 push {r4-r8, lr} mov r5, sp // preserve sp ldrd r6, r7, [sp, #24] // get blocks and iv args + ldr r8, [sp, #32] // reorder final tweak? + rsb r8, r8, #1 sub ip, sp, #128 // make room for 8x tweak bic ip, ip, #0xf // align sp to 16 bytes mov sp, ip diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index 9000d0796d5e..e85839a8aaeb 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -37,9 +38,9 @@ asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 ctr[], u8 final[]); asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 iv[]); + int rounds, int blocks, u8 iv[], int); asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 iv[]); + int rounds, int blocks, u8 iv[], int); struct aesbs_ctx { int rounds; @@ -53,6 +54,7 @@ struct aesbs_cbc_ctx { struct aesbs_xts_ctx { struct aesbs_ctx key; + struct crypto_cipher *cts_tfm; struct crypto_cipher *tweak_tfm; }; @@ -291,6 +293,9 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return err; key_len /= 2; + err = crypto_cipher_setkey(ctx->cts_tfm, in_key, key_len); + if (err) + return err; err = crypto_cipher_setkey(ctx->tweak_tfm, in_key + key_len, key_len); if (err) return err; @@ -302,7 +307,13 @@ static int xts_init(struct crypto_tfm *tfm) { struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm); + ctx->cts_tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(ctx->cts_tfm)) + return PTR_ERR(ctx->cts_tfm); + ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(ctx->tweak_tfm)) + crypto_free_cipher(ctx->cts_tfm); return PTR_ERR_OR_ZERO(ctx->tweak_tfm); } @@ -312,17 +323,34 @@ static void xts_exit(struct crypto_tfm *tfm) struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm); crypto_free_cipher(ctx->tweak_tfm); + crypto_free_cipher(ctx->cts_tfm); } -static int __xts_crypt(struct skcipher_request *req, +static int __xts_crypt(struct skcipher_request *req, bool encrypt, void (*fn)(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 iv[])) + int rounds, int blocks, u8 iv[], int)) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct skcipher_request subreq; + u8 buf[2 * AES_BLOCK_SIZE]; struct skcipher_walk walk; int err; + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + if (unlikely(tail)) { + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + req->cryptlen - tail, req->iv); + req = &subreq; + } + err = skcipher_walk_virt(&walk, req, true); if (err) return err; @@ -331,30 +359,53 @@ static int __xts_crypt(struct skcipher_request *req, while (walk.nbytes >= AES_BLOCK_SIZE) { unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; + int reorder_last_tweak = !encrypt && tail > 0; - if (walk.nbytes < walk.total) + if (walk.nbytes < walk.total) { blocks = round_down(blocks, walk.stride / AES_BLOCK_SIZE); + reorder_last_tweak = 0; + } kernel_neon_begin(); fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk, - ctx->key.rounds, blocks, walk.iv); + ctx->key.rounds, blocks, walk.iv, reorder_last_tweak); kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes - blocks * AES_BLOCK_SIZE); } - return err; + if (err || likely(!tail)) + return err; + + /* handle ciphertext stealing */ + scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE, + AES_BLOCK_SIZE, 0); + memcpy(buf + AES_BLOCK_SIZE, buf, tail); + scatterwalk_map_and_copy(buf, req->src, req->cryptlen, tail, 0); + + crypto_xor(buf, req->iv, AES_BLOCK_SIZE); + + if (encrypt) + crypto_cipher_encrypt_one(ctx->cts_tfm, buf, buf); + else + crypto_cipher_decrypt_one(ctx->cts_tfm, buf, buf); + + crypto_xor(buf, req->iv, AES_BLOCK_SIZE); + + scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE, + AES_BLOCK_SIZE + tail, 1); + return 0; } static int xts_encrypt(struct skcipher_request *req) { - return __xts_crypt(req, aesbs_xts_encrypt); + return __xts_crypt(req, true, aesbs_xts_encrypt); } static int xts_decrypt(struct skcipher_request *req) { - return __xts_crypt(req, aesbs_xts_decrypt); + return __xts_crypt(req, false, aesbs_xts_decrypt); } static struct skcipher_alg aes_algs[] = { { -- cgit v1.2.1 From 143d2647c892cb454489dca62a9fc65941c5c557 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 3 Sep 2019 09:43:37 -0700 Subject: crypto: arm/aes-ce - implement ciphertext stealing for CBC Instead of relying on the CTS template to wrap the accelerated CBC skcipher, implement the ciphertext stealing part directly. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-ce-core.S | 85 +++++++++++++++++++ arch/arm/crypto/aes-ce-glue.c | 188 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 256 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 763e51604ab6..b978cdf133af 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -284,6 +284,91 @@ ENTRY(ce_aes_cbc_decrypt) pop {r4-r6, pc} ENDPROC(ce_aes_cbc_decrypt) + + /* + * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], + * int rounds, int bytes, u8 const iv[]) + * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], + * int rounds, int bytes, u8 const iv[]) + */ + +ENTRY(ce_aes_cbc_cts_encrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + + movw ip, :lower16:.Lcts_permute_table + movt ip, :upper16:.Lcts_permute_table + sub r4, r4, #16 + add lr, ip, #32 + add ip, ip, r4 + sub lr, lr, r4 + vld1.8 {q5}, [ip] + vld1.8 {q6}, [lr] + + add ip, r1, r4 + vld1.8 {q0}, [r1] @ overlapping loads + vld1.8 {q3}, [ip] + + vld1.8 {q1}, [r5] @ get iv + prepare_key r2, r3 + + veor q0, q0, q1 @ xor with iv + bl aes_encrypt + + vtbl.8 d4, {d0-d1}, d10 + vtbl.8 d5, {d0-d1}, d11 + vtbl.8 d2, {d6-d7}, d12 + vtbl.8 d3, {d6-d7}, d13 + + veor q0, q0, q1 + bl aes_encrypt + + add r4, r0, r4 + vst1.8 {q2}, [r4] @ overlapping stores + vst1.8 {q0}, [r0] + + pop {r4-r6, pc} +ENDPROC(ce_aes_cbc_cts_encrypt) + +ENTRY(ce_aes_cbc_cts_decrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + + movw ip, :lower16:.Lcts_permute_table + movt ip, :upper16:.Lcts_permute_table + sub r4, r4, #16 + add lr, ip, #32 + add ip, ip, r4 + sub lr, lr, r4 + vld1.8 {q5}, [ip] + vld1.8 {q6}, [lr] + + add ip, r1, r4 + vld1.8 {q0}, [r1] @ overlapping loads + vld1.8 {q1}, [ip] + + vld1.8 {q3}, [r5] @ get iv + prepare_key r2, r3 + + bl aes_decrypt + + vtbl.8 d4, {d0-d1}, d10 + vtbl.8 d5, {d0-d1}, d11 + vtbx.8 d0, {d2-d3}, d12 + vtbx.8 d1, {d2-d3}, d13 + + veor q1, q1, q2 + bl aes_decrypt + veor q0, q0, q3 @ xor with iv + + add r4, r0, r4 + vst1.8 {q1}, [r4] @ overlapping stores + vst1.8 {q0}, [r0] + + pop {r4-r6, pc} +ENDPROC(ce_aes_cbc_cts_decrypt) + + /* * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, * int blocks, u8 ctr[]) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index c215792a2494..cdb1a07e7ad0 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -35,6 +35,10 @@ asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 iv[]); asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 iv[]); +asmlinkage void ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], + int rounds, int bytes, u8 const iv[]); +asmlinkage void ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], + int rounds, int bytes, u8 const iv[]); asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 ctr[]); @@ -210,48 +214,182 @@ static int ecb_decrypt(struct skcipher_request *req) return err; } -static int cbc_encrypt(struct skcipher_request *req) +static int cbc_encrypt_walk(struct skcipher_request *req, + struct skcipher_walk *walk) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; unsigned int blocks; - int err; + int err = 0; - err = skcipher_walk_virt(&walk, req, false); - - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { kernel_neon_begin(); - ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ce_aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr, ctx->key_enc, num_rounds(ctx), blocks, - walk.iv); + walk->iv); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; } -static int cbc_decrypt(struct skcipher_request *req) +static int cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; - unsigned int blocks; int err; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + return cbc_encrypt_walk(req, &walk); +} - while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { +static int cbc_decrypt_walk(struct skcipher_request *req, + struct skcipher_walk *walk) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned int blocks; + int err = 0; + + while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) { kernel_neon_begin(); - ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ce_aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr, ctx->key_dec, num_rounds(ctx), blocks, - walk.iv); + walk->iv); kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE); } return err; } +static int cbc_decrypt(struct skcipher_request *req) +{ + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + return cbc_decrypt_walk(req, &walk); +} + +static int cts_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct skcipher_walk walk; + int err; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); + + if (req->cryptlen <= AES_BLOCK_SIZE) { + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + cbc_blocks = 1; + } + + if (cbc_blocks > 0) { + skcipher_request_set_crypt(&subreq, req->src, req->dst, + cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false) ?: + cbc_encrypt_walk(&subreq, &walk); + if (err) + return err; + + if (req->cryptlen == AES_BLOCK_SIZE) + return 0; + + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); + } + + /* handle ciphertext stealing */ + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_neon_begin(); + ce_aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_enc, num_rounds(ctx), walk.nbytes, + walk.iv); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); +} + +static int cts_cbc_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + struct scatterlist *src = req->src, *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct skcipher_walk walk; + int err; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); + + if (req->cryptlen <= AES_BLOCK_SIZE) { + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + cbc_blocks = 1; + } + + if (cbc_blocks > 0) { + skcipher_request_set_crypt(&subreq, req->src, req->dst, + cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false) ?: + cbc_decrypt_walk(&subreq, &walk); + if (err) + return err; + + if (req->cryptlen == AES_BLOCK_SIZE) + return 0; + + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); + } + + /* handle ciphertext stealing */ + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_neon_begin(); + ce_aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + ctx->key_dec, num_rounds(ctx), walk.nbytes, + walk.iv); + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); +} + static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -486,6 +624,22 @@ static struct skcipher_alg aes_algs[] = { { .setkey = ce_aes_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, +}, { + .base.cra_name = "__cts(cbc(aes))", + .base.cra_driver_name = "__cts-cbc-aes-ce", + .base.cra_priority = 300, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct crypto_aes_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = cts_cbc_encrypt, + .decrypt = cts_cbc_decrypt, }, { .base.cra_name = "__ctr(aes)", .base.cra_driver_name = "__ctr-aes-ce", -- cgit v1.2.1 From f6680cbdb258709fe650d1e52ee1f4b9eee1ca96 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 4 Sep 2019 10:56:32 -0700 Subject: crypto: x86/aes-ni - use AES library instead of single-use AES cipher The RFC4106 key derivation code instantiates an AES cipher transform to encrypt only a single block before it is freed again. Switch to the new AES library which is more suitable for such use cases. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index bf12bb71cecc..3e707e81afdb 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -628,26 +628,21 @@ static int xts_decrypt(struct skcipher_request *req) static int rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) { - struct crypto_cipher *tfm; + struct crypto_aes_ctx ctx; int ret; - tfm = crypto_alloc_cipher("aes", 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - ret = crypto_cipher_setkey(tfm, key, key_len); + ret = aes_expandkey(&ctx, key, key_len); if (ret) - goto out_free_cipher; + return ret; /* Clear the data in the hash sub key container to zero.*/ /* We want to cipher all zeros to create the hash sub key. */ memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); - crypto_cipher_encrypt_one(tfm, hash_subkey, hash_subkey); + aes_encrypt(&ctx, hash_subkey, hash_subkey); -out_free_cipher: - crypto_free_cipher(tfm); - return ret; + memzero_explicit(&ctx, sizeof(ctx)); + return 0; } static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, -- cgit v1.2.1 From 007b3cf0af8cb7dc18828fbaf31c86d7d878f2c6 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Fri, 30 Aug 2019 14:01:39 -0700 Subject: arm64: dts: imx8mq: Add CAAM node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add node for CAAM - Cryptographic Acceleration and Assurance Module. Signed-off-by: Horia Geantă Signed-off-by: Andrey Smirnov Cc: Cory Tusar Cc: Chris Healy Cc: Lucas Stach Cc: Herbert Xu Cc: Shawn Guo Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Li Yang Signed-off-by: Herbert Xu --- arch/arm64/boot/dts/freescale/imx8mq.dtsi | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index d09b808eff87..752d5a61878c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -728,6 +728,36 @@ status = "disabled"; }; + crypto: crypto@30900000 { + compatible = "fsl,sec-v4.0"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x30900000 0x40000>; + ranges = <0 0x30900000 0x40000>; + interrupts = ; + clocks = <&clk IMX8MQ_CLK_AHB>, + <&clk IMX8MQ_CLK_IPG_ROOT>; + clock-names = "aclk", "ipg"; + + sec_jr0: jr@1000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x1000 0x1000>; + interrupts = ; + }; + + sec_jr1: jr@2000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x2000 0x1000>; + interrupts = ; + }; + + sec_jr2: jr@3000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x3000 0x1000>; + interrupts = ; + }; + }; + i2c1: i2c@30a20000 { compatible = "fsl,imx8mq-i2c", "fsl,imx21-i2c"; reg = <0x30a20000 0x10000>; -- cgit v1.2.1