From e666a4458206c16139d64e4937eaf95f0d47a175 Mon Sep 17 00:00:00 2001 From: Yidi Lin Date: Thu, 12 Jan 2023 14:49:43 +0800 Subject: vboot: add arm64 SHA256 extension support Copy sha256_armv8a_ce_a64.S from [1] and make some small changes to make it fit in vboot_reference build environment. With this CL, the boot time gets 126ms improvement on Tentacruel. The `vboot kernel verification` is reduced to 12ms. [1] https://github.com/OP-TEE/optee_os/blob/master/core/arch/arm/crypto/sha256_armv8a_ce_a64.S BRANCH=corsola BUG=b:263514393 TEST=make install_dut_test; ./build/tests/vb2_sha256_x86_tests TEST=check `cbmem -t` on Tentacruel Change-Id: Ic9abeae9687b2162d7ddadd46111ec20f34e771c Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/vboot_reference/+/4170144 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/vboot_reference/+/4380968 Auto-Submit: Phoebe Wang Reviewed-by: Cheng Yueh Tested-by: Phoebe Wang Commit-Queue: Cheng Yueh --- Makefile | 21 ++++- firmware/2lib/2hwcrypto.c | 101 ++++++++++++++++++++++++ firmware/2lib/2sha256_arm.c | 21 +++++ firmware/2lib/2sha256_x86.c | 100 ++---------------------- firmware/2lib/2stub_hwcrypto.c | 2 +- firmware/2lib/include/2sha_private.h | 5 ++ firmware/2lib/sha256_armv8a_ce_a64.S | 144 +++++++++++++++++++++++++++++++++++ 7 files changed, 298 insertions(+), 96 deletions(-) create mode 100644 firmware/2lib/2hwcrypto.c create mode 100644 firmware/2lib/2sha256_arm.c create mode 100644 firmware/2lib/sha256_armv8a_ce_a64.S diff --git a/Makefile b/Makefile index 0f58909f..6a21f1a9 100644 --- a/Makefile +++ b/Makefile @@ -395,8 +395,19 @@ endif ifneq ($(filter-out 0,${X86_SHA_EXT}),) CFLAGS += -DX86_SHA_EXT FWLIB_SRCS += \ + firmware/2lib/2hwcrypto.c \ firmware/2lib/2sha256_x86.c endif + +ifneq ($(filter-out 0,${ARMV8_CRYPTO_EXT}),) +CFLAGS += -DARMV8_CRYPTO_EXT +FWLIB_SRCS += \ + firmware/2lib/2hwcrypto.c \ + firmware/2lib/2sha256_arm.c +FWLIB_ASMS += \ + firmware/2lib/sha256_armv8a_ce_a64.S +endif + # Even if X86_SHA_EXT is 0 we need cflags since this will be compiled for tests ${BUILD}/firmware/2lib/2sha256_x86.o: CFLAGS += -mssse3 -mno-avx -msha @@ -410,7 +421,7 @@ FWLIB_SRCS += \ firmware/2lib/2stub.c endif -FWLIB_OBJS = ${FWLIB_SRCS:%.c=${BUILD}/%.o} +FWLIB_OBJS = ${FWLIB_SRCS:%.c=${BUILD}/%.o} ${FWLIB_ASMS:%.S=${BUILD}/%.o} TLCL_OBJS = ${TLCL_SRCS:%.c=${BUILD}/%.o} ALL_OBJS += ${FWLIB_OBJS} ${TLCL_OBJS} @@ -1109,9 +1120,9 @@ DUT_TEST_BINS = $(addprefix ${BUILD}/,${DUT_TEST_NAMES}) # Special build for sha256_x86 test ${BUILD}/tests/vb2_sha256_x86_tests: \ - ${BUILD}/firmware/2lib/2sha256_x86.o + ${BUILD}/firmware/2lib/2sha256_x86.o ${BUILD}/firmware/2lib/2hwcrypto.o ${BUILD}/tests/vb2_sha256_x86_tests: \ - LIBS += ${BUILD}/firmware/2lib/2sha256_x86.o + LIBS += ${BUILD}/firmware/2lib/2sha256_x86.o ${BUILD}/firmware/2lib/2hwcrypto.o .PHONY: install_dut_test install_dut_test: ${DUT_TEST_BINS} @@ -1147,6 +1158,10 @@ ${BUILD}/%.o: ${BUILD}/%.c @${PRINTF} " CC $(subst ${BUILD}/,,$@)\n" ${Q}${CC} ${CFLAGS} ${INCLUDES} -c -o $@ $< +${BUILD}/%.o: %.S + @${PRINTF} " CC $(subst ${BUILD}/,,$@)\n" + ${Q}${CC} ${CFLAGS} ${INCLUDES} -c -o $@ $< + # ---------------------------------------------------------------------------- # Here are the special tweaks to the generic rules. diff --git a/firmware/2lib/2hwcrypto.c b/firmware/2lib/2hwcrypto.c new file mode 100644 index 00000000..f6cc5241 --- /dev/null +++ b/firmware/2lib/2hwcrypto.c @@ -0,0 +1,101 @@ +/* Copyright 2023 The ChromiumOS Authors + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + * + * SHA256 implementation using the hardware crypto accelerator. + */ + +#include "2common.h" +#include "2sha.h" +#include "2sha_private.h" +#include "2api.h" + +struct vb2_sha256_context vb2_sha_ctx; + +vb2_error_t vb2ex_hwcrypto_digest_init(enum vb2_hash_algorithm hash_alg, + uint32_t data_size) +{ + int i; + + if (hash_alg != VB2_HASH_SHA256) + return VB2_ERROR_EX_HWCRYPTO_UNSUPPORTED; + + for (i = 0; i < ARRAY_SIZE(vb2_hash_seq); i++) { + VB2_ASSERT(vb2_hash_seq[i] < ARRAY_SIZE(vb2_sha_ctx.h)); + vb2_sha_ctx.h[vb2_hash_seq[i]] = vb2_sha256_h0[i]; + } + + vb2_sha_ctx.size = 0; + vb2_sha_ctx.total_size = 0; + memset(vb2_sha_ctx.block, 0, sizeof(vb2_sha_ctx.block)); + + return VB2_SUCCESS; +} + +vb2_error_t vb2ex_hwcrypto_digest_extend(const uint8_t *buf, uint32_t size) +{ + unsigned int remaining_blocks; + unsigned int new_size, rem_size, tmp_size; + const uint8_t *shifted_data; + + tmp_size = VB2_SHA256_BLOCK_SIZE - vb2_sha_ctx.size; + rem_size = size < tmp_size ? size : tmp_size; + + memcpy(&vb2_sha_ctx.block[vb2_sha_ctx.size], buf, rem_size); + + if (vb2_sha_ctx.size + size < VB2_SHA256_BLOCK_SIZE) { + vb2_sha_ctx.size += size; + return VB2_SUCCESS; + } + + new_size = size - rem_size; + remaining_blocks = new_size / VB2_SHA256_BLOCK_SIZE; + + shifted_data = buf + rem_size; + + vb2_sha256_transform_hwcrypto(vb2_sha_ctx.block, 1); + vb2_sha256_transform_hwcrypto(shifted_data, remaining_blocks); + + rem_size = new_size % VB2_SHA256_BLOCK_SIZE; + + memcpy(vb2_sha_ctx.block, + &shifted_data[remaining_blocks * VB2_SHA256_BLOCK_SIZE], + rem_size); + + vb2_sha_ctx.size = rem_size; + vb2_sha_ctx.total_size += (remaining_blocks + 1) * VB2_SHA256_BLOCK_SIZE; + return VB2_SUCCESS; +} + +vb2_error_t vb2ex_hwcrypto_digest_finalize(uint8_t *digest, + uint32_t digest_size) +{ + unsigned int block_nb; + unsigned int pm_size; + unsigned int size_b; + int i; + + if (digest_size != VB2_SHA256_DIGEST_SIZE) { + VB2_DEBUG("ERROR: Digest size does not match expected length.\n"); + return VB2_ERROR_SHA_FINALIZE_DIGEST_SIZE; + } + + block_nb = (1 + ((VB2_SHA256_BLOCK_SIZE - SHA256_MIN_PAD_LEN) + < (vb2_sha_ctx.size % VB2_SHA256_BLOCK_SIZE))); + + size_b = (vb2_sha_ctx.total_size + vb2_sha_ctx.size) * 8; + pm_size = block_nb * VB2_SHA256_BLOCK_SIZE; + + memset(vb2_sha_ctx.block + vb2_sha_ctx.size, 0, + pm_size - vb2_sha_ctx.size); + vb2_sha_ctx.block[vb2_sha_ctx.size] = SHA256_PAD_BEGIN; + UNPACK32(size_b, vb2_sha_ctx.block + pm_size - 4); + + vb2_sha256_transform_hwcrypto(vb2_sha_ctx.block, block_nb); + + for (i = 0; i < ARRAY_SIZE(vb2_hash_seq); i++) { + VB2_ASSERT(vb2_hash_seq[i] < ARRAY_SIZE(vb2_sha_ctx.h)); + UNPACK32(vb2_sha_ctx.h[vb2_hash_seq[i]], &digest[i * 4]); + } + return VB2_SUCCESS; +} diff --git a/firmware/2lib/2sha256_arm.c b/firmware/2lib/2sha256_arm.c new file mode 100644 index 00000000..556cd5c1 --- /dev/null +++ b/firmware/2lib/2sha256_arm.c @@ -0,0 +1,21 @@ +/* Copyright 2023 The ChromiumOS Authors + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + * + * SHA256 implementation using ARMv8 Cryptography Extension. + */ + +#include "2common.h" +#include "2sha.h" +#include "2sha_private.h" +#include "2api.h" + +const uint32_t vb2_hash_seq[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + +int sha256_ce_transform(uint32_t *state, const unsigned char *buf, int blocks); + +void vb2_sha256_transform_hwcrypto(const uint8_t *message, + unsigned int block_nb) +{ + sha256_ce_transform(vb2_sha_ctx.h, message, block_nb); +} diff --git a/firmware/2lib/2sha256_x86.c b/firmware/2lib/2sha256_x86.c index 9e745585..f0b37363 100644 --- a/firmware/2lib/2sha256_x86.c +++ b/firmware/2lib/2sha256_x86.c @@ -13,7 +13,7 @@ #include "2sha_private.h" #include "2api.h" -static struct vb2_sha256_context sha_ctx; +const uint32_t vb2_hash_seq[8] = {3, 2, 7, 6, 1, 0, 5, 4}; typedef int vb2_m128i __attribute__ ((vector_size(16))); @@ -114,8 +114,8 @@ static void vb2_sha256_transform_x86ext(const uint8_t *message, int i; const vb2_m128i shuf_mask = {0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f}; - state0 = vb2_loadu_si128((vb2_m128i *)&sha_ctx.h[0]); - state1 = vb2_loadu_si128((vb2_m128i *)&sha_ctx.h[4]); + state0 = vb2_loadu_si128((vb2_m128i *)&vb2_sha_ctx.h[0]); + state1 = vb2_loadu_si128((vb2_m128i *)&vb2_sha_ctx.h[4]); for (i = 0; i < (int) block_nb; i++) { abef_save = state0; cdgh_save = state1; @@ -160,96 +160,12 @@ static void vb2_sha256_transform_x86ext(const uint8_t *message, } - vb2_storeu_si128((vb2_m128i *)&sha_ctx.h[0], state0); - vb2_storeu_si128((vb2_m128i *)&sha_ctx.h[4], state1); + vb2_storeu_si128((vb2_m128i *)&vb2_sha_ctx.h[0], state0); + vb2_storeu_si128((vb2_m128i *)&vb2_sha_ctx.h[4], state1); } -vb2_error_t vb2ex_hwcrypto_digest_init(enum vb2_hash_algorithm hash_alg, - uint32_t data_size) +void vb2_sha256_transform_hwcrypto(const uint8_t *message, + unsigned int block_nb) { - if (hash_alg != VB2_HASH_SHA256) - return VB2_ERROR_EX_HWCRYPTO_UNSUPPORTED; - - sha_ctx.h[0] = vb2_sha256_h0[5]; - sha_ctx.h[1] = vb2_sha256_h0[4]; - sha_ctx.h[2] = vb2_sha256_h0[1]; - sha_ctx.h[3] = vb2_sha256_h0[0]; - sha_ctx.h[4] = vb2_sha256_h0[7]; - sha_ctx.h[5] = vb2_sha256_h0[6]; - sha_ctx.h[6] = vb2_sha256_h0[3]; - sha_ctx.h[7] = vb2_sha256_h0[2]; - sha_ctx.size = 0; - sha_ctx.total_size = 0; - memset(sha_ctx.block, 0, sizeof(sha_ctx.block)); - - return VB2_SUCCESS; -} - -vb2_error_t vb2ex_hwcrypto_digest_extend(const uint8_t *buf, uint32_t size) -{ - unsigned int remaining_blocks; - unsigned int new_size, rem_size, tmp_size; - const uint8_t *shifted_data; - - tmp_size = VB2_SHA256_BLOCK_SIZE - sha_ctx.size; - rem_size = size < tmp_size ? size : tmp_size; - - memcpy(&sha_ctx.block[sha_ctx.size], buf, rem_size); - - if (sha_ctx.size + size < VB2_SHA256_BLOCK_SIZE) { - sha_ctx.size += size; - return VB2_SUCCESS; - } - - new_size = size - rem_size; - remaining_blocks = new_size / VB2_SHA256_BLOCK_SIZE; - - shifted_data = buf + rem_size; - - vb2_sha256_transform_x86ext(sha_ctx.block, 1); - vb2_sha256_transform_x86ext(shifted_data, remaining_blocks); - - rem_size = new_size % VB2_SHA256_BLOCK_SIZE; - - memcpy(sha_ctx.block, &shifted_data[remaining_blocks * VB2_SHA256_BLOCK_SIZE], - rem_size); - - sha_ctx.size = rem_size; - sha_ctx.total_size += (remaining_blocks + 1) * VB2_SHA256_BLOCK_SIZE; - return VB2_SUCCESS; -} - -vb2_error_t vb2ex_hwcrypto_digest_finalize(uint8_t *digest, - uint32_t digest_size) -{ - unsigned int block_nb; - unsigned int pm_size; - unsigned int size_b; - unsigned int block_rem_size = sha_ctx.size % VB2_SHA256_BLOCK_SIZE; - if (digest_size != VB2_SHA256_DIGEST_SIZE) { - VB2_DEBUG("ERROR: Digest size does not match expected length.\n"); - return VB2_ERROR_SHA_FINALIZE_DIGEST_SIZE; - } - - block_nb = (1 + ((VB2_SHA256_BLOCK_SIZE - SHA256_MIN_PAD_LEN) - < block_rem_size)); - - size_b = (sha_ctx.total_size + sha_ctx.size) * 8; - pm_size = block_nb * VB2_SHA256_BLOCK_SIZE; - - memset(sha_ctx.block + sha_ctx.size, 0, pm_size - sha_ctx.size); - sha_ctx.block[sha_ctx.size] = SHA256_PAD_BEGIN; - UNPACK32(size_b, sha_ctx.block + pm_size - 4); - - vb2_sha256_transform_x86ext(sha_ctx.block, block_nb); - - UNPACK32(sha_ctx.h[3], &digest[ 0]); - UNPACK32(sha_ctx.h[2], &digest[ 4]); - UNPACK32(sha_ctx.h[7], &digest[ 8]); - UNPACK32(sha_ctx.h[6], &digest[12]); - UNPACK32(sha_ctx.h[1], &digest[16]); - UNPACK32(sha_ctx.h[0], &digest[20]); - UNPACK32(sha_ctx.h[5], &digest[24]); - UNPACK32(sha_ctx.h[4], &digest[28]); - return VB2_SUCCESS; + vb2_sha256_transform_x86ext(message, block_nb); } diff --git a/firmware/2lib/2stub_hwcrypto.c b/firmware/2lib/2stub_hwcrypto.c index 392c64fe..d5aa3429 100644 --- a/firmware/2lib/2stub_hwcrypto.c +++ b/firmware/2lib/2stub_hwcrypto.c @@ -7,7 +7,7 @@ #include "2api.h" -#ifndef X86_SHA_EXT +#if !defined(X86_SHA_EXT) && !defined(ARMV8_CRYPTO_EXT) __attribute__((weak)) vb2_error_t vb2ex_hwcrypto_digest_init(enum vb2_hash_algorithm hash_alg, uint32_t data_size) diff --git a/firmware/2lib/include/2sha_private.h b/firmware/2lib/include/2sha_private.h index 337f97ee..d6d4bbae 100644 --- a/firmware/2lib/include/2sha_private.h +++ b/firmware/2lib/include/2sha_private.h @@ -19,6 +19,8 @@ extern const uint32_t vb2_sha256_h0[8]; extern const uint32_t vb2_sha256_k[64]; +extern const uint32_t vb2_hash_seq[8]; +extern struct vb2_sha256_context vb2_sha_ctx; #define UNPACK32(x, str) \ { \ @@ -35,4 +37,7 @@ extern const uint32_t vb2_sha256_k[64]; | ((uint32_t) *((str) + 1) << 16) \ | ((uint32_t) *((str) + 0) << 24); \ } + +void vb2_sha256_transform_hwcrypto(const uint8_t *message, + unsigned int block_nb); #endif /* VBOOT_REFERENCE_2SHA_PRIVATE_H_ */ diff --git a/firmware/2lib/sha256_armv8a_ce_a64.S b/firmware/2lib/sha256_armv8a_ce_a64.S new file mode 100644 index 00000000..0ef6ae13 --- /dev/null +++ b/firmware/2lib/sha256_armv8a_ce_a64.S @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * Copyright 2023 The ChromiumOS Authors + * Copyright (c) 2015-2020, Linaro Limited + * Copyright (C) 2014 Linaro Ltd + */ + +/* Core SHA-224/SHA-256 transform using v8 Crypto Extensions */ + + .arch armv8-a+crypto + + dga .req q20 + dgav .req v20 + dgb .req q21 + dgbv .req v21 + + t0 .req v22 + t1 .req v23 + + dg0q .req q24 + dg0v .req v24 + dg1q .req q25 + dg1v .req v25 + dg2q .req q26 + dg2v .req v26 + + .macro add_only, ev, rc, s0 + mov dg2v.16b, dg0v.16b + .ifeq \ev + add t1.4s, v\s0\().4s, \rc\().4s + sha256h dg0q, dg1q, t0.4s + sha256h2 dg1q, dg2q, t0.4s + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha256h dg0q, dg1q, t1.4s + sha256h2 dg1q, dg2q, t1.4s + .endif + .endm + + .macro add_update, ev, rc, s0, s1, s2, s3 + sha256su0 v\s0\().4s, v\s1\().4s + add_only \ev, \rc, \s1 + sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s + .endm + + .macro FUNC name colon + .section .text.\name , "ax" , %progbits + .global \name + .type \name , %function + .balign 4 + \name \colon + .endm + + .macro END_FUNC name + .size \name , .-\name + .endm + + /* + * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, + * int blocks) + */ +FUNC sha256_ce_transform , : + /* load round constants */ + adr x8, .Lsha2_rcon + ld1 { v0.4s- v3.4s}, [x8], #64 + ld1 { v4.4s- v7.4s}, [x8], #64 + ld1 { v8.4s-v11.4s}, [x8], #64 + ld1 {v12.4s-v15.4s}, [x8] + + /* load state */ + mov x9, x0 + ld1 {dgav.4s}, [x9], #16 + ld1 {dgbv.4s}, [x9] + + /* load input */ +0: ld1 {v16.16b-v19.16b}, [x1], #64 + sub w2, w2, #1 + + rev32 v16.16b, v16.16b + rev32 v17.16b, v17.16b + rev32 v18.16b, v18.16b + rev32 v19.16b, v19.16b + +1: add t0.4s, v16.4s, v0.4s + mov dg0v.16b, dgav.16b + mov dg1v.16b, dgbv.16b + + add_update 0, v1, 16, 17, 18, 19 + add_update 1, v2, 17, 18, 19, 16 + add_update 0, v3, 18, 19, 16, 17 + add_update 1, v4, 19, 16, 17, 18 + + add_update 0, v5, 16, 17, 18, 19 + add_update 1, v6, 17, 18, 19, 16 + add_update 0, v7, 18, 19, 16, 17 + add_update 1, v8, 19, 16, 17, 18 + + add_update 0, v9, 16, 17, 18, 19 + add_update 1, v10, 17, 18, 19, 16 + add_update 0, v11, 18, 19, 16, 17 + add_update 1, v12, 19, 16, 17, 18 + + add_only 0, v13, 17 + add_only 1, v14, 18 + add_only 0, v15, 19 + add_only 1 + + /* update state */ + add dgav.4s, dgav.4s, dg0v.4s + add dgbv.4s, dgbv.4s, dg1v.4s + + /* handled all input blocks? */ + cbnz w2, 0b + + /* store new state */ +3: mov x9, x0 + st1 {dgav.16b}, [x9], #16 + st1 {dgbv.16b}, [x9] + ret + + /* + * The SHA-256 round constants + */ + .align 4 +.Lsha2_rcon: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +END_FUNC sha256_ce_transform -- cgit v1.2.1