diff options
-rw-r--r-- | crypto/modes/asm/ghash-riscv64.pl | 153 | ||||
-rw-r--r-- | crypto/modes/gcm128.c | 25 |
2 files changed, 170 insertions, 8 deletions
diff --git a/crypto/modes/asm/ghash-riscv64.pl b/crypto/modes/asm/ghash-riscv64.pl index bdeca8cd9e..691231ffa1 100644 --- a/crypto/modes/asm/ghash-riscv64.pl +++ b/crypto/modes/asm/ghash-riscv64.pl @@ -229,6 +229,159 @@ gcm_gmult_rv64i_zbc__zbkb: ___ } +################################################################################ +# void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16], +# const u8 *inp, size_t len); +# void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16], +# const u8 *inp, size_t len); +# +# input: Xi: current hash value +# Htable: copy of H +# inp: pointer to input data +# len: length of input data in bytes (mutiple of block size) +# output: Xi: Xi+1 (next hash value Xi) +{ +my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7"); +my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6"); + +$code .= <<___; +.p2align 3 +.globl gcm_ghash_rv64i_zbc +.type gcm_ghash_rv64i_zbc,\@function +gcm_ghash_rv64i_zbc: + # Load Xi and bit-reverse it + ld $x0, 0($Xi) + ld $x1, 8($Xi) + @{[brev8_rv64i $x0, $z0, $z1, $z2]} + @{[brev8_rv64i $x1, $z0, $z1, $z2]} + + # Load the key (already bit-reversed) + ld $y0, 0($Htable) + ld $y1, 8($Htable) + + # Load the reduction constant + la $polymod, Lpolymod + lbu $polymod, 0($polymod) + +Lstep: + # Load the input data, bit-reverse them, and XOR them with Xi + ld $t0, 0($inp) + ld $t1, 8($inp) + add $inp, $inp, 16 + add $len, $len, -16 + @{[brev8_rv64i $t0, $z0, $z1, $z2]} + @{[brev8_rv64i $t1, $z0, $z1, $z2]} + xor $x0, $x0, $t0 + xor $x1, $x1, $t1 + + # Multiplication (without Karatsuba) + @{[clmulh $z3, $x1, $y1]} + @{[clmul $z2, $x1, $y1]} + @{[clmulh $t1, $x0, $y1]} + @{[clmul $z1, $x0, $y1]} + xor $z2, $z2, $t1 + @{[clmulh $t1, $x1, $y0]} + @{[clmul $t0, $x1, $y0]} + xor $z2, $z2, $t1 + xor $z1, $z1, $t0 + @{[clmulh $t1, $x0, $y0]} + @{[clmul $z0, $x0, $y0]} + xor $z1, $z1, $t1 + + # Reduction with clmul + @{[clmulh $t1, $z3, $polymod]} + @{[clmul $t0, $z3, $polymod]} + xor $z2, $z2, $t1 + xor $z1, $z1, $t0 + @{[clmulh $t1, $z2, $polymod]} + @{[clmul $t0, $z2, $polymod]} + xor $x1, $z1, $t1 + xor $x0, $z0, $t0 + + # Iterate over all blocks + bnez $len, Lstep + + # Bit-reverse final Xi back and store it + @{[brev8_rv64i $x0, $z0, $z1, $z2]} + @{[brev8_rv64i $x1, $z0, $z1, $z2]} + sd $x0, 0($Xi) + sd $x1, 8($Xi) + ret +.size gcm_ghash_rv64i_zbc,.-gcm_ghash_rv64i_zbc +___ +} + +{ +my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7"); +my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6"); + +$code .= <<___; +.p2align 3 +.globl gcm_ghash_rv64i_zbc__zbkb +.type gcm_ghash_rv64i_zbc__zbkb,\@function +gcm_ghash_rv64i_zbc__zbkb: + # Load Xi and bit-reverse it + ld $x0, 0($Xi) + ld $x1, 8($Xi) + @{[brev8 $x0, $x0]} + @{[brev8 $x1, $x1]} + + # Load the key (already bit-reversed) + ld $y0, 0($Htable) + ld $y1, 8($Htable) + + # Load the reduction constant + la $polymod, Lpolymod + lbu $polymod, 0($polymod) + +Lstep_zkbk: + # Load the input data, bit-reverse them, and XOR them with Xi + ld $t0, 0($inp) + ld $t1, 8($inp) + add $inp, $inp, 16 + add $len, $len, -16 + @{[brev8 $t0, $t0]} + @{[brev8 $t1, $t1]} + xor $x0, $x0, $t0 + xor $x1, $x1, $t1 + + # Multiplication (without Karatsuba) + @{[clmulh $z3, $x1, $y1]} + @{[clmul $z2, $x1, $y1]} + @{[clmulh $t1, $x0, $y1]} + @{[clmul $z1, $x0, $y1]} + xor $z2, $z2, $t1 + @{[clmulh $t1, $x1, $y0]} + @{[clmul $t0, $x1, $y0]} + xor $z2, $z2, $t1 + xor $z1, $z1, $t0 + @{[clmulh $t1, $x0, $y0]} + @{[clmul $z0, $x0, $y0]} + xor $z1, $z1, $t1 + + # Reduction with clmul + @{[clmulh $t1, $z3, $polymod]} + @{[clmul $t0, $z3, $polymod]} + xor $z2, $z2, $t1 + xor $z1, $z1, $t0 + @{[clmulh $t1, $z2, $polymod]} + @{[clmul $t0, $z2, $polymod]} + xor $x1, $z1, $t1 + xor $x0, $z0, $t0 + + # Iterate over all blocks + bnez $len, Lstep_zkbk + + # Bit-reverse final Xi back and store it + @{[brev8 $x0, $x0]} + @{[brev8 $x1, $x1]} + sd $x0, 0($Xi) + sd $x1, 8($Xi) + ret +.size gcm_ghash_rv64i_zbc__zbkb,.-gcm_ghash_rv64i_zbc__zbkb +___ +} + $code .= <<___; .p2align 3 Lbrev8_const: diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index 15197ea962..3a29e35c11 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -27,9 +27,10 @@ typedef size_t size_t_aX; # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) #endif -/* RISC-V uses C implementation of gmult as a fallback. */ +/* RISC-V uses C implementation as a fallback. */ #if defined(__riscv) # define INCLUDE_C_GMULT_4BIT +# define INCLUDE_C_GHASH_4BIT #endif #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) @@ -232,7 +233,7 @@ static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) # endif -# if !defined(GHASH_ASM) +# if !defined(GHASH_ASM) || defined(INCLUDE_C_GHASH_4BIT) # if !defined(OPENSSL_SMALL_FOOTPRINT) /* * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for @@ -401,14 +402,17 @@ void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len); # elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64 # include "crypto/riscv_arch.h" -# define GHASH_ASM_RISCV -# undef GHASH +# define GHASH_ASM_RV64I /* Zbc/Zbkc (scalar crypto with clmul) based routines. */ void gcm_init_rv64i_zbc(u128 Htable[16], const u64 Xi[2]); void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 Xi[2]); void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 Xi[2]); void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]); void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16], + const u8 *inp, size_t len); +void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16], + const u8 *inp, size_t len); # endif #endif @@ -416,7 +420,7 @@ static void gcm_get_funcs(struct gcm_funcs_st *ctx) { /* set defaults -- overridden below as needed */ ctx->ginit = gcm_init_4bit; -#if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT) +#if !defined(GHASH_ASM) ctx->gmult = gcm_gmult_4bit; #else ctx->gmult = NULL; @@ -503,19 +507,24 @@ static void gcm_get_funcs(struct gcm_funcs_st *ctx) ctx->ghash = gcm_ghash_p8; } return; -#elif defined(GHASH_ASM_RISCV) && __riscv_xlen == 64 - /* RISCV defaults; gmult already set above */ - ctx->ghash = NULL; +#elif defined(GHASH_ASM_RV64I) + /* RISCV defaults */ + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + if (RISCV_HAS_ZBC()) { if (RISCV_HAS_ZBKB()) { ctx->ginit = gcm_init_rv64i_zbc__zbkb; ctx->gmult = gcm_gmult_rv64i_zbc__zbkb; + ctx->ghash = gcm_ghash_rv64i_zbc__zbkb; } else if (RISCV_HAS_ZBB()) { ctx->ginit = gcm_init_rv64i_zbc__zbb; ctx->gmult = gcm_gmult_rv64i_zbc; + ctx->ghash = gcm_ghash_rv64i_zbc; } else { ctx->ginit = gcm_init_rv64i_zbc; ctx->gmult = gcm_gmult_rv64i_zbc; + ctx->ghash = gcm_ghash_rv64i_zbc; } } return; |