From c831d6ffeeb80ea805502e62ad795afb8ef6fdff Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Mon, 28 Oct 2019 01:12:58 -0400 Subject: Pre-splat SIMON and SPECK keys when appropriate for Altivec (PR #910) SIMON and SPECK keys can be pre-splatted in the forward direction when Altivec instructions will be used. Pre-splatting does not work for the reverse transformation. It breaks modes like CBC, so the speed-up is only applied to the forward transformation. --- simon128_simd.cpp | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'simon128_simd.cpp') diff --git a/simon128_simd.cpp b/simon128_simd.cpp index a91da0f5..b780cf7e 100644 --- a/simon128_simd.cpp +++ b/simon128_simd.cpp @@ -66,7 +66,7 @@ using CryptoPP::vec_swap; // SunCC #if defined(_MSC_VER) && !defined(_M_ARM64) inline uint64x2_t vld1q_dup_u64(const uint64_t* ptr) { - return vmovq_n_u64(*ptr); + return vmovq_n_u64(*ptr); } #endif @@ -536,6 +536,7 @@ using CryptoPP::uint64x2_p; using CryptoPP::VecAnd; using CryptoPP::VecXor; +using CryptoPP::VecLoad; using CryptoPP::VecPermute; // Rotate left by bit count @@ -576,16 +577,18 @@ inline void SIMON128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigne for (int i = 0; i < static_cast(rounds & ~1)-1; i += 2) { - const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i]); - y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1); + // Round keys are pre-splated in forward direction + const uint64x2_p rk1 = VecLoad(subkeys+i*2); + const uint64x2_p rk2 = VecLoad(subkeys+i*2+2); - const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i+1]); + y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1); x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk2); } if (rounds & 1) { - const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]); + // Round keys are pre-splated in forward direction + const uint64x2_p rk = VecLoad(subkeys+rounds*2-2); y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk); std::swap(x1, y1); } @@ -667,12 +670,14 @@ inline void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i = 0; i < static_cast(rounds & ~1)-1; i += 2) { - const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i]); + // Round keys are pre-splated in forward direction + const uint64x2_p rk1 = VecLoad(subkeys+i*2); + const uint64x2_p rk2 = VecLoad(subkeys+i*2+2); + y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1); y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk1); y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk1); - const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i+1]); x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk2); x2 = VecXor(VecXor(x2, SIMON128_f(y2)), rk2); x3 = VecXor(VecXor(x3, SIMON128_f(y3)), rk2); @@ -680,7 +685,9 @@ inline void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, if (rounds & 1) { - const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]); + // Round keys are pre-splated in forward direction + const uint64x2_p rk = VecLoad(subkeys+rounds*2-2); + y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk); y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk); y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk); -- cgit v1.2.1