diff options
author | Jeffrey Walton <noloader@gmail.com> | 2019-10-28 01:12:58 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-28 01:12:58 -0400 |
commit | c831d6ffeeb80ea805502e62ad795afb8ef6fdff (patch) | |
tree | 01361e7f350be558699c1cec28d1780c693a9836 /simon128_simd.cpp | |
parent | 6da6b7f5ace3d342942a2f5aa39fe8542da798c7 (diff) | |
download | cryptopp-git-c831d6ffeeb80ea805502e62ad795afb8ef6fdff.tar.gz |
Pre-splat SIMON and SPECK keys when appropriate for Altivec (PR #910)
SIMON and SPECK keys can be pre-splatted in the forward direction when Altivec instructions will be used. Pre-splatting does not work for the reverse transformation. It breaks modes like CBC, so the speed-up is only applied to the forward transformation.
Diffstat (limited to 'simon128_simd.cpp')
-rw-r--r-- | simon128_simd.cpp | 23 |
1 files changed, 15 insertions, 8 deletions
diff --git a/simon128_simd.cpp b/simon128_simd.cpp index a91da0f5..b780cf7e 100644 --- a/simon128_simd.cpp +++ b/simon128_simd.cpp @@ -66,7 +66,7 @@ using CryptoPP::vec_swap; // SunCC #if defined(_MSC_VER) && !defined(_M_ARM64)
inline uint64x2_t vld1q_dup_u64(const uint64_t* ptr)
{
- return vmovq_n_u64(*ptr);
+ return vmovq_n_u64(*ptr);
}
#endif
@@ -536,6 +536,7 @@ using CryptoPP::uint64x2_p; using CryptoPP::VecAnd;
using CryptoPP::VecXor;
+using CryptoPP::VecLoad;
using CryptoPP::VecPermute;
// Rotate left by bit count
@@ -576,16 +577,18 @@ inline void SIMON128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigne for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
{
- const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i]);
- y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1);
+ // Round keys are pre-splated in forward direction
+ const uint64x2_p rk1 = VecLoad(subkeys+i*2);
+ const uint64x2_p rk2 = VecLoad(subkeys+i*2+2);
- const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i+1]);
+ y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1);
x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk2);
}
if (rounds & 1)
{
- const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]);
+ // Round keys are pre-splated in forward direction
+ const uint64x2_p rk = VecLoad(subkeys+rounds*2-2);
y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk);
std::swap(x1, y1);
}
@@ -667,12 +670,14 @@ inline void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
{
- const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i]);
+ // Round keys are pre-splated in forward direction
+ const uint64x2_p rk1 = VecLoad(subkeys+i*2);
+ const uint64x2_p rk2 = VecLoad(subkeys+i*2+2);
+
y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1);
y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk1);
y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk1);
- const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i+1]);
x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk2);
x2 = VecXor(VecXor(x2, SIMON128_f(y2)), rk2);
x3 = VecXor(VecXor(x3, SIMON128_f(y3)), rk2);
@@ -680,7 +685,9 @@ inline void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1, if (rounds & 1)
{
- const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]);
+ // Round keys are pre-splated in forward direction
+ const uint64x2_p rk = VecLoad(subkeys+rounds*2-2);
+
y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk);
y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk);
y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk);
|