summaryrefslogtreecommitdiff
path: root/simon128_simd.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2019-10-28 01:12:58 -0400
committerGitHub <noreply@github.com>2019-10-28 01:12:58 -0400
commitc831d6ffeeb80ea805502e62ad795afb8ef6fdff (patch)
tree01361e7f350be558699c1cec28d1780c693a9836 /simon128_simd.cpp
parent6da6b7f5ace3d342942a2f5aa39fe8542da798c7 (diff)
downloadcryptopp-git-c831d6ffeeb80ea805502e62ad795afb8ef6fdff.tar.gz
Pre-splat SIMON and SPECK keys when appropriate for Altivec (PR #910)
SIMON and SPECK keys can be pre-splatted in the forward direction when Altivec instructions will be used. Pre-splatting does not work for the reverse transformation. It breaks modes like CBC, so the speed-up is only applied to the forward transformation.
Diffstat (limited to 'simon128_simd.cpp')
-rw-r--r--simon128_simd.cpp23
1 files changed, 15 insertions, 8 deletions
diff --git a/simon128_simd.cpp b/simon128_simd.cpp
index a91da0f5..b780cf7e 100644
--- a/simon128_simd.cpp
+++ b/simon128_simd.cpp
@@ -66,7 +66,7 @@ using CryptoPP::vec_swap; // SunCC
#if defined(_MSC_VER) && !defined(_M_ARM64)
inline uint64x2_t vld1q_dup_u64(const uint64_t* ptr)
{
- return vmovq_n_u64(*ptr);
+ return vmovq_n_u64(*ptr);
}
#endif
@@ -536,6 +536,7 @@ using CryptoPP::uint64x2_p;
using CryptoPP::VecAnd;
using CryptoPP::VecXor;
+using CryptoPP::VecLoad;
using CryptoPP::VecPermute;
// Rotate left by bit count
@@ -576,16 +577,18 @@ inline void SIMON128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigne
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
{
- const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i]);
- y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1);
+ // Round keys are pre-splated in forward direction
+ const uint64x2_p rk1 = VecLoad(subkeys+i*2);
+ const uint64x2_p rk2 = VecLoad(subkeys+i*2+2);
- const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i+1]);
+ y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1);
x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk2);
}
if (rounds & 1)
{
- const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]);
+ // Round keys are pre-splated in forward direction
+ const uint64x2_p rk = VecLoad(subkeys+rounds*2-2);
y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk);
std::swap(x1, y1);
}
@@ -667,12 +670,14 @@ inline void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
{
- const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i]);
+ // Round keys are pre-splated in forward direction
+ const uint64x2_p rk1 = VecLoad(subkeys+i*2);
+ const uint64x2_p rk2 = VecLoad(subkeys+i*2+2);
+
y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1);
y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk1);
y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk1);
- const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i+1]);
x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk2);
x2 = VecXor(VecXor(x2, SIMON128_f(y2)), rk2);
x3 = VecXor(VecXor(x3, SIMON128_f(y3)), rk2);
@@ -680,7 +685,9 @@ inline void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
if (rounds & 1)
{
- const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]);
+ // Round keys are pre-splated in forward direction
+ const uint64x2_p rk = VecLoad(subkeys+rounds*2-2);
+
y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk);
y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk);
y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk);