summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2018-06-23 13:47:59 -0400
committerJeffrey Walton <noloader@gmail.com>2018-06-23 13:47:59 -0400
commit2d0d87b57ad227324ac59fee0ac58808050cc239 (patch)
treecac5891aa740b53cd53d515a1b4ad96ac0dcb1d7
parent09c8ae2835352e739f9fa4ec51eb3a8100499ed0 (diff)
downloadcryptopp-git-2d0d87b57ad227324ac59fee0ac58808050cc239.tar.gz
Use pre-splatted key table for LEA NEON and Aarch64
-rw-r--r--lea-simd.cpp12
-rw-r--r--lea.cpp13
2 files changed, 20 insertions, 5 deletions
diff --git a/lea-simd.cpp b/lea-simd.cpp
index a43b719a..8dacdf56 100644
--- a/lea-simd.cpp
+++ b/lea-simd.cpp
@@ -140,7 +140,7 @@ uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b)
template <unsigned int IDX>
inline uint32x4_t LoadKey(const word32 rkey[])
{
- return vdupq_n_u32(rkey[IDX]);
+ return vld1q_u32(&rkey[IDX*4]);
}
template <unsigned int IDX>
@@ -1012,6 +1012,16 @@ size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds,
#endif // CRYPTOPP_SSSE3_AVAILABLE
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+void LEA_SplatKeys_NEON(SecBlock<word32>& rkeys)
+{
+ SecBlock<word32> temp(rkeys.size() * 4);
+ for (size_t i=0, j=0; i<rkeys.size(); i++, j+=4)
+ {
+ vst1q_u32(&temp[j], vdupq_n_u32(rkeys[i]));
+ }
+ std::swap(rkeys, temp);
+}
+
size_t LEA_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
diff --git a/lea.cpp b/lea.cpp
index 35149157..122313da 100644
--- a/lea.cpp
+++ b/lea.cpp
@@ -567,6 +567,8 @@ extern size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t
# endif
# if (CRYPTOPP_ARM_NEON_AVAILABLE)
+extern void LEA_SplatKeys_NEON(SecBlock<word32>& rkeys);
+
extern size_t LEA_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
@@ -606,13 +608,16 @@ void LEA::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, con
CRYPTOPP_ASSERT(0);;
}
+ // If we pre-splat the round keys at setup then we avoid a shuffle
+ // at runtime for each subkey used during encryption and decryption.
+ // Pre-splatting saves about 0.7 to 1.0 cpb at the cost of 4x storage.
#if (CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS) && (CRYPTOPP_SSSE3_AVAILABLE)
if (HasSSSE3())
- {
- // If we pre-splat the round keys at setup then we avoid a shuffle
- // at runtime for each subkey used during encryption and decryption.
LEA_SplatKeys_SSSE3(m_rkey);
- }
+#endif
+#if (CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS) && (CRYPTOPP_ARM_NEON_AVAILABLE)
+ if (HasNEON())
+ LEA_SplatKeys_NEON(m_rkey);
#endif
}