diff options
author | Jeffrey Walton <noloader@gmail.com> | 2018-06-23 13:47:59 -0400 |
---|---|---|
committer | Jeffrey Walton <noloader@gmail.com> | 2018-06-23 13:47:59 -0400 |
commit | 2d0d87b57ad227324ac59fee0ac58808050cc239 (patch) | |
tree | cac5891aa740b53cd53d515a1b4ad96ac0dcb1d7 | |
parent | 09c8ae2835352e739f9fa4ec51eb3a8100499ed0 (diff) | |
download | cryptopp-git-2d0d87b57ad227324ac59fee0ac58808050cc239.tar.gz |
Use pre-splatted key table for LEA NEON and Aarch64
-rw-r--r-- | lea-simd.cpp | 12 | ||||
-rw-r--r-- | lea.cpp | 13 |
2 files changed, 20 insertions, 5 deletions
diff --git a/lea-simd.cpp b/lea-simd.cpp index a43b719a..8dacdf56 100644 --- a/lea-simd.cpp +++ b/lea-simd.cpp @@ -140,7 +140,7 @@ uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b) template <unsigned int IDX>
inline uint32x4_t LoadKey(const word32 rkey[])
{
- return vdupq_n_u32(rkey[IDX]);
+ return vld1q_u32(&rkey[IDX*4]);
}
template <unsigned int IDX>
@@ -1012,6 +1012,16 @@ size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds, #endif // CRYPTOPP_SSSE3_AVAILABLE
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+void LEA_SplatKeys_NEON(SecBlock<word32>& rkeys)
+{
+ SecBlock<word32> temp(rkeys.size() * 4);
+ for (size_t i=0, j=0; i<rkeys.size(); i++, j+=4)
+ {
+ vst1q_u32(&temp[j], vdupq_n_u32(rkeys[i]));
+ }
+ std::swap(rkeys, temp);
+}
+
size_t LEA_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -567,6 +567,8 @@ extern size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t # endif
# if (CRYPTOPP_ARM_NEON_AVAILABLE)
+extern void LEA_SplatKeys_NEON(SecBlock<word32>& rkeys);
+
extern size_t LEA_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
@@ -606,13 +608,16 @@ void LEA::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, con CRYPTOPP_ASSERT(0);;
}
+ // If we pre-splat the round keys at setup then we avoid a shuffle
+ // at runtime for each subkey used during encryption and decryption.
+ // Pre-splatting saves about 0.7 to 1.0 cpb at the cost of 4x storage.
#if (CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS) && (CRYPTOPP_SSSE3_AVAILABLE)
if (HasSSSE3())
- {
- // If we pre-splat the round keys at setup then we avoid a shuffle
- // at runtime for each subkey used during encryption and decryption.
LEA_SplatKeys_SSSE3(m_rkey);
- }
+#endif
+#if (CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS) && (CRYPTOPP_ARM_NEON_AVAILABLE)
+ if (HasNEON())
+ LEA_SplatKeys_NEON(m_rkey);
#endif
}
|