From 2d0d87b57ad227324ac59fee0ac58808050cc239 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sat, 23 Jun 2018 13:47:59 -0400 Subject: Use pre-splatted key table for LEA NEON and Aarch64 --- lea-simd.cpp | 12 +++++++++++- lea.cpp | 13 +++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/lea-simd.cpp b/lea-simd.cpp index a43b719a..8dacdf56 100644 --- a/lea-simd.cpp +++ b/lea-simd.cpp @@ -140,7 +140,7 @@ uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b) template inline uint32x4_t LoadKey(const word32 rkey[]) { - return vdupq_n_u32(rkey[IDX]); + return vld1q_u32(&rkey[IDX*4]); } template @@ -1012,6 +1012,16 @@ size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds, #endif // CRYPTOPP_SSSE3_AVAILABLE #if defined(CRYPTOPP_ARM_NEON_AVAILABLE) +void LEA_SplatKeys_NEON(SecBlock& rkeys) +{ + SecBlock temp(rkeys.size() * 4); + for (size_t i=0, j=0; i& rkeys); + extern size_t LEA_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); @@ -606,13 +608,16 @@ void LEA::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, con CRYPTOPP_ASSERT(0);; } + // If we pre-splat the round keys at setup then we avoid a shuffle + // at runtime for each subkey used during encryption and decryption. + // Pre-splatting saves about 0.7 to 1.0 cpb at the cost of 4x storage. #if (CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS) && (CRYPTOPP_SSSE3_AVAILABLE) if (HasSSSE3()) - { - // If we pre-splat the round keys at setup then we avoid a shuffle - // at runtime for each subkey used during encryption and decryption. LEA_SplatKeys_SSSE3(m_rkey); - } +#endif +#if (CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS) && (CRYPTOPP_ARM_NEON_AVAILABLE) + if (HasNEON()) + LEA_SplatKeys_NEON(m_rkey); #endif } -- cgit v1.2.1