summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lea-simd.cpp30
-rw-r--r--lea.cpp20
-rw-r--r--lea.h10
3 files changed, 3 insertions, 57 deletions
diff --git a/lea-simd.cpp b/lea-simd.cpp
index 703930c9..75e710b7 100644
--- a/lea-simd.cpp
+++ b/lea-simd.cpp
@@ -140,11 +140,7 @@ uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b)
template <unsigned int IDX>
inline uint32x4_t LoadKey(const word32 rkey[])
{
-#if (CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS)
- return vld1q_u32(&rkey[IDX*4]);
-#else
return vdupq_n_u32(rkey[IDX]);
-#endif
}
template <unsigned int IDX>
@@ -599,7 +595,8 @@ inline __m128i RotateRight<8>(const __m128i& val)
template <unsigned int IDX>
inline __m128i LoadKey(const word32 rkey[])
{
- return _mm_loadu_si128((const __m128i*) &rkey[IDX*4]);
+ float rk; std::memcpy(&rk, rkey+IDX, sizeof(rk));
+ return _mm_castps_si128(_mm_load_ps1(&rk));
}
template <unsigned int IDX>
@@ -989,17 +986,6 @@ ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
-void LEA_SplatKeys_SSSE3(SecBlock<word32>& rkeys)
-{
- SecBlock<word32> temp(rkeys.size() * 4);
- for (size_t i=0, j=0; i<rkeys.size(); i++, j+=4)
- {
- _mm_storeu_si128((__m128i*) &temp[j],
- _mm_castps_si128(_mm_load_ps1((const float*) &rkeys[i])));
- }
- std::swap(rkeys, temp);
-}
-
size_t LEA_Enc_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -1016,18 +1002,6 @@ size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds,
#endif // CRYPTOPP_SSSE3_AVAILABLE
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
-# if (CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS)
-void LEA_SplatKeys_NEON(SecBlock<word32>& rkeys)
-{
- SecBlock<word32> temp(rkeys.size() * 4);
- for (size_t i=0, j=0; i<rkeys.size(); i++, j+=4)
- {
- vst1q_u32(&temp[j], vdupq_n_u32(rkeys[i]));
- }
- std::swap(rkeys, temp);
-}
-# endif // CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS
-
size_t LEA_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
diff --git a/lea.cpp b/lea.cpp
index 75de3556..1efbf2f5 100644
--- a/lea.cpp
+++ b/lea.cpp
@@ -557,8 +557,6 @@ NAMESPACE_BEGIN(CryptoPP)
#if CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS
# if defined(CRYPTOPP_SSSE3_AVAILABLE)
-extern void LEA_SplatKeys_SSSE3(SecBlock<word32>& rkeys);
-
extern size_t LEA_Enc_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
@@ -567,10 +565,6 @@ extern size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t
# endif
# if (CRYPTOPP_ARM_NEON_AVAILABLE)
-# if (CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS)
-extern void LEA_SplatKeys_NEON(SecBlock<word32>& rkeys);
-# endif // CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS
-
extern size_t LEA_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
@@ -609,20 +603,6 @@ void LEA::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, con
default:
CRYPTOPP_ASSERT(0);;
}
-
- // If we pre-splat the round keys at setup then we avoid a shuffle
- // at runtime for each subkey used during encryption and decryption.
- // Pre-splatting saves about 0.7 to 1.0 cpb at the cost of 4x storage.
-#if (CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS) && (CRYPTOPP_SSSE3_AVAILABLE)
- if (HasSSSE3())
- LEA_SplatKeys_SSSE3(m_rkey);
-#endif
-#if (CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS) && (CRYPTOPP_ARM_NEON_AVAILABLE)
-# if (CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS)
- if (HasNEON())
- LEA_SplatKeys_NEON(m_rkey);
-# endif // CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS
-#endif
}
void LEA::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
diff --git a/lea.h b/lea.h
index 5b9f74b0..d8d95700 100644
--- a/lea.h
+++ b/lea.h
@@ -19,19 +19,11 @@
# define CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS 1
#endif
-// Define this if you want to pre-splat the round key table
-// for NEON and Aarch64. Pre-splatting the round key increases
-// performance by about 0.7 cpb on ARM server boards like an
-// AMD Opteron A1100. However, it crushes performance on ARM
-// dev-boards like LeMaker HiKey and Pine64. HiKey and Pine64
-// run about 8 cpb slower when pre-splatting the round keys.
-// # define CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS 1
-
NAMESPACE_BEGIN(CryptoPP)
/// \brief LEA block cipher information
/// \since Crypto++ 7.1
-struct LEA_Info : public FixedBlockSize<16>, VariableKeyLength<16,16,32,8>
+struct LEA_Info : public FixedBlockSize<16>, public VariableKeyLength<16,16,32,8>
{
static const std::string StaticAlgorithmName()
{