diff options
author | Jeffrey Walton <noloader@gmail.com> | 2017-12-03 02:28:40 -0500 |
---|---|---|
committer | Jeffrey Walton <noloader@gmail.com> | 2017-12-03 02:28:40 -0500 |
commit | 6bb1f1d9c41060e9bc2ebef1d2736b2bdb48cd49 (patch) | |
tree | cb8acf0cc2ab462fe783b96da18499ebef03b370 /speck.cpp | |
parent | 77ff7aa528cc9302cefb5dfb5f7f61247df218ef (diff) | |
download | cryptopp-git-6bb1f1d9c41060e9bc2ebef1d2736b2bdb48cd49.tar.gz |
Add SPECK-64 SSE intrinsics
Performance went from about 11.9 cpb (C++) to about 4.5 cpb (SSE)
Diffstat (limited to 'speck.cpp')
-rw-r--r-- | speck.cpp | 36 |
1 files changed, 35 insertions, 1 deletions
@@ -7,9 +7,10 @@ #include "misc.h"
#include "cpu.h"
-// Uncomment for benchmarking C++ against SSE2 or NEON.
+// Uncomment for benchmarking C++ against SSE or NEON.
// Do so in both speck.cpp and speck-simd.cpp.
// #undef CRYPTOPP_SSSE3_AVAILABLE
+// #undef CRYPTOPP_SSE41_AVAILABLE
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
// Disable NEON/ASIMD for Cortex-A53 and A57. The shifts are too slow and C/C++ is about
@@ -176,6 +177,9 @@ ANONYMOUS_NAMESPACE_END NAMESPACE_BEGIN(CryptoPP)
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+extern size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
extern size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
@@ -183,6 +187,14 @@ extern size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, siz const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+extern size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
+extern size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+#endif
+
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
extern size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
@@ -353,6 +365,28 @@ void SPECK128::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock }
#if defined(CRYPTOPP_SPECK_ADVANCED_PROCESS_BLOCKS)
+size_t SPECK64::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+ if (HasSSE41())
+ return SPECK64_Enc_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+
+size_t SPECK64::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+ if (HasSSE41())
+ return SPECK64_Dec_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+
size_t SPECK128::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
byte *outBlocks, size_t length, word32 flags) const
{
|