summaryrefslogtreecommitdiff
path: root/speck.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-12-03 02:28:40 -0500
committerJeffrey Walton <noloader@gmail.com>2017-12-03 02:28:40 -0500
commit6bb1f1d9c41060e9bc2ebef1d2736b2bdb48cd49 (patch)
treecb8acf0cc2ab462fe783b96da18499ebef03b370 /speck.cpp
parent77ff7aa528cc9302cefb5dfb5f7f61247df218ef (diff)
downloadcryptopp-git-6bb1f1d9c41060e9bc2ebef1d2736b2bdb48cd49.tar.gz
Add SPECK-64 SSE intrinsics
Performance went from about 11.9 cpb (C++) to about 4.5 cpb (SSE)
Diffstat (limited to 'speck.cpp')
-rw-r--r--speck.cpp36
1 files changed, 35 insertions, 1 deletions
diff --git a/speck.cpp b/speck.cpp
index 00e73a0c..f6762094 100644
--- a/speck.cpp
+++ b/speck.cpp
@@ -7,9 +7,10 @@
#include "misc.h"
#include "cpu.h"
-// Uncomment for benchmarking C++ against SSE2 or NEON.
+// Uncomment for benchmarking C++ against SSE or NEON.
// Do so in both speck.cpp and speck-simd.cpp.
// #undef CRYPTOPP_SSSE3_AVAILABLE
+// #undef CRYPTOPP_SSE41_AVAILABLE
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
// Disable NEON/ASIMD for Cortex-A53 and A57. The shifts are too slow and C/C++ is about
@@ -176,6 +177,9 @@ ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+extern size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
extern size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
@@ -183,6 +187,14 @@ extern size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, siz
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+extern size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
+extern size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+#endif
+
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
extern size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
@@ -353,6 +365,28 @@ void SPECK128::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
}
#if defined(CRYPTOPP_SPECK_ADVANCED_PROCESS_BLOCKS)
+size_t SPECK64::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+ if (HasSSE41())
+ return SPECK64_Enc_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+
+size_t SPECK64::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+ if (HasSSE41())
+ return SPECK64_Dec_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+
size_t SPECK128::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
byte *outBlocks, size_t length, word32 flags) const
{