summaryrefslogtreecommitdiff
path: root/simon.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-12-03 04:10:55 -0500
committerJeffrey Walton <noloader@gmail.com>2017-12-03 04:10:55 -0500
commit081afde0fdf6202073dc9dfc46b501a0f06388a6 (patch)
tree29dfe4ae28dead2e6d7f04ac6aeafc605642383e /simon.cpp
parent6bb1f1d9c41060e9bc2ebef1d2736b2bdb48cd49 (diff)
downloadcryptopp-git-081afde0fdf6202073dc9dfc46b501a0f06388a6.tar.gz
Add SIMON-64 SSE intrinsics
Performance went from about 29 cpb (C++) to about 11.1 cpb (SSE)
Diffstat (limited to 'simon.cpp')
-rw-r--r--simon.cpp45
1 files changed, 43 insertions, 2 deletions
diff --git a/simon.cpp b/simon.cpp
index 565a5205..a966a4ca 100644
--- a/simon.cpp
+++ b/simon.cpp
@@ -7,9 +7,10 @@
#include "misc.h"
#include "cpu.h"
-// Uncomment for benchmarking C++ against SSE2 or NEON.
-// Do so in both speck.cpp and speck-simd.cpp.
+// Uncomment for benchmarking C++ against SSE or NEON.
+// Do so in both simon.cpp and simon-simd.cpp.
// #undef CRYPTOPP_SSSE3_AVAILABLE
+// #undef CRYPTOPP_SSE41_AVAILABLE
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
// Disable NEON/ASIMD for Cortex-A53 and A57. The shifts are too slow and C/C++ is about
@@ -206,6 +207,14 @@ extern size_t SIMON128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, siz
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+extern size_t SIMON64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
+extern size_t SIMON64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+#endif
+
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
extern size_t SIMON128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
@@ -376,6 +385,38 @@ void SIMON128::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
}
#if defined(CRYPTOPP_SIMON_ADVANCED_PROCESS_BLOCKS)
+size_t SIMON64::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+ if (HasSSE41())
+ return SIMON64_Enc_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+ if (HasNEON())
+ return SIMON64_Enc_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+
+size_t SIMON64::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+ if (HasSSE41())
+ return SIMON64_Dec_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+ if (HasNEON())
+ return SIMON64_Dec_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+
size_t SIMON128::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
byte *outBlocks, size_t length, word32 flags) const
{