summaryrefslogtreecommitdiff
path: root/blake2.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-11-23 02:22:27 -0500
committerJeffrey Walton <noloader@gmail.com>2017-11-23 02:22:27 -0500
commitb08596da4466a7380f65803bd51d2d2213818d19 (patch)
tree33a71877ff8e8812892595ef34f36840341583d6 /blake2.cpp
parent78ec2aa5f49f2d0977a705225333aae68afefd5a (diff)
downloadcryptopp-git-b08596da4466a7380f65803bd51d2d2213818d19.tar.gz
Add CRYPTOPP_SLOW_ARMV8_SHIFT for Aarch32 and Aarch64
Both BLAKE2 and SPECK slow down when using NEON/ASIMD. When just BLAKE2 experienced the issue, it was a one-off problem. Its now wider than a one-off, so add the formal define
Diffstat (limited to 'blake2.cpp')
-rw-r--r--blake2.cpp13
1 files changed, 9 insertions, 4 deletions
diff --git a/blake2.cpp b/blake2.cpp
index 83e1bdaf..a54902c9 100644
--- a/blake2.cpp
+++ b/blake2.cpp
@@ -17,6 +17,12 @@ NAMESPACE_BEGIN(CryptoPP)
// #undef CRYPTOPP_SSE41_AVAILABLE
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
+// Disable NEON/ASIMD for Cortex-A53 and A57. The shifts are too slow and C/C++ is about
+// 3 cpb faster than NEON/ASIMD. Also see http://github.com/weidai11/cryptopp/issues/367.
+#if (defined(__aarch32__) || defined(__aarch64__)) && defined(CRYPTOPP_SLOW_ARMV8_SHIFT)
+# undef CRYPTOPP_ARM_NEON_AVAILABLE
+#endif
+
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2_State<word32, false>& state);
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2_State<word64, true>& state);
@@ -25,8 +31,7 @@ extern void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false
extern void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state);
#endif
-// Disable NEON for Cortex-A53 and A57. Also see http://github.com/weidai11/cryptopp/issues/367
-#if CRYPTOPP_BOOL_ARM32 && CRYPTOPP_ARM_NEON_AVAILABLE
+#if CRYPTOPP_ARM_NEON_AVAILABLE
extern void BLAKE2_Compress32_NEON(const byte* input, BLAKE2_State<word32, false>& state);
extern void BLAKE2_Compress64_NEON(const byte* input, BLAKE2_State<word64, true>& state);
#endif
@@ -93,7 +98,7 @@ pfnCompress64 InitializeCompress64Fn()
#if CRYPTOPP_SSE41_AVAILABLE
HasSSE41() ? &BLAKE2_Compress64_SSE4 :
#endif
-#if CRYPTOPP_BOOL_ARM32 && CRYPTOPP_ARM_NEON_AVAILABLE
+#if CRYPTOPP_ARM_NEON_AVAILABLE
HasNEON() ? &BLAKE2_Compress64_NEON :
#endif
&BLAKE2_Compress64_CXX;
@@ -105,7 +110,7 @@ pfnCompress32 InitializeCompress32Fn()
#if CRYPTOPP_SSE41_AVAILABLE
HasSSE41() ? &BLAKE2_Compress32_SSE4 :
#endif
-#if CRYPTOPP_BOOL_ARM32 && CRYPTOPP_ARM_NEON_AVAILABLE
+#if CRYPTOPP_ARM_NEON_AVAILABLE
HasNEON() ? &BLAKE2_Compress32_NEON :
#endif
&BLAKE2_Compress32_CXX;