summaryrefslogtreecommitdiff
path: root/adv_simd.h
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2018-11-12 20:56:15 -0500
committerJeffrey Walton <noloader@gmail.com>2018-11-12 20:56:15 -0500
commita0608a6b8083a330b43747a31dfd31d27482a725 (patch)
tree905dfe8c9bb5ee329fbd2ddfeb4e54a1243cb014 /adv_simd.h
parentb372f7d3878c5dac52ebc61783593aee5212f911 (diff)
downloadcryptopp-git-a0608a6b8083a330b43747a31dfd31d27482a725.tar.gz
Port SPECK64 to Altivec
SPECK64 runs about 4x faster than C++ for POWER4 and friends. If POWER7 is available it goes back to full speed due to efficient unaligned loads
Diffstat (limited to 'adv_simd.h')
-rw-r--r--adv_simd.h32
1 files changed, 20 insertions, 12 deletions
diff --git a/adv_simd.h b/adv_simd.h
index 76954e99..c79a9f25 100644
--- a/adv_simd.h
+++ b/adv_simd.h
@@ -74,6 +74,14 @@
# include "ppc_simd.h"
#endif
+#ifndef CRYPTOPP_INLINE
+# if defined(CRYPTOPP_DEBUG)
+# define CRYPTOPP_INLINE static
+# else
+# define CRYPTOPP_INLINE inline
+# endif
+#endif
+
// ************************ All block ciphers *********************** //
ANONYMOUS_NAMESPACE_BEGIN
@@ -103,7 +111,7 @@ NAMESPACE_BEGIN(CryptoPP)
/// \details The subkey type is usually word32 or word64. F2 and F6 must use the
/// same word type.
template <typename F2, typename F6, typename W>
-inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
const W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -348,7 +356,7 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
/// \details The subkey type is usually word32 or word64. F1 and F6 must use the
/// same word type.
template <typename F1, typename F6, typename W>
-inline size_t AdvancedProcessBlocks128_6x1_NEON(F1 func1, F6 func6,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_NEON(F1 func1, F6 func6,
const W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -510,7 +518,7 @@ inline size_t AdvancedProcessBlocks128_6x1_NEON(F1 func1, F6 func6,
/// vector type. The V parameter is used to avoid template argument
/// deduction/substitution failures.
template <typename F1, typename F4, typename W, typename V>
-inline size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
const V& unused, const W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -651,7 +659,7 @@ inline size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
/// \details The subkey type is usually word32 or word64. F2 and F6 must use the
/// same word type.
template <typename F2, typename F6, typename W>
-inline size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
const W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -882,7 +890,7 @@ NAMESPACE_BEGIN(CryptoPP)
/// \details The subkey type is usually word32 or word64. F1 and F2 must use the
/// same word type.
template <typename F1, typename F2, typename W>
-inline size_t AdvancedProcessBlocks64_2x1_SSE(F1 func1, F2 func2,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_2x1_SSE(F1 func1, F2 func2,
MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -1037,7 +1045,7 @@ inline size_t AdvancedProcessBlocks64_2x1_SSE(F1 func1, F2 func2,
/// \details The subkey type is usually word32 or word64. F2 and F6 must use the
/// same word type.
template <typename F2, typename F6, typename W>
-inline size_t AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -1288,7 +1296,7 @@ inline size_t AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
/// \details The subkey type is usually word32 or word64. F2 and F6 must use the
/// same word type.
template <typename F2, typename F6, typename W>
-inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -1483,7 +1491,7 @@ inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
/// \details The subkey type is usually word32 or word64. F1 and F4 must use the
/// same word type.
template <typename F1, typename F4, typename W>
-inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -1614,7 +1622,7 @@ inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
/// \details The subkey type is usually word32 or word64. F1 and F4 must use the
/// same word type.
template <typename F1, typename F4, typename W>
-inline size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -1797,7 +1805,7 @@ NAMESPACE_BEGIN(CryptoPP)
/// \details The subkey type is usually word32 or word64. F2 and F6 must use the
/// same word type.
template <typename F2, typename F6, typename W>
-inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
const W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -2068,7 +2076,7 @@ inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
/// \details The subkey type is usually word32 or word64. F1 and F4 must use the
/// same word type.
template <typename F1, typename F4, typename W>
-inline size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
const W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -2213,7 +2221,7 @@ inline size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
/// \details The subkey type is usually word32 or word64. F1 and F6 must use the
/// same word type.
template <typename F1, typename F6, typename W>
-inline size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
+CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
const W *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{