summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--adv-simd.h174
-rw-r--r--ppc-simd.h9
-rw-r--r--rijndael-simd.cpp135
-rw-r--r--rijndael.cpp8
-rw-r--r--simon-simd.cpp16
-rw-r--r--speck-simd.cpp16
6 files changed, 192 insertions, 166 deletions
diff --git a/adv-simd.h b/adv-simd.h
index 3ee46fb6..b3d8a847 100644
--- a/adv-simd.h
+++ b/adv-simd.h
@@ -8,16 +8,18 @@
//
// There are 8 templates provided in this file. The number following the
// function name is the block size of the cipher. The name following that
-// is the acceleration and arrangement. For example SSE1x4 means Intel SSE
-// using two encrypt (or decrypt) functions: one that operates on 1 block,
-// and one that operates on 4 blocks.
+// is the acceleration and arrangement. For example 4x1_SSE means Intel SSE
+// using two encrypt (or decrypt) functions: one that operates on 4 blocks,
+// and one that operates on 1 block.
//
-// * AdvancedProcessBlocks64_SSE1x4
-// * AdvancedProcessBlocks128_SSE1x4
-// * AdvancedProcessBlocks64_SSE2x6
-// * AdvancedProcessBlocks128_SSE2x6
-// * AdvancedProcessBlocks64_NEON2x6
-// * AdvancedProcessBlocks128_NEON2x6
+// * AdvancedProcessBlocks64_4x1_SSE
+// * AdvancedProcessBlocks128_4x1_SSE
+// * AdvancedProcessBlocks64_6x2_SSE
+// * AdvancedProcessBlocks128_6x2_SSE
+// * AdvancedProcessBlocks64_6x2_NEON
+// * AdvancedProcessBlocks128_6x2_NEON
+// * AdvancedProcessBlocks64_6x2_ALTIVEC
+// * AdvancedProcessBlocks128_6x2_ALTIVEC
//
#ifndef CRYPTOPP_ADVANCED_SIMD_TEMPLATES
@@ -36,6 +38,10 @@
# include <tmmintrin.h>
#endif
+#if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
+# include "ppc-simd.h"
+#endif
+
// https://www.spinics.net/lists/gcchelp/msg47735.html and
// https://www.spinics.net/lists/gcchelp/msg47749.html
#if (CRYPTOPP_GCC_VERSION >= 40900)
@@ -88,7 +94,7 @@ ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
template <typename F2, typename F6>
-inline size_t AdvancedProcessBlocks64_NEON2x6(F2 func2, F6 func6,
+inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
const word32 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -455,7 +461,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
}
template <typename F2, typename F6>
-size_t AdvancedProcessBlocks128_NEON2x6(F2 func2, F6 func6,
+size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
const word64 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -690,7 +696,7 @@ ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
template <typename F2, typename F6>
-inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_SSE2x6(F2 func2, F6 func6,
+inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
const word32 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -924,7 +930,7 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_SSE2x6(F2 func2, F6 func6,
}
template <typename F2, typename F6>
-inline size_t AdvancedProcessBlocks128_SSE2x6(F2 func2, F6 func6,
+inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
const word64 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -1109,7 +1115,7 @@ inline size_t AdvancedProcessBlocks128_SSE2x6(F2 func2, F6 func6,
}
template <typename F1, typename F4>
-inline size_t AdvancedProcessBlocks128_SSE1x4(F1 func1, F4 func4,
+inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
MAYBE_CONST word32 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@@ -1232,4 +1238,144 @@ NAMESPACE_END // CryptoPP
#endif // CRYPTOPP_SSSE3_AVAILABLE
+// *********************** Altivec/Power 4 ********************** //
+
+#if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
+
+ANONYMOUS_NAMESPACE_BEGIN
+
+using CryptoPP::uint32x4_p;
+
+#if defined(CRYPTOPP_LITTLE_ENDIAN)
+const uint32x4_p s_one = {1,0,0,0};
+#else
+const uint32x4_p s_one = {0,0,0,1};
+#endif
+
+ANONYMOUS_NAMESPACE_END
+
+NAMESPACE_BEGIN(CryptoPP)
+
+template <typename F1, typename F6>
+size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
+{
+ CRYPTOPP_ASSERT(subKeys);
+ CRYPTOPP_ASSERT(inBlocks);
+ CRYPTOPP_ASSERT(outBlocks);
+ CRYPTOPP_ASSERT(length >= 16);
+
+ const ptrdiff_t blockSize = 16;
+
+ ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize;
+ ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0;
+ ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : blockSize;
+
+ if (flags & BT_ReverseDirection)
+ {
+ inBlocks += length - blockSize;
+ xorBlocks += length - blockSize;
+ outBlocks += length - blockSize;
+ inIncrement = 0-inIncrement;
+ xorIncrement = 0-xorIncrement;
+ outIncrement = 0-outIncrement;
+ }
+
+ if (flags & BT_AllowParallel)
+ {
+ while (length >= 6*blockSize)
+ {
+ VectorType block0, block1, block2, block3, block4, block5, temp;
+ block0 = VectorLoad(inBlocks);
+
+ if (flags & BT_InBlockIsCounter)
+ {
+ block1 = VectorAdd(block0, s_one);
+ block2 = VectorAdd(block1, s_one);
+ block3 = VectorAdd(block2, s_one);
+ block4 = VectorAdd(block3, s_one);
+ block5 = VectorAdd(block4, s_one);
+ temp = VectorAdd(block5, s_one);
+ VectorStore(temp, const_cast<byte*>(inBlocks));
+ }
+ else
+ {
+ const int inc = static_cast<int>(inIncrement);
+ block1 = VectorLoad(1*inc, inBlocks);
+ block2 = VectorLoad(2*inc, inBlocks);
+ block3 = VectorLoad(3*inc, inBlocks);
+ block4 = VectorLoad(4*inc, inBlocks);
+ block5 = VectorLoad(5*inc, inBlocks);
+ inBlocks += 6*inc;
+ }
+
+ if (flags & BT_XorInput)
+ {
+ const int inc = static_cast<int>(xorIncrement);
+ block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
+ block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
+ block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
+ block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
+ block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks));
+ block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks));
+ xorBlocks += 6*inc;
+ }
+
+ func6(block0, block1, block2, block3, block4, block5, subKeys, rounds);
+
+ if (xorBlocks && !(flags & BT_XorInput))
+ {
+ const int inc = static_cast<int>(xorIncrement);
+ block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
+ block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
+ block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
+ block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
+ block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks));
+ block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks));
+ xorBlocks += 6*inc;
+ }
+
+ const int inc = static_cast<int>(outIncrement);
+ VectorStore(block0, outBlocks+0*inc);
+ VectorStore(block1, outBlocks+1*inc);
+ VectorStore(block2, outBlocks+2*inc);
+ VectorStore(block3, outBlocks+3*inc);
+ VectorStore(block4, outBlocks+4*inc);
+ VectorStore(block5, outBlocks+5*inc);
+
+ outBlocks += 6*inc;
+ length -= 6*blockSize;
+ }
+ }
+
+ while (length >= blockSize)
+ {
+ VectorType block = VectorLoad(inBlocks);
+
+ if (flags & BT_XorInput)
+ block = VectorXor(block, VectorLoad(xorBlocks));
+
+ if (flags & BT_InBlockIsCounter)
+ const_cast<byte *>(inBlocks)[15]++;
+
+ func1(block, subKeys, rounds);
+
+ if (xorBlocks && !(flags & BT_XorInput))
+ block = VectorXor(block, VectorLoad(xorBlocks));
+
+ VectorStore(block, outBlocks);
+
+ inBlocks += inIncrement;
+ outBlocks += outIncrement;
+ xorBlocks += xorIncrement;
+ length -= blockSize;
+ }
+
+ return length;
+}
+
+NAMESPACE_END // CryptoPP
+
+#endif // CRYPTOPP_ALTIVEC_AVAILABLE
+
#endif // CRYPTOPP_ADVANCED_SIMD_TEMPLATES
diff --git a/ppc-simd.h b/ppc-simd.h
index 880d15d4..d1f04607 100644
--- a/ppc-simd.h
+++ b/ppc-simd.h
@@ -29,9 +29,12 @@ NAMESPACE_BEGIN(CryptoPP)
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
-typedef __vector unsigned char uint8x16_p;
-typedef __vector unsigned int uint32x4_p;
-#if defined(CRYPTOPP_POWER5_AVAILABLE)
+typedef __vector char int8x16_p;
+typedef __vector unsigned char uint8x16_p;
+typedef __vector unsigned short uint16x8_p;
+typedef __vector unsigned int uint32x4_p;
+
+#if defined(CRYPTOPP_POWER8_AVAILABLE)
typedef __vector unsigned long long uint64x2_p;
#endif
diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp
index f6445b98..0c5c7c52 100644
--- a/rijndael-simd.cpp
+++ b/rijndael-simd.cpp
@@ -517,7 +517,7 @@ size_t Rijndael_Enc_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro
MAYBE_CONST byte* ib = MAYBE_UNCONST_CAST(byte*, inBlocks);
MAYBE_CONST byte* xb = MAYBE_UNCONST_CAST(byte*, xorBlocks);
- return AdvancedProcessBlocks128_SSE1x4(AESNI_Enc_Block, AESNI_Enc_4_Blocks,
+ return AdvancedProcessBlocks128_4x1_SSE(AESNI_Enc_Block, AESNI_Enc_4_Blocks,
sk, rounds, ib, xb, outBlocks, length, flags);
}
@@ -528,7 +528,7 @@ size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro
MAYBE_CONST byte* ib = MAYBE_UNCONST_CAST(byte*, inBlocks);
MAYBE_CONST byte* xb = MAYBE_UNCONST_CAST(byte*, xorBlocks);
- return AdvancedProcessBlocks128_SSE1x4(AESNI_Dec_Block, AESNI_Dec_4_Blocks,
+ return AdvancedProcessBlocks128_4x1_SSE(AESNI_Dec_Block, AESNI_Dec_4_Blocks,
sk, rounds, ib, xb, outBlocks, length, flags);
}
@@ -702,129 +702,6 @@ static inline void POWER8_Dec_6_Blocks(VectorType &block0, VectorType &block1,
block5 = VectorDecryptLast(block5, k);
}
-template <typename F1, typename F6>
-size_t Rijndael_AdvancedProcessBlocks_POWER8(F1 func1, F6 func6, const word32 *subKeys, size_t rounds,
- const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
-{
- CRYPTOPP_ASSERT(subKeys);
- CRYPTOPP_ASSERT(inBlocks);
- CRYPTOPP_ASSERT(outBlocks);
- CRYPTOPP_ASSERT(length >= 16);
-
- const size_t blockSize = 16;
- size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize;
- size_t xorIncrement = xorBlocks ? blockSize : 0;
- size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize;
-
- if (flags & BlockTransformation::BT_ReverseDirection)
- {
- inBlocks += length - blockSize;
- xorBlocks += length - blockSize;
- outBlocks += length - blockSize;
- inIncrement = 0-inIncrement;
- xorIncrement = 0-xorIncrement;
- outIncrement = 0-outIncrement;
- }
-
- if (flags & BlockTransformation::BT_AllowParallel)
- {
- while (length >= 6*blockSize)
- {
-#if defined(CRYPTOPP_LITTLE_ENDIAN)
- const VectorType one = (VectorType)((uint64x2_p){1,0});
-#else
- const VectorType one = (VectorType)((uint64x2_p){0,1});
-#endif
-
- VectorType block0, block1, block2, block3, block4, block5, temp;
- block0 = VectorLoad(inBlocks);
-
- if (flags & BlockTransformation::BT_InBlockIsCounter)
- {
- block1 = VectorAdd(block0, one);
- block2 = VectorAdd(block1, one);
- block3 = VectorAdd(block2, one);
- block4 = VectorAdd(block3, one);
- block5 = VectorAdd(block4, one);
- temp = VectorAdd(block5, one);
- VectorStore(temp, const_cast<byte*>(inBlocks));
- }
- else
- {
- const int inc = static_cast<int>(inIncrement);
- block1 = VectorLoad(1*inc, inBlocks);
- block2 = VectorLoad(2*inc, inBlocks);
- block3 = VectorLoad(3*inc, inBlocks);
- block4 = VectorLoad(4*inc, inBlocks);
- block5 = VectorLoad(5*inc, inBlocks);
- inBlocks += 6*inc;
- }
-
- if (flags & BlockTransformation::BT_XorInput)
- {
- const int inc = static_cast<int>(xorIncrement);
- block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
- block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
- block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
- block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
- block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks));
- block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks));
- xorBlocks += 6*inc;
- }
-
- func6(block0, block1, block2, block3, block4, block5, subKeys, rounds);
-
- if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
- {
- const int inc = static_cast<int>(xorIncrement);
- block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
- block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
- block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
- block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
- block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks));
- block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks));
- xorBlocks += 6*inc;
- }
-
- const int inc = static_cast<int>(outIncrement);
- VectorStore(block0, outBlocks+0*inc);
- VectorStore(block1, outBlocks+1*inc);
- VectorStore(block2, outBlocks+2*inc);
- VectorStore(block3, outBlocks+3*inc);
- VectorStore(block4, outBlocks+4*inc);
- VectorStore(block5, outBlocks+5*inc);
-
- outBlocks += 6*inc;
- length -= 6*blockSize;
- }
- }
-
- while (length >= blockSize)
- {
- VectorType block = VectorLoad(inBlocks);
-
- if (flags & BlockTransformation::BT_XorInput)
- block = VectorXor(block, VectorLoad(xorBlocks));
-
- if (flags & BlockTransformation::BT_InBlockIsCounter)
- const_cast<byte *>(inBlocks)[15]++;
-
- func1(block, subKeys, rounds);
-
- if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
- block = VectorXor(block, VectorLoad(xorBlocks));
-
- VectorStore(block, outBlocks);
-
- inBlocks += inIncrement;
- outBlocks += outIncrement;
- xorBlocks += xorIncrement;
- length -= blockSize;
- }
-
- return length;
-}
-
ANONYMOUS_NAMESPACE_END
// We still need rcon and Se to fallback to C/C++ for AES-192 and AES-256.
@@ -925,17 +802,17 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
}
}
-size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subKeys, size_t rounds,
+size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return Rijndael_AdvancedProcessBlocks_POWER8(POWER8_Enc_Block, POWER8_Enc_6_Blocks,
+ return AdvancedProcessBlocks128_6x1_ALTIVEC(POWER8_Enc_Block, POWER8_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
-size_t Rijndael_Dec_AdvancedProcessBlocks_POWER8(const word32 *subKeys, size_t rounds,
+size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return Rijndael_AdvancedProcessBlocks_POWER8(POWER8_Dec_Block, POWER8_Dec_6_Blocks,
+ return AdvancedProcessBlocks128_6x1_ALTIVEC(POWER8_Dec_Block, POWER8_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
diff --git a/rijndael.cpp b/rijndael.cpp
index 9074cec9..4ffe2a4b 100644
--- a/rijndael.cpp
+++ b/rijndael.cpp
@@ -304,9 +304,9 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si
extern void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen,
word32* rk, const word32* rc, const byte* Se);
-extern size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds,
+extern size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subkeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
-extern size_t Rijndael_Dec_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds,
+extern size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subkeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
@@ -1139,7 +1139,7 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
#endif
#if CRYPTOPP_POWER8_AES_AVAILABLE
if (HasAES())
- return Rijndael_Enc_AdvancedProcessBlocks_POWER8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
+ return Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
@@ -1207,7 +1207,7 @@ size_t Rijndael::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
#endif
#if CRYPTOPP_POWER8_AES_AVAILABLE
if (HasAES())
- return Rijndael_Dec_AdvancedProcessBlocks_POWER8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
+ return Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
#endif
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
diff --git a/simon-simd.cpp b/simon-simd.cpp
index e3a39b01..26b41c73 100644
--- a/simon-simd.cpp
+++ b/simon-simd.cpp
@@ -1155,14 +1155,14 @@ NAMESPACE_BEGIN(CryptoPP)
size_t SIMON64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks64_NEON2x6(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks,
+ return AdvancedProcessBlocks64_6x2_NEON(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SIMON64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks64_NEON2x6(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks,
+ return AdvancedProcessBlocks64_6x2_NEON(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
@@ -1171,14 +1171,14 @@ size_t SIMON64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t roun
size_t SIMON128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks128_NEON2x6(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks,
+ return AdvancedProcessBlocks128_6x2_NEON(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SIMON128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks128_NEON2x6(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks,
+ return AdvancedProcessBlocks128_6x2_NEON(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
@@ -1189,14 +1189,14 @@ size_t SIMON128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rou
size_t SIMON64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks64_SSE2x6(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks,
+ return AdvancedProcessBlocks64_6x2_SSE(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SIMON64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks64_SSE2x6(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks,
+ return AdvancedProcessBlocks64_6x2_SSE(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif
@@ -1205,14 +1205,14 @@ size_t SIMON64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rou
size_t SIMON128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks128_SSE2x6(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks,
+ return AdvancedProcessBlocks128_6x2_SSE(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SIMON128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks128_SSE2x6(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks,
+ return AdvancedProcessBlocks128_6x2_SSE(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_SSSE3_AVAILABLE
diff --git a/speck-simd.cpp b/speck-simd.cpp
index ae340c38..b946460a 100644
--- a/speck-simd.cpp
+++ b/speck-simd.cpp
@@ -1056,14 +1056,14 @@ NAMESPACE_BEGIN(CryptoPP)
size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks64_NEON2x6(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
+ return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks64_NEON2x6(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
+ return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif
@@ -1072,14 +1072,14 @@ size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t roun
size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks128_NEON2x6(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
+ return AdvancedProcessBlocks128_6x2_NEON(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks128_NEON2x6(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
+ return AdvancedProcessBlocks128_6x2_NEON(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
@@ -1090,14 +1090,14 @@ size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rou
size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks64_SSE2x6(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
+ return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks64_SSE2x6(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
+ return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif
@@ -1106,14 +1106,14 @@ size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rou
size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks128_SSE2x6(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
+ return AdvancedProcessBlocks128_6x2_SSE(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
- return AdvancedProcessBlocks128_SSE2x6(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
+ return AdvancedProcessBlocks128_6x2_SSE(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_SSSE3_AVAILABLE