summaryrefslogtreecommitdiff
path: root/speck.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2018-02-18 23:23:50 -0500
committerJeffrey Walton <noloader@gmail.com>2018-02-18 23:23:50 -0500
commite416b243d37d0904c6dfdf1ecce491f458fcecbb (patch)
treec02738cd3768de42aa558a2a19763bd05dd91b25 /speck.cpp
parente5b9fa648569b161151c94f21506d171e71d7a1b (diff)
downloadcryptopp-git-e416b243d37d0904c6dfdf1ecce491f458fcecbb.tar.gz
Re-add Simon and Speck, enable SSE (GH #585)
This commit re-adds Simon and Speck. The commit includes C++, SSSE3 and SSE4. NEON, Aarch32 and Aarch64 are disabled at the moment.
Diffstat (limited to 'speck.cpp')
-rw-r--r--speck.cpp438
1 files changed, 438 insertions, 0 deletions
diff --git a/speck.cpp b/speck.cpp
new file mode 100644
index 00000000..9aaff67f
--- /dev/null
+++ b/speck.cpp
@@ -0,0 +1,438 @@
+// speck.cpp - written and placed in the public domain by Jeffrey Walton
+
+#include "pch.h"
+#include "config.h"
+
+#include "speck.h"
+#include "misc.h"
+#include "cpu.h"
+
+// Uncomment for benchmarking C++ against SSE or NEON.
+// Do so in both speck.cpp and speck-simd.cpp.
+// #undef CRYPTOPP_SSSE3_AVAILABLE
+// #undef CRYPTOPP_SSE41_AVAILABLE
+// #undef CRYPTOPP_ARM_NEON_AVAILABLE
+
+ANONYMOUS_NAMESPACE_BEGIN
+
+using CryptoPP::word32;
+using CryptoPP::word64;
+using CryptoPP::rotlConstant;
+using CryptoPP::rotrConstant;
+
+/// \brief Forward round transformation
+/// \tparam W word type
+/// \details TF83() is the forward round transformation using a=8 and b=3 rotations.
+/// The initial test implementation provided template parameters, but they were
+/// removed because SPECK32 using a=7 and b=2 was not on the road map. The
+/// additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt
+/// kind of messy.
+template <class W>
+inline void TF83(W& x, W& y, const W k)
+{
+ x = rotrConstant<8>(x);
+ x += y; x ^= k;
+ y = rotlConstant<3>(y);
+ y ^= x;
+}
+
+/// \brief Reverse round transformation
+/// \tparam W word type
+/// \details TR83() is the reverse round transformation using a=8 and b=3 rotations.
+/// The initial test implementation provided template parameters, but they were
+/// removed because SPECK32 using a=7 and b=2 was not on the road map. The
+/// additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt
+/// kind of messy.
+template <class W>
+inline void TR83(W& x, W& y, const W k)
+{
+ y ^= x;
+ y = rotrConstant<3>(y);
+ x ^= k; x -= y;
+ x = rotlConstant<8>(x);
+}
+
+/// \brief Forward transformation
+/// \tparam W word type
+/// \tparam R number of rounds
+/// \param c output array
+/// \param p input array
+/// \param k subkey array
+template <class W, unsigned int R>
+inline void SPECK_Encrypt(W c[2], const W p[2], const W k[R])
+{
+ c[0]=p[0]; c[1]=p[1];
+
+ // Don't unroll this loop. Things slow down.
+ for (int i = 0; i < static_cast<int>(R); ++i)
+ TF83(c[0], c[1], k[i]);
+}
+
+/// \brief Reverse transformation
+/// \tparam W word type
+/// \tparam R number of rounds
+/// \param p output array
+/// \param c input array
+/// \param k subkey array
+template <class W, unsigned int R>
+inline void SPECK_Decrypt(W p[2], const W c[2], const W k[R])
+{
+ p[0]=c[0]; p[1]=c[1];
+
+ // Don't unroll this loop. Things slow down.
+ for (int i = static_cast<int>(R-1); i >= 0; --i)
+ TR83(p[0], p[1], k[i]);
+}
+
+/// \brief Subkey generation function
+/// \details Used when the user key consists of 2 words
+/// \tparam W word type
+/// \tparam R number of rounds
+/// \param key empty subkey array
+/// \param k user key array
+template <class W, unsigned int R>
+inline void SPECK_ExpandKey_2W(W key[R], const W k[2])
+{
+ CRYPTOPP_ASSERT(R==32);
+ W i=0, B=k[0], A=k[1];
+
+ while (i<R-1)
+ {
+ key[i]=A; TF83(B, A, i);
+ i++;
+ }
+ key[R-1]=A;
+}
+
+/// \brief Subkey generation function
+/// \details Used when the user key consists of 3 words
+/// \tparam W word type
+/// \tparam R number of rounds
+/// \param key empty subkey array
+/// \param k user key array
+template <class W, unsigned int R>
+inline void SPECK_ExpandKey_3W(W key[R], const W k[3])
+{
+ CRYPTOPP_ASSERT(R==33 || R==26);
+ W i=0, C=k[0], B=k[1], A=k[2];
+
+ unsigned int blocks = R/2;
+ while (blocks--)
+ {
+ key[i+0]=A; TF83(B, A, i+0);
+ key[i+1]=A; TF83(C, A, i+1);
+ i+=2;
+ }
+
+ // The constexpr residue should allow the optimizer to remove unneeded statements
+ if(R%2 == 1)
+ {
+ key[R-1]=A;
+ }
+}
+
+/// \brief Subkey generation function
+/// \details Used when the user key consists of 4 words
+/// \tparam W word type
+/// \tparam R number of rounds
+/// \param key empty subkey array
+/// \param k user key array
+template <class W, unsigned int R>
+inline void SPECK_ExpandKey_4W(W key[R], const W k[4])
+{
+ CRYPTOPP_ASSERT(R==34 || R==27);
+ W i=0, D=k[0], C=k[1], B=k[2], A=k[3];
+
+ unsigned int blocks = R/3;
+ while (blocks--)
+ {
+ key[i+0]=A; TF83(B, A, i+0);
+ key[i+1]=A; TF83(C, A, i+1);
+ key[i+2]=A; TF83(D, A, i+2);
+ i+=3;
+ }
+
+ // The constexpr residue should allow the optimizer to remove unneeded statements
+ if(R%3 == 1)
+ {
+ key[R-1]=A;
+ }
+ else if(R%3 == 2)
+ {
+ key[R-2]=A; TF83(B, A, W(R-2));
+ key[R-1]=A;
+ }
+}
+
+ANONYMOUS_NAMESPACE_END
+
+///////////////////////////////////////////////////////////
+
+NAMESPACE_BEGIN(CryptoPP)
+
+#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+extern size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
+extern size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
+extern size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
+extern size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+#endif
+
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+extern size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
+extern size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+#endif
+
+#if defined(CRYPTOPP_SSSE3_AVAILABLE)
+extern size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+
+extern size_t SPECK128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
+ const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
+#endif
+
+void SPECK64::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs &params)
+{
+ CRYPTOPP_ASSERT(keyLength == 12 || keyLength == 16);
+ CRYPTOPP_UNUSED(params);
+
+ // Building the key schedule table requires {3,4} words workspace.
+ // Encrypting and decrypting requires 4 words workspace.
+ m_kwords = keyLength/sizeof(word32);
+ m_wspace.New(4U);
+
+ // Do the endian gyrations from the paper and align pointers
+ typedef GetBlock<word32, LittleEndian, false> KeyBlock;
+ KeyBlock kblk(userKey);
+
+ switch (m_kwords)
+ {
+ case 3:
+ m_rkeys.New((m_rounds = 26));
+ kblk(m_wspace[2])(m_wspace[1])(m_wspace[0]);
+ SPECK_ExpandKey_3W<word32, 26>(m_rkeys, m_wspace);
+ break;
+ case 4:
+ m_rkeys.New((m_rounds = 27));
+ kblk(m_wspace[3])(m_wspace[2])(m_wspace[1])(m_wspace[0]);
+ SPECK_ExpandKey_4W<word32, 27>(m_rkeys, m_wspace);
+ break;
+ default:
+ CRYPTOPP_ASSERT(0);;
+ }
+}
+
+void SPECK64::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
+{
+ // Do the endian gyrations from the paper and align pointers
+ typedef GetBlock<word32, LittleEndian, false> InBlock;
+ InBlock iblk(inBlock); iblk(m_wspace[1])(m_wspace[0]);
+
+ switch (m_rounds)
+ {
+ case 26:
+ SPECK_Encrypt<word32, 26>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ case 27:
+ SPECK_Encrypt<word32, 27>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ default:
+ CRYPTOPP_ASSERT(0);;
+ }
+
+ // Do the endian gyrations from the paper and align pointers
+ typedef PutBlock<word32, LittleEndian, false> OutBlock;
+ OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[3])(m_wspace[2]);
+}
+
+void SPECK64::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
+{
+ // Do the endian gyrations from the paper and align pointers
+ typedef GetBlock<word32, LittleEndian, false> InBlock;
+ InBlock iblk(inBlock); iblk(m_wspace[1])(m_wspace[0]);
+
+ switch (m_rounds)
+ {
+ case 26:
+ SPECK_Decrypt<word32, 26>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ case 27:
+ SPECK_Decrypt<word32, 27>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ default:
+ CRYPTOPP_ASSERT(0);;
+ }
+
+ // Do the endian gyrations from the paper and align pointers
+ typedef PutBlock<word32, LittleEndian, false> OutBlock;
+ OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[3])(m_wspace[2]);
+}
+
+///////////////////////////////////////////////////////////
+
+void SPECK128::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs &params)
+{
+ CRYPTOPP_ASSERT(keyLength == 16 || keyLength == 24 || keyLength == 32);
+ CRYPTOPP_UNUSED(params);
+
+ // Building the key schedule table requires {2,3,4} words workspace.
+ // Encrypting and decrypting requires 4 words workspace.
+ m_kwords = keyLength/sizeof(word64);
+ m_wspace.New(4U);
+
+ // Do the endian gyrations from the paper and align pointers
+ typedef GetBlock<word64, LittleEndian, false> KeyBlock;
+ KeyBlock kblk(userKey);
+
+ switch (m_kwords)
+ {
+ case 2:
+ m_rkeys.New((m_rounds = 32));
+ kblk(m_wspace[1])(m_wspace[0]);
+ SPECK_ExpandKey_2W<word64, 32>(m_rkeys, m_wspace);
+ break;
+ case 3:
+ m_rkeys.New((m_rounds = 33));
+ kblk(m_wspace[2])(m_wspace[1])(m_wspace[0]);
+ SPECK_ExpandKey_3W<word64, 33>(m_rkeys, m_wspace);
+ break;
+ case 4:
+ m_rkeys.New((m_rounds = 34));
+ kblk(m_wspace[3])(m_wspace[2])(m_wspace[1])(m_wspace[0]);
+ SPECK_ExpandKey_4W<word64, 34>(m_rkeys, m_wspace);
+ break;
+ default:
+ CRYPTOPP_ASSERT(0);;
+ }
+}
+
+void SPECK128::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
+{
+ // Do the endian gyrations from the paper and align pointers
+ typedef GetBlock<word64, LittleEndian, false> InBlock;
+ InBlock iblk(inBlock); iblk(m_wspace[1])(m_wspace[0]);
+
+ switch (m_rounds)
+ {
+ case 32:
+ SPECK_Encrypt<word64, 32>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ case 33:
+ SPECK_Encrypt<word64, 33>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ case 34:
+ SPECK_Encrypt<word64, 34>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ default:
+ CRYPTOPP_ASSERT(0);;
+ }
+
+ // Do the endian gyrations from the paper and align pointers
+ typedef PutBlock<word64, LittleEndian, false> OutBlock;
+ OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[3])(m_wspace[2]);
+}
+
+void SPECK128::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
+{
+ // Do the endian gyrations from the paper and align pointers
+ typedef GetBlock<word64, LittleEndian, false> InBlock;
+ InBlock iblk(inBlock); iblk(m_wspace[1])(m_wspace[0]);
+
+ switch (m_rounds)
+ {
+ case 32:
+ SPECK_Decrypt<word64, 32>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ case 33:
+ SPECK_Decrypt<word64, 33>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ case 34:
+ SPECK_Decrypt<word64, 34>(m_wspace+2, m_wspace+0, m_rkeys);
+ break;
+ default:
+ CRYPTOPP_ASSERT(0);;
+ }
+
+ // Do the endian gyrations from the paper and align pointers
+ typedef PutBlock<word64, LittleEndian, false> OutBlock;
+ OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[3])(m_wspace[2]);
+}
+
+#if defined(CRYPTOPP_SPECK64_ADVANCED_PROCESS_BLOCKS)
+size_t SPECK64::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+ if (HasSSE41())
+ return SPECK64_Enc_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+ if (HasNEON())
+ return SPECK64_Enc_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+
+size_t SPECK64::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSE41_AVAILABLE)
+ if (HasSSE41())
+ return SPECK64_Dec_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+ if (HasNEON())
+ return SPECK64_Dec_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+#endif // CRYPTOPP_SPECK64_ADVANCED_PROCESS_BLOCKS
+
+#if defined(CRYPTOPP_SPECK128_ADVANCED_PROCESS_BLOCKS)
+size_t SPECK128::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSSE3_AVAILABLE)
+ if (HasSSSE3())
+ return SPECK128_Enc_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+ if (HasNEON())
+ return SPECK128_Enc_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+
+size_t SPECK128::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
+ byte *outBlocks, size_t length, word32 flags) const
+{
+#if defined(CRYPTOPP_SSSE3_AVAILABLE)
+ if (HasSSSE3())
+ return SPECK128_Dec_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
+ if (HasNEON())
+ return SPECK128_Dec_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
+ inBlocks, xorBlocks, outBlocks, length, flags);
+#endif
+ return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
+}
+#endif // CRYPTOPP_SPECK128_ADVANCED_PROCESS_BLOCKS
+
+NAMESPACE_END