// speck.cpp - written and placed in the public domain by Jeffrey Walton #include "pch.h" #include "config.h" #include "speck.h" #include "misc.h" #include "cpu.h" // Uncomment for benchmarking C++ against SSE2 or NEON. // Do so in both speck.cpp and speck-simd.cpp. // #undef CRYPTOPP_SSSE3_AVAILABLE // #undef CRYPTOPP_ARM_NEON_AVAILABLE // Disable NEON/ASIMD for Cortex-A53 and A57. The shifts are too slow and C/C++ is about // 3 cpb faster than NEON/ASIMD. Also see http://github.com/weidai11/cryptopp/issues/367. #if (defined(__aarch32__) || defined(__aarch64__)) && defined(CRYPTOPP_SLOW_ARMV8_SHIFT) # undef CRYPTOPP_ARM_NEON_AVAILABLE #endif ANONYMOUS_NAMESPACE_BEGIN using CryptoPP::word32; using CryptoPP::word64; using CryptoPP::rotlConstant; using CryptoPP::rotrConstant; //! \brief Forward round transformation //! \tparam W word type //! \details TF83() is the forward round transformation using a=8 and b=3 rotations. //! The initial test implementation provided template parameters, but they were //! removed because SPECK32 using a=7 and b=2 was not on the road map. The //! additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt //! kind of messy. template inline void TF83(W& x, W& y, const W k) { x = rotrConstant<8>(x); x += y; x ^= k; y = rotlConstant<3>(y); y ^= x; } //! \brief Reverse round transformation //! \tparam W word type //! \details TR83() is the reverse round transformation using a=8 and b=3 rotations. //! The initial test implementation provided template parameters, but they were //! removed because SPECK32 using a=7 and b=2 was not on the road map. The //! additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt //! kind of messy. template inline void TR83(W& x, W& y, const W k) { y ^= x; y = rotrConstant<3>(y); x ^= k; x -= y; x = rotlConstant<8>(x); } //! \brief Forward transformation //! \tparam W word type //! \tparam R number of rounds //! \param c output array //! \param p input array //! \param k subkey array template inline void SPECK_Encrypt(W c[2], const W p[2], const W k[R]) { c[0]=p[0]; c[1]=p[1]; // Don't unroll this loop. Things slow down. for (size_t i=0; static_cast(i) inline void SPECK_Decrypt(W p[2], const W c[2], const W k[R]) { p[0]=c[0]; p[1]=c[1]; // Don't unroll this loop. Things slow down. for (size_t i=R-1; static_cast(i)>=0; --i) TR83(p[0], p[1], k[i]); } //! \brief Subkey generation function //! \details Used when the user key consists of 2 words //! \tparam W word type //! \tparam R number of rounds //! \param key empty subkey array //! \param k user key array template inline void SPECK_ExpandKey_2W(W key[R], const W k[2]) { CRYPTOPP_ASSERT(R==32); W i=0, B=k[0], A=k[1]; while (i inline void SPECK_ExpandKey_3W(W key[R], const W k[3]) { CRYPTOPP_ASSERT(R==33 || R==26); W i=0, C=k[0], B=k[1], A=k[2]; unsigned int blocks = R/2; while (blocks--) { key[i+0]=A; TF83(B, A, i+0); key[i+1]=A; TF83(C, A, i+1); i+=2; } // The constexpr residue should allow the optimizer to remove unneeded statements if(R%2 == 1) { key[R-1]=A; } } //! \brief Subkey generation function //! \details Used when the user key consists of 4 words //! \tparam W word type //! \tparam R number of rounds //! \param key empty subkey array //! \param k user key array template inline void SPECK_ExpandKey_4W(W key[R], const W k[4]) { CRYPTOPP_ASSERT(R==34 || R==27); W i=0, D=k[0], C=k[1], B=k[2], A=k[3]; unsigned int blocks = R/3; while (blocks--) { key[i+0]=A; TF83(B, A, i+0); key[i+1]=A; TF83(C, A, i+1); key[i+2]=A; TF83(D, A, i+2); i+=3; } // The constexpr residue should allow the optimizer to remove unneeded statements if(R%3 == 1) { key[R-1]=A; } else if(R%3 == 2) { key[R-2]=A; TF83(B, A, W(R-2)); key[R-1]=A; } } ANONYMOUS_NAMESPACE_END /////////////////////////////////////////////////////////// NAMESPACE_BEGIN(CryptoPP) #if defined(CRYPTOPP_ARM_NEON_AVAILABLE) extern size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); extern size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); #endif #if defined(CRYPTOPP_SSSE3_AVAILABLE) extern size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); extern size_t SPECK128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); #endif void SPECK64::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs ¶ms) { CRYPTOPP_ASSERT(keyLength == 12 || keyLength == 16); CRYPTOPP_UNUSED(params); // Building the key schedule table requires {3,4} words workspace. // Encrypting and decrypting requires 4 words workspace. m_kwords = keyLength/sizeof(word32); m_wspace.New(STDMAX(m_kwords,4U)); GetUserKey(BIG_ENDIAN_ORDER, m_wspace.begin(), m_kwords, userKey, keyLength); switch (m_kwords) { case 3: m_rkeys.New(26); m_rounds = 26; SPECK_ExpandKey_3W(m_rkeys, m_wspace); break; case 4: m_rkeys.New(27); m_rounds = 27; SPECK_ExpandKey_4W(m_rkeys, m_wspace); break; default: CRYPTOPP_ASSERT(0);; } } void SPECK64::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const { // Reverse bytes on LittleEndian; align pointer on BigEndian typedef GetBlock InBlock; InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]); switch (m_rounds) { case 26: SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; case 27: SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; default: CRYPTOPP_ASSERT(0);; } // Reverse bytes on LittleEndian; align pointer on BigEndian typedef PutBlock OutBlock; OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[2])(m_wspace[3]); } void SPECK64::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const { // Reverse bytes on LittleEndian; align pointer on BigEndian typedef GetBlock InBlock; InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]); switch (m_rounds) { case 26: SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; case 27: SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; default: CRYPTOPP_ASSERT(0);; } // Reverse bytes on LittleEndian; align pointer on BigEndian typedef PutBlock OutBlock; OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[2])(m_wspace[3]); } /////////////////////////////////////////////////////////// void SPECK128::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs ¶ms) { CRYPTOPP_ASSERT(keyLength == 16 || keyLength == 24 || keyLength == 32); CRYPTOPP_UNUSED(params); // Building the key schedule table requires {2,3,4} words workspace. // Encrypting and decrypting requires 4 words workspace. m_kwords = keyLength/sizeof(word64); m_wspace.New(STDMAX(m_kwords,4U)); GetUserKey(BIG_ENDIAN_ORDER, m_wspace.begin(), m_kwords, userKey, keyLength); switch (m_kwords) { case 2: m_rkeys.New(32); m_rounds = 32; SPECK_ExpandKey_2W(m_rkeys, m_wspace); break; case 3: m_rkeys.New(33); m_rounds = 33; SPECK_ExpandKey_3W(m_rkeys, m_wspace); break; case 4: m_rkeys.New(34); m_rounds = 34; SPECK_ExpandKey_4W(m_rkeys, m_wspace); break; default: CRYPTOPP_ASSERT(0);; } } void SPECK128::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const { // Reverse bytes on LittleEndian; align pointer on BigEndian typedef GetBlock InBlock; InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]); switch (m_rounds) { case 32: SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; case 33: SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; case 34: SPECK_Encrypt(m_wspace+2, m_wspace+0, m_rkeys); break; default: CRYPTOPP_ASSERT(0);; } // Reverse bytes on LittleEndian; align pointer on BigEndian typedef PutBlock OutBlock; OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[2])(m_wspace[3]); } void SPECK128::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const { // Reverse bytes on LittleEndian; align pointer on BigEndian typedef GetBlock InBlock; InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]); switch (m_rounds) { case 32: SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; case 33: SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; case 34: SPECK_Decrypt(m_wspace+2, m_wspace+0, m_rkeys); break; default: CRYPTOPP_ASSERT(0);; } // Reverse bytes on LittleEndian; align pointer on BigEndian typedef PutBlock OutBlock; OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[2])(m_wspace[3]); } #if defined(CRYPTOPP_SPECK_ADVANCED_PROCESS_BLOCKS) size_t SPECK128::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const { #if defined(CRYPTOPP_SSSE3_AVAILABLE) if (HasSSSE3()) return SPECK128_Enc_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); #endif #if defined(CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) return SPECK128_Enc_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); #endif return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags); } size_t SPECK128::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const { #if defined(CRYPTOPP_SSSE3_AVAILABLE) if (HasSSSE3()) return SPECK128_Dec_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); #endif #if defined(CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) return SPECK128_Dec_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds, inBlocks, xorBlocks, outBlocks, length, flags); #endif return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags); } #endif NAMESPACE_END