// chacha.cpp - written and placed in the public domain by Jeffrey Walton. // Based on Wei Dai's Salsa20, Botan's SSE2 implementation, // and Bernstein's reference ChaCha family implementation at // http://cr.yp.to/chacha.html. #include "pch.h" #include "config.h" #include "chacha.h" #include "argnames.h" #include "misc.h" #include "cpu.h" // Internal compiler error in GCC 3.3 and below #if defined(__GNUC__) && (__GNUC__ < 4) # undef CRYPTOPP_SSE2_INTRIN_AVAILABLE #endif NAMESPACE_BEGIN(CryptoPP) #if (CRYPTOPP_ARM_NEON_AVAILABLE) extern void ChaCha_OperateKeystream_NEON(const word32 *state, const byte* input, byte *output, unsigned int rounds); #endif #if (CRYPTOPP_AVX2_AVAILABLE) extern void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input, byte *output, unsigned int rounds); #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) extern void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *output, unsigned int rounds); #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) extern void ChaCha_OperateKeystream_ALTIVEC(const word32 *state, const byte* input, byte *output, unsigned int rounds); #endif #if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING) void ChaCha_TestInstantiations() { ChaCha::Encryption x; ChaChaTLS::Encryption y; XChaCha20::Encryption z; } #endif NAMESPACE_END // CryptoPP ////////////////////////////// ChaCha Core ////////////////////////////// #define CHACHA_QUARTER_ROUND(a,b,c,d) \ a += b; d ^= a; d = rotlConstant<16,word32>(d); \ c += d; b ^= c; b = rotlConstant<12,word32>(b); \ a += b; d ^= a; d = rotlConstant<8,word32>(d); \ c += d; b ^= c; b = rotlConstant<7,word32>(b); #define CHACHA_OUTPUT(x){\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + state[0]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x1 + state[1]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x2 + state[2]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x3 + state[3]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + state[4]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x5 + state[5]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x6 + state[6]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x7 + state[7]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + state[8]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x9 + state[9]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x10 + state[10]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x11 + state[11]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + state[12]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x13 + state[13]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x14 + state[14]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x15 + state[15]);} ANONYMOUS_NAMESPACE_BEGIN // Hacks... Bring in all symbols, and supply // the stuff the templates normally provide. using namespace CryptoPP; typedef word32 WordType; enum {BYTES_PER_ITERATION=64}; // MultiBlockSafe detects a condition that can arise in the SIMD // implementations where we overflow one of the 32-bit state words during // addition in an intermediate result. Preconditions for the issue include // a user seeks to around 2^32 blocks (256 GB of data) for ChaCha; or a // user specifies an arbitrarily large initial counter block for ChaChaTLS. // Also see https://github.com/weidai11/cryptopp/issues/732. inline bool MultiBlockSafe(unsigned int ctrLow, unsigned int blocks) { return 0xffffffff - ctrLow > blocks; } // OperateKeystream always produces a key stream. The key stream is written // to output. Optionally a message may be supplied to xor with the key stream. // The message is input, and output = output ^ input. void ChaCha_OperateKeystream(KeystreamOperation operation, word32 state[16], word32& ctrLow, word32& ctrHigh, word32 rounds, byte *output, const byte *input, size_t iterationCount) { do { #if (CRYPTOPP_AVX2_AVAILABLE) if (HasAVX2()) { while (iterationCount >= 8 && MultiBlockSafe(state[12], 8)) { const bool xorInput = (operation & EnumToInt(INPUT_NULL)) != EnumToInt(INPUT_NULL); ChaCha_OperateKeystream_AVX2(state, xorInput ? input : NULLPTR, output, rounds); // MultiBlockSafe avoids overflow on the counter words state[12] += 8; input += (!!xorInput) * 8 * BYTES_PER_ITERATION; output += 8 * BYTES_PER_ITERATION; iterationCount -= 8; } } #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) if (HasSSE2()) { while (iterationCount >= 4 && MultiBlockSafe(state[12], 4)) { const bool xorInput = (operation & EnumToInt(INPUT_NULL)) != EnumToInt(INPUT_NULL); ChaCha_OperateKeystream_SSE2(state, xorInput ? input : NULLPTR, output, rounds); // MultiBlockSafe avoids overflow on the counter words state[12] += 4; input += (!!xorInput)*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; } } #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) { while (iterationCount >= 4 && MultiBlockSafe(state[12], 4)) { const bool xorInput = (operation & EnumToInt(INPUT_NULL)) != EnumToInt(INPUT_NULL); ChaCha_OperateKeystream_NEON(state, xorInput ? input : NULLPTR, output, rounds); // MultiBlockSafe avoids overflow on the counter words state[12] += 4; input += (!!xorInput)*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; } } #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) { while (iterationCount >= 4 && MultiBlockSafe(state[12], 4)) { const bool xorInput = (operation & EnumToInt(INPUT_NULL)) != EnumToInt(INPUT_NULL); ChaCha_OperateKeystream_ALTIVEC(state, xorInput ? input : NULLPTR, output, rounds); // MultiBlockSafe avoids overflow on the counter words state[12] += 4; input += (!!xorInput)*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; } } #endif if (iterationCount) { word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; x0 = state[0]; x1 = state[1]; x2 = state[2]; x3 = state[3]; x4 = state[4]; x5 = state[5]; x6 = state[6]; x7 = state[7]; x8 = state[8]; x9 = state[9]; x10 = state[10]; x11 = state[11]; x12 = state[12]; x13 = state[13]; x14 = state[14]; x15 = state[15]; for (int i = static_cast(rounds); i > 0; i -= 2) { CHACHA_QUARTER_ROUND(x0, x4, x8, x12); CHACHA_QUARTER_ROUND(x1, x5, x9, x13); CHACHA_QUARTER_ROUND(x2, x6, x10, x14); CHACHA_QUARTER_ROUND(x3, x7, x11, x15); CHACHA_QUARTER_ROUND(x0, x5, x10, x15); CHACHA_QUARTER_ROUND(x1, x6, x11, x12); CHACHA_QUARTER_ROUND(x2, x7, x8, x13); CHACHA_QUARTER_ROUND(x3, x4, x9, x14); } CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION); // This is state[12] and state[13] from ChaCha. In the case of // ChaChaTLS ctrHigh is a reference to a discard value. if (++ctrLow == 0) ctrHigh++; } // We may re-enter a SIMD keystream operation from here. } while (iterationCount--); } // XChaCha key derivation void HChaCha_OperateKeystream(const word32 state[16], word32 output[8]) { word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; x0 = state[0]; x1 = state[1]; x2 = state[2]; x3 = state[3]; x4 = state[4]; x5 = state[5]; x6 = state[6]; x7 = state[7]; x8 = state[8]; x9 = state[9]; x10 = state[10]; x11 = state[11]; x12 = state[12]; x13 = state[13]; x14 = state[14]; x15 = state[15]; for (int i = 20; i > 0; i -= 2) { CHACHA_QUARTER_ROUND(x0, x4, x8, x12); CHACHA_QUARTER_ROUND(x1, x5, x9, x13); CHACHA_QUARTER_ROUND(x2, x6, x10, x14); CHACHA_QUARTER_ROUND(x3, x7, x11, x15); CHACHA_QUARTER_ROUND(x0, x5, x10, x15); CHACHA_QUARTER_ROUND(x1, x6, x11, x12); CHACHA_QUARTER_ROUND(x2, x7, x8, x13); CHACHA_QUARTER_ROUND(x3, x4, x9, x14); } output[0] = x0; output[1] = x1; output[2] = x2; output[3] = x3; output[4] = x12; output[5] = x13; output[6] = x14; output[7] = x15; } std::string ChaCha_AlgorithmProvider() { #if (CRYPTOPP_AVX2_AVAILABLE) if (HasAVX2()) return "AVX2"; else #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) if (HasSSE2()) return "SSE2"; else #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) return "NEON"; else #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) return "Altivec"; else #endif return "C++"; } unsigned int ChaCha_GetAlignment() { #if (CRYPTOPP_AVX2_AVAILABLE) if (HasAVX2()) return 16; else #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) if (HasSSE2()) return 16; else #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) return 16; else #endif return GetAlignmentOf(); } unsigned int ChaCha_GetOptimalBlockSize() { #if (CRYPTOPP_AVX2_AVAILABLE) if (HasAVX2()) return 8 * BYTES_PER_ITERATION; else #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE) if (HasSSE2()) return 4*BYTES_PER_ITERATION; else #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) return 4*BYTES_PER_ITERATION; else #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) return 4*BYTES_PER_ITERATION; else #endif return BYTES_PER_ITERATION; } ANONYMOUS_NAMESPACE_END NAMESPACE_BEGIN(CryptoPP) ////////////////////////////// Bernstein ChaCha ////////////////////////////// std::string ChaCha_Policy::AlgorithmName() const { return std::string("ChaCha")+IntToString(m_rounds); } std::string ChaCha_Policy::AlgorithmProvider() const { return ChaCha_AlgorithmProvider(); } void ChaCha_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length) { CRYPTOPP_ASSERT(key); CRYPTOPP_ASSERT(length == 16 || length == 32); CRYPTOPP_UNUSED(key); CRYPTOPP_UNUSED(length); // Use previous rounds as the default value int rounds = params.GetIntValueWithDefault(Name::Rounds(), m_rounds); if (rounds != 20 && rounds != 12 && rounds != 8) throw InvalidRounds(ChaCha::StaticAlgorithmName(), rounds); // Latch a good value m_rounds = rounds; // "expand 16-byte k" or "expand 32-byte k" m_state[0] = 0x61707865; m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e; m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32; m_state[3] = 0x6b206574; GetBlock get1(key); get1(m_state[4])(m_state[5])(m_state[6])(m_state[7]); GetBlock get2(key + ((length == 32) ? 16 : 0)); get2(m_state[8])(m_state[9])(m_state[10])(m_state[11]); } void ChaCha_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length) { CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length); CRYPTOPP_ASSERT(length==8); CRYPTOPP_UNUSED(length); GetBlock get(IV); m_state[12] = m_state[13] = 0; get(m_state[14])(m_state[15]); } void ChaCha_Policy::SeekToIteration(lword iterationCount) { m_state[12] = (word32)iterationCount; // low word m_state[13] = (word32)SafeRightShift<32>(iterationCount); } unsigned int ChaCha_Policy::GetAlignment() const { return ChaCha_GetAlignment(); } unsigned int ChaCha_Policy::GetOptimalBlockSize() const { return ChaCha_GetOptimalBlockSize(); } void ChaCha_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) { ChaCha_OperateKeystream(operation, m_state, m_state[12], m_state[13], m_rounds, output, input, iterationCount); } ////////////////////////////// IETF ChaChaTLS ////////////////////////////// std::string ChaChaTLS_Policy::AlgorithmName() const { return std::string("ChaChaTLS"); } std::string ChaChaTLS_Policy::AlgorithmProvider() const { return ChaCha_AlgorithmProvider(); } void ChaChaTLS_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length) { CRYPTOPP_ASSERT(key); CRYPTOPP_ASSERT(length == 32); CRYPTOPP_UNUSED(length); // ChaChaTLS is always 20 rounds. Fetch Rounds() to avoid a spurious failure. int rounds = params.GetIntValueWithDefault(Name::Rounds(), ROUNDS); if (rounds != 20) throw InvalidRounds(ChaChaTLS::StaticAlgorithmName(), rounds); // RFC 8439 test vectors use an initial block counter. However, the counter // can be an arbitrary value per RFC 8439 Section 2.4. We stash the counter // away in state[16] and use it for a Resynchronize() operation. I think // the initial counter is used more like a Tweak when non-0, and it should // be provided in Resynchronize() (light-weight re-keying). However, // Resynchronize() does not have an overload that allows us to pass it into // the function, so we have to use the heavier-weight SetKey to change it. word64 block; if (params.GetValue("InitialBlock", block)) m_counter = static_cast(block); else m_counter = 0; // State words are defined in RFC 8439, Section 2.3. Key is 32-bytes. GetBlock get(key); get(m_state[KEY+0])(m_state[KEY+1])(m_state[KEY+2])(m_state[KEY+3]) (m_state[KEY+4])(m_state[KEY+5])(m_state[KEY+6])(m_state[KEY+7]); } void ChaChaTLS_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length) { CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length); CRYPTOPP_ASSERT(length==12); // State words are defined in RFC 8439, Section 2.3. m_state[0] = 0x61707865; m_state[1] = 0x3320646e; m_state[2] = 0x79622d32; m_state[3] = 0x6b206574; // Copy saved key into state std::memcpy(m_state+4, m_state+KEY, 8*sizeof(word32)); // State words are defined in RFC 8439, Section 2.3 GetBlock get(IV); m_state[12] = m_counter; get(m_state[13])(m_state[14])(m_state[15]); } void ChaChaTLS_Policy::SeekToIteration(lword iterationCount) { // Should we throw here??? If the initial block counter is // large then we can wrap and process more data as long as // data processed in the security context does not exceed // 2^32 blocks or approximately 256 GB of data. CRYPTOPP_ASSERT(iterationCount <= std::numeric_limits::max()); m_state[12] = (word32)iterationCount; // low word } unsigned int ChaChaTLS_Policy::GetAlignment() const { return ChaCha_GetAlignment(); } unsigned int ChaChaTLS_Policy::GetOptimalBlockSize() const { return ChaCha_GetOptimalBlockSize(); } void ChaChaTLS_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) { word32 discard=0; ChaCha_OperateKeystream(operation, m_state, m_state[12], discard, ROUNDS, output, input, iterationCount); // If this fires it means ChaCha_OperateKeystream generated a counter // block carry that was discarded. The problem is, the RFC does not // specify what should happen when the counter block wraps. All we can // do is inform the user that something bad may happen because we don't // know what we should do. // Also see https://github.com/weidai11/cryptopp/issues/790 and // https://mailarchive.ietf.org/arch/msg/cfrg/gsOnTJzcbgG6OqD8Sc0GO5aR_tU // CRYPTOPP_ASSERT(discard==0); } ////////////////////////////// IETF XChaCha20 ////////////////////////////// std::string XChaCha20_Policy::AlgorithmName() const { return std::string("XChaCha20"); } std::string XChaCha20_Policy::AlgorithmProvider() const { return ChaCha_AlgorithmProvider(); } void XChaCha20_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length) { CRYPTOPP_ASSERT(key); CRYPTOPP_ASSERT(length == 32); CRYPTOPP_UNUSED(length); // Use previous rounds as the default value int rounds = params.GetIntValueWithDefault(Name::Rounds(), m_rounds); if (rounds != 20 && rounds != 12) throw InvalidRounds(ChaCha::StaticAlgorithmName(), rounds); // Latch a good value m_rounds = rounds; word64 block; if (params.GetValue("InitialBlock", block)) m_counter = static_cast(block); else m_counter = 1; // Stash key away for use in CipherResynchronize GetBlock get(key); get(m_state[KEY+0])(m_state[KEY+1])(m_state[KEY+2])(m_state[KEY+3]) (m_state[KEY+4])(m_state[KEY+5])(m_state[KEY+6])(m_state[KEY+7]); } void XChaCha20_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length) { CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length); CRYPTOPP_ASSERT(length==24); // HChaCha derivation m_state[0] = 0x61707865; m_state[1] = 0x3320646e; m_state[2] = 0x79622d32; m_state[3] = 0x6b206574; // Copy saved key into state std::memcpy(m_state+4, m_state+KEY, 8*sizeof(word32)); GetBlock get(iv); get(m_state[12])(m_state[13])(m_state[14])(m_state[15]); // Operate the keystream without adding state back in. // This function also gathers the key words into a // contiguous 8-word block. HChaCha_OperateKeystream(m_state, m_state+4); // XChaCha state m_state[0] = 0x61707865; m_state[1] = 0x3320646e; m_state[2] = 0x79622d32; m_state[3] = 0x6b206574; // Setup new IV m_state[12] = m_counter; m_state[13] = 0; m_state[14] = GetWord(false, LITTLE_ENDIAN_ORDER, iv+16); m_state[15] = GetWord(false, LITTLE_ENDIAN_ORDER, iv+20); } void XChaCha20_Policy::SeekToIteration(lword iterationCount) { // Should we throw here??? XChaCha does not have a block // counter, so I'm not sure how to seek on it. CRYPTOPP_ASSERT(0); CRYPTOPP_UNUSED(iterationCount); } unsigned int XChaCha20_Policy::GetAlignment() const { return ChaCha_GetAlignment(); } unsigned int XChaCha20_Policy::GetOptimalBlockSize() const { return ChaCha_GetOptimalBlockSize(); } void XChaCha20_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) { ChaCha_OperateKeystream(operation, m_state, m_state[12], m_state[13], m_rounds, output, input, iterationCount); } NAMESPACE_END