// scrypt.cpp - written and placed in public domain by Jeffrey Walton. // Based on reference source code by Colin Percival for // Scrypt and Daniel Bernstein for Salsa20 core. #include "pch.h" #include "scrypt.h" #include "algparam.h" #include "argnames.h" #include "pwdbased.h" #include "stdcpp.h" #include "salsa.h" #include "misc.h" #include "sha.h" #include #include #ifdef _OPENMP # include #endif // https://github.com/weidai11/cryptopp/issues/777 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE # if defined(__clang__) # pragma GCC diagnostic ignored "-Wtautological-compare" # elif defined(__GNUC__) # pragma GCC diagnostic ignored "-Wtype-limits" # endif #endif ANONYMOUS_NAMESPACE_BEGIN using CryptoPP::byte; using CryptoPP::word32; using CryptoPP::word64; using CryptoPP::GetWord; using CryptoPP::PutWord; using CryptoPP::Salsa20_Core; using CryptoPP::rotlConstant; using CryptoPP::LITTLE_ENDIAN_ORDER; using CryptoPP::AlignedSecByteBlock; inline void LE32ENC(byte* out, word32 in) { PutWord(false, LITTLE_ENDIAN_ORDER, out, in); } inline word32 LE32DEC(const byte* in) { return GetWord(false, LITTLE_ENDIAN_ORDER, in); } inline word64 LE64DEC(const byte* in) { return GetWord(false, LITTLE_ENDIAN_ORDER, in); } inline void BlockCopy(byte* dest, byte* src, size_t len) { // OpenMP 4.0 released July 2013. #if _OPENMP >= 201307 #pragma omp simd for (size_t i = 0; i < len; ++i) dest[i] = src[i]; #else for (size_t i = 0; i < len; ++i) dest[i] = src[i]; #endif } inline void BlockXOR(byte* dest, byte* src, size_t len) { // OpenMP 4.0 released July 2013. #if _OPENMP >= 201307 #pragma omp simd for (size_t i = 0; i < len; ++i) dest[i] ^= src[i]; #else for (size_t i = 0; i < len; ++i) dest[i] ^= src[i]; #endif } inline void PBKDF2_SHA256(byte* buf, size_t dkLen, const byte* passwd, size_t passwdlen, const byte* salt, size_t saltlen, byte count) { using CryptoPP::SHA256; using CryptoPP::PKCS5_PBKDF2_HMAC; PKCS5_PBKDF2_HMAC pbkdf; pbkdf.DeriveKey(buf, dkLen, 0, passwd, passwdlen, salt, saltlen, count, 0.0f); } inline void Salsa20_8(byte B[64]) { word32 B32[16]; for (size_t i = 0; i < 16; ++i) B32[i] = LE32DEC(&B[i * 4]); Salsa20_Core(B32, 8); for (size_t i = 0; i < 16; ++i) LE32ENC(&B[4 * i], B32[i]); } inline void BlockMix(byte* B, byte* Y, size_t r) { byte X[64]; // 1: X <-- B_{2r - 1} BlockCopy(X, &B[(2 * r - 1) * 64], 64); // 2: for i = 0 to 2r - 1 do for (size_t i = 0; i < 2 * r; ++i) { // 3: X <-- H(X \xor B_i) BlockXOR(X, &B[i * 64], 64); Salsa20_8(X); // 4: Y_i <-- X BlockCopy(&Y[i * 64], X, 64); } // 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) for (size_t i = 0; i < r; ++i) BlockCopy(&B[i * 64], &Y[(i * 2) * 64], 64); for (size_t i = 0; i < r; ++i) BlockCopy(&B[(i + r) * 64], &Y[(i * 2 + 1) * 64], 64); } inline word64 Integerify(byte* B, size_t r) { byte* X = &B[(2 * r - 1) * 64]; return LE64DEC(X); } inline void Smix(byte* B, size_t r, word64 N, byte* V, byte* XY) { byte* X = XY; byte* Y = XY+128*r; // 1: X <-- B BlockCopy(X, B, 128 * r); // 2: for i = 0 to N - 1 do for (word64 i = 0; i < N; ++i) { // 3: V_i <-- X BlockCopy(&V[i * (128 * r)], X, 128 * r); // 4: X <-- H(X) BlockMix(X, Y, r); } // 6: for i = 0 to N - 1 do for (word64 i = 0; i < N; ++i) { // 7: j <-- Integerify(X) mod N word64 j = Integerify(X, r) & (N - 1); // 8: X <-- H(X \xor V_j) BlockXOR(X, &V[j * (128 * r)], 128 * r); BlockMix(X, Y, r); } // 10: B' <-- X BlockCopy(B, X, 128 * r); } ANONYMOUS_NAMESPACE_END NAMESPACE_BEGIN(CryptoPP) size_t Scrypt::GetValidDerivedLength(size_t keylength) const { if (keylength > MaxDerivedKeyLength()) return MaxDerivedKeyLength(); return keylength; } void Scrypt::ValidateParameters(size_t derivedLen, word64 cost, word64 blockSize, word64 parallelization) const { // https://github.com/weidai11/cryptopp/issues/842 CRYPTOPP_ASSERT(derivedLen != 0); CRYPTOPP_ASSERT(cost != 0); CRYPTOPP_ASSERT(blockSize != 0); CRYPTOPP_ASSERT(parallelization != 0); if (cost == 0) throw InvalidArgument("Scrypt: cost cannot be 0"); if (blockSize == 0) throw InvalidArgument("Scrypt: block size cannot be 0"); if (parallelization == 0) throw InvalidArgument("Scrypt: parallelization cannot be 0"); // Optimizer should remove this on 32-bit platforms if (std::numeric_limits::max() > std::numeric_limits::max()) { const word64 maxLen = ((static_cast(1) << 32) - 1) * 32; if (derivedLen > maxLen) { std::ostringstream oss; oss << "derivedLen " << derivedLen << " is larger than " << maxLen; throw InvalidArgument("Scrypt: " + oss.str()); } } // https://github.com/weidai11/cryptopp/issues/787 CRYPTOPP_ASSERT(parallelization <= static_cast(std::numeric_limits::max())); if (parallelization > static_cast(std::numeric_limits::max())) { std::ostringstream oss; oss << " parallelization " << parallelization << " is larger than "; oss << std::numeric_limits::max(); throw InvalidArgument("Scrypt: " + oss.str()); } CRYPTOPP_ASSERT(IsPowerOf2(cost)); if (IsPowerOf2(cost) == false) throw InvalidArgument("Scrypt: cost must be a power of 2"); const word64 prod = static_cast(blockSize) * parallelization; CRYPTOPP_ASSERT(prod < (1U << 30)); if (prod >= (1U << 30)) { std::ostringstream oss; oss << "r*p " << prod << " is larger than " << (1U << 30); throw InvalidArgument("Scrypt: " + oss.str()); } // Scrypt has several tests that effectively verify allocations like // '128 * r * N' and '128 * r * p' do not overflow. They are the tests // that set errno to ENOMEM. We can make the logic a little more clear // using word128. At first blush the word128 may seem like overkill. // However, this algorithm is dominated by slow moving parts, so a // one-time check is insignificant in the bigger picture. #if defined(CRYPTOPP_WORD128_AVAILABLE) const word128 maxElems = static_cast(SIZE_MAX); bool bLimit = (maxElems >= static_cast(cost) * blockSize * 128U); bool xyLimit = (maxElems >= static_cast(parallelization) * blockSize * 128U); bool vLimit = (maxElems >= static_cast(blockSize) * 256U + 64U); #else const word64 maxElems = static_cast(SIZE_MAX); bool bLimit = (blockSize < maxElems / 128U / cost); bool xyLimit = (blockSize < maxElems / 128U / parallelization); bool vLimit = (blockSize < (maxElems - 64U) / 256U); #endif CRYPTOPP_ASSERT(bLimit); CRYPTOPP_ASSERT(xyLimit); CRYPTOPP_ASSERT(vLimit); if (!bLimit || !xyLimit || !vLimit) throw std::bad_alloc(); } size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen, const byte*secret, size_t secretLen, const NameValuePairs& params) const { CRYPTOPP_ASSERT(secret /*&& secretLen*/); CRYPTOPP_ASSERT(derived && derivedLen); CRYPTOPP_ASSERT(derivedLen <= MaxDerivedKeyLength()); word64 cost=0, blockSize=0, parallelization=0; if(params.GetValue("Cost", cost) == false) cost = defaultCost; if(params.GetValue("BlockSize", blockSize) == false) blockSize = defaultBlockSize; if(params.GetValue("Parallelization", parallelization) == false) parallelization = defaultParallelization; ConstByteArrayParameter salt; (void)params.GetValue("Salt", salt); return DeriveKey(derived, derivedLen, secret, secretLen, salt.begin(), salt.size(), cost, blockSize, parallelization); } size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen, const byte*secret, size_t secretLen, const byte*salt, size_t saltLen, word64 cost, word64 blockSize, word64 parallel) const { CRYPTOPP_ASSERT(secret /*&& secretLen*/); CRYPTOPP_ASSERT(derived && derivedLen); CRYPTOPP_ASSERT(derivedLen <= MaxDerivedKeyLength()); ThrowIfInvalidDerivedKeyLength(derivedLen); ValidateParameters(derivedLen, cost, blockSize, parallel); AlignedSecByteBlock B(static_cast(blockSize * parallel * 128U)); // 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) PBKDF2_SHA256(B, B.size(), secret, secretLen, salt, saltLen, 1); // Visual Studio and OpenMP 2.0 fixup. We must use int, not size_t. int maxParallel=0; if (!SafeConvert(parallel, maxParallel)) maxParallel = std::numeric_limits::max(); #ifdef _OPENMP int threads = STDMIN(omp_get_max_threads(), maxParallel); #endif // http://stackoverflow.com/q/49604260/608639 #pragma omp parallel num_threads(threads) { // Each thread gets its own copy AlignedSecByteBlock XY(static_cast(blockSize * 256U)); AlignedSecByteBlock V(static_cast(blockSize * cost * 128U)); // 2: for i = 0 to p - 1 do #pragma omp for for (int i = 0; i < maxParallel; ++i) { // 3: B_i <-- MF(B_i, N) const ptrdiff_t offset = static_cast(blockSize*i*128); Smix(B+offset, static_cast(blockSize), cost, V, XY); } } // 5: DK <-- PBKDF2(P, B, 1, dkLen) PBKDF2_SHA256(derived, derivedLen, secret, secretLen, B, B.size(), 1); return 1; } NAMESPACE_END