// scrypt.cpp - written and placed in public domain by Jeffrey Walton. // Based on reference source code by Colin Percival for // Scrypt and Daniel Bernstein for Salsa20 core. #include "pch.h" #include "scrypt.h" #include "algparam.h" #include "argnames.h" #include "pwdbased.h" #include "stdcpp.h" #include "salsa.h" #include "misc.h" #include "sha.h" #include #ifdef _OPENMP # include #endif ANONYMOUS_NAMESPACE_BEGIN using CryptoPP::byte; using CryptoPP::word32; using CryptoPP::word64; using CryptoPP::GetWord; using CryptoPP::PutWord; using CryptoPP::Salsa20_Core; using CryptoPP::rotlConstant; using CryptoPP::LITTLE_ENDIAN_ORDER; using CryptoPP::AlignedSecByteBlock; static inline void LE32ENC(byte* out, word32 in) { PutWord(false, LITTLE_ENDIAN_ORDER, out, in); } static inline word32 LE32DEC(const byte* in) { return GetWord(false, LITTLE_ENDIAN_ORDER, in); } static inline word64 LE64DEC(const byte* in) { return GetWord(false, LITTLE_ENDIAN_ORDER, in); } static inline void BlockCopy(byte* dest, byte* src, size_t len) { for (size_t i = 0; i < len; ++i) dest[i] = src[i]; } static inline void BlockXOR(byte* dest, byte* src, size_t len) { #pragma omp simd for (size_t i = 0; i < len; ++i) dest[i] ^= src[i]; } static inline void PBKDF2_SHA256(byte* buf, size_t dkLen, const byte* passwd, size_t passwdlen, const byte* salt, size_t saltlen, byte count) { using CryptoPP::SHA256; using CryptoPP::PKCS5_PBKDF2_HMAC; PKCS5_PBKDF2_HMAC pbkdf; pbkdf.DeriveKey(buf, dkLen, 0, passwd, passwdlen, salt, saltlen, count, 0.0f); } static inline void Salsa20_8(byte B[64]) { word32 B32[16]; for (size_t i = 0; i < 16; ++i) B32[i] = LE32DEC(&B[i * 4]); Salsa20_Core(B32, 8); for (size_t i = 0; i < 16; ++i) LE32ENC(&B[4 * i], B32[i]); } static inline void BlockMix(byte* B, byte* Y, size_t r) { byte X[64]; // 1: X <-- B_{2r - 1} BlockCopy(X, &B[(2 * r - 1) * 64], 64); // 2: for i = 0 to 2r - 1 do for (size_t i = 0; i < 2 * r; ++i) { // 3: X <-- H(X \xor B_i) BlockXOR(X, &B[i * 64], 64); Salsa20_8(X); // 4: Y_i <-- X BlockCopy(&Y[i * 64], X, 64); } // 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) for (size_t i = 0; i < r; ++i) BlockCopy(&B[i * 64], &Y[(i * 2) * 64], 64); for (size_t i = 0; i < r; ++i) BlockCopy(&B[(i + r) * 64], &Y[(i * 2 + 1) * 64], 64); } static inline word64 Integerify(byte* B, size_t r) { byte* X = &B[(2 * r - 1) * 64]; return LE64DEC(X); } static inline void Smix(byte* B, size_t r, word64 N, byte* V, byte* XY) { byte* X = XY; byte* Y = XY+128*r; // 1: X <-- B BlockCopy(X, B, 128 * r); // 2: for i = 0 to N - 1 do for (word64 i = 0; i < N; ++i) { // 3: V_i <-- X BlockCopy(&V[i * (128 * r)], X, 128 * r); // 4: X <-- H(X) BlockMix(X, Y, r); } // 6: for i = 0 to N - 1 do for (word64 i = 0; i < N; ++i) { // 7: j <-- Integerify(X) mod N word64 j = Integerify(X, r) & (N - 1); // 8: X <-- H(X \xor V_j) BlockXOR(X, &V[j * (128 * r)], 128 * r); BlockMix(X, Y, r); } // 10: B' <-- X BlockCopy(B, X, 128 * r); } ANONYMOUS_NAMESPACE_END NAMESPACE_BEGIN(CryptoPP) size_t Scrypt::GetValidDerivedLength(size_t keylength) const { if (keylength > MaxDerivedLength()) return MaxDerivedLength(); return keylength; } void Scrypt::ValidateParameters(size_t derivedLen, word64 cost, word64 blockSize, word64 parallelization) const { // Optimizer should remove this on 32-bit platforms if (std::numeric_limits::max() > std::numeric_limits::max()) { const word64 maxLen = ((static_cast(1) << 32) - 1) * 32; if (derivedLen > maxLen) { std::ostringstream oss; oss << "derivedLen " << derivedLen << " is larger than " << maxLen; throw InvalidArgument("Scrypt: " + oss.str()); } } CRYPTOPP_ASSERT(IsPowerOf2(cost)); if (IsPowerOf2(cost) == false) throw InvalidArgument("Scrypt: cost must be a power of 2"); const word64 prod = static_cast(blockSize) * parallelization; CRYPTOPP_ASSERT(prod < (1U << 30)); if (prod >= (1U << 30)) { std::ostringstream oss; oss << "r*p " << prod << " is larger than " << (1U << 30); throw InvalidArgument("Scrypt: " + oss.str()); } // Scrypt has several tests that effectively verify allocations like // '128 * r * N' and '128 * r * p' do not overflow. They are the tests // that set errno to ENOMEM. We can make the logic a little more clear // using word128. At first blush the word128 may seem like overkill. // However, this alogirthm is dominated by slow moving parts, so a // one-time check is insignificant in the bigger picture. #if defined(CRYPTOPP_WORD128_AVAILABLE) const word128 maxElems = static_cast(SIZE_MAX); bool bLimit = (maxElems >= static_cast(cost) * blockSize * 128U); bool xyLimit = (maxElems >= static_cast(parallelization) * blockSize * 128U); bool vLimit = (maxElems >= static_cast(blockSize) * 256U + 64U); #else const word64 maxElems = static_cast(SIZE_MAX); bool bLimit = (blockSize < maxElems / 128U / cost); bool xyLimit = (blockSize < maxElems / 128U / parallelization); bool vLimit = (blockSize < (maxElems - 64U) / 256U); #endif CRYPTOPP_ASSERT(bLimit); CRYPTOPP_ASSERT(xyLimit); CRYPTOPP_ASSERT(vLimit); if (!bLimit || !xyLimit || !vLimit) throw std::bad_alloc(); } size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen, const byte*secret, size_t secretLen, const NameValuePairs& params) const { CRYPTOPP_ASSERT(secret /*&& secretLen*/); CRYPTOPP_ASSERT(derived && derivedLen); CRYPTOPP_ASSERT(derivedLen <= MaxDerivedLength()); word64 cost=0, blockSize=0, parallelization=0; if(params.GetValue("Cost", cost) == false) cost = defaultCost; if(params.GetValue("BlockSize", blockSize) == false) blockSize = defaultBlockSize; if(params.GetValue("Parallelization", parallelization) == false) parallelization = defaultParallelization; ConstByteArrayParameter salt; (void)params.GetValue("Salt", salt); return DeriveKey(derived, derivedLen, secret, secretLen, salt.begin(), salt.size(), cost, blockSize, parallelization); } size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen, const byte*secret, size_t secretLen, const byte*salt, size_t saltLen, word64 cost, word64 blockSize, word64 parallel) const { CRYPTOPP_ASSERT(secret /*&& secretLen*/); CRYPTOPP_ASSERT(derived && derivedLen); CRYPTOPP_ASSERT(derivedLen <= MaxDerivedLength()); ThrowIfInvalidDerivedLength(derivedLen); ValidateParameters(derivedLen, cost, blockSize, parallel); AlignedSecByteBlock B(static_cast(blockSize * parallel * 128U)); // 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) PBKDF2_SHA256(B, B.size(), secret, secretLen, salt, saltLen, 1); #ifdef _OPENMP int threads = STDMIN(omp_get_max_threads(), static_cast(STDMIN(static_cast(parallel), static_cast(std::numeric_limits::max())))); #endif // http://stackoverflow.com/q/49604260/608639 #pragma omp parallel num_threads(threads) { // Each thread gets its own copy AlignedSecByteBlock XY(static_cast(blockSize * 256U)); AlignedSecByteBlock V(static_cast(blockSize * cost * 128U)); // 2: for i = 0 to p - 1 do #pragma omp for for (size_t i = 0; i < static_cast(parallel); ++i) { // 3: B_i <-- MF(B_i, N) const ptrdiff_t offset = static_cast(blockSize*i*128); Smix(B+offset, static_cast(blockSize), cost, V, XY); } } // 5: DK <-- PBKDF2(P, B, 1, dkLen) PBKDF2_SHA256(derived, derivedLen, secret, secretLen, B, B.size(), 1); return 1; } NAMESPACE_END