diff options
-rw-r--r-- | Readme.txt | 9 | ||||
-rw-r--r-- | TestVectors/all.txt | 50 | ||||
-rw-r--r-- | asn.h | 4 | ||||
-rw-r--r-- | config.h | 2 | ||||
-rw-r--r-- | cryptlib.cpp | 13 | ||||
-rw-r--r-- | cryptlib.h | 30 | ||||
-rwxr-xr-x | cryptlib.vcproj | 8 | ||||
-rw-r--r-- | datatest.cpp | 12 | ||||
-rw-r--r-- | files.h | 4 | ||||
-rw-r--r-- | filters.cpp | 20 | ||||
-rw-r--r-- | filters.h | 44 | ||||
-rw-r--r-- | fltrimpl.h | 2 | ||||
-rw-r--r-- | iterhash.cpp | 16 | ||||
-rw-r--r-- | iterhash.h | 4 | ||||
-rw-r--r-- | mqueue.h | 6 | ||||
-rw-r--r-- | queue.cpp | 8 | ||||
-rw-r--r-- | queue.h | 8 | ||||
-rw-r--r-- | rijndael.cpp | 18 | ||||
-rw-r--r-- | secblock.h | 2 | ||||
-rw-r--r-- | sha.cpp | 385 | ||||
-rw-r--r-- | sha.h | 14 | ||||
-rw-r--r-- | simple.h | 18 | ||||
-rw-r--r-- | test.cpp | 4 | ||||
-rwxr-xr-x | vmac.cpp | 9 | ||||
-rw-r--r-- | x64dll.asm | 2620 |
25 files changed, 2474 insertions, 836 deletions
@@ -78,10 +78,9 @@ The following compilers are supported for this release. Please visit http://www.cryptopp.com the most up to date build instructions and porting notes. * MSVC 6.0 - 2008 - * GCC 3.3 - 4.2 - * Borland C++Builder 2006 - 2007 - * Intel C++ Compiler 9.1 - 10.0 - * Sun Studio 11 - 12 (CC 5.8 - 5.9) + * GCC 3.3 - 4.3 + * Intel C++ Compiler 9 - 11 + * Sun Studio 12 (CC 5.9) *** Important Usage Notes *** @@ -415,7 +414,7 @@ the mailing list. 5.6 - added AuthenticatedSymmetricCipher interface class and Filter wrappers - added CCM, GCM (with SSE2 assembly), CMAC, and SEED - added support for variable length IVs - - improved AES speed on x86 and x64 + - improved AES and SHA-256 speed on x86 and x64 - fixed run-time validation error on x86-64 with GCC 4.3.2 -O2 - fixed HashFilter bug when putMessage=true - fixed warnings with GCC 4.3 diff --git a/TestVectors/all.txt b/TestVectors/all.txt index f26df0d..45aa4fb 100644 --- a/TestVectors/all.txt +++ b/TestVectors/all.txt @@ -1,27 +1,27 @@ AlgorithmType: FileList Name: all.txt collection -Test: tea.txt -Test: camellia.txt -Test: shacal2.txt -Test: ttmac.txt -Test: whrlpool.txt -Test: dlies.txt -Test: dsa.txt -Test: dsa_1363.txt -Test: esign.txt -Test: hmac.txt -Test: nr.txt -Test: rsa_oaep.txt -Test: rsa_pkcs1_1_5.txt -Test: rsa_pss.txt -Test: rw.txt -Test: seal.txt -Test: sha.txt -Test: panama.txt -Test: aes.txt -Test: salsa.txt -Test: vmac.txt -Test: sosemanuk.txt -Test: ccm.txt -Test: gcm.txt -Test: cmac.txt +Test: TestVectors/tea.txt +Test: TestVectors/camellia.txt +Test: TestVectors/shacal2.txt +Test: TestVectors/ttmac.txt +Test: TestVectors/whrlpool.txt +Test: TestVectors/dlies.txt +Test: TestVectors/dsa.txt +Test: TestVectors/dsa_1363.txt +Test: TestVectors/esign.txt +Test: TestVectors/hmac.txt +Test: TestVectors/nr.txt +Test: TestVectors/rsa_oaep.txt +Test: TestVectors/rsa_pkcs1_1_5.txt +Test: TestVectors/rsa_pss.txt +Test: TestVectors/rw.txt +Test: TestVectors/seal.txt +Test: TestVectors/sha.txt +Test: TestVectors/panama.txt +Test: TestVectors/aes.txt +Test: TestVectors/salsa.txt +Test: TestVectors/vmac.txt +Test: TestVectors/sosemanuk.txt +Test: TestVectors/ccm.txt +Test: TestVectors/gcm.txt +Test: TestVectors/cmac.txt @@ -138,8 +138,8 @@ public: byte PeekByte() const; void CheckByte(byte b); - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; // call this to denote end of sequence void MessageEnd(); @@ -311,7 +311,7 @@ NAMESPACE_END #endif // how to declare class constants -#if defined(_MSC_VER) && _MSC_VER <= 1300 +#if (defined(_MSC_VER) && _MSC_VER <= 1300) || defined(__INTEL_COMPILER) # define CRYPTOPP_CONSTANT(x) enum {x}; #else # define CRYPTOPP_CONSTANT(x) static const int x; diff --git a/cryptlib.cpp b/cryptlib.cpp index b84a085..0e5bd24 100644 --- a/cryptlib.cpp +++ b/cryptlib.cpp @@ -26,7 +26,10 @@ CRYPTOPP_COMPILE_ASSERT(sizeof(word64) == 8); CRYPTOPP_COMPILE_ASSERT(sizeof(dword) == 2*sizeof(word)); #endif -const std::string BufferedTransformation::NULL_CHANNEL; +const std::string DEFAULT_CHANNEL; +const std::string AAD_CHANNEL = "AAD"; +const std::string &BufferedTransformation::NULL_CHANNEL = DEFAULT_CHANNEL; + const NullNameValuePairs g_nullNameValuePairs; BufferedTransformation & TheBitBucket() @@ -254,12 +257,12 @@ word32 RandomNumberGenerator::GenerateWord32(word32 min, word32 max) void RandomNumberGenerator::GenerateBlock(byte *output, size_t size) { ArraySink s(output, size); - GenerateIntoBufferedTransformation(s, BufferedTransformation::NULL_CHANNEL, size); + GenerateIntoBufferedTransformation(s, DEFAULT_CHANNEL, size); } void RandomNumberGenerator::DiscardBytes(size_t n) { - GenerateIntoBufferedTransformation(TheBitBucket(), BufferedTransformation::NULL_CHANNEL, n); + GenerateIntoBufferedTransformation(TheBitBucket(), DEFAULT_CHANNEL, n); } void RandomNumberGenerator::GenerateIntoBufferedTransformation(BufferedTransformation &target, const std::string &channel, lword length) @@ -593,12 +596,12 @@ size_t BufferedTransformation::ChannelPutWord32(const std::string &channel, word size_t BufferedTransformation::PutWord16(word16 value, ByteOrder order, bool blocking) { - return ChannelPutWord16(NULL_CHANNEL, value, order, blocking); + return ChannelPutWord16(DEFAULT_CHANNEL, value, order, blocking); } size_t BufferedTransformation::PutWord32(word32 value, ByteOrder order, bool blocking) { - return ChannelPutWord32(NULL_CHANNEL, value, order, blocking); + return ChannelPutWord32(DEFAULT_CHANNEL, value, order, blocking); } size_t BufferedTransformation::PeekWord16(word16 &value, ByteOrder order) const @@ -746,6 +746,12 @@ public: bool Wait(unsigned long milliseconds, CallStack const& callStack); }; +//! the default channel for BufferedTransformation, equal to the empty string +extern const std::string DEFAULT_CHANNEL; + +//! channel for additional authenticated data, equal to "AAD" +extern const std::string AAD_CHANNEL; + //! interface for buffered transformations /*! BufferedTransformation is a generalization of BlockTransformation, @@ -776,7 +782,7 @@ class CRYPTOPP_DLL CRYPTOPP_NO_VTABLE BufferedTransformation : public Algorithm, { public: // placed up here for CW8 - static const std::string NULL_CHANNEL; // the empty string "" + static const std::string &NULL_CHANNEL; // same as DEFAULT_CHANNEL, for backwards compatibility BufferedTransformation() : Algorithm(false) {} @@ -903,18 +909,18 @@ public: size_t PeekWord32(word32 &value, ByteOrder order=BIG_ENDIAN_ORDER) const; //! move transferMax bytes of the buffered output to target as input - lword TransferTo(BufferedTransformation &target, lword transferMax=LWORD_MAX, const std::string &channel=NULL_CHANNEL) + lword TransferTo(BufferedTransformation &target, lword transferMax=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL) {TransferTo2(target, transferMax, channel); return transferMax;} //! discard skipMax bytes from the output buffer virtual lword Skip(lword skipMax=LWORD_MAX); //! copy copyMax bytes of the buffered output to target as input - lword CopyTo(BufferedTransformation &target, lword copyMax=LWORD_MAX, const std::string &channel=NULL_CHANNEL) const + lword CopyTo(BufferedTransformation &target, lword copyMax=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL) const {return CopyRangeTo(target, 0, copyMax, channel);} //! copy copyMax bytes of the buffered output, starting at position (relative to current position), to target as input - lword CopyRangeTo(BufferedTransformation &target, lword position, lword copyMax=LWORD_MAX, const std::string &channel=NULL_CHANNEL) const + lword CopyRangeTo(BufferedTransformation &target, lword position, lword copyMax=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL) const {lword i = position; CopyRangeTo2(target, i, i+copyMax, channel); return i-position;} #ifdef CRYPTOPP_MAINTAIN_BACKWARDS_COMPATIBILITY @@ -939,18 +945,18 @@ public: //! skip count number of messages virtual unsigned int SkipMessages(unsigned int count=UINT_MAX); //! - unsigned int TransferMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) + unsigned int TransferMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) {TransferMessagesTo2(target, count, channel); return count;} //! - unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) const; + unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) const; //! virtual void SkipAll(); //! - void TransferAllTo(BufferedTransformation &target, const std::string &channel=NULL_CHANNEL) + void TransferAllTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) {TransferAllTo2(target, channel);} //! - void CopyAllTo(BufferedTransformation &target, const std::string &channel=NULL_CHANNEL) const; + void CopyAllTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) const; virtual bool GetNextMessageSeries() {return false;} virtual unsigned int NumberOfMessagesInThisSeries() const {return NumberOfMessages();} @@ -960,13 +966,13 @@ public: //! \name NON-BLOCKING TRANSFER OF OUTPUT //@{ //! upon return, byteCount contains number of bytes that have finished being transfered, and returns the number of bytes left in the current transfer block - virtual size_t TransferTo2(BufferedTransformation &target, lword &byteCount, const std::string &channel=NULL_CHANNEL, bool blocking=true) =0; + virtual size_t TransferTo2(BufferedTransformation &target, lword &byteCount, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) =0; //! upon return, begin contains the start position of data yet to be finished copying, and returns the number of bytes left in the current transfer block - virtual size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const =0; + virtual size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const =0; //! upon return, messageCount contains number of messages that have finished being transfered, and returns the number of bytes left in the current transfer block - size_t TransferMessagesTo2(BufferedTransformation &target, unsigned int &messageCount, const std::string &channel=NULL_CHANNEL, bool blocking=true); + size_t TransferMessagesTo2(BufferedTransformation &target, unsigned int &messageCount, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); //! returns the number of bytes left in the current transfer block - size_t TransferAllTo2(BufferedTransformation &target, const std::string &channel=NULL_CHANNEL, bool blocking=true); + size_t TransferAllTo2(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); //@} //! \name CHANNELS diff --git a/cryptlib.vcproj b/cryptlib.vcproj index a0458ff..cb34a4c 100755 --- a/cryptlib.vcproj +++ b/cryptlib.vcproj @@ -8528,7 +8528,7 @@ >
<Tool
Name="VCCustomBuildTool"
- CommandLine="ml64.exe /c /nologo /Fo"$(IntDir)\x64dll.obj" /Zi "$(InputPath)""
+ CommandLine="ml64.exe /c /nologo /Fo"$(IntDir)\x64dll.obj" /Zi "$(InputPath)"
"
Outputs="$(IntDir)\x64dll.obj"
/>
</FileConfiguration>
@@ -8546,7 +8546,7 @@ >
<Tool
Name="VCCustomBuildTool"
- CommandLine="ml64.exe /c /nologo /Fo"$(IntDir)\x64dll.obj" /Zi "$(InputPath)""
+ CommandLine="ml64.exe /c /nologo /Fo"$(IntDir)\x64dll.obj" /Zi "$(InputPath)"
"
Outputs="$(IntDir)\x64dll.obj"
/>
</FileConfiguration>
@@ -8563,7 +8563,7 @@ >
<Tool
Name="VCCustomBuildTool"
- CommandLine="ml64.exe /c /nologo /Fo"$(IntDir)\x64dll.obj" /Zi "$(InputPath)""
+ CommandLine="ml64.exe /c /nologo /Fo"$(IntDir)\x64dll.obj" /Zi "$(InputPath)"
"
Outputs="$(IntDir)\x64dll.obj"
/>
</FileConfiguration>
@@ -8581,7 +8581,7 @@ >
<Tool
Name="VCCustomBuildTool"
- CommandLine="ml64.exe /c /nologo /Fo"$(IntDir)\x64dll.obj" /Zi "$(InputPath)""
+ CommandLine="ml64.exe /c /nologo /Fo"$(IntDir)\x64dll.obj" /Zi "$(InputPath)"
"
Outputs="$(IntDir)\x64dll.obj"
/>
</FileConfiguration>
diff --git a/datatest.cpp b/datatest.cpp index a028244..3e97a3e 100644 --- a/datatest.cpp +++ b/datatest.cpp @@ -410,19 +410,19 @@ void TestAuthenticatedSymmetricCipher(TestData &v, const NameValuePairs &overrid if (macAtBegin) sm.TransferTo(df); - sh.CopyTo(df, LWORD_MAX, "AAD"); + sh.CopyTo(df, LWORD_MAX, AAD_CHANNEL); sc.TransferTo(df); - sf.CopyTo(df, LWORD_MAX, "AAD"); + sf.CopyTo(df, LWORD_MAX, AAD_CHANNEL); if (!macAtBegin) sm.TransferTo(df); df.MessageEnd(); - sh.TransferTo(ef, sh.MaxRetrievable()/2+1, "AAD"); - sh.TransferTo(ef, LWORD_MAX, "AAD"); + sh.TransferTo(ef, sh.MaxRetrievable()/2+1, AAD_CHANNEL); + sh.TransferTo(ef, LWORD_MAX, AAD_CHANNEL); sp.TransferTo(ef, sp.MaxRetrievable()/2+1); sp.TransferTo(ef); - sf.TransferTo(ef, sf.MaxRetrievable()/2+1, "AAD"); - sf.TransferTo(ef, LWORD_MAX, "AAD"); + sf.TransferTo(ef, sf.MaxRetrievable()/2+1, AAD_CHANNEL); + sf.TransferTo(ef, LWORD_MAX, AAD_CHANNEL); ef.MessageEnd(); if (test == "Encrypt" && encrypted != ciphertext+mac) @@ -31,8 +31,8 @@ public: std::istream* GetStream() {return m_stream;} lword MaxRetrievable() const; - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; lword Skip(lword skipMax=ULONG_MAX); private: diff --git a/filters.cpp b/filters.cpp index 9a749f4..083dfd3 100644 --- a/filters.cpp +++ b/filters.cpp @@ -596,7 +596,7 @@ void StreamTransformationFilter::NextPutMultiple(const byte *inString, size_t le do { size_t len = m_optimalBufferSize; - byte *space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, s, length, len); + byte *space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, s, length, len); if (len < length) { if (len == m_optimalBufferSize) @@ -636,7 +636,7 @@ void StreamTransformationFilter::LastPut(const byte *inString, size_t length) { // do padding size_t blockSize = STDMAX(minLastBlockSize, (size_t)m_cipher.MandatoryBlockSize()); - space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, blockSize); + space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, blockSize); memcpy(space, inString, length); memset(space + length, 0, blockSize - length); m_cipher.ProcessLastBlock(space, space, blockSize); @@ -652,7 +652,7 @@ void StreamTransformationFilter::LastPut(const byte *inString, size_t length) throw InvalidCiphertext("StreamTransformationFilter: ciphertext length is not a multiple of block size"); } - space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, length, m_optimalBufferSize); + space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, length, m_optimalBufferSize); m_cipher.ProcessLastBlock(space, inString, length); AttachedTransformation()->Put(space, length); } @@ -664,7 +664,7 @@ void StreamTransformationFilter::LastPut(const byte *inString, size_t length) unsigned int s; s = m_cipher.MandatoryBlockSize(); assert(s > 1); - space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, s, m_optimalBufferSize); + space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, s, m_optimalBufferSize); if (m_cipher.IsForwardTransformation()) { assert(length < s); @@ -807,9 +807,9 @@ void HashVerificationFilter::LastPut(const byte *inString, size_t length) // ************************************************************* AuthenticatedEncryptionFilter::AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment, - bool putMessage, int truncatedDigestSize, const std::string &macChannel, BlockPaddingScheme padding) + bool putAAD, int truncatedDigestSize, const std::string &macChannel, BlockPaddingScheme padding) : StreamTransformationFilter(c, attachment, padding, true) - , m_hf(c, new OutputProxy(*this, false), putMessage, truncatedDigestSize, "AAD", macChannel) + , m_hf(c, new OutputProxy(*this, false), putAAD, truncatedDigestSize, AAD_CHANNEL, macChannel) { assert(c.IsForwardTransformation()); } @@ -825,7 +825,7 @@ byte * AuthenticatedEncryptionFilter::ChannelCreatePutSpace(const std::string &c if (channel.empty()) return StreamTransformationFilter::CreatePutSpace(size); - if (channel == "AAD") + if (channel == AAD_CHANNEL) return m_hf.CreatePutSpace(size); throw InvalidChannelName("AuthenticatedEncryptionFilter", channel); @@ -836,7 +836,7 @@ size_t AuthenticatedEncryptionFilter::ChannelPut2(const std::string &channel, co if (channel.empty()) return StreamTransformationFilter::Put2(begin, length, messageEnd, blocking); - if (channel == "AAD") + if (channel == AAD_CHANNEL) return m_hf.Put2(begin, length, 0, blocking); throw InvalidChannelName("AuthenticatedEncryptionFilter", channel); @@ -876,7 +876,7 @@ byte * AuthenticatedDecryptionFilter::ChannelCreatePutSpace(const std::string &c if (channel.empty()) return m_streamFilter.CreatePutSpace(size); - if (channel == "AAD") + if (channel == AAD_CHANNEL) return m_hashVerifier.CreatePutSpace(size); throw InvalidChannelName("AuthenticatedDecryptionFilter", channel); @@ -891,7 +891,7 @@ size_t AuthenticatedDecryptionFilter::ChannelPut2(const std::string &channel, co return FilterWithBufferedInput::Put2(begin, length, messageEnd, blocking); } - if (channel == "AAD") + if (channel == AAD_CHANNEL) return m_hashVerifier.Put2(begin, length, 0, blocking); throw InvalidChannelName("AuthenticatedDecryptionFilter", channel); @@ -22,8 +22,8 @@ public: const BufferedTransformation *AttachedTransformation() const; void Detach(BufferedTransformation *newAttachment = NULL); - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; void Initialize(const NameValuePairs ¶meters=g_nullNameValuePairs, int propagation=-1); bool Flush(bool hardFlush, int propagation=-1, bool blocking=true); @@ -38,11 +38,11 @@ protected: void PropagateInitialize(const NameValuePairs ¶meters, int propagation); - size_t Output(int outputSite, const byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=NULL_CHANNEL); - size_t OutputModifiable(int outputSite, byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=NULL_CHANNEL); - bool OutputMessageEnd(int outputSite, int propagation, bool blocking, const std::string &channel=NULL_CHANNEL); - bool OutputFlush(int outputSite, bool hardFlush, int propagation, bool blocking, const std::string &channel=NULL_CHANNEL); - bool OutputMessageSeriesEnd(int outputSite, int propagation, bool blocking, const std::string &channel=NULL_CHANNEL); + size_t Output(int outputSite, const byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=DEFAULT_CHANNEL); + size_t OutputModifiable(int outputSite, byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=DEFAULT_CHANNEL); + bool OutputMessageEnd(int outputSite, int propagation, bool blocking, const std::string &channel=DEFAULT_CHANNEL); + bool OutputFlush(int outputSite, bool hardFlush, int propagation, bool blocking, const std::string &channel=DEFAULT_CHANNEL); + bool OutputMessageSeriesEnd(int outputSite, int propagation, bool blocking, const std::string &channel=DEFAULT_CHANNEL); private: member_ptr<BufferedTransformation> m_attachment; @@ -289,7 +289,7 @@ typedef StreamTransformationFilter StreamCipherFilter; class CRYPTOPP_DLL HashFilter : public Bufferless<Filter>, private FilterPutSpaceHelper { public: - HashFilter(HashTransformation &hm, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &messagePutChannel=NULL_CHANNEL, const std::string &hashPutChannel=NULL_CHANNEL); + HashFilter(HashTransformation &hm, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &messagePutChannel=DEFAULT_CHANNEL, const std::string &hashPutChannel=DEFAULT_CHANNEL); std::string AlgorithmName() const {return m_hashModule.AlgorithmName();} void IsolatedInitialize(const NameValuePairs ¶meters); @@ -315,7 +315,7 @@ public: : Exception(DATA_INTEGRITY_CHECK_FAILED, "HashVerificationFilter: message hash or MAC not valid") {} }; - enum Flags {HASH_AT_BEGIN=1, PUT_MESSAGE=2, PUT_HASH=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = HASH_AT_BEGIN | PUT_RESULT}; + enum Flags {HASH_AT_END=0, HASH_AT_BEGIN=1, PUT_MESSAGE=2, PUT_HASH=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = HASH_AT_BEGIN | PUT_RESULT}; HashVerificationFilter(HashTransformation &hm, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS, int truncatedDigestSize=-1); std::string AlgorithmName() const {return m_hashModule.AlgorithmName();} @@ -345,7 +345,7 @@ class CRYPTOPP_DLL AuthenticatedEncryptionFilter : public StreamTransformationFi { public: /*! See StreamTransformationFilter for documentation on BlockPaddingScheme */ - AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &macChannel=NULL_CHANNEL, BlockPaddingScheme padding = DEFAULT_PADDING); + AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, bool putAAD=false, int truncatedDigestSize=-1, const std::string &macChannel=DEFAULT_CHANNEL, BlockPaddingScheme padding = DEFAULT_PADDING); void IsolatedInitialize(const NameValuePairs ¶meters); byte * ChannelCreatePutSpace(const std::string &channel, size_t &size); @@ -361,7 +361,7 @@ protected: class CRYPTOPP_DLL AuthenticatedDecryptionFilter : public FilterWithBufferedInput, public BlockPaddingSchemeDef { public: - enum Flags {MAC_AT_BEGIN=1, THROW_EXCEPTION=16, DEFAULT_FLAGS = THROW_EXCEPTION}; + enum Flags {MAC_AT_END=0, MAC_AT_BEGIN=1, THROW_EXCEPTION=16, DEFAULT_FLAGS = THROW_EXCEPTION}; /*! See StreamTransformationFilter for documentation on BlockPaddingScheme */ AuthenticatedDecryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS, int truncatedDigestSize=-1, BlockPaddingScheme padding = DEFAULT_PADDING); @@ -412,7 +412,7 @@ public: : Exception(DATA_INTEGRITY_CHECK_FAILED, "VerifierFilter: digital signature not valid") {} }; - enum Flags {SIGNATURE_AT_BEGIN=1, PUT_MESSAGE=2, PUT_SIGNATURE=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = SIGNATURE_AT_BEGIN | PUT_RESULT}; + enum Flags {SIGNATURE_AT_END=0, SIGNATURE_AT_BEGIN=1, PUT_MESSAGE=2, PUT_SIGNATURE=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = SIGNATURE_AT_BEGIN | PUT_RESULT}; SignatureVerificationFilter(const PK_Verifier &verifier, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS); std::string AlgorithmName() const {return m_verifier.AlgorithmName();} @@ -517,6 +517,8 @@ public: bool MessageSeriesEnd(int propagation=-1, bool blocking=true) {return m_passSignal ? m_owner.AttachedTransformation()->MessageSeriesEnd(propagation, blocking) : false;} + byte * ChannelCreatePutSpace(const std::string &channel, size_t &size) + {return m_owner.AttachedTransformation()->ChannelCreatePutSpace(channel, size);} size_t ChannelPut2(const std::string &channel, const byte *begin, size_t length, int messageEnd, bool blocking) {return m_owner.AttachedTransformation()->ChannelPut2(channel, begin, length, m_passSignal ? messageEnd : 0, blocking);} size_t ChannelPutModifiable2(const std::string &channel, byte *begin, size_t length, int messageEnd, bool blocking) @@ -669,8 +671,8 @@ public: template <class T> StringStore(const T &string) {StoreInitialize(MakeParameters("InputBuffer", ConstByteArrayParameter(string)));} - CRYPTOPP_DLL size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - CRYPTOPP_DLL size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + CRYPTOPP_DLL size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + CRYPTOPP_DLL size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; private: CRYPTOPP_DLL void StoreInitialize(const NameValuePairs ¶meters); @@ -692,8 +694,8 @@ public: bool AnyRetrievable() const {return MaxRetrievable() != 0;} lword MaxRetrievable() const {return m_length-m_count;} - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const { throw NotImplemented("RandomNumberStore: CopyRangeTo2() is not supported by this store"); } @@ -712,8 +714,8 @@ public: NullStore(lword size = ULONG_MAX) : m_size(size) {} void StoreInitialize(const NameValuePairs ¶meters) {} lword MaxRetrievable() const {return m_size;} - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; private: lword m_size; @@ -756,11 +758,11 @@ public: void IsolatedInitialize(const NameValuePairs ¶meters) {m_store.IsolatedInitialize(parameters);} size_t Pump2(lword &byteCount, bool blocking=true) - {return m_store.TransferTo2(*AttachedTransformation(), byteCount, NULL_CHANNEL, blocking);} + {return m_store.TransferTo2(*AttachedTransformation(), byteCount, DEFAULT_CHANNEL, blocking);} size_t PumpMessages2(unsigned int &messageCount, bool blocking=true) - {return m_store.TransferMessagesTo2(*AttachedTransformation(), messageCount, NULL_CHANNEL, blocking);} + {return m_store.TransferMessagesTo2(*AttachedTransformation(), messageCount, DEFAULT_CHANNEL, blocking);} size_t PumpAll2(bool blocking=true) - {return m_store.TransferAllTo2(*AttachedTransformation(), NULL_CHANNEL, blocking);} + {return m_store.TransferAllTo2(*AttachedTransformation(), DEFAULT_CHANNEL, blocking);} bool SourceExhausted() const {return !m_store.AnyRetrievable() && !m_store.AnyMessages();} void SetAutoSignalPropagation(int propagation) @@ -34,7 +34,7 @@ } #define FILTER_OUTPUT2(site, statement, output, length, messageEnd) \ - FILTER_OUTPUT3(site, statement, output, length, messageEnd, NULL_CHANNEL) + FILTER_OUTPUT3(site, statement, output, length, messageEnd, DEFAULT_CHANNEL) #define FILTER_OUTPUT(site, output, length, messageEnd) \ FILTER_OUTPUT2(site, 0, output, length, messageEnd) diff --git a/iterhash.cpp b/iterhash.cpp index 642a7ca..478950c 100644 --- a/iterhash.cpp +++ b/iterhash.cpp @@ -132,14 +132,18 @@ template <class T, class BASE> void IteratedHashBase<T, BASE>::TruncatedFinal(by ByteOrder order = this->GetByteOrder(); PadLastBlock(blockSize - 2*sizeof(HashWordType)); - ConditionalByteReverse<HashWordType>(order, dataBuf, dataBuf, blockSize - 2*sizeof(HashWordType)); + dataBuf[blockSize/sizeof(T)-2+order] = ConditionalByteReverse(order, this->GetBitCountLo()); + dataBuf[blockSize/sizeof(T)-1-order] = ConditionalByteReverse(order, this->GetBitCountHi()); - dataBuf[blockSize/sizeof(T)-2] = order ? this->GetBitCountHi() : this->GetBitCountLo(); - dataBuf[blockSize/sizeof(T)-1] = order ? this->GetBitCountLo() : this->GetBitCountHi(); + HashBlock(dataBuf); - HashEndianCorrectedBlock(dataBuf); - ConditionalByteReverse<HashWordType>(order, stateBuf, stateBuf, this->DigestSize()); - memcpy(digest, stateBuf, size); + if (IsAligned<HashWordType>(digest) && size%sizeof(HashWordType)==0) + ConditionalByteReverse<HashWordType>(order, (HashWordType *)digest, stateBuf, size); + else + { + ConditionalByteReverse<HashWordType>(order, stateBuf, stateBuf, this->DigestSize()); + memcpy(digest, stateBuf, size); + } this->Restart(); // reinit for next use } @@ -76,7 +76,7 @@ protected: }; //! _ -template <class T_HashWordType, class T_Endianness, unsigned int T_BlockSize, unsigned int T_StateSize, class T_Transform, unsigned int T_DigestSize = 0> +template <class T_HashWordType, class T_Endianness, unsigned int T_BlockSize, unsigned int T_StateSize, class T_Transform, unsigned int T_DigestSize = 0, bool T_StateAligned = false> class CRYPTOPP_NO_VTABLE IteratedHashWithStaticTransform : public ClonableImpl<T_Transform, AlgorithmImpl<IteratedHash<T_HashWordType, T_Endianness, T_BlockSize>, T_Transform> > { @@ -90,7 +90,7 @@ protected: void Init() {T_Transform::InitState(this->m_state);} T_HashWordType* StateBuf() {return this->m_state;} - FixedSizeSecBlock<T_HashWordType, T_BlockSize/sizeof(T_HashWordType)> m_state; + FixedSizeAlignedSecBlock<T_HashWordType, T_BlockSize/sizeof(T_HashWordType), T_StateAligned> m_state; }; #ifndef __GNUC__ @@ -35,8 +35,8 @@ public: bool AnyRetrievable() const {return m_lengths.front() > 0;} - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; lword TotalBytesRetrievable() const {return m_queue.MaxRetrievable();} @@ -49,7 +49,7 @@ public: unsigned int NumberOfMessageSeries() const {return (unsigned int)m_messageCounts.size()-1;} - unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) const; + unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) const; const byte * Spy(size_t &contiguousSize) const; @@ -64,14 +64,14 @@ public: return len; } - inline size_t CopyTo(BufferedTransformation &target, const std::string &channel=BufferedTransformation::NULL_CHANNEL) const + inline size_t CopyTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) const { size_t len = m_tail-m_head; target.ChannelPut(channel, buf+m_head, len); return len; } - inline size_t CopyTo(BufferedTransformation &target, size_t copyMax, const std::string &channel=BufferedTransformation::NULL_CHANNEL) const + inline size_t CopyTo(BufferedTransformation &target, size_t copyMax, const std::string &channel=DEFAULT_CHANNEL) const { size_t len = STDMIN(copyMax, m_tail-m_head); target.ChannelPut(channel, buf+m_head, len); @@ -92,7 +92,7 @@ public: return len; } - inline size_t TransferTo(BufferedTransformation &target, const std::string &channel=BufferedTransformation::NULL_CHANNEL) + inline size_t TransferTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) { size_t len = m_tail-m_head; target.ChannelPutModifiable(channel, buf+m_head, len); @@ -100,7 +100,7 @@ public: return len; } - inline size_t TransferTo(BufferedTransformation &target, lword transferMax, const std::string &channel=BufferedTransformation::NULL_CHANNEL) + inline size_t TransferTo(BufferedTransformation &target, lword transferMax, const std::string &channel=DEFAULT_CHANNEL) { size_t len = UnsignedMin(m_tail-m_head, transferMax); target.ChannelPutModifiable(channel, buf+m_head, len); @@ -35,8 +35,8 @@ public: size_t Peek(byte &outByte) const; size_t Peek(byte *outString, size_t peekMax) const; - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; // these member functions are not inherited void SetNodeSize(size_t nodeSize); @@ -80,8 +80,8 @@ public: size_t Peek(byte &outByte) const; size_t Peek(byte *outString, size_t peekMax) const; - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true); - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const; + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true); + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const; private: const ByteQueue &m_queue; diff --git a/rijndael.cpp b/rijndael.cpp index 589733e..04e1f21 100644 --- a/rijndael.cpp +++ b/rijndael.cpp @@ -534,8 +534,10 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l #endif #if CRYPTOPP_BOOL_X86 - AS_PUSH_IF86( bx) - AS_PUSH_IF86( bp) +#if !defined(_MSC_VER) || (_MSC_VER < 1300) + AS_PUSH_IF86(bx) +#endif + AS_PUSH_IF86(bp) AS2( mov [ecx+16*12+16*4], esp) AS2( lea esp, [ecx-512]) #endif @@ -583,7 +585,7 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l // counter mode one-time setup AS2( mov WORD_REG(bp), [L_INBLOCKS]) - AS2( movdqa xmm2, [WORD_REG(bp)]) // counter + AS2( movdqu xmm2, [WORD_REG(bp)]) // counter AS2( pxor xmm2, xmm1) AS2( psrldq xmm1, 14) AS2( movd eax, xmm1) @@ -843,11 +845,13 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l #else AS2( mov rbp, [L_BP]) #endif - AS_POP_IF86( bp) - AS_POP_IF86( bx) + AS_POP_IF86(bp) +#if !defined(_MSC_VER) || (_MSC_VER < 1300) + AS_POP_IF86(bx) +#endif #ifndef __GNUC__ - AS_POP_IF86( di) - AS_POP_IF86( si) + AS_POP_IF86(di) + AS_POP_IF86(si) #endif #ifdef CRYPTOPP_GENERATE_X64_MASM pop r12 @@ -459,7 +459,7 @@ public: }; template <class T, unsigned int S, bool T_Align16 = CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64> -class FixedSizeAlignedSecBlock : public FixedSizeSecBlock<T, S, FixedSizeAllocatorWithCleanup<T, S, NullAllocator<word32>, T_Align16> > +class FixedSizeAlignedSecBlock : public FixedSizeSecBlock<T, S, FixedSizeAllocatorWithCleanup<T, S, NullAllocator<T>, T_Align16> > { }; @@ -3,14 +3,21 @@ // Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. // Both are in the public domain. +// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code + #include "pch.h" #ifndef CRYPTOPP_IMPORTS +#ifndef CRYPTOPP_GENERATE_X64_MASM #include "sha.h" #include "misc.h" #include "cpu.h" +#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE +#include <emmintrin.h> +#endif + NAMESPACE_BEGIN(CryptoPP) // start of Steve Reid's code @@ -93,7 +100,7 @@ void SHA256::InitState(HashWordType *state) memcpy(state, s, sizeof(s)); } -static const word32 SHA256_K[64] = { +extern const word32 SHA256_K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -112,10 +119,333 @@ static const word32 SHA256_K[64] = { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; +#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM + +#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM) + +#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code + +static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len) +{ + #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ + #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4] + #define G(i) H(i+1) + #define F(i) H(i+2) + #define E(i) H(i+3) + #define D(i) H(i+4) + #define C(i) H(i+5) + #define B(i) H(i+6) + #define A(i) H(i+7) + #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4 + #define Wt_2(i) Wt((i)-2) + #define Wt_15(i) Wt((i)-15) + #define Wt_7(i) Wt((i)-7) + #define K_END [BASE+8*4+16*4+0*WORD_SZ] + #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ] + #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ] + #define DATA_END [BASE+8*4+16*4+3*WORD_SZ] + #define Kt(i) WORD_REG(si)+(i)*4 +#if CRYPTOPP_BOOL_X86 + #define BASE esp+4 +#elif defined(__GNUC__) + #define BASE r8 +#else + #define BASE rsp +#endif + +#define RA0(i, edx, edi) \ + AS2( add edx, [Kt(i)] )\ + AS2( add edx, [Wt(i)] )\ + AS2( add edx, H(i) )\ + +#define RA1(i, edx, edi) + +#define RB0(i, edx, edi) + +#define RB1(i, edx, edi) \ + AS2( mov AS_REG_7d, [Wt_2(i)] )\ + AS2( mov edi, [Wt_15(i)])\ + AS2( mov ebx, AS_REG_7d )\ + AS2( shr AS_REG_7d, 10 )\ + AS2( ror ebx, 17 )\ + AS2( xor AS_REG_7d, ebx )\ + AS2( ror ebx, 2 )\ + AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\ + AS2( add ebx, [Wt_7(i)])\ + AS2( mov AS_REG_7d, edi )\ + AS2( shr AS_REG_7d, 3 )\ + AS2( ror edi, 7 )\ + AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\ + AS2( xor AS_REG_7d, edi )\ + AS2( add edx, [Kt(i)])\ + AS2( ror edi, 11 )\ + AS2( add edx, H(i) )\ + AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\ + AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\ + AS2( mov [Wt(i)], AS_REG_7d)\ + AS2( add edx, AS_REG_7d )\ + +#define ROUND(i, r, eax, ecx, edi, edx)\ + /* in: edi = E */\ + /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\ + AS2( mov edx, F(i) )\ + AS2( xor edx, G(i) )\ + AS2( and edx, edi )\ + AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\ + AS2( mov AS_REG_7d, edi )\ + AS2( ror edi, 6 )\ + AS2( ror AS_REG_7d, 25 )\ + RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ + AS2( xor AS_REG_7d, edi )\ + AS2( ror edi, 5 )\ + AS2( xor AS_REG_7d, edi )/* S1(E) */\ + AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\ + RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ + /* in: ecx = A, eax = B^C, edx = T1 */\ + /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\ + AS2( mov ebx, ecx )\ + AS2( xor ecx, B(i) )/* A^B */\ + AS2( and eax, ecx )\ + AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\ + AS2( mov AS_REG_7d, ebx )\ + AS2( ror ebx, 2 )\ + AS2( add eax, edx )/* T1 + Maj(A,B,C) */\ + AS2( add edx, D(i) )\ + AS2( mov D(i), edx )\ + AS2( ror AS_REG_7d, 22 )\ + AS2( xor AS_REG_7d, ebx )\ + AS2( ror ebx, 11 )\ + AS2( xor AS_REG_7d, ebx )\ + AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\ + AS2( mov H(i), eax )\ + +#define SWAP_COPY(i) \ + AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\ + AS1( bswap WORD_REG(bx))\ + AS2( mov [Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx)) + +#if defined(__GNUC__) + #if CRYPTOPP_BOOL_X64 + __m128i workspace[(LOCALS_SIZE+15)/16]; + #endif + __asm__ __volatile__ + ( + #if CRYPTOPP_BOOL_X64 + "movq %4, %%r8;" + #endif + ".intel_syntax noprefix;" +#elif defined(CRYPTOPP_GENERATE_X64_MASM) + ALIGN 8 + X86_SHA256_HashBlocks PROC FRAME + rex_push_reg rsi + push_reg rdi + push_reg rbx + push_reg rbp + alloc_stack(LOCALS_SIZE+8) + .endprolog + mov rdi, r8 + lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4] +#endif + +#if CRYPTOPP_BOOL_X86 + #ifndef __GNUC__ + AS2( mov edi, [len]) + AS2( lea WORD_REG(si), [SHA256_K+48*4]) + #endif + #if !defined(_MSC_VER) || (_MSC_VER < 1300) + AS_PUSH_IF86(bx) + #endif + + AS_PUSH_IF86(bp) + AS2( mov ebx, esp) + AS2( and esp, -16) + AS2( sub WORD_REG(sp), LOCALS_SIZE) + AS_PUSH_IF86(bx) +#endif + AS2( mov STATE_SAVE, WORD_REG(cx)) + AS2( mov DATA_SAVE, WORD_REG(dx)) + AS2( add WORD_REG(di), WORD_REG(dx)) + AS2( mov DATA_END, WORD_REG(di)) + AS2( mov K_END, WORD_REG(si)) + +#if CRYPTOPP_BOOL_X86 + AS2( test edi, 1) + ASJ( jnz, 2, f) +#endif + + AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16]) + AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16]) + +#if CRYPTOPP_BOOL_X86 + ASJ( jmp, 0, f) + ASL(2) // non-SSE2 + AS2( mov esi, ecx) + AS2( lea edi, A(0)) + AS2( mov ecx, 8) + AS1( rep movsd) + AS2( mov esi, K_END) + ASJ( jmp, 3, f) +#endif + + ASL(0) + AS2( movdqa E(0), xmm1) + AS2( movdqa A(0), xmm0) +#if CRYPTOPP_BOOL_X86 + ASL(3) +#endif + AS2( sub WORD_REG(si), 48*4) + SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3) + SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7) +#if CRYPTOPP_BOOL_X86 + SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11) + SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15) +#endif + AS2( mov edi, E(0)) // E + AS2( mov eax, B(0)) // B + AS2( xor eax, C(0)) // B^C + AS2( mov ecx, A(0)) // A + + ROUND(0, 0, eax, ecx, edi, edx) + ROUND(1, 0, ecx, eax, edx, edi) + ROUND(2, 0, eax, ecx, edi, edx) + ROUND(3, 0, ecx, eax, edx, edi) + ROUND(4, 0, eax, ecx, edi, edx) + ROUND(5, 0, ecx, eax, edx, edi) + ROUND(6, 0, eax, ecx, edi, edx) + ROUND(7, 0, ecx, eax, edx, edi) + ROUND(8, 0, eax, ecx, edi, edx) + ROUND(9, 0, ecx, eax, edx, edi) + ROUND(10, 0, eax, ecx, edi, edx) + ROUND(11, 0, ecx, eax, edx, edi) + ROUND(12, 0, eax, ecx, edi, edx) + ROUND(13, 0, ecx, eax, edx, edi) + ROUND(14, 0, eax, ecx, edi, edx) + ROUND(15, 0, ecx, eax, edx, edi) + + ASL(1) + AS2(add WORD_REG(si), 4*16) + ROUND(0, 1, eax, ecx, edi, edx) + ROUND(1, 1, ecx, eax, edx, edi) + ROUND(2, 1, eax, ecx, edi, edx) + ROUND(3, 1, ecx, eax, edx, edi) + ROUND(4, 1, eax, ecx, edi, edx) + ROUND(5, 1, ecx, eax, edx, edi) + ROUND(6, 1, eax, ecx, edi, edx) + ROUND(7, 1, ecx, eax, edx, edi) + ROUND(8, 1, eax, ecx, edi, edx) + ROUND(9, 1, ecx, eax, edx, edi) + ROUND(10, 1, eax, ecx, edi, edx) + ROUND(11, 1, ecx, eax, edx, edi) + ROUND(12, 1, eax, ecx, edi, edx) + ROUND(13, 1, ecx, eax, edx, edi) + ROUND(14, 1, eax, ecx, edi, edx) + ROUND(15, 1, ecx, eax, edx, edi) + AS2( cmp WORD_REG(si), K_END) + ASJ( jne, 1, b) + + AS2( mov WORD_REG(dx), DATA_SAVE) + AS2( add WORD_REG(dx), 64) + AS2( mov AS_REG_7, STATE_SAVE) + AS2( mov DATA_SAVE, WORD_REG(dx)) + +#if CRYPTOPP_BOOL_X86 + AS2( test DWORD PTR DATA_END, 1) + ASJ( jnz, 4, f) +#endif + + AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16]) + AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16]) + AS2( paddd xmm1, E(0)) + AS2( paddd xmm0, A(0)) + AS2( movdqa [AS_REG_7+1*16], xmm1) + AS2( movdqa [AS_REG_7+0*16], xmm0) + AS2( cmp WORD_REG(dx), DATA_END) + ASJ( jl, 0, b) + +#if CRYPTOPP_BOOL_X86 + ASJ( jmp, 5, f) + ASL(4) // non-SSE2 + AS2( add [AS_REG_7+0*4], ecx) // A + AS2( add [AS_REG_7+4*4], edi) // E + AS2( mov eax, B(0)) + AS2( mov ebx, C(0)) + AS2( mov ecx, D(0)) + AS2( add [AS_REG_7+1*4], eax) + AS2( add [AS_REG_7+2*4], ebx) + AS2( add [AS_REG_7+3*4], ecx) + AS2( mov eax, F(0)) + AS2( mov ebx, G(0)) + AS2( mov ecx, H(0)) + AS2( add [AS_REG_7+5*4], eax) + AS2( add [AS_REG_7+6*4], ebx) + AS2( add [AS_REG_7+7*4], ecx) + AS2( mov ecx, AS_REG_7d) + AS2( cmp WORD_REG(dx), DATA_END) + ASJ( jl, 2, b) + ASL(5) +#endif + + AS_POP_IF86(sp) + AS_POP_IF86(bp) + #if !defined(_MSC_VER) || (_MSC_VER < 1300) + AS_POP_IF86(bx) + #endif + +#ifdef CRYPTOPP_GENERATE_X64_MASM + add rsp, LOCALS_SIZE+8 + pop rbp + pop rbx + pop rdi + pop rsi + ret + X86_SHA256_HashBlocks ENDP +#endif + +#ifdef __GNUC__ + ".att_syntax prefix;" + : + : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len) + #if CRYPTOPP_BOOL_X64 + , "r" (workspace) + #endif + : "memory", "cc", "%eax" + #if CRYPTOPP_BOOL_X64 + , "%rbx", "%r8" + #endif + ); +#endif +} + +#endif // #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM) + +#ifndef CRYPTOPP_GENERATE_X64_MASM + +#ifdef CRYPTOPP_X64_MASM_AVAILABLE +extern "C" { +void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len); +} +#endif + +#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE) + +size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length) +{ + X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2()); + return length % BLOCKSIZE; +} + +size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length) +{ + X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2()); + return length % BLOCKSIZE; +} + +#endif + #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) #define Ch(x,y,z) (z^(x&(y^z))) -#define Maj(x,y,z) ((x&y)|(z&(x|y))) +#define Maj(x,y,z) (y^((x^y)&(y^z))) #define a(i) T[(0-i)&7] #define b(i) T[(1-i)&7] @@ -138,6 +468,11 @@ static const word32 SHA256_K[64] = { void SHA256::Transform(word32 *state, const word32 *data) { word32 W[16]; +#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE) + // this byte reverse is a waste of time, but this function is only called by MDC + ByteReverse(W, data, BLOCKSIZE); + X86_SHA256_HashBlocks(state, W, BLOCKSIZE - !HasSSE2()); +#else word32 T[8]; /* Copy context->state[] to working vars */ memcpy(T, state, sizeof(T)); @@ -158,11 +493,12 @@ void SHA256::Transform(word32 *state, const word32 *data) state[5] += f(0); state[6] += g(0); state[7] += h(0); +#endif } /* // smaller but slower -void SHA256_Transform(word32 *state, const word32 *data) +void SHA256::Transform(word32 *state, const word32 *data) { word32 T[20]; word32 W[32]; @@ -176,7 +512,7 @@ void SHA256_Transform(word32 *state, const word32 *data) { word32 w = data[j]; W[j] = w; - w += K[j]; + w += SHA256_K[j]; w += t[7]; w += S1(e); w += Ch(e, t[5], t[6]); @@ -196,7 +532,7 @@ void SHA256_Transform(word32 *state, const word32 *data) i = j&0xf; word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7]; W[i+16] = W[i] = w; - w += K[j]; + w += SHA256_K[j]; w += t[7]; w += S1(e); w += Ch(e, t[5], t[6]); @@ -208,7 +544,7 @@ void SHA256_Transform(word32 *state, const word32 *data) w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7]; W[(i+1)+16] = W[(i+1)] = w; - w += K[j+1]; + w += SHA256_K[j+1]; w += (t-1)[7]; w += S1(e); w += Ch(e, (t-1)[5], (t-1)[6]); @@ -335,22 +671,16 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8 - AS2( movq mm4, [ecx+0*8]) - AS2( movq [edi+0*8], mm4) - AS2( movq mm0, [ecx+1*8]) - AS2( movq [edi+1*8], mm0) - AS2( movq mm0, [ecx+2*8]) - AS2( movq [edi+2*8], mm0) - AS2( movq mm0, [ecx+3*8]) - AS2( movq [edi+3*8], mm0) - AS2( movq mm5, [ecx+4*8]) - AS2( movq [edi+4*8], mm5) - AS2( movq mm0, [ecx+5*8]) - AS2( movq [edi+5*8], mm0) - AS2( movq mm0, [ecx+6*8]) - AS2( movq [edi+6*8], mm0) - AS2( movq mm0, [ecx+7*8]) - AS2( movq [edi+7*8], mm0) + AS2( movdqa xmm0, [ecx+0*16]) + AS2( movdq2q mm4, xmm0) + AS2( movdqa [edi+0*16], xmm0) + AS2( movdqa xmm0, [ecx+1*16]) + AS2( movdqa [edi+1*16], xmm0) + AS2( movdqa xmm0, [ecx+2*16]) + AS2( movdq2q mm5, xmm0) + AS2( movdqa [edi+2*16], xmm0) + AS2( movdqa xmm0, [ecx+3*16]) + AS2( movdqa [edi+3*16], xmm0) ASJ( jmp, 0, f) #define SSE2_S0_S1(r, a, b, c) \ @@ -475,18 +805,14 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state ASJ( jne, 1, b) #define SSE2_CombineState(i) \ - AS2( movq mm0, [edi+i*8])\ - AS2( paddq mm0, [ecx+i*8])\ - AS2( movq [ecx+i*8], mm0) + AS2( movdqa xmm0, [edi+i*16])\ + AS2( paddq xmm0, [ecx+i*16])\ + AS2( movdqa [ecx+i*16], xmm0) SSE2_CombineState(0) SSE2_CombineState(1) SSE2_CombineState(2) SSE2_CombineState(3) - SSE2_CombineState(4) - SSE2_CombineState(5) - SSE2_CombineState(6) - SSE2_CombineState(7) AS1( pop esp) AS1( emms) @@ -550,4 +876,5 @@ void SHA512::Transform(word64 *state, const word64 *data) NAMESPACE_END +#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM #endif // #ifndef CRYPTOPP_IMPORTS @@ -17,25 +17,31 @@ public: typedef SHA1 SHA; // for backwards compatibility //! implements the SHA-256 standard -class CRYPTOPP_DLL SHA256 : public IteratedHashWithStaticTransform<word32, BigEndian, 64, 32, SHA256> +class CRYPTOPP_DLL SHA256 : public IteratedHashWithStaticTransform<word32, BigEndian, 64, 32, SHA256, 32, CRYPTOPP_BOOL_X86||CRYPTOPP_BOOL_X64> { public: +#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE) + size_t HashMultipleBlocks(const word32 *input, size_t length); +#endif static void CRYPTOPP_API InitState(HashWordType *state); static void CRYPTOPP_API Transform(word32 *digest, const word32 *data); static const char * CRYPTOPP_API StaticAlgorithmName() {return "SHA-256";} }; //! implements the SHA-224 standard -class CRYPTOPP_DLL SHA224 : public IteratedHashWithStaticTransform<word32, BigEndian, 64, 32, SHA224, 28> +class CRYPTOPP_DLL SHA224 : public IteratedHashWithStaticTransform<word32, BigEndian, 64, 32, SHA224, 28, CRYPTOPP_BOOL_X86||CRYPTOPP_BOOL_X64> { public: +#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE) + size_t HashMultipleBlocks(const word32 *input, size_t length); +#endif static void CRYPTOPP_API InitState(HashWordType *state); static void CRYPTOPP_API Transform(word32 *digest, const word32 *data) {SHA256::Transform(digest, data);} static const char * CRYPTOPP_API StaticAlgorithmName() {return "SHA-224";} }; //! implements the SHA-512 standard -class CRYPTOPP_DLL SHA512 : public IteratedHashWithStaticTransform<word64, BigEndian, 128, 64, SHA512> +class CRYPTOPP_DLL SHA512 : public IteratedHashWithStaticTransform<word64, BigEndian, 128, 64, SHA512, 64, CRYPTOPP_BOOL_X86> { public: static void CRYPTOPP_API InitState(HashWordType *state); @@ -44,7 +50,7 @@ public: }; //! implements the SHA-384 standard -class CRYPTOPP_DLL SHA384 : public IteratedHashWithStaticTransform<word64, BigEndian, 128, 64, SHA384, 48> +class CRYPTOPP_DLL SHA384 : public IteratedHashWithStaticTransform<word64, BigEndian, 128, 64, SHA384, 48, CRYPTOPP_BOOL_X86> { public: static void CRYPTOPP_API InitState(HashWordType *state); @@ -58,7 +58,7 @@ class CRYPTOPP_NO_VTABLE Unflushable : public T { public: bool Flush(bool completeFlush, int propagation=-1, bool blocking=true) - {return ChannelFlush(this->NULL_CHANNEL, completeFlush, propagation, blocking);} + {return ChannelFlush(DEFAULT_CHANNEL, completeFlush, propagation, blocking);} bool IsolatedFlush(bool hardFlush, bool blocking) {assert(false); return false;} bool ChannelFlush(const std::string &channel, bool hardFlush, int propagation=-1, bool blocking=true) @@ -123,15 +123,15 @@ class CRYPTOPP_NO_VTABLE Multichannel : public CustomFlushPropagation<T> { public: bool Flush(bool hardFlush, int propagation=-1, bool blocking=true) - {return ChannelFlush(this->NULL_CHANNEL, hardFlush, propagation, blocking);} + {return this->ChannelFlush(DEFAULT_CHANNEL, hardFlush, propagation, blocking);} bool MessageSeriesEnd(int propagation=-1, bool blocking=true) - {return ChannelMessageSeriesEnd(this->NULL_CHANNEL, propagation, blocking);} + {return this->ChannelMessageSeriesEnd(DEFAULT_CHANNEL, propagation, blocking);} byte * CreatePutSpace(size_t &size) - {return ChannelCreatePutSpace(this->NULL_CHANNEL, size);} + {return this->ChannelCreatePutSpace(DEFAULT_CHANNEL, size);} size_t Put2(const byte *begin, size_t length, int messageEnd, bool blocking) - {return ChannelPut2(this->NULL_CHANNEL, begin, length, messageEnd, blocking);} + {return this->ChannelPut2(DEFAULT_CHANNEL, begin, length, messageEnd, blocking);} size_t PutModifiable2(byte *inString, size_t length, int messageEnd, bool blocking) - {return ChannelPutModifiable2(this->NULL_CHANNEL, inString, length, messageEnd, blocking);} + {return this->ChannelPutModifiable2(DEFAULT_CHANNEL, inString, length, messageEnd, blocking);} // void ChannelMessageSeriesEnd(const std::string &channel, int propagation=-1) // {PropagateMessageSeriesEnd(propagation, channel);} @@ -177,7 +177,7 @@ public: unsigned int NumberOfMessages() const {return m_messageEnd ? 0 : 1;} bool GetNextMessage(); - unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) const; + unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) const; protected: virtual void StoreInitialize(const NameValuePairs ¶meters) =0; @@ -189,9 +189,9 @@ protected: class CRYPTOPP_DLL CRYPTOPP_NO_VTABLE Sink : public BufferedTransformation { public: - size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true) + size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) {transferBytes = 0; return 0;} - size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const + size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const {return 0;} }; @@ -559,7 +559,7 @@ void SecretShareFile(int threshold, int nShares, const char *filename, const cha channel = WordToString<word32>(i); fileSinks[i]->Put((byte *)channel.data(), 4); - channelSwitch->AddRoute(channel, *fileSinks[i], BufferedTransformation::NULL_CHANNEL); + channelSwitch->AddRoute(channel, *fileSinks[i], DEFAULT_CHANNEL); } source.PumpAll(); @@ -609,7 +609,7 @@ void InformationDisperseFile(int threshold, int nShares, const char *filename) channel = WordToString<word32>(i); fileSinks[i]->Put((byte *)channel.data(), 4); - channelSwitch->AddRoute(channel, *fileSinks[i], BufferedTransformation::NULL_CHANNEL); + channelSwitch->AddRoute(channel, *fileSinks[i], DEFAULT_CHANNEL); } source.PumpAll(); @@ -57,12 +57,8 @@ void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, con /* Fill nh key */ in[0] = 0x80; - for (i = 0; i < m_nhKeySize()*sizeof(word64); i += blockSize) - { - cipher.ProcessBlock(in, out.BytePtr()); - ConditionalByteReverse(BIG_ENDIAN_ORDER, m_nhKey()+i/sizeof(word64), out.begin(), blockSize); - in[15]++; - } + cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter); + ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64)); /* Fill poly key */ in[0] = 0xC0; @@ -137,6 +133,7 @@ void VMAC_Base::Resynchronize(const byte *nonce, int len) void VMAC_Base::HashEndianCorrectedBlock(const word64 *data) { assert(false); + throw 0; } #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 @@ -1,665 +1,1955 @@ -include ksamd64.inc
-EXTERNDEF ?Te@rdtable@CryptoPP@@3PA_KA:FAR
-EXTERNDEF ?g_cacheLineSize@CryptoPP@@3IA:FAR
-.CODE
-
- ALIGN 8
-Baseline_Add PROC
- lea rdx, [rdx+8*rcx]
- lea r8, [r8+8*rcx]
- lea r9, [r9+8*rcx]
- neg rcx ; rcx is negative index
- jz $1@Baseline_Add
- mov rax,[r8+8*rcx]
- add rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
-$0@Baseline_Add:
- mov rax,[r8+8*rcx+8]
- adc rax,[r9+8*rcx+8]
- mov [rdx+8*rcx+8],rax
- lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
- jrcxz $1@Baseline_Add ; loop until rcx overflows and becomes zero
- mov rax,[r8+8*rcx]
- adc rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
- jmp $0@Baseline_Add
-$1@Baseline_Add:
- mov rax, 0
- adc rax, rax ; store carry into rax (return result register)
- ret
-Baseline_Add ENDP
-
- ALIGN 8
-Baseline_Sub PROC
- lea rdx, [rdx+8*rcx]
- lea r8, [r8+8*rcx]
- lea r9, [r9+8*rcx]
- neg rcx ; rcx is negative index
- jz $1@Baseline_Sub
- mov rax,[r8+8*rcx]
- sub rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
-$0@Baseline_Sub:
- mov rax,[r8+8*rcx+8]
- sbb rax,[r9+8*rcx+8]
- mov [rdx+8*rcx+8],rax
- lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
- jrcxz $1@Baseline_Sub ; loop until rcx overflows and becomes zero
- mov rax,[r8+8*rcx]
- sbb rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
- jmp $0@Baseline_Sub
-$1@Baseline_Sub:
- mov rax, 0
- adc rax, rax ; store carry into rax (return result register)
-
- ret
-Baseline_Sub ENDP
-
-ALIGN 8
-Rijndael_Enc_AdvancedProcessBlocks PROC FRAME
-rex_push_reg rsi
-push_reg rdi
-push_reg rbx
-push_reg rbp
-push_reg r12
-.endprolog
-mov r8, rcx
-mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA
-mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA]
-mov rbp, [(r8+16*19)]
-mov rax, 16
-and rax, rbp
-movdqa xmm3, XMMWORD PTR [rdx+16+rax]
-movdqa [(r8+16*12)], xmm3
-lea rax, [rdx+rax+2*16]
-sub rax, rbp
-label0:
-movdqa xmm0, [rax+rbp]
-movdqa XMMWORD PTR [(r8+0)+rbp], xmm0
-add rbp, 16
-cmp rbp, 16*12
-jl label0
-movdqa xmm4, [rax+rbp]
-movdqa xmm1, [rdx]
-mov r11d, [rdx+4*4]
-mov ebx, [rdx+5*4]
-mov ecx, [rdx+6*4]
-mov edx, [rdx+7*4]
-xor rax, rax
-label9:
-mov ebp, [rsi+rax]
-add rax, rdi
-mov ebp, [rsi+rax]
-add rax, rdi
-mov ebp, [rsi+rax]
-add rax, rdi
-mov ebp, [rsi+rax]
-add rax, rdi
-cmp rax, 2048
-jl label9
-lfence
-test DWORD PTR [(r8+16*18+8)], 1
-jz label8
-mov rbp, [(r8+16*14)]
-movdqa xmm2, [rbp]
-pxor xmm2, xmm1
-psrldq xmm1, 14
-movd eax, xmm1
-mov al, BYTE PTR [rbp+15]
-mov r12d, eax
-movd eax, xmm2
-psrldq xmm2, 4
-movd edi, xmm2
-psrldq xmm2, 4
-movzx ebp, al
-xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movd edi, xmm2
-psrldq xmm2, 4
-movzx ebp, al
-xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movd edi, xmm2
-movzx ebp, al
-xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movzx ebp, al
-xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-psrldq xmm2, 3
-mov eax, [(r8+16*12)+0*4]
-mov edi, [(r8+16*12)+2*4]
-mov r10d, [(r8+16*12)+3*4]
-movzx ebp, cl
-xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, bh
-xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr ebx, 16
-movzx ebp, bl
-xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, bh
-mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-xor ebx, [(r8+16*12)+1*4]
-movzx ebp, ch
-xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr ecx, 16
-movzx ebp, dl
-xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, dh
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr edx, 16
-movzx ebp, ch
-xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dh
-xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movd ecx, xmm2
-mov edx, r11d
-mov [(r8+0)+3*4], r10d
-mov [(r8+0)+0*4], eax
-mov [(r8+0)+1*4], ebx
-mov [(r8+0)+2*4], edi
-jmp label5
-label3:
-mov r11d, [(r8+16*12)+0*4]
-mov ebx, [(r8+16*12)+1*4]
-mov ecx, [(r8+16*12)+2*4]
-mov edx, [(r8+16*12)+3*4]
-label8:
-mov rax, [(r8+16*14)]
-movdqu xmm2, [rax]
-mov rbp, [(r8+16*14)+8]
-movdqu xmm5, [rbp]
-pxor xmm2, xmm1
-pxor xmm2, xmm5
-movd eax, xmm2
-psrldq xmm2, 4
-movd edi, xmm2
-psrldq xmm2, 4
-movzx ebp, al
-xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movd edi, xmm2
-psrldq xmm2, 4
-movzx ebp, al
-xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movd edi, xmm2
-movzx ebp, al
-xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movzx ebp, al
-xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor r11d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, r11d
-add r8, [(r8+16*19)]
-add r8, 4*16
-jmp label2
-label1:
-mov ecx, r12d
-mov edx, r11d
-mov eax, [(r8+0)+0*4]
-mov ebx, [(r8+0)+1*4]
-xor cl, ch
-and rcx, 255
-label5:
-add r12d, 1
-xor edx, DWORD PTR [rsi+rcx*8+3]
-movzx ebp, dl
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, dh
-mov ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr edx, 16
-xor ecx, [(r8+0)+2*4]
-movzx ebp, dh
-xor eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, dl
-mov edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-xor edx, [(r8+0)+3*4]
-add r8, [(r8+16*19)]
-add r8, 3*16
-jmp label4
-label2:
-mov r10d, [(r8+0)-4*16+3*4]
-mov edi, [(r8+0)-4*16+2*4]
-movzx ebp, cl
-xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov cl, al
-movzx ebp, ah
-xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, bh
-xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr ebx, 16
-movzx ebp, al
-xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, ah
-mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, bl
-xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, bh
-mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ch
-xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-shr ecx, 16
-movzx ebp, dl
-xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, dh
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr edx, 16
-movzx ebp, ch
-xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dh
-xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-mov ecx, edi
-xor eax, [(r8+0)-4*16+0*4]
-xor ebx, [(r8+0)-4*16+1*4]
-mov edx, r10d
-label4:
-mov r10d, [(r8+0)-4*16+7*4]
-mov edi, [(r8+0)-4*16+6*4]
-movzx ebp, cl
-xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov cl, al
-movzx ebp, ah
-xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, bh
-xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr ebx, 16
-movzx ebp, al
-xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, ah
-mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, bl
-xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, bh
-mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ch
-xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-shr ecx, 16
-movzx ebp, dl
-xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, dh
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr edx, 16
-movzx ebp, ch
-xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dh
-xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-mov ecx, edi
-xor eax, [(r8+0)-4*16+4*4]
-xor ebx, [(r8+0)-4*16+5*4]
-mov edx, r10d
-add r8, 32
-test r8, 255
-jnz label2
-sub r8, 16*16
-movzx ebp, ch
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+2], di
-movzx ebp, dh
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, al
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+6], di
-shr edx, 16
-movzx ebp, ah
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+10], di
-shr eax, 16
-movzx ebp, bh
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, cl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+14], di
-shr ebx, 16
-movzx ebp, dh
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, al
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+12], di
-shr ecx, 16
-movzx ebp, ah
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+0], di
-movzx ebp, bh
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, cl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+4], di
-movzx ebp, ch
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+8], di
-mov rax, [(r8+16*14)+16]
-mov rbx, [(r8+16*14)+24]
-mov rcx, [(r8+16*18+8)]
-sub rcx, 16
-movdqu xmm2, [rax]
-pxor xmm2, xmm4
-movdqa xmm0, [(r8+16*16)+16]
-paddq xmm0, [(r8+16*14)+16]
-movdqa [(r8+16*14)+16], xmm0
-pxor xmm2, [(r8+16*13)]
-movdqu [rbx], xmm2
-jle label7
-mov [(r8+16*18+8)], rcx
-test rcx, 1
-jnz label1
-movdqa xmm0, [(r8+16*16)]
-paddd xmm0, [(r8+16*14)]
-movdqa [(r8+16*14)], xmm0
-jmp label3
-label7:
-mov rbp, [(r8+16*18)]
-pop r12
-pop rbp
-pop rbx
-pop rdi
-pop rsi
-ret
-Rijndael_Enc_AdvancedProcessBlocks ENDP
-
-ALIGN 8
-GCM_AuthenticateBlocks_2K PROC FRAME
-rex_push_reg rsi
-push_reg rdi
-push_reg rbx
-.endprolog
-mov rsi, r8
-mov r11, r9
-movdqa xmm0, [rsi]
-label0:
-movdqu xmm4, [rcx]
-pxor xmm0, xmm4
-movd ebx, xmm0
-mov eax, 0f0f0f0f0h
-and eax, ebx
-shl ebx, 4
-and ebx, 0f0f0f0f0h
-movzx edi, ah
-movdqa xmm5, XMMWORD PTR [rsi + 32 + 1024 + rdi]
-movzx edi, al
-movdqa xmm4, XMMWORD PTR [rsi + 32 + 1024 + rdi]
-shr eax, 16
-movzx edi, ah
-movdqa xmm3, XMMWORD PTR [rsi + 32 + 1024 + rdi]
-movzx edi, al
-movdqa xmm2, XMMWORD PTR [rsi + 32 + 1024 + rdi]
-psrldq xmm0, 4
-movd eax, xmm0
-and eax, 0f0f0f0f0h
-movzx edi, bh
-pxor xmm5, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
-movzx edi, bl
-pxor xmm4, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
-shr ebx, 16
-movzx edi, bh
-pxor xmm3, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
-movzx edi, bl
-pxor xmm2, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
-movd ebx, xmm0
-shl ebx, 4
-and ebx, 0f0f0f0f0h
-movzx edi, ah
-pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
-movzx edi, al
-pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
-shr eax, 16
-movzx edi, ah
-pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
-movzx edi, al
-pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
-psrldq xmm0, 4
-movd eax, xmm0
-and eax, 0f0f0f0f0h
-movzx edi, bh
-pxor xmm5, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
-movzx edi, bl
-pxor xmm4, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
-shr ebx, 16
-movzx edi, bh
-pxor xmm3, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
-movzx edi, bl
-pxor xmm2, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
-movd ebx, xmm0
-shl ebx, 4
-and ebx, 0f0f0f0f0h
-movzx edi, ah
-pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
-movzx edi, al
-pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
-shr eax, 16
-movzx edi, ah
-pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
-movzx edi, al
-pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
-psrldq xmm0, 4
-movd eax, xmm0
-and eax, 0f0f0f0f0h
-movzx edi, bh
-pxor xmm5, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
-movzx edi, bl
-pxor xmm4, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
-shr ebx, 16
-movzx edi, bh
-pxor xmm3, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
-movzx edi, bl
-pxor xmm2, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
-movd ebx, xmm0
-shl ebx, 4
-and ebx, 0f0f0f0f0h
-movzx edi, ah
-pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
-movzx edi, al
-pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
-shr eax, 16
-movzx edi, ah
-pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
-movzx edi, al
-pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
-movzx edi, bh
-pxor xmm5, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
-movzx edi, bl
-pxor xmm4, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
-shr ebx, 16
-movzx edi, bh
-pxor xmm3, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
-movzx edi, bl
-pxor xmm2, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
-movdqa xmm0, xmm3
-pslldq xmm3, 1
-pxor xmm2, xmm3
-movdqa xmm1, xmm2
-pslldq xmm2, 1
-pxor xmm5, xmm2
-psrldq xmm0, 15
-movd rdi, xmm0
-movzx eax, WORD PTR [r11 + rdi*2]
-shl eax, 8
-movdqa xmm0, xmm5
-pslldq xmm5, 1
-pxor xmm4, xmm5
-psrldq xmm1, 15
-movd rdi, xmm1
-xor ax, WORD PTR [r11 + rdi*2]
-shl eax, 8
-psrldq xmm0, 15
-movd rdi, xmm0
-xor ax, WORD PTR [r11 + rdi*2]
-movd xmm0, eax
-pxor xmm0, xmm4
-add rcx, 16
-sub rdx, 1
-jnz label0
-movdqa [rsi], xmm0
-pop rbx
-pop rdi
-pop rsi
-ret
-GCM_AuthenticateBlocks_2K ENDP
-
-ALIGN 8
-GCM_AuthenticateBlocks_64K PROC FRAME
-rex_push_reg rsi
-push_reg rdi
-.endprolog
-mov rsi, r8
-movdqa xmm0, [rsi]
-label1:
-movdqu xmm1, [rcx]
-pxor xmm1, xmm0
-pxor xmm0, xmm0
-movd eax, xmm1
-psrldq xmm1, 4
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (0*4+0)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (0*4+1)*256*16 + rdi*8]
-shr eax, 16
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (0*4+2)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (0*4+3)*256*16 + rdi*8]
-movd eax, xmm1
-psrldq xmm1, 4
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (1*4+0)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (1*4+1)*256*16 + rdi*8]
-shr eax, 16
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (1*4+2)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (1*4+3)*256*16 + rdi*8]
-movd eax, xmm1
-psrldq xmm1, 4
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (2*4+0)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (2*4+1)*256*16 + rdi*8]
-shr eax, 16
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (2*4+2)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (2*4+3)*256*16 + rdi*8]
-movd eax, xmm1
-psrldq xmm1, 4
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (3*4+0)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (3*4+1)*256*16 + rdi*8]
-shr eax, 16
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (3*4+2)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (3*4+3)*256*16 + rdi*8]
-add rcx, 16
-sub rdx, 1
-jnz label1
-movdqa [rsi], xmm0
-pop rdi
-pop rsi
-ret
-GCM_AuthenticateBlocks_64K ENDP
-
-_TEXT ENDS
-END
+include ksamd64.inc +EXTERNDEF ?Te@rdtable@CryptoPP@@3PA_KA:FAR +EXTERNDEF ?g_cacheLineSize@CryptoPP@@3IA:FAR +EXTERNDEF ?SHA256_K@CryptoPP@@3QBIB:FAR +.CODE + + ALIGN 8 +Baseline_Add PROC + lea rdx, [rdx+8*rcx] + lea r8, [r8+8*rcx] + lea r9, [r9+8*rcx] + neg rcx ; rcx is negative index + jz $1@Baseline_Add + mov rax,[r8+8*rcx] + add rax,[r9+8*rcx] + mov [rdx+8*rcx],rax +$0@Baseline_Add: + mov rax,[r8+8*rcx+8] + adc rax,[r9+8*rcx+8] + mov [rdx+8*rcx+8],rax + lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2 + jrcxz $1@Baseline_Add ; loop until rcx overflows and becomes zero + mov rax,[r8+8*rcx] + adc rax,[r9+8*rcx] + mov [rdx+8*rcx],rax + jmp $0@Baseline_Add +$1@Baseline_Add: + mov rax, 0 + adc rax, rax ; store carry into rax (return result register) + ret +Baseline_Add ENDP + + ALIGN 8 +Baseline_Sub PROC + lea rdx, [rdx+8*rcx] + lea r8, [r8+8*rcx] + lea r9, [r9+8*rcx] + neg rcx ; rcx is negative index + jz $1@Baseline_Sub + mov rax,[r8+8*rcx] + sub rax,[r9+8*rcx] + mov [rdx+8*rcx],rax +$0@Baseline_Sub: + mov rax,[r8+8*rcx+8] + sbb rax,[r9+8*rcx+8] + mov [rdx+8*rcx+8],rax + lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2 + jrcxz $1@Baseline_Sub ; loop until rcx overflows and becomes zero + mov rax,[r8+8*rcx] + sbb rax,[r9+8*rcx] + mov [rdx+8*rcx],rax + jmp $0@Baseline_Sub +$1@Baseline_Sub: + mov rax, 0 + adc rax, rax ; store carry into rax (return result register) + + ret +Baseline_Sub ENDP + +ALIGN 8 +Rijndael_Enc_AdvancedProcessBlocks PROC FRAME +rex_push_reg rsi +push_reg rdi +push_reg rbx +push_reg rbp +push_reg r12 +.endprolog +mov r8, rcx +mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA +mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA] +mov rbp, [(r8+16*19)] +mov rax, 16 +and rax, rbp +movdqa xmm3, XMMWORD PTR [rdx+16+rax] +movdqa [(r8+16*12)], xmm3 +lea rax, [rdx+rax+2*16] +sub rax, rbp +label0: +movdqa xmm0, [rax+rbp] +movdqa XMMWORD PTR [(r8+0)+rbp], xmm0 +add rbp, 16 +cmp rbp, 16*12 +jl label0 +movdqa xmm4, [rax+rbp] +movdqa xmm1, [rdx] +mov r11d, [rdx+4*4] +mov ebx, [rdx+5*4] +mov ecx, [rdx+6*4] +mov edx, [rdx+7*4] +xor rax, rax +label9: +mov ebp, [rsi+rax] +add rax, rdi +mov ebp, [rsi+rax] +add rax, rdi +mov ebp, [rsi+rax] +add rax, rdi +mov ebp, [rsi+rax] +add rax, rdi +cmp rax, 2048 +jl label9 +lfence +test DWORD PTR [(r8+16*18+8)], 1 +jz label8 +mov rbp, [(r8+16*14)] +movdqa xmm2, [rbp] +pxor xmm2, xmm1 +psrldq xmm1, 14 +movd eax, xmm1 +mov al, BYTE PTR [rbp+15] +mov r12d, eax +movd eax, xmm2 +psrldq xmm2, 4 +movd edi, xmm2 +psrldq xmm2, 4 +movzx ebp, al +xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movd edi, xmm2 +psrldq xmm2, 4 +movzx ebp, al +xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movd edi, xmm2 +movzx ebp, al +xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movzx ebp, al +xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +psrldq xmm2, 3 +mov eax, [(r8+16*12)+0*4] +mov edi, [(r8+16*12)+2*4] +mov r10d, [(r8+16*12)+3*4] +movzx ebp, cl +xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, bl +xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, bh +xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr ebx, 16 +movzx ebp, bl +xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, bh +mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +xor ebx, [(r8+16*12)+1*4] +movzx ebp, ch +xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr ecx, 16 +movzx ebp, dl +xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, dh +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr edx, 16 +movzx ebp, ch +xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dl +xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dh +xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movd ecx, xmm2 +mov edx, r11d +mov [(r8+0)+3*4], r10d +mov [(r8+0)+0*4], eax +mov [(r8+0)+1*4], ebx +mov [(r8+0)+2*4], edi +jmp label5 +label3: +mov r11d, [(r8+16*12)+0*4] +mov ebx, [(r8+16*12)+1*4] +mov ecx, [(r8+16*12)+2*4] +mov edx, [(r8+16*12)+3*4] +label8: +mov rax, [(r8+16*14)] +movdqu xmm2, [rax] +mov rbp, [(r8+16*14)+8] +movdqu xmm5, [rbp] +pxor xmm2, xmm1 +pxor xmm2, xmm5 +movd eax, xmm2 +psrldq xmm2, 4 +movd edi, xmm2 +psrldq xmm2, 4 +movzx ebp, al +xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movd edi, xmm2 +psrldq xmm2, 4 +movzx ebp, al +xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movd edi, xmm2 +movzx ebp, al +xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, edi +movzx ebp, al +xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ah +xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, al +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, ah +xor r11d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov eax, r11d +add r8, [(r8+16*19)] +add r8, 4*16 +jmp label2 +label1: +mov ecx, r12d +mov edx, r11d +mov eax, [(r8+0)+0*4] +mov ebx, [(r8+0)+1*4] +xor cl, ch +and rcx, 255 +label5: +add r12d, 1 +xor edx, DWORD PTR [rsi+rcx*8+3] +movzx ebp, dl +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, dh +mov ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr edx, 16 +xor ecx, [(r8+0)+2*4] +movzx ebp, dh +xor eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, dl +mov edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +xor edx, [(r8+0)+3*4] +add r8, [(r8+16*19)] +add r8, 3*16 +jmp label4 +label2: +mov r10d, [(r8+0)-4*16+3*4] +mov edi, [(r8+0)-4*16+2*4] +movzx ebp, cl +xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov cl, al +movzx ebp, ah +xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, bl +xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, bh +xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr ebx, 16 +movzx ebp, al +xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, ah +mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, bl +xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, bh +mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ch +xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +shr ecx, 16 +movzx ebp, dl +xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, dh +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr edx, 16 +movzx ebp, ch +xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dl +xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dh +xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +mov ecx, edi +xor eax, [(r8+0)-4*16+0*4] +xor ebx, [(r8+0)-4*16+1*4] +mov edx, r10d +label4: +mov r10d, [(r8+0)-4*16+7*4] +mov edi, [(r8+0)-4*16+6*4] +movzx ebp, cl +xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +mov cl, al +movzx ebp, ah +xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr eax, 16 +movzx ebp, bl +xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, bh +xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr ebx, 16 +movzx ebp, al +xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, ah +mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, bl +xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, bh +mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, ch +xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +shr ecx, 16 +movzx ebp, dl +xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)] +movzx ebp, dh +xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)] +shr edx, 16 +movzx ebp, ch +xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +movzx ebp, cl +xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dl +xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)] +movzx ebp, dh +xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)] +mov ecx, edi +xor eax, [(r8+0)-4*16+4*4] +xor ebx, [(r8+0)-4*16+5*4] +mov edx, r10d +add r8, 32 +test r8, 255 +jnz label2 +sub r8, 16*16 +movzx ebp, ch +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, dl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+2], di +movzx ebp, dh +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, al +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+6], di +shr edx, 16 +movzx ebp, ah +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, bl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+10], di +shr eax, 16 +movzx ebp, bh +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, cl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+14], di +shr ebx, 16 +movzx ebp, dh +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, al +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+12], di +shr ecx, 16 +movzx ebp, ah +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, bl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+0], di +movzx ebp, bh +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, cl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+4], di +movzx ebp, ch +movzx edi, BYTE PTR [rsi+rbp*8+1] +movzx ebp, dl +xor edi, DWORD PTR [rsi+rbp*8+0] +mov WORD PTR [(r8+16*13)+8], di +mov rax, [(r8+16*14)+16] +mov rbx, [(r8+16*14)+24] +mov rcx, [(r8+16*18+8)] +sub rcx, 16 +movdqu xmm2, [rax] +pxor xmm2, xmm4 +movdqa xmm0, [(r8+16*16)+16] +paddq xmm0, [(r8+16*14)+16] +movdqa [(r8+16*14)+16], xmm0 +pxor xmm2, [(r8+16*13)] +movdqu [rbx], xmm2 +jle label7 +mov [(r8+16*18+8)], rcx +test rcx, 1 +jnz label1 +movdqa xmm0, [(r8+16*16)] +paddd xmm0, [(r8+16*14)] +movdqa [(r8+16*14)], xmm0 +jmp label3 +label7: +mov rbp, [(r8+16*18)] +pop r12 +pop rbp +pop rbx +pop rdi +pop rsi +ret +Rijndael_Enc_AdvancedProcessBlocks ENDP + +ALIGN 8 +GCM_AuthenticateBlocks_2K PROC FRAME +rex_push_reg rsi +push_reg rdi +push_reg rbx +.endprolog +mov rsi, r8 +mov r11, r9 +movdqa xmm0, [rsi] +label0: +movdqu xmm4, [rcx] +pxor xmm0, xmm4 +movd ebx, xmm0 +mov eax, 0f0f0f0f0h +and eax, ebx +shl ebx, 4 +and ebx, 0f0f0f0f0h +movzx edi, ah +movdqa xmm5, XMMWORD PTR [rsi + 32 + 1024 + rdi] +movzx edi, al +movdqa xmm4, XMMWORD PTR [rsi + 32 + 1024 + rdi] +shr eax, 16 +movzx edi, ah +movdqa xmm3, XMMWORD PTR [rsi + 32 + 1024 + rdi] +movzx edi, al +movdqa xmm2, XMMWORD PTR [rsi + 32 + 1024 + rdi] +psrldq xmm0, 4 +movd eax, xmm0 +and eax, 0f0f0f0f0h +movzx edi, bh +pxor xmm5, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] +movzx edi, bl +pxor xmm4, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] +shr ebx, 16 +movzx edi, bh +pxor xmm3, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] +movzx edi, bl +pxor xmm2, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] +movd ebx, xmm0 +shl ebx, 4 +and ebx, 0f0f0f0f0h +movzx edi, ah +pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] +movzx edi, al +pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] +shr eax, 16 +movzx edi, ah +pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] +movzx edi, al +pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] +psrldq xmm0, 4 +movd eax, xmm0 +and eax, 0f0f0f0f0h +movzx edi, bh +pxor xmm5, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] +movzx edi, bl +pxor xmm4, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] +shr ebx, 16 +movzx edi, bh +pxor xmm3, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] +movzx edi, bl +pxor xmm2, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] +movd ebx, xmm0 +shl ebx, 4 +and ebx, 0f0f0f0f0h +movzx edi, ah +pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] +movzx edi, al +pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] +shr eax, 16 +movzx edi, ah +pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] +movzx edi, al +pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] +psrldq xmm0, 4 +movd eax, xmm0 +and eax, 0f0f0f0f0h +movzx edi, bh +pxor xmm5, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] +movzx edi, bl +pxor xmm4, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] +shr ebx, 16 +movzx edi, bh +pxor xmm3, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] +movzx edi, bl +pxor xmm2, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] +movd ebx, xmm0 +shl ebx, 4 +and ebx, 0f0f0f0f0h +movzx edi, ah +pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] +movzx edi, al +pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] +shr eax, 16 +movzx edi, ah +pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] +movzx edi, al +pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] +movzx edi, bh +pxor xmm5, XMMWORD PTR [rsi + 32 + 3*256 + rdi] +movzx edi, bl +pxor xmm4, XMMWORD PTR [rsi + 32 + 3*256 + rdi] +shr ebx, 16 +movzx edi, bh +pxor xmm3, XMMWORD PTR [rsi + 32 + 3*256 + rdi] +movzx edi, bl +pxor xmm2, XMMWORD PTR [rsi + 32 + 3*256 + rdi] +movdqa xmm0, xmm3 +pslldq xmm3, 1 +pxor xmm2, xmm3 +movdqa xmm1, xmm2 +pslldq xmm2, 1 +pxor xmm5, xmm2 +psrldq xmm0, 15 +movd rdi, xmm0 +movzx eax, WORD PTR [r11 + rdi*2] +shl eax, 8 +movdqa xmm0, xmm5 +pslldq xmm5, 1 +pxor xmm4, xmm5 +psrldq xmm1, 15 +movd rdi, xmm1 +xor ax, WORD PTR [r11 + rdi*2] +shl eax, 8 +psrldq xmm0, 15 +movd rdi, xmm0 +xor ax, WORD PTR [r11 + rdi*2] +movd xmm0, eax +pxor xmm0, xmm4 +add rcx, 16 +sub rdx, 1 +jnz label0 +movdqa [rsi], xmm0 +pop rbx +pop rdi +pop rsi +ret +GCM_AuthenticateBlocks_2K ENDP + +ALIGN 8 +GCM_AuthenticateBlocks_64K PROC FRAME +rex_push_reg rsi +push_reg rdi +.endprolog +mov rsi, r8 +movdqa xmm0, [rsi] +label1: +movdqu xmm1, [rcx] +pxor xmm1, xmm0 +pxor xmm0, xmm0 +movd eax, xmm1 +psrldq xmm1, 4 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (0*4+0)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (0*4+1)*256*16 + rdi*8] +shr eax, 16 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (0*4+2)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (0*4+3)*256*16 + rdi*8] +movd eax, xmm1 +psrldq xmm1, 4 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (1*4+0)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (1*4+1)*256*16 + rdi*8] +shr eax, 16 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (1*4+2)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (1*4+3)*256*16 + rdi*8] +movd eax, xmm1 +psrldq xmm1, 4 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (2*4+0)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (2*4+1)*256*16 + rdi*8] +shr eax, 16 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (2*4+2)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (2*4+3)*256*16 + rdi*8] +movd eax, xmm1 +psrldq xmm1, 4 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (3*4+0)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (3*4+1)*256*16 + rdi*8] +shr eax, 16 +movzx edi, al +add rdi, rdi +pxor xmm0, [rsi + 32 + (3*4+2)*256*16 + rdi*8] +movzx edi, ah +add rdi, rdi +pxor xmm0, [rsi + 32 + (3*4+3)*256*16 + rdi*8] +add rcx, 16 +sub rdx, 1 +jnz label1 +movdqa [rsi], xmm0 +pop rdi +pop rsi +ret +GCM_AuthenticateBlocks_64K ENDP + +ALIGN 8 +X86_SHA256_HashBlocks PROC FRAME +rex_push_reg rsi +push_reg rdi +push_reg rbx +push_reg rbp +alloc_stack(8*4 + 16*4 + 4*8 + 8) +.endprolog +mov rdi, r8 +lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4] +mov [rsp+8*4+16*4+1*8], rcx +mov [rsp+8*4+16*4+2*8], rdx +add rdi, rdx +mov [rsp+8*4+16*4+3*8], rdi +movdqa xmm0, XMMWORD PTR [rcx+0*16] +movdqa xmm1, XMMWORD PTR [rcx+1*16] +mov [rsp+8*4+16*4+0*8], rsi +label0: +sub rsi, 48*4 +movdqa [rsp+((1024+7-(0+3)) MOD (8))*4], xmm1 +movdqa [rsp+((1024+7-(0+7)) MOD (8))*4], xmm0 +mov rbx, [rdx+0*8] +bswap rbx +mov [rsp+8*4+((1024+15-(0*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+1*8] +bswap rbx +mov [rsp+8*4+((1024+15-(1*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+2*8] +bswap rbx +mov [rsp+8*4+((1024+15-(2*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+3*8] +bswap rbx +mov [rsp+8*4+((1024+15-(3*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+4*8] +bswap rbx +mov [rsp+8*4+((1024+15-(4*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+5*8] +bswap rbx +mov [rsp+8*4+((1024+15-(5*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+6*8] +bswap rbx +mov [rsp+8*4+((1024+15-(6*(1+1)+1)) MOD (16))*4], rbx +mov rbx, [rdx+7*8] +bswap rbx +mov [rsp+8*4+((1024+15-(7*(1+1)+1)) MOD (16))*4], rbx +mov edi, [rsp+((1024+7-(0+3)) MOD (8))*4] +mov eax, [rsp+((1024+7-(0+6)) MOD (8))*4] +xor eax, [rsp+((1024+7-(0+5)) MOD (8))*4] +mov ecx, [rsp+((1024+7-(0+7)) MOD (8))*4] +mov edx, [rsp+((1024+7-(0+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(0)*4] +add edx, [rsp+8*4+((1024+15-(0)) MOD (16))*4] +add edx, [rsp+((1024+7-(0)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(0+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(0+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(0+4)) MOD (8))*4] +mov [rsp+((1024+7-(0+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(0)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(1+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(1)*4] +add edi, [rsp+8*4+((1024+15-(1)) MOD (16))*4] +add edi, [rsp+((1024+7-(1)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(1+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(1+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(1+4)) MOD (8))*4] +mov [rsp+((1024+7-(1+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(1)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(2+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(2)*4] +add edx, [rsp+8*4+((1024+15-(2)) MOD (16))*4] +add edx, [rsp+((1024+7-(2)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(2+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(2+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(2+4)) MOD (8))*4] +mov [rsp+((1024+7-(2+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(2)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(3+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(3)*4] +add edi, [rsp+8*4+((1024+15-(3)) MOD (16))*4] +add edi, [rsp+((1024+7-(3)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(3+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(3+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(3+4)) MOD (8))*4] +mov [rsp+((1024+7-(3+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(3)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(4+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(4)*4] +add edx, [rsp+8*4+((1024+15-(4)) MOD (16))*4] +add edx, [rsp+((1024+7-(4)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(4+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(4+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(4+4)) MOD (8))*4] +mov [rsp+((1024+7-(4+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(4)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(5+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(5)*4] +add edi, [rsp+8*4+((1024+15-(5)) MOD (16))*4] +add edi, [rsp+((1024+7-(5)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(5+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(5+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(5+4)) MOD (8))*4] +mov [rsp+((1024+7-(5+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(5)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(6+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(6)*4] +add edx, [rsp+8*4+((1024+15-(6)) MOD (16))*4] +add edx, [rsp+((1024+7-(6)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(6+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(6+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(6+4)) MOD (8))*4] +mov [rsp+((1024+7-(6+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(6)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(7+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(7)*4] +add edi, [rsp+8*4+((1024+15-(7)) MOD (16))*4] +add edi, [rsp+((1024+7-(7)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(7+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(7+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(7+4)) MOD (8))*4] +mov [rsp+((1024+7-(7+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(7)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(8+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(8)*4] +add edx, [rsp+8*4+((1024+15-(8)) MOD (16))*4] +add edx, [rsp+((1024+7-(8)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(8+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(8+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(8+4)) MOD (8))*4] +mov [rsp+((1024+7-(8+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(8)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(9+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(9)*4] +add edi, [rsp+8*4+((1024+15-(9)) MOD (16))*4] +add edi, [rsp+((1024+7-(9)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(9+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(9+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(9+4)) MOD (8))*4] +mov [rsp+((1024+7-(9+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(9)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(10+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(10)*4] +add edx, [rsp+8*4+((1024+15-(10)) MOD (16))*4] +add edx, [rsp+((1024+7-(10)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(10+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(10+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(10+4)) MOD (8))*4] +mov [rsp+((1024+7-(10+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(10)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(11+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(11)*4] +add edi, [rsp+8*4+((1024+15-(11)) MOD (16))*4] +add edi, [rsp+((1024+7-(11)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(11+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(11+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(11+4)) MOD (8))*4] +mov [rsp+((1024+7-(11+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(11)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(12+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(12)*4] +add edx, [rsp+8*4+((1024+15-(12)) MOD (16))*4] +add edx, [rsp+((1024+7-(12)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(12+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(12+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(12+4)) MOD (8))*4] +mov [rsp+((1024+7-(12+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(12)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(13+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(13)*4] +add edi, [rsp+8*4+((1024+15-(13)) MOD (16))*4] +add edi, [rsp+((1024+7-(13)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(13+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(13+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(13+4)) MOD (8))*4] +mov [rsp+((1024+7-(13+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(13)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(14+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +add edx, [rsi+(14)*4] +add edx, [rsp+8*4+((1024+15-(14)) MOD (16))*4] +add edx, [rsp+((1024+7-(14)) MOD (8))*4] +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(14+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(14+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(14+4)) MOD (8))*4] +mov [rsp+((1024+7-(14+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(14)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(15+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +add edi, [rsi+(15)*4] +add edi, [rsp+8*4+((1024+15-(15)) MOD (16))*4] +add edi, [rsp+((1024+7-(15)) MOD (8))*4] +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(15+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(15+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(15+4)) MOD (8))*4] +mov [rsp+((1024+7-(15+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(15)) MOD (8))*4], ecx +label1: +add rsi, 4*16 +mov edx, [rsp+((1024+7-(0+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((0)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((0)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((0)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(0)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(0)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(0)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(0)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(0+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(0+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(0+4)) MOD (8))*4] +mov [rsp+((1024+7-(0+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(0)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(1+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((1)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((1)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((1)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(1)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(1)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(1)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(1)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(1+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(1+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(1+4)) MOD (8))*4] +mov [rsp+((1024+7-(1+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(1)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(2+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((2)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((2)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((2)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(2)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(2)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(2)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(2)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(2+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(2+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(2+4)) MOD (8))*4] +mov [rsp+((1024+7-(2+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(2)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(3+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((3)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((3)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((3)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(3)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(3)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(3)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(3)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(3+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(3+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(3+4)) MOD (8))*4] +mov [rsp+((1024+7-(3+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(3)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(4+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((4)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((4)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((4)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(4)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(4)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(4)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(4)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(4+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(4+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(4+4)) MOD (8))*4] +mov [rsp+((1024+7-(4+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(4)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(5+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((5)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((5)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((5)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(5)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(5)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(5)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(5)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(5+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(5+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(5+4)) MOD (8))*4] +mov [rsp+((1024+7-(5+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(5)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(6+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((6)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((6)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((6)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(6)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(6)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(6)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(6)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(6+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(6+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(6+4)) MOD (8))*4] +mov [rsp+((1024+7-(6+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(6)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(7+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((7)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((7)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((7)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(7)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(7)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(7)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(7)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(7+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(7+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(7+4)) MOD (8))*4] +mov [rsp+((1024+7-(7+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(7)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(8+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((8)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((8)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((8)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(8)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(8)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(8)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(8)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(8+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(8+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(8+4)) MOD (8))*4] +mov [rsp+((1024+7-(8+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(8)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(9+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((9)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((9)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((9)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(9)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(9)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(9)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(9)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(9+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(9+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(9+4)) MOD (8))*4] +mov [rsp+((1024+7-(9+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(9)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(10+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((10)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((10)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((10)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(10)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(10)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(10)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(10)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(10+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(10+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(10+4)) MOD (8))*4] +mov [rsp+((1024+7-(10+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(10)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(11+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((11)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((11)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((11)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(11)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(11)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(11)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(11)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(11+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(11+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(11+4)) MOD (8))*4] +mov [rsp+((1024+7-(11+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(11)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(12+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((12)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((12)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((12)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(12)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(12)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(12)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(12)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(12+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(12+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(12+4)) MOD (8))*4] +mov [rsp+((1024+7-(12+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(12)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(13+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((13)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((13)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((13)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(13)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(13)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(13)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(13)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(13+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(13+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(13+4)) MOD (8))*4] +mov [rsp+((1024+7-(13+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(13)) MOD (8))*4], ecx +mov edx, [rsp+((1024+7-(14+2)) MOD (8))*4] +xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] +and edx, edi +xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] +mov ebp, edi +ror edi, 6 +ror ebp, 25 +xor ebp, edi +ror edi, 5 +xor ebp, edi +add edx, ebp +mov ebp, [rsp+8*4+((1024+15-((14)-2)) MOD (16))*4] +mov edi, [rsp+8*4+((1024+15-((14)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((14)-7)) MOD (16))*4] +mov ebp, edi +shr ebp, 3 +ror edi, 7 +add ebx, [rsp+8*4+((1024+15-(14)) MOD (16))*4] +xor ebp, edi +add edx, [rsi+(14)*4] +ror edi, 11 +add edx, [rsp+((1024+7-(14)) MOD (8))*4] +xor ebp, edi +add ebp, ebx +mov [rsp+8*4+((1024+15-(14)) MOD (16))*4], ebp +add edx, ebp +mov ebx, ecx +xor ecx, [rsp+((1024+7-(14+6)) MOD (8))*4] +and eax, ecx +xor eax, [rsp+((1024+7-(14+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add eax, edx +add edx, [rsp+((1024+7-(14+4)) MOD (8))*4] +mov [rsp+((1024+7-(14+4)) MOD (8))*4], edx +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add eax, ebp +mov [rsp+((1024+7-(14)) MOD (8))*4], eax +mov edi, [rsp+((1024+7-(15+2)) MOD (8))*4] +xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] +and edi, edx +xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] +mov ebp, edx +ror edx, 6 +ror ebp, 25 +xor ebp, edx +ror edx, 5 +xor ebp, edx +add edi, ebp +mov ebp, [rsp+8*4+((1024+15-((15)-2)) MOD (16))*4] +mov edx, [rsp+8*4+((1024+15-((15)-15)) MOD (16))*4] +mov ebx, ebp +shr ebp, 10 +ror ebx, 17 +xor ebp, ebx +ror ebx, 2 +xor ebx, ebp +add ebx, [rsp+8*4+((1024+15-((15)-7)) MOD (16))*4] +mov ebp, edx +shr ebp, 3 +ror edx, 7 +add ebx, [rsp+8*4+((1024+15-(15)) MOD (16))*4] +xor ebp, edx +add edi, [rsi+(15)*4] +ror edx, 11 +add edi, [rsp+((1024+7-(15)) MOD (8))*4] +xor ebp, edx +add ebp, ebx +mov [rsp+8*4+((1024+15-(15)) MOD (16))*4], ebp +add edi, ebp +mov ebx, eax +xor eax, [rsp+((1024+7-(15+6)) MOD (8))*4] +and ecx, eax +xor ecx, [rsp+((1024+7-(15+6)) MOD (8))*4] +mov ebp, ebx +ror ebx, 2 +add ecx, edi +add edi, [rsp+((1024+7-(15+4)) MOD (8))*4] +mov [rsp+((1024+7-(15+4)) MOD (8))*4], edi +ror ebp, 22 +xor ebp, ebx +ror ebx, 11 +xor ebp, ebx +add ecx, ebp +mov [rsp+((1024+7-(15)) MOD (8))*4], ecx +cmp rsi, [rsp+8*4+16*4+0*8] +jne label1 +mov rcx, [rsp+8*4+16*4+1*8] +movdqa xmm1, XMMWORD PTR [rcx+1*16] +movdqa xmm0, XMMWORD PTR [rcx+0*16] +paddd xmm1, [rsp+((1024+7-(0+3)) MOD (8))*4] +paddd xmm0, [rsp+((1024+7-(0+7)) MOD (8))*4] +movdqa [rcx+1*16], xmm1 +movdqa [rcx+0*16], xmm0 +mov rdx, [rsp+8*4+16*4+2*8] +add rdx, 64 +mov [rsp+8*4+16*4+2*8], rdx +cmp rdx, [rsp+8*4+16*4+3*8] +jne label0 +add rsp, 8*4 + 16*4 + 4*8 + 8 +pop rbp +pop rbx +pop rdi +pop rsi +ret +X86_SHA256_HashBlocks ENDP + +_TEXT ENDS +END |