summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Readme.txt9
-rw-r--r--TestVectors/all.txt50
-rw-r--r--asn.h4
-rw-r--r--config.h2
-rw-r--r--cryptlib.cpp13
-rw-r--r--cryptlib.h30
-rwxr-xr-xcryptlib.vcproj8
-rw-r--r--datatest.cpp12
-rw-r--r--files.h4
-rw-r--r--filters.cpp20
-rw-r--r--filters.h44
-rw-r--r--fltrimpl.h2
-rw-r--r--iterhash.cpp16
-rw-r--r--iterhash.h4
-rw-r--r--mqueue.h6
-rw-r--r--queue.cpp8
-rw-r--r--queue.h8
-rw-r--r--rijndael.cpp18
-rw-r--r--secblock.h2
-rw-r--r--sha.cpp385
-rw-r--r--sha.h14
-rw-r--r--simple.h18
-rw-r--r--test.cpp4
-rwxr-xr-xvmac.cpp9
-rw-r--r--x64dll.asm2620
25 files changed, 2474 insertions, 836 deletions
diff --git a/Readme.txt b/Readme.txt
index b4e329a..4af41c4 100644
--- a/Readme.txt
+++ b/Readme.txt
@@ -78,10 +78,9 @@ The following compilers are supported for this release. Please visit
http://www.cryptopp.com the most up to date build instructions and porting notes.
* MSVC 6.0 - 2008
- * GCC 3.3 - 4.2
- * Borland C++Builder 2006 - 2007
- * Intel C++ Compiler 9.1 - 10.0
- * Sun Studio 11 - 12 (CC 5.8 - 5.9)
+ * GCC 3.3 - 4.3
+ * Intel C++ Compiler 9 - 11
+ * Sun Studio 12 (CC 5.9)
*** Important Usage Notes ***
@@ -415,7 +414,7 @@ the mailing list.
5.6 - added AuthenticatedSymmetricCipher interface class and Filter wrappers
- added CCM, GCM (with SSE2 assembly), CMAC, and SEED
- added support for variable length IVs
- - improved AES speed on x86 and x64
+ - improved AES and SHA-256 speed on x86 and x64
- fixed run-time validation error on x86-64 with GCC 4.3.2 -O2
- fixed HashFilter bug when putMessage=true
- fixed warnings with GCC 4.3
diff --git a/TestVectors/all.txt b/TestVectors/all.txt
index f26df0d..45aa4fb 100644
--- a/TestVectors/all.txt
+++ b/TestVectors/all.txt
@@ -1,27 +1,27 @@
AlgorithmType: FileList
Name: all.txt collection
-Test: tea.txt
-Test: camellia.txt
-Test: shacal2.txt
-Test: ttmac.txt
-Test: whrlpool.txt
-Test: dlies.txt
-Test: dsa.txt
-Test: dsa_1363.txt
-Test: esign.txt
-Test: hmac.txt
-Test: nr.txt
-Test: rsa_oaep.txt
-Test: rsa_pkcs1_1_5.txt
-Test: rsa_pss.txt
-Test: rw.txt
-Test: seal.txt
-Test: sha.txt
-Test: panama.txt
-Test: aes.txt
-Test: salsa.txt
-Test: vmac.txt
-Test: sosemanuk.txt
-Test: ccm.txt
-Test: gcm.txt
-Test: cmac.txt
+Test: TestVectors/tea.txt
+Test: TestVectors/camellia.txt
+Test: TestVectors/shacal2.txt
+Test: TestVectors/ttmac.txt
+Test: TestVectors/whrlpool.txt
+Test: TestVectors/dlies.txt
+Test: TestVectors/dsa.txt
+Test: TestVectors/dsa_1363.txt
+Test: TestVectors/esign.txt
+Test: TestVectors/hmac.txt
+Test: TestVectors/nr.txt
+Test: TestVectors/rsa_oaep.txt
+Test: TestVectors/rsa_pkcs1_1_5.txt
+Test: TestVectors/rsa_pss.txt
+Test: TestVectors/rw.txt
+Test: TestVectors/seal.txt
+Test: TestVectors/sha.txt
+Test: TestVectors/panama.txt
+Test: TestVectors/aes.txt
+Test: TestVectors/salsa.txt
+Test: TestVectors/vmac.txt
+Test: TestVectors/sosemanuk.txt
+Test: TestVectors/ccm.txt
+Test: TestVectors/gcm.txt
+Test: TestVectors/cmac.txt
diff --git a/asn.h b/asn.h
index ab92991..c35126b 100644
--- a/asn.h
+++ b/asn.h
@@ -138,8 +138,8 @@ public:
byte PeekByte() const;
void CheckByte(byte b);
- size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true);
- size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const;
+ size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
+ size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const;
// call this to denote end of sequence
void MessageEnd();
diff --git a/config.h b/config.h
index 2805953..838d5a9 100644
--- a/config.h
+++ b/config.h
@@ -311,7 +311,7 @@ NAMESPACE_END
#endif
// how to declare class constants
-#if defined(_MSC_VER) && _MSC_VER <= 1300
+#if (defined(_MSC_VER) && _MSC_VER <= 1300) || defined(__INTEL_COMPILER)
# define CRYPTOPP_CONSTANT(x) enum {x};
#else
# define CRYPTOPP_CONSTANT(x) static const int x;
diff --git a/cryptlib.cpp b/cryptlib.cpp
index b84a085..0e5bd24 100644
--- a/cryptlib.cpp
+++ b/cryptlib.cpp
@@ -26,7 +26,10 @@ CRYPTOPP_COMPILE_ASSERT(sizeof(word64) == 8);
CRYPTOPP_COMPILE_ASSERT(sizeof(dword) == 2*sizeof(word));
#endif
-const std::string BufferedTransformation::NULL_CHANNEL;
+const std::string DEFAULT_CHANNEL;
+const std::string AAD_CHANNEL = "AAD";
+const std::string &BufferedTransformation::NULL_CHANNEL = DEFAULT_CHANNEL;
+
const NullNameValuePairs g_nullNameValuePairs;
BufferedTransformation & TheBitBucket()
@@ -254,12 +257,12 @@ word32 RandomNumberGenerator::GenerateWord32(word32 min, word32 max)
void RandomNumberGenerator::GenerateBlock(byte *output, size_t size)
{
ArraySink s(output, size);
- GenerateIntoBufferedTransformation(s, BufferedTransformation::NULL_CHANNEL, size);
+ GenerateIntoBufferedTransformation(s, DEFAULT_CHANNEL, size);
}
void RandomNumberGenerator::DiscardBytes(size_t n)
{
- GenerateIntoBufferedTransformation(TheBitBucket(), BufferedTransformation::NULL_CHANNEL, n);
+ GenerateIntoBufferedTransformation(TheBitBucket(), DEFAULT_CHANNEL, n);
}
void RandomNumberGenerator::GenerateIntoBufferedTransformation(BufferedTransformation &target, const std::string &channel, lword length)
@@ -593,12 +596,12 @@ size_t BufferedTransformation::ChannelPutWord32(const std::string &channel, word
size_t BufferedTransformation::PutWord16(word16 value, ByteOrder order, bool blocking)
{
- return ChannelPutWord16(NULL_CHANNEL, value, order, blocking);
+ return ChannelPutWord16(DEFAULT_CHANNEL, value, order, blocking);
}
size_t BufferedTransformation::PutWord32(word32 value, ByteOrder order, bool blocking)
{
- return ChannelPutWord32(NULL_CHANNEL, value, order, blocking);
+ return ChannelPutWord32(DEFAULT_CHANNEL, value, order, blocking);
}
size_t BufferedTransformation::PeekWord16(word16 &value, ByteOrder order) const
diff --git a/cryptlib.h b/cryptlib.h
index a6b4aaa..330ce2b 100644
--- a/cryptlib.h
+++ b/cryptlib.h
@@ -746,6 +746,12 @@ public:
bool Wait(unsigned long milliseconds, CallStack const& callStack);
};
+//! the default channel for BufferedTransformation, equal to the empty string
+extern const std::string DEFAULT_CHANNEL;
+
+//! channel for additional authenticated data, equal to "AAD"
+extern const std::string AAD_CHANNEL;
+
//! interface for buffered transformations
/*! BufferedTransformation is a generalization of BlockTransformation,
@@ -776,7 +782,7 @@ class CRYPTOPP_DLL CRYPTOPP_NO_VTABLE BufferedTransformation : public Algorithm,
{
public:
// placed up here for CW8
- static const std::string NULL_CHANNEL; // the empty string ""
+ static const std::string &NULL_CHANNEL; // same as DEFAULT_CHANNEL, for backwards compatibility
BufferedTransformation() : Algorithm(false) {}
@@ -903,18 +909,18 @@ public:
size_t PeekWord32(word32 &value, ByteOrder order=BIG_ENDIAN_ORDER) const;
//! move transferMax bytes of the buffered output to target as input
- lword TransferTo(BufferedTransformation &target, lword transferMax=LWORD_MAX, const std::string &channel=NULL_CHANNEL)
+ lword TransferTo(BufferedTransformation &target, lword transferMax=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL)
{TransferTo2(target, transferMax, channel); return transferMax;}
//! discard skipMax bytes from the output buffer
virtual lword Skip(lword skipMax=LWORD_MAX);
//! copy copyMax bytes of the buffered output to target as input
- lword CopyTo(BufferedTransformation &target, lword copyMax=LWORD_MAX, const std::string &channel=NULL_CHANNEL) const
+ lword CopyTo(BufferedTransformation &target, lword copyMax=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL) const
{return CopyRangeTo(target, 0, copyMax, channel);}
//! copy copyMax bytes of the buffered output, starting at position (relative to current position), to target as input
- lword CopyRangeTo(BufferedTransformation &target, lword position, lword copyMax=LWORD_MAX, const std::string &channel=NULL_CHANNEL) const
+ lword CopyRangeTo(BufferedTransformation &target, lword position, lword copyMax=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL) const
{lword i = position; CopyRangeTo2(target, i, i+copyMax, channel); return i-position;}
#ifdef CRYPTOPP_MAINTAIN_BACKWARDS_COMPATIBILITY
@@ -939,18 +945,18 @@ public:
//! skip count number of messages
virtual unsigned int SkipMessages(unsigned int count=UINT_MAX);
//!
- unsigned int TransferMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL)
+ unsigned int TransferMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL)
{TransferMessagesTo2(target, count, channel); return count;}
//!
- unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) const;
+ unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) const;
//!
virtual void SkipAll();
//!
- void TransferAllTo(BufferedTransformation &target, const std::string &channel=NULL_CHANNEL)
+ void TransferAllTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL)
{TransferAllTo2(target, channel);}
//!
- void CopyAllTo(BufferedTransformation &target, const std::string &channel=NULL_CHANNEL) const;
+ void CopyAllTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) const;
virtual bool GetNextMessageSeries() {return false;}
virtual unsigned int NumberOfMessagesInThisSeries() const {return NumberOfMessages();}
@@ -960,13 +966,13 @@ public:
//! \name NON-BLOCKING TRANSFER OF OUTPUT
//@{
//! upon return, byteCount contains number of bytes that have finished being transfered, and returns the number of bytes left in the current transfer block
- virtual size_t TransferTo2(BufferedTransformation &target, lword &byteCount, const std::string &channel=NULL_CHANNEL, bool blocking=true) =0;
+ virtual size_t TransferTo2(BufferedTransformation &target, lword &byteCount, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) =0;
//! upon return, begin contains the start position of data yet to be finished copying, and returns the number of bytes left in the current transfer block
- virtual size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const =0;
+ virtual size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const =0;
//! upon return, messageCount contains number of messages that have finished being transfered, and returns the number of bytes left in the current transfer block
- size_t TransferMessagesTo2(BufferedTransformation &target, unsigned int &messageCount, const std::string &channel=NULL_CHANNEL, bool blocking=true);
+ size_t TransferMessagesTo2(BufferedTransformation &target, unsigned int &messageCount, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
//! returns the number of bytes left in the current transfer block
- size_t TransferAllTo2(BufferedTransformation &target, const std::string &channel=NULL_CHANNEL, bool blocking=true);
+ size_t TransferAllTo2(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
//@}
//! \name CHANNELS
diff --git a/cryptlib.vcproj b/cryptlib.vcproj
index a0458ff..cb34a4c 100755
--- a/cryptlib.vcproj
+++ b/cryptlib.vcproj
@@ -8528,7 +8528,7 @@
>
<Tool
Name="VCCustomBuildTool"
- CommandLine="ml64.exe /c /nologo /Fo&quot;$(IntDir)\x64dll.obj&quot; /Zi &quot;$(InputPath)&quot;"
+ CommandLine="ml64.exe /c /nologo /Fo&quot;$(IntDir)\x64dll.obj&quot; /Zi &quot;$(InputPath)&quot;&#x0D;&#x0A;"
Outputs="$(IntDir)\x64dll.obj"
/>
</FileConfiguration>
@@ -8546,7 +8546,7 @@
>
<Tool
Name="VCCustomBuildTool"
- CommandLine="ml64.exe /c /nologo /Fo&quot;$(IntDir)\x64dll.obj&quot; /Zi &quot;$(InputPath)&quot;"
+ CommandLine="ml64.exe /c /nologo /Fo&quot;$(IntDir)\x64dll.obj&quot; /Zi &quot;$(InputPath)&quot;&#x0D;&#x0A;"
Outputs="$(IntDir)\x64dll.obj"
/>
</FileConfiguration>
@@ -8563,7 +8563,7 @@
>
<Tool
Name="VCCustomBuildTool"
- CommandLine="ml64.exe /c /nologo /Fo&quot;$(IntDir)\x64dll.obj&quot; /Zi &quot;$(InputPath)&quot;"
+ CommandLine="ml64.exe /c /nologo /Fo&quot;$(IntDir)\x64dll.obj&quot; /Zi &quot;$(InputPath)&quot;&#x0D;&#x0A;"
Outputs="$(IntDir)\x64dll.obj"
/>
</FileConfiguration>
@@ -8581,7 +8581,7 @@
>
<Tool
Name="VCCustomBuildTool"
- CommandLine="ml64.exe /c /nologo /Fo&quot;$(IntDir)\x64dll.obj&quot; /Zi &quot;$(InputPath)&quot;"
+ CommandLine="ml64.exe /c /nologo /Fo&quot;$(IntDir)\x64dll.obj&quot; /Zi &quot;$(InputPath)&quot;&#x0D;&#x0A;"
Outputs="$(IntDir)\x64dll.obj"
/>
</FileConfiguration>
diff --git a/datatest.cpp b/datatest.cpp
index a028244..3e97a3e 100644
--- a/datatest.cpp
+++ b/datatest.cpp
@@ -410,19 +410,19 @@ void TestAuthenticatedSymmetricCipher(TestData &v, const NameValuePairs &overrid
if (macAtBegin)
sm.TransferTo(df);
- sh.CopyTo(df, LWORD_MAX, "AAD");
+ sh.CopyTo(df, LWORD_MAX, AAD_CHANNEL);
sc.TransferTo(df);
- sf.CopyTo(df, LWORD_MAX, "AAD");
+ sf.CopyTo(df, LWORD_MAX, AAD_CHANNEL);
if (!macAtBegin)
sm.TransferTo(df);
df.MessageEnd();
- sh.TransferTo(ef, sh.MaxRetrievable()/2+1, "AAD");
- sh.TransferTo(ef, LWORD_MAX, "AAD");
+ sh.TransferTo(ef, sh.MaxRetrievable()/2+1, AAD_CHANNEL);
+ sh.TransferTo(ef, LWORD_MAX, AAD_CHANNEL);
sp.TransferTo(ef, sp.MaxRetrievable()/2+1);
sp.TransferTo(ef);
- sf.TransferTo(ef, sf.MaxRetrievable()/2+1, "AAD");
- sf.TransferTo(ef, LWORD_MAX, "AAD");
+ sf.TransferTo(ef, sf.MaxRetrievable()/2+1, AAD_CHANNEL);
+ sf.TransferTo(ef, LWORD_MAX, AAD_CHANNEL);
ef.MessageEnd();
if (test == "Encrypt" && encrypted != ciphertext+mac)
diff --git a/files.h b/files.h
index d98d4b6..2c4e2b8 100644
--- a/files.h
+++ b/files.h
@@ -31,8 +31,8 @@ public:
std::istream* GetStream() {return m_stream;}
lword MaxRetrievable() const;
- size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true);
- size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const;
+ size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
+ size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const;
lword Skip(lword skipMax=ULONG_MAX);
private:
diff --git a/filters.cpp b/filters.cpp
index 9a749f4..083dfd3 100644
--- a/filters.cpp
+++ b/filters.cpp
@@ -596,7 +596,7 @@ void StreamTransformationFilter::NextPutMultiple(const byte *inString, size_t le
do
{
size_t len = m_optimalBufferSize;
- byte *space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, s, length, len);
+ byte *space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, s, length, len);
if (len < length)
{
if (len == m_optimalBufferSize)
@@ -636,7 +636,7 @@ void StreamTransformationFilter::LastPut(const byte *inString, size_t length)
{
// do padding
size_t blockSize = STDMAX(minLastBlockSize, (size_t)m_cipher.MandatoryBlockSize());
- space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, blockSize);
+ space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, blockSize);
memcpy(space, inString, length);
memset(space + length, 0, blockSize - length);
m_cipher.ProcessLastBlock(space, space, blockSize);
@@ -652,7 +652,7 @@ void StreamTransformationFilter::LastPut(const byte *inString, size_t length)
throw InvalidCiphertext("StreamTransformationFilter: ciphertext length is not a multiple of block size");
}
- space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, length, m_optimalBufferSize);
+ space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, length, m_optimalBufferSize);
m_cipher.ProcessLastBlock(space, inString, length);
AttachedTransformation()->Put(space, length);
}
@@ -664,7 +664,7 @@ void StreamTransformationFilter::LastPut(const byte *inString, size_t length)
unsigned int s;
s = m_cipher.MandatoryBlockSize();
assert(s > 1);
- space = HelpCreatePutSpace(*AttachedTransformation(), NULL_CHANNEL, s, m_optimalBufferSize);
+ space = HelpCreatePutSpace(*AttachedTransformation(), DEFAULT_CHANNEL, s, m_optimalBufferSize);
if (m_cipher.IsForwardTransformation())
{
assert(length < s);
@@ -807,9 +807,9 @@ void HashVerificationFilter::LastPut(const byte *inString, size_t length)
// *************************************************************
AuthenticatedEncryptionFilter::AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment,
- bool putMessage, int truncatedDigestSize, const std::string &macChannel, BlockPaddingScheme padding)
+ bool putAAD, int truncatedDigestSize, const std::string &macChannel, BlockPaddingScheme padding)
: StreamTransformationFilter(c, attachment, padding, true)
- , m_hf(c, new OutputProxy(*this, false), putMessage, truncatedDigestSize, "AAD", macChannel)
+ , m_hf(c, new OutputProxy(*this, false), putAAD, truncatedDigestSize, AAD_CHANNEL, macChannel)
{
assert(c.IsForwardTransformation());
}
@@ -825,7 +825,7 @@ byte * AuthenticatedEncryptionFilter::ChannelCreatePutSpace(const std::string &c
if (channel.empty())
return StreamTransformationFilter::CreatePutSpace(size);
- if (channel == "AAD")
+ if (channel == AAD_CHANNEL)
return m_hf.CreatePutSpace(size);
throw InvalidChannelName("AuthenticatedEncryptionFilter", channel);
@@ -836,7 +836,7 @@ size_t AuthenticatedEncryptionFilter::ChannelPut2(const std::string &channel, co
if (channel.empty())
return StreamTransformationFilter::Put2(begin, length, messageEnd, blocking);
- if (channel == "AAD")
+ if (channel == AAD_CHANNEL)
return m_hf.Put2(begin, length, 0, blocking);
throw InvalidChannelName("AuthenticatedEncryptionFilter", channel);
@@ -876,7 +876,7 @@ byte * AuthenticatedDecryptionFilter::ChannelCreatePutSpace(const std::string &c
if (channel.empty())
return m_streamFilter.CreatePutSpace(size);
- if (channel == "AAD")
+ if (channel == AAD_CHANNEL)
return m_hashVerifier.CreatePutSpace(size);
throw InvalidChannelName("AuthenticatedDecryptionFilter", channel);
@@ -891,7 +891,7 @@ size_t AuthenticatedDecryptionFilter::ChannelPut2(const std::string &channel, co
return FilterWithBufferedInput::Put2(begin, length, messageEnd, blocking);
}
- if (channel == "AAD")
+ if (channel == AAD_CHANNEL)
return m_hashVerifier.Put2(begin, length, 0, blocking);
throw InvalidChannelName("AuthenticatedDecryptionFilter", channel);
diff --git a/filters.h b/filters.h
index 0562ad5..7355646 100644
--- a/filters.h
+++ b/filters.h
@@ -22,8 +22,8 @@ public:
const BufferedTransformation *AttachedTransformation() const;
void Detach(BufferedTransformation *newAttachment = NULL);
- size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true);
- size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const;
+ size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
+ size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const;
void Initialize(const NameValuePairs &parameters=g_nullNameValuePairs, int propagation=-1);
bool Flush(bool hardFlush, int propagation=-1, bool blocking=true);
@@ -38,11 +38,11 @@ protected:
void PropagateInitialize(const NameValuePairs &parameters, int propagation);
- size_t Output(int outputSite, const byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=NULL_CHANNEL);
- size_t OutputModifiable(int outputSite, byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=NULL_CHANNEL);
- bool OutputMessageEnd(int outputSite, int propagation, bool blocking, const std::string &channel=NULL_CHANNEL);
- bool OutputFlush(int outputSite, bool hardFlush, int propagation, bool blocking, const std::string &channel=NULL_CHANNEL);
- bool OutputMessageSeriesEnd(int outputSite, int propagation, bool blocking, const std::string &channel=NULL_CHANNEL);
+ size_t Output(int outputSite, const byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=DEFAULT_CHANNEL);
+ size_t OutputModifiable(int outputSite, byte *inString, size_t length, int messageEnd, bool blocking, const std::string &channel=DEFAULT_CHANNEL);
+ bool OutputMessageEnd(int outputSite, int propagation, bool blocking, const std::string &channel=DEFAULT_CHANNEL);
+ bool OutputFlush(int outputSite, bool hardFlush, int propagation, bool blocking, const std::string &channel=DEFAULT_CHANNEL);
+ bool OutputMessageSeriesEnd(int outputSite, int propagation, bool blocking, const std::string &channel=DEFAULT_CHANNEL);
private:
member_ptr<BufferedTransformation> m_attachment;
@@ -289,7 +289,7 @@ typedef StreamTransformationFilter StreamCipherFilter;
class CRYPTOPP_DLL HashFilter : public Bufferless<Filter>, private FilterPutSpaceHelper
{
public:
- HashFilter(HashTransformation &hm, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &messagePutChannel=NULL_CHANNEL, const std::string &hashPutChannel=NULL_CHANNEL);
+ HashFilter(HashTransformation &hm, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &messagePutChannel=DEFAULT_CHANNEL, const std::string &hashPutChannel=DEFAULT_CHANNEL);
std::string AlgorithmName() const {return m_hashModule.AlgorithmName();}
void IsolatedInitialize(const NameValuePairs &parameters);
@@ -315,7 +315,7 @@ public:
: Exception(DATA_INTEGRITY_CHECK_FAILED, "HashVerificationFilter: message hash or MAC not valid") {}
};
- enum Flags {HASH_AT_BEGIN=1, PUT_MESSAGE=2, PUT_HASH=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = HASH_AT_BEGIN | PUT_RESULT};
+ enum Flags {HASH_AT_END=0, HASH_AT_BEGIN=1, PUT_MESSAGE=2, PUT_HASH=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = HASH_AT_BEGIN | PUT_RESULT};
HashVerificationFilter(HashTransformation &hm, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS, int truncatedDigestSize=-1);
std::string AlgorithmName() const {return m_hashModule.AlgorithmName();}
@@ -345,7 +345,7 @@ class CRYPTOPP_DLL AuthenticatedEncryptionFilter : public StreamTransformationFi
{
public:
/*! See StreamTransformationFilter for documentation on BlockPaddingScheme */
- AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &macChannel=NULL_CHANNEL, BlockPaddingScheme padding = DEFAULT_PADDING);
+ AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, bool putAAD=false, int truncatedDigestSize=-1, const std::string &macChannel=DEFAULT_CHANNEL, BlockPaddingScheme padding = DEFAULT_PADDING);
void IsolatedInitialize(const NameValuePairs &parameters);
byte * ChannelCreatePutSpace(const std::string &channel, size_t &size);
@@ -361,7 +361,7 @@ protected:
class CRYPTOPP_DLL AuthenticatedDecryptionFilter : public FilterWithBufferedInput, public BlockPaddingSchemeDef
{
public:
- enum Flags {MAC_AT_BEGIN=1, THROW_EXCEPTION=16, DEFAULT_FLAGS = THROW_EXCEPTION};
+ enum Flags {MAC_AT_END=0, MAC_AT_BEGIN=1, THROW_EXCEPTION=16, DEFAULT_FLAGS = THROW_EXCEPTION};
/*! See StreamTransformationFilter for documentation on BlockPaddingScheme */
AuthenticatedDecryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS, int truncatedDigestSize=-1, BlockPaddingScheme padding = DEFAULT_PADDING);
@@ -412,7 +412,7 @@ public:
: Exception(DATA_INTEGRITY_CHECK_FAILED, "VerifierFilter: digital signature not valid") {}
};
- enum Flags {SIGNATURE_AT_BEGIN=1, PUT_MESSAGE=2, PUT_SIGNATURE=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = SIGNATURE_AT_BEGIN | PUT_RESULT};
+ enum Flags {SIGNATURE_AT_END=0, SIGNATURE_AT_BEGIN=1, PUT_MESSAGE=2, PUT_SIGNATURE=4, PUT_RESULT=8, THROW_EXCEPTION=16, DEFAULT_FLAGS = SIGNATURE_AT_BEGIN | PUT_RESULT};
SignatureVerificationFilter(const PK_Verifier &verifier, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS);
std::string AlgorithmName() const {return m_verifier.AlgorithmName();}
@@ -517,6 +517,8 @@ public:
bool MessageSeriesEnd(int propagation=-1, bool blocking=true)
{return m_passSignal ? m_owner.AttachedTransformation()->MessageSeriesEnd(propagation, blocking) : false;}
+ byte * ChannelCreatePutSpace(const std::string &channel, size_t &size)
+ {return m_owner.AttachedTransformation()->ChannelCreatePutSpace(channel, size);}
size_t ChannelPut2(const std::string &channel, const byte *begin, size_t length, int messageEnd, bool blocking)
{return m_owner.AttachedTransformation()->ChannelPut2(channel, begin, length, m_passSignal ? messageEnd : 0, blocking);}
size_t ChannelPutModifiable2(const std::string &channel, byte *begin, size_t length, int messageEnd, bool blocking)
@@ -669,8 +671,8 @@ public:
template <class T> StringStore(const T &string)
{StoreInitialize(MakeParameters("InputBuffer", ConstByteArrayParameter(string)));}
- CRYPTOPP_DLL size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true);
- CRYPTOPP_DLL size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const;
+ CRYPTOPP_DLL size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
+ CRYPTOPP_DLL size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const;
private:
CRYPTOPP_DLL void StoreInitialize(const NameValuePairs &parameters);
@@ -692,8 +694,8 @@ public:
bool AnyRetrievable() const {return MaxRetrievable() != 0;}
lword MaxRetrievable() const {return m_length-m_count;}
- size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true);
- size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const
+ size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
+ size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const
{
throw NotImplemented("RandomNumberStore: CopyRangeTo2() is not supported by this store");
}
@@ -712,8 +714,8 @@ public:
NullStore(lword size = ULONG_MAX) : m_size(size) {}
void StoreInitialize(const NameValuePairs &parameters) {}
lword MaxRetrievable() const {return m_size;}
- size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true);
- size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const;
+ size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
+ size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const;
private:
lword m_size;
@@ -756,11 +758,11 @@ public:
void IsolatedInitialize(const NameValuePairs &parameters)
{m_store.IsolatedInitialize(parameters);}
size_t Pump2(lword &byteCount, bool blocking=true)
- {return m_store.TransferTo2(*AttachedTransformation(), byteCount, NULL_CHANNEL, blocking);}
+ {return m_store.TransferTo2(*AttachedTransformation(), byteCount, DEFAULT_CHANNEL, blocking);}
size_t PumpMessages2(unsigned int &messageCount, bool blocking=true)
- {return m_store.TransferMessagesTo2(*AttachedTransformation(), messageCount, NULL_CHANNEL, blocking);}
+ {return m_store.TransferMessagesTo2(*AttachedTransformation(), messageCount, DEFAULT_CHANNEL, blocking);}
size_t PumpAll2(bool blocking=true)
- {return m_store.TransferAllTo2(*AttachedTransformation(), NULL_CHANNEL, blocking);}
+ {return m_store.TransferAllTo2(*AttachedTransformation(), DEFAULT_CHANNEL, blocking);}
bool SourceExhausted() const
{return !m_store.AnyRetrievable() && !m_store.AnyMessages();}
void SetAutoSignalPropagation(int propagation)
diff --git a/fltrimpl.h b/fltrimpl.h
index 40d70ff..4087d7d 100644
--- a/fltrimpl.h
+++ b/fltrimpl.h
@@ -34,7 +34,7 @@
}
#define FILTER_OUTPUT2(site, statement, output, length, messageEnd) \
- FILTER_OUTPUT3(site, statement, output, length, messageEnd, NULL_CHANNEL)
+ FILTER_OUTPUT3(site, statement, output, length, messageEnd, DEFAULT_CHANNEL)
#define FILTER_OUTPUT(site, output, length, messageEnd) \
FILTER_OUTPUT2(site, 0, output, length, messageEnd)
diff --git a/iterhash.cpp b/iterhash.cpp
index 642a7ca..478950c 100644
--- a/iterhash.cpp
+++ b/iterhash.cpp
@@ -132,14 +132,18 @@ template <class T, class BASE> void IteratedHashBase<T, BASE>::TruncatedFinal(by
ByteOrder order = this->GetByteOrder();
PadLastBlock(blockSize - 2*sizeof(HashWordType));
- ConditionalByteReverse<HashWordType>(order, dataBuf, dataBuf, blockSize - 2*sizeof(HashWordType));
+ dataBuf[blockSize/sizeof(T)-2+order] = ConditionalByteReverse(order, this->GetBitCountLo());
+ dataBuf[blockSize/sizeof(T)-1-order] = ConditionalByteReverse(order, this->GetBitCountHi());
- dataBuf[blockSize/sizeof(T)-2] = order ? this->GetBitCountHi() : this->GetBitCountLo();
- dataBuf[blockSize/sizeof(T)-1] = order ? this->GetBitCountLo() : this->GetBitCountHi();
+ HashBlock(dataBuf);
- HashEndianCorrectedBlock(dataBuf);
- ConditionalByteReverse<HashWordType>(order, stateBuf, stateBuf, this->DigestSize());
- memcpy(digest, stateBuf, size);
+ if (IsAligned<HashWordType>(digest) && size%sizeof(HashWordType)==0)
+ ConditionalByteReverse<HashWordType>(order, (HashWordType *)digest, stateBuf, size);
+ else
+ {
+ ConditionalByteReverse<HashWordType>(order, stateBuf, stateBuf, this->DigestSize());
+ memcpy(digest, stateBuf, size);
+ }
this->Restart(); // reinit for next use
}
diff --git a/iterhash.h b/iterhash.h
index 8af3177..cce9e82 100644
--- a/iterhash.h
+++ b/iterhash.h
@@ -76,7 +76,7 @@ protected:
};
//! _
-template <class T_HashWordType, class T_Endianness, unsigned int T_BlockSize, unsigned int T_StateSize, class T_Transform, unsigned int T_DigestSize = 0>
+template <class T_HashWordType, class T_Endianness, unsigned int T_BlockSize, unsigned int T_StateSize, class T_Transform, unsigned int T_DigestSize = 0, bool T_StateAligned = false>
class CRYPTOPP_NO_VTABLE IteratedHashWithStaticTransform
: public ClonableImpl<T_Transform, AlgorithmImpl<IteratedHash<T_HashWordType, T_Endianness, T_BlockSize>, T_Transform> >
{
@@ -90,7 +90,7 @@ protected:
void Init() {T_Transform::InitState(this->m_state);}
T_HashWordType* StateBuf() {return this->m_state;}
- FixedSizeSecBlock<T_HashWordType, T_BlockSize/sizeof(T_HashWordType)> m_state;
+ FixedSizeAlignedSecBlock<T_HashWordType, T_BlockSize/sizeof(T_HashWordType), T_StateAligned> m_state;
};
#ifndef __GNUC__
diff --git a/mqueue.h b/mqueue.h
index a4ee117..b46f67d 100644
--- a/mqueue.h
+++ b/mqueue.h
@@ -35,8 +35,8 @@ public:
bool AnyRetrievable() const
{return m_lengths.front() > 0;}
- size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true);
- size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const;
+ size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
+ size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const;
lword TotalBytesRetrievable() const
{return m_queue.MaxRetrievable();}
@@ -49,7 +49,7 @@ public:
unsigned int NumberOfMessageSeries() const
{return (unsigned int)m_messageCounts.size()-1;}
- unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) const;
+ unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) const;
const byte * Spy(size_t &contiguousSize) const;
diff --git a/queue.cpp b/queue.cpp
index f123201..ae05a78 100644
--- a/queue.cpp
+++ b/queue.cpp
@@ -64,14 +64,14 @@ public:
return len;
}
- inline size_t CopyTo(BufferedTransformation &target, const std::string &channel=BufferedTransformation::NULL_CHANNEL) const
+ inline size_t CopyTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL) const
{
size_t len = m_tail-m_head;
target.ChannelPut(channel, buf+m_head, len);
return len;
}
- inline size_t CopyTo(BufferedTransformation &target, size_t copyMax, const std::string &channel=BufferedTransformation::NULL_CHANNEL) const
+ inline size_t CopyTo(BufferedTransformation &target, size_t copyMax, const std::string &channel=DEFAULT_CHANNEL) const
{
size_t len = STDMIN(copyMax, m_tail-m_head);
target.ChannelPut(channel, buf+m_head, len);
@@ -92,7 +92,7 @@ public:
return len;
}
- inline size_t TransferTo(BufferedTransformation &target, const std::string &channel=BufferedTransformation::NULL_CHANNEL)
+ inline size_t TransferTo(BufferedTransformation &target, const std::string &channel=DEFAULT_CHANNEL)
{
size_t len = m_tail-m_head;
target.ChannelPutModifiable(channel, buf+m_head, len);
@@ -100,7 +100,7 @@ public:
return len;
}
- inline size_t TransferTo(BufferedTransformation &target, lword transferMax, const std::string &channel=BufferedTransformation::NULL_CHANNEL)
+ inline size_t TransferTo(BufferedTransformation &target, lword transferMax, const std::string &channel=DEFAULT_CHANNEL)
{
size_t len = UnsignedMin(m_tail-m_head, transferMax);
target.ChannelPutModifiable(channel, buf+m_head, len);
diff --git a/queue.h b/queue.h
index e9e195c..7e17200 100644
--- a/queue.h
+++ b/queue.h
@@ -35,8 +35,8 @@ public:
size_t Peek(byte &outByte) const;
size_t Peek(byte *outString, size_t peekMax) const;
- size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true);
- size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const;
+ size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
+ size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const;
// these member functions are not inherited
void SetNodeSize(size_t nodeSize);
@@ -80,8 +80,8 @@ public:
size_t Peek(byte &outByte) const;
size_t Peek(byte *outString, size_t peekMax) const;
- size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true);
- size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const;
+ size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true);
+ size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const;
private:
const ByteQueue &m_queue;
diff --git a/rijndael.cpp b/rijndael.cpp
index 589733e..04e1f21 100644
--- a/rijndael.cpp
+++ b/rijndael.cpp
@@ -534,8 +534,10 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
#endif
#if CRYPTOPP_BOOL_X86
- AS_PUSH_IF86( bx)
- AS_PUSH_IF86( bp)
+#if !defined(_MSC_VER) || (_MSC_VER < 1300)
+ AS_PUSH_IF86(bx)
+#endif
+ AS_PUSH_IF86(bp)
AS2( mov [ecx+16*12+16*4], esp)
AS2( lea esp, [ecx-512])
#endif
@@ -583,7 +585,7 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
// counter mode one-time setup
AS2( mov WORD_REG(bp), [L_INBLOCKS])
- AS2( movdqa xmm2, [WORD_REG(bp)]) // counter
+ AS2( movdqu xmm2, [WORD_REG(bp)]) // counter
AS2( pxor xmm2, xmm1)
AS2( psrldq xmm1, 14)
AS2( movd eax, xmm1)
@@ -843,11 +845,13 @@ CRYPTOPP_NAKED void CRYPTOPP_FASTCALL Rijndael_Enc_AdvancedProcessBlocks(void *l
#else
AS2( mov rbp, [L_BP])
#endif
- AS_POP_IF86( bp)
- AS_POP_IF86( bx)
+ AS_POP_IF86(bp)
+#if !defined(_MSC_VER) || (_MSC_VER < 1300)
+ AS_POP_IF86(bx)
+#endif
#ifndef __GNUC__
- AS_POP_IF86( di)
- AS_POP_IF86( si)
+ AS_POP_IF86(di)
+ AS_POP_IF86(si)
#endif
#ifdef CRYPTOPP_GENERATE_X64_MASM
pop r12
diff --git a/secblock.h b/secblock.h
index c2e9c00..481533c 100644
--- a/secblock.h
+++ b/secblock.h
@@ -459,7 +459,7 @@ public:
};
template <class T, unsigned int S, bool T_Align16 = CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64>
-class FixedSizeAlignedSecBlock : public FixedSizeSecBlock<T, S, FixedSizeAllocatorWithCleanup<T, S, NullAllocator<word32>, T_Align16> >
+class FixedSizeAlignedSecBlock : public FixedSizeSecBlock<T, S, FixedSizeAllocatorWithCleanup<T, S, NullAllocator<T>, T_Align16> >
{
};
diff --git a/sha.cpp b/sha.cpp
index 7322543..905d12d 100644
--- a/sha.cpp
+++ b/sha.cpp
@@ -3,14 +3,21 @@
// Steve Reid implemented SHA-1. Wei Dai implemented SHA-2.
// Both are in the public domain.
+// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
+
#include "pch.h"
#ifndef CRYPTOPP_IMPORTS
+#ifndef CRYPTOPP_GENERATE_X64_MASM
#include "sha.h"
#include "misc.h"
#include "cpu.h"
+#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
+#include <emmintrin.h>
+#endif
+
NAMESPACE_BEGIN(CryptoPP)
// start of Steve Reid's code
@@ -93,7 +100,7 @@ void SHA256::InitState(HashWordType *state)
memcpy(state, s, sizeof(s));
}
-static const word32 SHA256_K[64] = {
+extern const word32 SHA256_K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
@@ -112,10 +119,333 @@ static const word32 SHA256_K[64] = {
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
+#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
+
+#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)
+
+#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
+
+static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len)
+{
+ #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
+ #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
+ #define G(i) H(i+1)
+ #define F(i) H(i+2)
+ #define E(i) H(i+3)
+ #define D(i) H(i+4)
+ #define C(i) H(i+5)
+ #define B(i) H(i+6)
+ #define A(i) H(i+7)
+ #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4
+ #define Wt_2(i) Wt((i)-2)
+ #define Wt_15(i) Wt((i)-15)
+ #define Wt_7(i) Wt((i)-7)
+ #define K_END [BASE+8*4+16*4+0*WORD_SZ]
+ #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ]
+ #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ]
+ #define DATA_END [BASE+8*4+16*4+3*WORD_SZ]
+ #define Kt(i) WORD_REG(si)+(i)*4
+#if CRYPTOPP_BOOL_X86
+ #define BASE esp+4
+#elif defined(__GNUC__)
+ #define BASE r8
+#else
+ #define BASE rsp
+#endif
+
+#define RA0(i, edx, edi) \
+ AS2( add edx, [Kt(i)] )\
+ AS2( add edx, [Wt(i)] )\
+ AS2( add edx, H(i) )\
+
+#define RA1(i, edx, edi)
+
+#define RB0(i, edx, edi)
+
+#define RB1(i, edx, edi) \
+ AS2( mov AS_REG_7d, [Wt_2(i)] )\
+ AS2( mov edi, [Wt_15(i)])\
+ AS2( mov ebx, AS_REG_7d )\
+ AS2( shr AS_REG_7d, 10 )\
+ AS2( ror ebx, 17 )\
+ AS2( xor AS_REG_7d, ebx )\
+ AS2( ror ebx, 2 )\
+ AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\
+ AS2( add ebx, [Wt_7(i)])\
+ AS2( mov AS_REG_7d, edi )\
+ AS2( shr AS_REG_7d, 3 )\
+ AS2( ror edi, 7 )\
+ AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
+ AS2( xor AS_REG_7d, edi )\
+ AS2( add edx, [Kt(i)])\
+ AS2( ror edi, 11 )\
+ AS2( add edx, H(i) )\
+ AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\
+ AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
+ AS2( mov [Wt(i)], AS_REG_7d)\
+ AS2( add edx, AS_REG_7d )\
+
+#define ROUND(i, r, eax, ecx, edi, edx)\
+ /* in: edi = E */\
+ /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
+ AS2( mov edx, F(i) )\
+ AS2( xor edx, G(i) )\
+ AS2( and edx, edi )\
+ AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\
+ AS2( mov AS_REG_7d, edi )\
+ AS2( ror edi, 6 )\
+ AS2( ror AS_REG_7d, 25 )\
+ RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
+ AS2( xor AS_REG_7d, edi )\
+ AS2( ror edi, 5 )\
+ AS2( xor AS_REG_7d, edi )/* S1(E) */\
+ AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
+ RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\
+ /* in: ecx = A, eax = B^C, edx = T1 */\
+ /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
+ AS2( mov ebx, ecx )\
+ AS2( xor ecx, B(i) )/* A^B */\
+ AS2( and eax, ecx )\
+ AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\
+ AS2( mov AS_REG_7d, ebx )\
+ AS2( ror ebx, 2 )\
+ AS2( add eax, edx )/* T1 + Maj(A,B,C) */\
+ AS2( add edx, D(i) )\
+ AS2( mov D(i), edx )\
+ AS2( ror AS_REG_7d, 22 )\
+ AS2( xor AS_REG_7d, ebx )\
+ AS2( ror ebx, 11 )\
+ AS2( xor AS_REG_7d, ebx )\
+ AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\
+ AS2( mov H(i), eax )\
+
+#define SWAP_COPY(i) \
+ AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
+ AS1( bswap WORD_REG(bx))\
+ AS2( mov [Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx))
+
+#if defined(__GNUC__)
+ #if CRYPTOPP_BOOL_X64
+ __m128i workspace[(LOCALS_SIZE+15)/16];
+ #endif
+ __asm__ __volatile__
+ (
+ #if CRYPTOPP_BOOL_X64
+ "movq %4, %%r8;"
+ #endif
+ ".intel_syntax noprefix;"
+#elif defined(CRYPTOPP_GENERATE_X64_MASM)
+ ALIGN 8
+ X86_SHA256_HashBlocks PROC FRAME
+ rex_push_reg rsi
+ push_reg rdi
+ push_reg rbx
+ push_reg rbp
+ alloc_stack(LOCALS_SIZE+8)
+ .endprolog
+ mov rdi, r8
+ lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
+#endif
+
+#if CRYPTOPP_BOOL_X86
+ #ifndef __GNUC__
+ AS2( mov edi, [len])
+ AS2( lea WORD_REG(si), [SHA256_K+48*4])
+ #endif
+ #if !defined(_MSC_VER) || (_MSC_VER < 1300)
+ AS_PUSH_IF86(bx)
+ #endif
+
+ AS_PUSH_IF86(bp)
+ AS2( mov ebx, esp)
+ AS2( and esp, -16)
+ AS2( sub WORD_REG(sp), LOCALS_SIZE)
+ AS_PUSH_IF86(bx)
+#endif
+ AS2( mov STATE_SAVE, WORD_REG(cx))
+ AS2( mov DATA_SAVE, WORD_REG(dx))
+ AS2( add WORD_REG(di), WORD_REG(dx))
+ AS2( mov DATA_END, WORD_REG(di))
+ AS2( mov K_END, WORD_REG(si))
+
+#if CRYPTOPP_BOOL_X86
+ AS2( test edi, 1)
+ ASJ( jnz, 2, f)
+#endif
+
+ AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
+ AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
+
+#if CRYPTOPP_BOOL_X86
+ ASJ( jmp, 0, f)
+ ASL(2) // non-SSE2
+ AS2( mov esi, ecx)
+ AS2( lea edi, A(0))
+ AS2( mov ecx, 8)
+ AS1( rep movsd)
+ AS2( mov esi, K_END)
+ ASJ( jmp, 3, f)
+#endif
+
+ ASL(0)
+ AS2( movdqa E(0), xmm1)
+ AS2( movdqa A(0), xmm0)
+#if CRYPTOPP_BOOL_X86
+ ASL(3)
+#endif
+ AS2( sub WORD_REG(si), 48*4)
+ SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3)
+ SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7)
+#if CRYPTOPP_BOOL_X86
+ SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11)
+ SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15)
+#endif
+ AS2( mov edi, E(0)) // E
+ AS2( mov eax, B(0)) // B
+ AS2( xor eax, C(0)) // B^C
+ AS2( mov ecx, A(0)) // A
+
+ ROUND(0, 0, eax, ecx, edi, edx)
+ ROUND(1, 0, ecx, eax, edx, edi)
+ ROUND(2, 0, eax, ecx, edi, edx)
+ ROUND(3, 0, ecx, eax, edx, edi)
+ ROUND(4, 0, eax, ecx, edi, edx)
+ ROUND(5, 0, ecx, eax, edx, edi)
+ ROUND(6, 0, eax, ecx, edi, edx)
+ ROUND(7, 0, ecx, eax, edx, edi)
+ ROUND(8, 0, eax, ecx, edi, edx)
+ ROUND(9, 0, ecx, eax, edx, edi)
+ ROUND(10, 0, eax, ecx, edi, edx)
+ ROUND(11, 0, ecx, eax, edx, edi)
+ ROUND(12, 0, eax, ecx, edi, edx)
+ ROUND(13, 0, ecx, eax, edx, edi)
+ ROUND(14, 0, eax, ecx, edi, edx)
+ ROUND(15, 0, ecx, eax, edx, edi)
+
+ ASL(1)
+ AS2(add WORD_REG(si), 4*16)
+ ROUND(0, 1, eax, ecx, edi, edx)
+ ROUND(1, 1, ecx, eax, edx, edi)
+ ROUND(2, 1, eax, ecx, edi, edx)
+ ROUND(3, 1, ecx, eax, edx, edi)
+ ROUND(4, 1, eax, ecx, edi, edx)
+ ROUND(5, 1, ecx, eax, edx, edi)
+ ROUND(6, 1, eax, ecx, edi, edx)
+ ROUND(7, 1, ecx, eax, edx, edi)
+ ROUND(8, 1, eax, ecx, edi, edx)
+ ROUND(9, 1, ecx, eax, edx, edi)
+ ROUND(10, 1, eax, ecx, edi, edx)
+ ROUND(11, 1, ecx, eax, edx, edi)
+ ROUND(12, 1, eax, ecx, edi, edx)
+ ROUND(13, 1, ecx, eax, edx, edi)
+ ROUND(14, 1, eax, ecx, edi, edx)
+ ROUND(15, 1, ecx, eax, edx, edi)
+ AS2( cmp WORD_REG(si), K_END)
+ ASJ( jne, 1, b)
+
+ AS2( mov WORD_REG(dx), DATA_SAVE)
+ AS2( add WORD_REG(dx), 64)
+ AS2( mov AS_REG_7, STATE_SAVE)
+ AS2( mov DATA_SAVE, WORD_REG(dx))
+
+#if CRYPTOPP_BOOL_X86
+ AS2( test DWORD PTR DATA_END, 1)
+ ASJ( jnz, 4, f)
+#endif
+
+ AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16])
+ AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16])
+ AS2( paddd xmm1, E(0))
+ AS2( paddd xmm0, A(0))
+ AS2( movdqa [AS_REG_7+1*16], xmm1)
+ AS2( movdqa [AS_REG_7+0*16], xmm0)
+ AS2( cmp WORD_REG(dx), DATA_END)
+ ASJ( jl, 0, b)
+
+#if CRYPTOPP_BOOL_X86
+ ASJ( jmp, 5, f)
+ ASL(4) // non-SSE2
+ AS2( add [AS_REG_7+0*4], ecx) // A
+ AS2( add [AS_REG_7+4*4], edi) // E
+ AS2( mov eax, B(0))
+ AS2( mov ebx, C(0))
+ AS2( mov ecx, D(0))
+ AS2( add [AS_REG_7+1*4], eax)
+ AS2( add [AS_REG_7+2*4], ebx)
+ AS2( add [AS_REG_7+3*4], ecx)
+ AS2( mov eax, F(0))
+ AS2( mov ebx, G(0))
+ AS2( mov ecx, H(0))
+ AS2( add [AS_REG_7+5*4], eax)
+ AS2( add [AS_REG_7+6*4], ebx)
+ AS2( add [AS_REG_7+7*4], ecx)
+ AS2( mov ecx, AS_REG_7d)
+ AS2( cmp WORD_REG(dx), DATA_END)
+ ASJ( jl, 2, b)
+ ASL(5)
+#endif
+
+ AS_POP_IF86(sp)
+ AS_POP_IF86(bp)
+ #if !defined(_MSC_VER) || (_MSC_VER < 1300)
+ AS_POP_IF86(bx)
+ #endif
+
+#ifdef CRYPTOPP_GENERATE_X64_MASM
+ add rsp, LOCALS_SIZE+8
+ pop rbp
+ pop rbx
+ pop rdi
+ pop rsi
+ ret
+ X86_SHA256_HashBlocks ENDP
+#endif
+
+#ifdef __GNUC__
+ ".att_syntax prefix;"
+ :
+ : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
+ #if CRYPTOPP_BOOL_X64
+ , "r" (workspace)
+ #endif
+ : "memory", "cc", "%eax"
+ #if CRYPTOPP_BOOL_X64
+ , "%rbx", "%r8"
+ #endif
+ );
+#endif
+}
+
+#endif // #if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_GENERATE_X64_MASM)
+
+#ifndef CRYPTOPP_GENERATE_X64_MASM
+
+#ifdef CRYPTOPP_X64_MASM_AVAILABLE
+extern "C" {
+void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len);
+}
+#endif
+
+#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
+
+size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
+{
+ X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
+ return length % BLOCKSIZE;
+}
+
+size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
+{
+ X86_SHA256_HashBlocks(m_state, input, (length&(size_t(0)-BLOCKSIZE)) - !HasSSE2());
+ return length % BLOCKSIZE;
+}
+
+#endif
+
#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
#define Ch(x,y,z) (z^(x&(y^z)))
-#define Maj(x,y,z) ((x&y)|(z&(x|y)))
+#define Maj(x,y,z) (y^((x^y)&(y^z)))
#define a(i) T[(0-i)&7]
#define b(i) T[(1-i)&7]
@@ -138,6 +468,11 @@ static const word32 SHA256_K[64] = {
void SHA256::Transform(word32 *state, const word32 *data)
{
word32 W[16];
+#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)
+ // this byte reverse is a waste of time, but this function is only called by MDC
+ ByteReverse(W, data, BLOCKSIZE);
+ X86_SHA256_HashBlocks(state, W, BLOCKSIZE - !HasSSE2());
+#else
word32 T[8];
/* Copy context->state[] to working vars */
memcpy(T, state, sizeof(T));
@@ -158,11 +493,12 @@ void SHA256::Transform(word32 *state, const word32 *data)
state[5] += f(0);
state[6] += g(0);
state[7] += h(0);
+#endif
}
/*
// smaller but slower
-void SHA256_Transform(word32 *state, const word32 *data)
+void SHA256::Transform(word32 *state, const word32 *data)
{
word32 T[20];
word32 W[32];
@@ -176,7 +512,7 @@ void SHA256_Transform(word32 *state, const word32 *data)
{
word32 w = data[j];
W[j] = w;
- w += K[j];
+ w += SHA256_K[j];
w += t[7];
w += S1(e);
w += Ch(e, t[5], t[6]);
@@ -196,7 +532,7 @@ void SHA256_Transform(word32 *state, const word32 *data)
i = j&0xf;
word32 w = s1(W[i+16-2]) + s0(W[i+16-15]) + W[i] + W[i+16-7];
W[i+16] = W[i] = w;
- w += K[j];
+ w += SHA256_K[j];
w += t[7];
w += S1(e);
w += Ch(e, t[5], t[6]);
@@ -208,7 +544,7 @@ void SHA256_Transform(word32 *state, const word32 *data)
w = s1(W[(i+1)+16-2]) + s0(W[(i+1)+16-15]) + W[(i+1)] + W[(i+1)+16-7];
W[(i+1)+16] = W[(i+1)] = w;
- w += K[j+1];
+ w += SHA256_K[j+1];
w += (t-1)[7];
w += S1(e);
w += Ch(e, (t-1)[5], (t-1)[6]);
@@ -335,22 +671,16 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state
AS2( lea edi, [esp+4+8*8]) // start at middle of state buffer. will decrement pointer each round to avoid copying
AS2( lea esi, [esp+4+20*8+8]) // 16-byte alignment, then add 8
- AS2( movq mm4, [ecx+0*8])
- AS2( movq [edi+0*8], mm4)
- AS2( movq mm0, [ecx+1*8])
- AS2( movq [edi+1*8], mm0)
- AS2( movq mm0, [ecx+2*8])
- AS2( movq [edi+2*8], mm0)
- AS2( movq mm0, [ecx+3*8])
- AS2( movq [edi+3*8], mm0)
- AS2( movq mm5, [ecx+4*8])
- AS2( movq [edi+4*8], mm5)
- AS2( movq mm0, [ecx+5*8])
- AS2( movq [edi+5*8], mm0)
- AS2( movq mm0, [ecx+6*8])
- AS2( movq [edi+6*8], mm0)
- AS2( movq mm0, [ecx+7*8])
- AS2( movq [edi+7*8], mm0)
+ AS2( movdqa xmm0, [ecx+0*16])
+ AS2( movdq2q mm4, xmm0)
+ AS2( movdqa [edi+0*16], xmm0)
+ AS2( movdqa xmm0, [ecx+1*16])
+ AS2( movdqa [edi+1*16], xmm0)
+ AS2( movdqa xmm0, [ecx+2*16])
+ AS2( movdq2q mm5, xmm0)
+ AS2( movdqa [edi+2*16], xmm0)
+ AS2( movdqa xmm0, [ecx+3*16])
+ AS2( movdqa [edi+3*16], xmm0)
ASJ( jmp, 0, f)
#define SSE2_S0_S1(r, a, b, c) \
@@ -475,18 +805,14 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state
ASJ( jne, 1, b)
#define SSE2_CombineState(i) \
- AS2( movq mm0, [edi+i*8])\
- AS2( paddq mm0, [ecx+i*8])\
- AS2( movq [ecx+i*8], mm0)
+ AS2( movdqa xmm0, [edi+i*16])\
+ AS2( paddq xmm0, [ecx+i*16])\
+ AS2( movdqa [ecx+i*16], xmm0)
SSE2_CombineState(0)
SSE2_CombineState(1)
SSE2_CombineState(2)
SSE2_CombineState(3)
- SSE2_CombineState(4)
- SSE2_CombineState(5)
- SSE2_CombineState(6)
- SSE2_CombineState(7)
AS1( pop esp)
AS1( emms)
@@ -550,4 +876,5 @@ void SHA512::Transform(word64 *state, const word64 *data)
NAMESPACE_END
+#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
#endif // #ifndef CRYPTOPP_IMPORTS
diff --git a/sha.h b/sha.h
index 09ef24a..ff580f6 100644
--- a/sha.h
+++ b/sha.h
@@ -17,25 +17,31 @@ public:
typedef SHA1 SHA; // for backwards compatibility
//! implements the SHA-256 standard
-class CRYPTOPP_DLL SHA256 : public IteratedHashWithStaticTransform<word32, BigEndian, 64, 32, SHA256>
+class CRYPTOPP_DLL SHA256 : public IteratedHashWithStaticTransform<word32, BigEndian, 64, 32, SHA256, 32, CRYPTOPP_BOOL_X86||CRYPTOPP_BOOL_X64>
{
public:
+#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)
+ size_t HashMultipleBlocks(const word32 *input, size_t length);
+#endif
static void CRYPTOPP_API InitState(HashWordType *state);
static void CRYPTOPP_API Transform(word32 *digest, const word32 *data);
static const char * CRYPTOPP_API StaticAlgorithmName() {return "SHA-256";}
};
//! implements the SHA-224 standard
-class CRYPTOPP_DLL SHA224 : public IteratedHashWithStaticTransform<word32, BigEndian, 64, 32, SHA224, 28>
+class CRYPTOPP_DLL SHA224 : public IteratedHashWithStaticTransform<word32, BigEndian, 64, 32, SHA224, 28, CRYPTOPP_BOOL_X86||CRYPTOPP_BOOL_X64>
{
public:
+#if defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X64_MASM_AVAILABLE)
+ size_t HashMultipleBlocks(const word32 *input, size_t length);
+#endif
static void CRYPTOPP_API InitState(HashWordType *state);
static void CRYPTOPP_API Transform(word32 *digest, const word32 *data) {SHA256::Transform(digest, data);}
static const char * CRYPTOPP_API StaticAlgorithmName() {return "SHA-224";}
};
//! implements the SHA-512 standard
-class CRYPTOPP_DLL SHA512 : public IteratedHashWithStaticTransform<word64, BigEndian, 128, 64, SHA512>
+class CRYPTOPP_DLL SHA512 : public IteratedHashWithStaticTransform<word64, BigEndian, 128, 64, SHA512, 64, CRYPTOPP_BOOL_X86>
{
public:
static void CRYPTOPP_API InitState(HashWordType *state);
@@ -44,7 +50,7 @@ public:
};
//! implements the SHA-384 standard
-class CRYPTOPP_DLL SHA384 : public IteratedHashWithStaticTransform<word64, BigEndian, 128, 64, SHA384, 48>
+class CRYPTOPP_DLL SHA384 : public IteratedHashWithStaticTransform<word64, BigEndian, 128, 64, SHA384, 48, CRYPTOPP_BOOL_X86>
{
public:
static void CRYPTOPP_API InitState(HashWordType *state);
diff --git a/simple.h b/simple.h
index d450876..35fd65a 100644
--- a/simple.h
+++ b/simple.h
@@ -58,7 +58,7 @@ class CRYPTOPP_NO_VTABLE Unflushable : public T
{
public:
bool Flush(bool completeFlush, int propagation=-1, bool blocking=true)
- {return ChannelFlush(this->NULL_CHANNEL, completeFlush, propagation, blocking);}
+ {return ChannelFlush(DEFAULT_CHANNEL, completeFlush, propagation, blocking);}
bool IsolatedFlush(bool hardFlush, bool blocking)
{assert(false); return false;}
bool ChannelFlush(const std::string &channel, bool hardFlush, int propagation=-1, bool blocking=true)
@@ -123,15 +123,15 @@ class CRYPTOPP_NO_VTABLE Multichannel : public CustomFlushPropagation<T>
{
public:
bool Flush(bool hardFlush, int propagation=-1, bool blocking=true)
- {return ChannelFlush(this->NULL_CHANNEL, hardFlush, propagation, blocking);}
+ {return this->ChannelFlush(DEFAULT_CHANNEL, hardFlush, propagation, blocking);}
bool MessageSeriesEnd(int propagation=-1, bool blocking=true)
- {return ChannelMessageSeriesEnd(this->NULL_CHANNEL, propagation, blocking);}
+ {return this->ChannelMessageSeriesEnd(DEFAULT_CHANNEL, propagation, blocking);}
byte * CreatePutSpace(size_t &size)
- {return ChannelCreatePutSpace(this->NULL_CHANNEL, size);}
+ {return this->ChannelCreatePutSpace(DEFAULT_CHANNEL, size);}
size_t Put2(const byte *begin, size_t length, int messageEnd, bool blocking)
- {return ChannelPut2(this->NULL_CHANNEL, begin, length, messageEnd, blocking);}
+ {return this->ChannelPut2(DEFAULT_CHANNEL, begin, length, messageEnd, blocking);}
size_t PutModifiable2(byte *inString, size_t length, int messageEnd, bool blocking)
- {return ChannelPutModifiable2(this->NULL_CHANNEL, inString, length, messageEnd, blocking);}
+ {return this->ChannelPutModifiable2(DEFAULT_CHANNEL, inString, length, messageEnd, blocking);}
// void ChannelMessageSeriesEnd(const std::string &channel, int propagation=-1)
// {PropagateMessageSeriesEnd(propagation, channel);}
@@ -177,7 +177,7 @@ public:
unsigned int NumberOfMessages() const {return m_messageEnd ? 0 : 1;}
bool GetNextMessage();
- unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=NULL_CHANNEL) const;
+ unsigned int CopyMessagesTo(BufferedTransformation &target, unsigned int count=UINT_MAX, const std::string &channel=DEFAULT_CHANNEL) const;
protected:
virtual void StoreInitialize(const NameValuePairs &parameters) =0;
@@ -189,9 +189,9 @@ protected:
class CRYPTOPP_DLL CRYPTOPP_NO_VTABLE Sink : public BufferedTransformation
{
public:
- size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=NULL_CHANNEL, bool blocking=true)
+ size_t TransferTo2(BufferedTransformation &target, lword &transferBytes, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true)
{transferBytes = 0; return 0;}
- size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=NULL_CHANNEL, bool blocking=true) const
+ size_t CopyRangeTo2(BufferedTransformation &target, lword &begin, lword end=LWORD_MAX, const std::string &channel=DEFAULT_CHANNEL, bool blocking=true) const
{return 0;}
};
diff --git a/test.cpp b/test.cpp
index de995fd..683cfb5 100644
--- a/test.cpp
+++ b/test.cpp
@@ -559,7 +559,7 @@ void SecretShareFile(int threshold, int nShares, const char *filename, const cha
channel = WordToString<word32>(i);
fileSinks[i]->Put((byte *)channel.data(), 4);
- channelSwitch->AddRoute(channel, *fileSinks[i], BufferedTransformation::NULL_CHANNEL);
+ channelSwitch->AddRoute(channel, *fileSinks[i], DEFAULT_CHANNEL);
}
source.PumpAll();
@@ -609,7 +609,7 @@ void InformationDisperseFile(int threshold, int nShares, const char *filename)
channel = WordToString<word32>(i);
fileSinks[i]->Put((byte *)channel.data(), 4);
- channelSwitch->AddRoute(channel, *fileSinks[i], BufferedTransformation::NULL_CHANNEL);
+ channelSwitch->AddRoute(channel, *fileSinks[i], DEFAULT_CHANNEL);
}
source.PumpAll();
diff --git a/vmac.cpp b/vmac.cpp
index f71bafb..6b490f9 100755
--- a/vmac.cpp
+++ b/vmac.cpp
@@ -57,12 +57,8 @@ void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, con
/* Fill nh key */
in[0] = 0x80;
- for (i = 0; i < m_nhKeySize()*sizeof(word64); i += blockSize)
- {
- cipher.ProcessBlock(in, out.BytePtr());
- ConditionalByteReverse(BIG_ENDIAN_ORDER, m_nhKey()+i/sizeof(word64), out.begin(), blockSize);
- in[15]++;
- }
+ cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
+ ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
/* Fill poly key */
in[0] = 0xC0;
@@ -137,6 +133,7 @@ void VMAC_Base::Resynchronize(const byte *nonce, int len)
void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
{
assert(false);
+ throw 0;
}
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
diff --git a/x64dll.asm b/x64dll.asm
index 643dbe4..6b94e1e 100644
--- a/x64dll.asm
+++ b/x64dll.asm
@@ -1,665 +1,1955 @@
-include ksamd64.inc
-EXTERNDEF ?Te@rdtable@CryptoPP@@3PA_KA:FAR
-EXTERNDEF ?g_cacheLineSize@CryptoPP@@3IA:FAR
-.CODE
-
- ALIGN 8
-Baseline_Add PROC
- lea rdx, [rdx+8*rcx]
- lea r8, [r8+8*rcx]
- lea r9, [r9+8*rcx]
- neg rcx ; rcx is negative index
- jz $1@Baseline_Add
- mov rax,[r8+8*rcx]
- add rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
-$0@Baseline_Add:
- mov rax,[r8+8*rcx+8]
- adc rax,[r9+8*rcx+8]
- mov [rdx+8*rcx+8],rax
- lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
- jrcxz $1@Baseline_Add ; loop until rcx overflows and becomes zero
- mov rax,[r8+8*rcx]
- adc rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
- jmp $0@Baseline_Add
-$1@Baseline_Add:
- mov rax, 0
- adc rax, rax ; store carry into rax (return result register)
- ret
-Baseline_Add ENDP
-
- ALIGN 8
-Baseline_Sub PROC
- lea rdx, [rdx+8*rcx]
- lea r8, [r8+8*rcx]
- lea r9, [r9+8*rcx]
- neg rcx ; rcx is negative index
- jz $1@Baseline_Sub
- mov rax,[r8+8*rcx]
- sub rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
-$0@Baseline_Sub:
- mov rax,[r8+8*rcx+8]
- sbb rax,[r9+8*rcx+8]
- mov [rdx+8*rcx+8],rax
- lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
- jrcxz $1@Baseline_Sub ; loop until rcx overflows and becomes zero
- mov rax,[r8+8*rcx]
- sbb rax,[r9+8*rcx]
- mov [rdx+8*rcx],rax
- jmp $0@Baseline_Sub
-$1@Baseline_Sub:
- mov rax, 0
- adc rax, rax ; store carry into rax (return result register)
-
- ret
-Baseline_Sub ENDP
-
-ALIGN 8
-Rijndael_Enc_AdvancedProcessBlocks PROC FRAME
-rex_push_reg rsi
-push_reg rdi
-push_reg rbx
-push_reg rbp
-push_reg r12
-.endprolog
-mov r8, rcx
-mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA
-mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA]
-mov rbp, [(r8+16*19)]
-mov rax, 16
-and rax, rbp
-movdqa xmm3, XMMWORD PTR [rdx+16+rax]
-movdqa [(r8+16*12)], xmm3
-lea rax, [rdx+rax+2*16]
-sub rax, rbp
-label0:
-movdqa xmm0, [rax+rbp]
-movdqa XMMWORD PTR [(r8+0)+rbp], xmm0
-add rbp, 16
-cmp rbp, 16*12
-jl label0
-movdqa xmm4, [rax+rbp]
-movdqa xmm1, [rdx]
-mov r11d, [rdx+4*4]
-mov ebx, [rdx+5*4]
-mov ecx, [rdx+6*4]
-mov edx, [rdx+7*4]
-xor rax, rax
-label9:
-mov ebp, [rsi+rax]
-add rax, rdi
-mov ebp, [rsi+rax]
-add rax, rdi
-mov ebp, [rsi+rax]
-add rax, rdi
-mov ebp, [rsi+rax]
-add rax, rdi
-cmp rax, 2048
-jl label9
-lfence
-test DWORD PTR [(r8+16*18+8)], 1
-jz label8
-mov rbp, [(r8+16*14)]
-movdqa xmm2, [rbp]
-pxor xmm2, xmm1
-psrldq xmm1, 14
-movd eax, xmm1
-mov al, BYTE PTR [rbp+15]
-mov r12d, eax
-movd eax, xmm2
-psrldq xmm2, 4
-movd edi, xmm2
-psrldq xmm2, 4
-movzx ebp, al
-xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movd edi, xmm2
-psrldq xmm2, 4
-movzx ebp, al
-xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movd edi, xmm2
-movzx ebp, al
-xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movzx ebp, al
-xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-psrldq xmm2, 3
-mov eax, [(r8+16*12)+0*4]
-mov edi, [(r8+16*12)+2*4]
-mov r10d, [(r8+16*12)+3*4]
-movzx ebp, cl
-xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, bh
-xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr ebx, 16
-movzx ebp, bl
-xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, bh
-mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-xor ebx, [(r8+16*12)+1*4]
-movzx ebp, ch
-xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr ecx, 16
-movzx ebp, dl
-xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, dh
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr edx, 16
-movzx ebp, ch
-xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dh
-xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movd ecx, xmm2
-mov edx, r11d
-mov [(r8+0)+3*4], r10d
-mov [(r8+0)+0*4], eax
-mov [(r8+0)+1*4], ebx
-mov [(r8+0)+2*4], edi
-jmp label5
-label3:
-mov r11d, [(r8+16*12)+0*4]
-mov ebx, [(r8+16*12)+1*4]
-mov ecx, [(r8+16*12)+2*4]
-mov edx, [(r8+16*12)+3*4]
-label8:
-mov rax, [(r8+16*14)]
-movdqu xmm2, [rax]
-mov rbp, [(r8+16*14)+8]
-movdqu xmm5, [rbp]
-pxor xmm2, xmm1
-pxor xmm2, xmm5
-movd eax, xmm2
-psrldq xmm2, 4
-movd edi, xmm2
-psrldq xmm2, 4
-movzx ebp, al
-xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movd edi, xmm2
-psrldq xmm2, 4
-movzx ebp, al
-xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movd edi, xmm2
-movzx ebp, al
-xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, edi
-movzx ebp, al
-xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ah
-xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, al
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, ah
-xor r11d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov eax, r11d
-add r8, [(r8+16*19)]
-add r8, 4*16
-jmp label2
-label1:
-mov ecx, r12d
-mov edx, r11d
-mov eax, [(r8+0)+0*4]
-mov ebx, [(r8+0)+1*4]
-xor cl, ch
-and rcx, 255
-label5:
-add r12d, 1
-xor edx, DWORD PTR [rsi+rcx*8+3]
-movzx ebp, dl
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, dh
-mov ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr edx, 16
-xor ecx, [(r8+0)+2*4]
-movzx ebp, dh
-xor eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, dl
-mov edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-xor edx, [(r8+0)+3*4]
-add r8, [(r8+16*19)]
-add r8, 3*16
-jmp label4
-label2:
-mov r10d, [(r8+0)-4*16+3*4]
-mov edi, [(r8+0)-4*16+2*4]
-movzx ebp, cl
-xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov cl, al
-movzx ebp, ah
-xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, bh
-xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr ebx, 16
-movzx ebp, al
-xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, ah
-mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, bl
-xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, bh
-mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ch
-xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-shr ecx, 16
-movzx ebp, dl
-xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, dh
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr edx, 16
-movzx ebp, ch
-xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dh
-xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-mov ecx, edi
-xor eax, [(r8+0)-4*16+0*4]
-xor ebx, [(r8+0)-4*16+1*4]
-mov edx, r10d
-label4:
-mov r10d, [(r8+0)-4*16+7*4]
-mov edi, [(r8+0)-4*16+6*4]
-movzx ebp, cl
-xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-mov cl, al
-movzx ebp, ah
-xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr eax, 16
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, bh
-xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr ebx, 16
-movzx ebp, al
-xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, ah
-mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, bl
-xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, bh
-mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, ch
-xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-shr ecx, 16
-movzx ebp, dl
-xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
-movzx ebp, dh
-xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
-shr edx, 16
-movzx ebp, ch
-xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-movzx ebp, cl
-xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
-movzx ebp, dh
-xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
-mov ecx, edi
-xor eax, [(r8+0)-4*16+4*4]
-xor ebx, [(r8+0)-4*16+5*4]
-mov edx, r10d
-add r8, 32
-test r8, 255
-jnz label2
-sub r8, 16*16
-movzx ebp, ch
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+2], di
-movzx ebp, dh
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, al
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+6], di
-shr edx, 16
-movzx ebp, ah
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+10], di
-shr eax, 16
-movzx ebp, bh
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, cl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+14], di
-shr ebx, 16
-movzx ebp, dh
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, al
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+12], di
-shr ecx, 16
-movzx ebp, ah
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, bl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+0], di
-movzx ebp, bh
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, cl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+4], di
-movzx ebp, ch
-movzx edi, BYTE PTR [rsi+rbp*8+1]
-movzx ebp, dl
-xor edi, DWORD PTR [rsi+rbp*8+0]
-mov WORD PTR [(r8+16*13)+8], di
-mov rax, [(r8+16*14)+16]
-mov rbx, [(r8+16*14)+24]
-mov rcx, [(r8+16*18+8)]
-sub rcx, 16
-movdqu xmm2, [rax]
-pxor xmm2, xmm4
-movdqa xmm0, [(r8+16*16)+16]
-paddq xmm0, [(r8+16*14)+16]
-movdqa [(r8+16*14)+16], xmm0
-pxor xmm2, [(r8+16*13)]
-movdqu [rbx], xmm2
-jle label7
-mov [(r8+16*18+8)], rcx
-test rcx, 1
-jnz label1
-movdqa xmm0, [(r8+16*16)]
-paddd xmm0, [(r8+16*14)]
-movdqa [(r8+16*14)], xmm0
-jmp label3
-label7:
-mov rbp, [(r8+16*18)]
-pop r12
-pop rbp
-pop rbx
-pop rdi
-pop rsi
-ret
-Rijndael_Enc_AdvancedProcessBlocks ENDP
-
-ALIGN 8
-GCM_AuthenticateBlocks_2K PROC FRAME
-rex_push_reg rsi
-push_reg rdi
-push_reg rbx
-.endprolog
-mov rsi, r8
-mov r11, r9
-movdqa xmm0, [rsi]
-label0:
-movdqu xmm4, [rcx]
-pxor xmm0, xmm4
-movd ebx, xmm0
-mov eax, 0f0f0f0f0h
-and eax, ebx
-shl ebx, 4
-and ebx, 0f0f0f0f0h
-movzx edi, ah
-movdqa xmm5, XMMWORD PTR [rsi + 32 + 1024 + rdi]
-movzx edi, al
-movdqa xmm4, XMMWORD PTR [rsi + 32 + 1024 + rdi]
-shr eax, 16
-movzx edi, ah
-movdqa xmm3, XMMWORD PTR [rsi + 32 + 1024 + rdi]
-movzx edi, al
-movdqa xmm2, XMMWORD PTR [rsi + 32 + 1024 + rdi]
-psrldq xmm0, 4
-movd eax, xmm0
-and eax, 0f0f0f0f0h
-movzx edi, bh
-pxor xmm5, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
-movzx edi, bl
-pxor xmm4, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
-shr ebx, 16
-movzx edi, bh
-pxor xmm3, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
-movzx edi, bl
-pxor xmm2, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
-movd ebx, xmm0
-shl ebx, 4
-and ebx, 0f0f0f0f0h
-movzx edi, ah
-pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
-movzx edi, al
-pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
-shr eax, 16
-movzx edi, ah
-pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
-movzx edi, al
-pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
-psrldq xmm0, 4
-movd eax, xmm0
-and eax, 0f0f0f0f0h
-movzx edi, bh
-pxor xmm5, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
-movzx edi, bl
-pxor xmm4, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
-shr ebx, 16
-movzx edi, bh
-pxor xmm3, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
-movzx edi, bl
-pxor xmm2, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
-movd ebx, xmm0
-shl ebx, 4
-and ebx, 0f0f0f0f0h
-movzx edi, ah
-pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
-movzx edi, al
-pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
-shr eax, 16
-movzx edi, ah
-pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
-movzx edi, al
-pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
-psrldq xmm0, 4
-movd eax, xmm0
-and eax, 0f0f0f0f0h
-movzx edi, bh
-pxor xmm5, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
-movzx edi, bl
-pxor xmm4, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
-shr ebx, 16
-movzx edi, bh
-pxor xmm3, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
-movzx edi, bl
-pxor xmm2, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
-movd ebx, xmm0
-shl ebx, 4
-and ebx, 0f0f0f0f0h
-movzx edi, ah
-pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
-movzx edi, al
-pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
-shr eax, 16
-movzx edi, ah
-pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
-movzx edi, al
-pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
-movzx edi, bh
-pxor xmm5, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
-movzx edi, bl
-pxor xmm4, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
-shr ebx, 16
-movzx edi, bh
-pxor xmm3, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
-movzx edi, bl
-pxor xmm2, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
-movdqa xmm0, xmm3
-pslldq xmm3, 1
-pxor xmm2, xmm3
-movdqa xmm1, xmm2
-pslldq xmm2, 1
-pxor xmm5, xmm2
-psrldq xmm0, 15
-movd rdi, xmm0
-movzx eax, WORD PTR [r11 + rdi*2]
-shl eax, 8
-movdqa xmm0, xmm5
-pslldq xmm5, 1
-pxor xmm4, xmm5
-psrldq xmm1, 15
-movd rdi, xmm1
-xor ax, WORD PTR [r11 + rdi*2]
-shl eax, 8
-psrldq xmm0, 15
-movd rdi, xmm0
-xor ax, WORD PTR [r11 + rdi*2]
-movd xmm0, eax
-pxor xmm0, xmm4
-add rcx, 16
-sub rdx, 1
-jnz label0
-movdqa [rsi], xmm0
-pop rbx
-pop rdi
-pop rsi
-ret
-GCM_AuthenticateBlocks_2K ENDP
-
-ALIGN 8
-GCM_AuthenticateBlocks_64K PROC FRAME
-rex_push_reg rsi
-push_reg rdi
-.endprolog
-mov rsi, r8
-movdqa xmm0, [rsi]
-label1:
-movdqu xmm1, [rcx]
-pxor xmm1, xmm0
-pxor xmm0, xmm0
-movd eax, xmm1
-psrldq xmm1, 4
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (0*4+0)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (0*4+1)*256*16 + rdi*8]
-shr eax, 16
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (0*4+2)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (0*4+3)*256*16 + rdi*8]
-movd eax, xmm1
-psrldq xmm1, 4
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (1*4+0)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (1*4+1)*256*16 + rdi*8]
-shr eax, 16
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (1*4+2)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (1*4+3)*256*16 + rdi*8]
-movd eax, xmm1
-psrldq xmm1, 4
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (2*4+0)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (2*4+1)*256*16 + rdi*8]
-shr eax, 16
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (2*4+2)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (2*4+3)*256*16 + rdi*8]
-movd eax, xmm1
-psrldq xmm1, 4
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (3*4+0)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (3*4+1)*256*16 + rdi*8]
-shr eax, 16
-movzx edi, al
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (3*4+2)*256*16 + rdi*8]
-movzx edi, ah
-add rdi, rdi
-pxor xmm0, [rsi + 32 + (3*4+3)*256*16 + rdi*8]
-add rcx, 16
-sub rdx, 1
-jnz label1
-movdqa [rsi], xmm0
-pop rdi
-pop rsi
-ret
-GCM_AuthenticateBlocks_64K ENDP
-
-_TEXT ENDS
-END
+include ksamd64.inc
+EXTERNDEF ?Te@rdtable@CryptoPP@@3PA_KA:FAR
+EXTERNDEF ?g_cacheLineSize@CryptoPP@@3IA:FAR
+EXTERNDEF ?SHA256_K@CryptoPP@@3QBIB:FAR
+.CODE
+
+ ALIGN 8
+Baseline_Add PROC
+ lea rdx, [rdx+8*rcx]
+ lea r8, [r8+8*rcx]
+ lea r9, [r9+8*rcx]
+ neg rcx ; rcx is negative index
+ jz $1@Baseline_Add
+ mov rax,[r8+8*rcx]
+ add rax,[r9+8*rcx]
+ mov [rdx+8*rcx],rax
+$0@Baseline_Add:
+ mov rax,[r8+8*rcx+8]
+ adc rax,[r9+8*rcx+8]
+ mov [rdx+8*rcx+8],rax
+ lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
+ jrcxz $1@Baseline_Add ; loop until rcx overflows and becomes zero
+ mov rax,[r8+8*rcx]
+ adc rax,[r9+8*rcx]
+ mov [rdx+8*rcx],rax
+ jmp $0@Baseline_Add
+$1@Baseline_Add:
+ mov rax, 0
+ adc rax, rax ; store carry into rax (return result register)
+ ret
+Baseline_Add ENDP
+
+ ALIGN 8
+Baseline_Sub PROC
+ lea rdx, [rdx+8*rcx]
+ lea r8, [r8+8*rcx]
+ lea r9, [r9+8*rcx]
+ neg rcx ; rcx is negative index
+ jz $1@Baseline_Sub
+ mov rax,[r8+8*rcx]
+ sub rax,[r9+8*rcx]
+ mov [rdx+8*rcx],rax
+$0@Baseline_Sub:
+ mov rax,[r8+8*rcx+8]
+ sbb rax,[r9+8*rcx+8]
+ mov [rdx+8*rcx+8],rax
+ lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2
+ jrcxz $1@Baseline_Sub ; loop until rcx overflows and becomes zero
+ mov rax,[r8+8*rcx]
+ sbb rax,[r9+8*rcx]
+ mov [rdx+8*rcx],rax
+ jmp $0@Baseline_Sub
+$1@Baseline_Sub:
+ mov rax, 0
+ adc rax, rax ; store carry into rax (return result register)
+
+ ret
+Baseline_Sub ENDP
+
+ALIGN 8
+Rijndael_Enc_AdvancedProcessBlocks PROC FRAME
+rex_push_reg rsi
+push_reg rdi
+push_reg rbx
+push_reg rbp
+push_reg r12
+.endprolog
+mov r8, rcx
+mov rsi, ?Te@rdtable@CryptoPP@@3PA_KA
+mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA]
+mov rbp, [(r8+16*19)]
+mov rax, 16
+and rax, rbp
+movdqa xmm3, XMMWORD PTR [rdx+16+rax]
+movdqa [(r8+16*12)], xmm3
+lea rax, [rdx+rax+2*16]
+sub rax, rbp
+label0:
+movdqa xmm0, [rax+rbp]
+movdqa XMMWORD PTR [(r8+0)+rbp], xmm0
+add rbp, 16
+cmp rbp, 16*12
+jl label0
+movdqa xmm4, [rax+rbp]
+movdqa xmm1, [rdx]
+mov r11d, [rdx+4*4]
+mov ebx, [rdx+5*4]
+mov ecx, [rdx+6*4]
+mov edx, [rdx+7*4]
+xor rax, rax
+label9:
+mov ebp, [rsi+rax]
+add rax, rdi
+mov ebp, [rsi+rax]
+add rax, rdi
+mov ebp, [rsi+rax]
+add rax, rdi
+mov ebp, [rsi+rax]
+add rax, rdi
+cmp rax, 2048
+jl label9
+lfence
+test DWORD PTR [(r8+16*18+8)], 1
+jz label8
+mov rbp, [(r8+16*14)]
+movdqa xmm2, [rbp]
+pxor xmm2, xmm1
+psrldq xmm1, 14
+movd eax, xmm1
+mov al, BYTE PTR [rbp+15]
+mov r12d, eax
+movd eax, xmm2
+psrldq xmm2, 4
+movd edi, xmm2
+psrldq xmm2, 4
+movzx ebp, al
+xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ah
+xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, al
+xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+movzx ebp, ah
+xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+mov eax, edi
+movd edi, xmm2
+psrldq xmm2, 4
+movzx ebp, al
+xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ah
+xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, al
+xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+movzx ebp, ah
+xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+mov eax, edi
+movd edi, xmm2
+movzx ebp, al
+xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ah
+xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, al
+xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+movzx ebp, ah
+xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+mov eax, edi
+movzx ebp, al
+xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ah
+xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, al
+xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+psrldq xmm2, 3
+mov eax, [(r8+16*12)+0*4]
+mov edi, [(r8+16*12)+2*4]
+mov r10d, [(r8+16*12)+3*4]
+movzx ebp, cl
+xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+movzx ebp, bl
+xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+movzx ebp, bh
+xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr ebx, 16
+movzx ebp, bl
+xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, bh
+mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+xor ebx, [(r8+16*12)+1*4]
+movzx ebp, ch
+xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr ecx, 16
+movzx ebp, dl
+xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+movzx ebp, dh
+xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr edx, 16
+movzx ebp, ch
+xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, cl
+xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, dl
+xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, dh
+xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movd ecx, xmm2
+mov edx, r11d
+mov [(r8+0)+3*4], r10d
+mov [(r8+0)+0*4], eax
+mov [(r8+0)+1*4], ebx
+mov [(r8+0)+2*4], edi
+jmp label5
+label3:
+mov r11d, [(r8+16*12)+0*4]
+mov ebx, [(r8+16*12)+1*4]
+mov ecx, [(r8+16*12)+2*4]
+mov edx, [(r8+16*12)+3*4]
+label8:
+mov rax, [(r8+16*14)]
+movdqu xmm2, [rax]
+mov rbp, [(r8+16*14)+8]
+movdqu xmm5, [rbp]
+pxor xmm2, xmm1
+pxor xmm2, xmm5
+movd eax, xmm2
+psrldq xmm2, 4
+movd edi, xmm2
+psrldq xmm2, 4
+movzx ebp, al
+xor r11d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ah
+xor edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, al
+xor ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+movzx ebp, ah
+xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+mov eax, edi
+movd edi, xmm2
+psrldq xmm2, 4
+movzx ebp, al
+xor ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ah
+xor r11d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, al
+xor edx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+movzx ebp, ah
+xor ecx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+mov eax, edi
+movd edi, xmm2
+movzx ebp, al
+xor ecx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ah
+xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, al
+xor r11d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+movzx ebp, ah
+xor edx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+mov eax, edi
+movzx ebp, al
+xor edx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ah
+xor ecx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, al
+xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+movzx ebp, ah
+xor r11d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+mov eax, r11d
+add r8, [(r8+16*19)]
+add r8, 4*16
+jmp label2
+label1:
+mov ecx, r12d
+mov edx, r11d
+mov eax, [(r8+0)+0*4]
+mov ebx, [(r8+0)+1*4]
+xor cl, ch
+and rcx, 255
+label5:
+add r12d, 1
+xor edx, DWORD PTR [rsi+rcx*8+3]
+movzx ebp, dl
+xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+movzx ebp, dh
+mov ecx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr edx, 16
+xor ecx, [(r8+0)+2*4]
+movzx ebp, dh
+xor eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, dl
+mov edx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+xor edx, [(r8+0)+3*4]
+add r8, [(r8+16*19)]
+add r8, 3*16
+jmp label4
+label2:
+mov r10d, [(r8+0)-4*16+3*4]
+mov edi, [(r8+0)-4*16+2*4]
+movzx ebp, cl
+xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+mov cl, al
+movzx ebp, ah
+xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, bl
+xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+movzx ebp, bh
+xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr ebx, 16
+movzx ebp, al
+xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, ah
+mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, bl
+xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, bh
+mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ch
+xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+movzx ebp, cl
+xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+shr ecx, 16
+movzx ebp, dl
+xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+movzx ebp, dh
+xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr edx, 16
+movzx ebp, ch
+xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, cl
+xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, dl
+xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, dh
+xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+mov ecx, edi
+xor eax, [(r8+0)-4*16+0*4]
+xor ebx, [(r8+0)-4*16+1*4]
+mov edx, r10d
+label4:
+mov r10d, [(r8+0)-4*16+7*4]
+mov edi, [(r8+0)-4*16+6*4]
+movzx ebp, cl
+xor r10d, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+mov cl, al
+movzx ebp, ah
+xor edi, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr eax, 16
+movzx ebp, bl
+xor edi, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+movzx ebp, bh
+xor r10d, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr ebx, 16
+movzx ebp, al
+xor r10d, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, ah
+mov eax, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, bl
+xor eax, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, bh
+mov ebx, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, ch
+xor eax, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+movzx ebp, cl
+xor ebx, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+shr ecx, 16
+movzx ebp, dl
+xor eax, DWORD PTR [rsi+8*rbp+(((3+3) MOD (4))+1)]
+movzx ebp, dh
+xor ebx, DWORD PTR [rsi+8*rbp+(((2+3) MOD (4))+1)]
+shr edx, 16
+movzx ebp, ch
+xor edi, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+movzx ebp, cl
+xor ebx, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, dl
+xor edi, DWORD PTR [rsi+8*rbp+(((1+3) MOD (4))+1)]
+movzx ebp, dh
+xor r10d, DWORD PTR [rsi+8*rbp+(((0+3) MOD (4))+1)]
+mov ecx, edi
+xor eax, [(r8+0)-4*16+4*4]
+xor ebx, [(r8+0)-4*16+5*4]
+mov edx, r10d
+add r8, 32
+test r8, 255
+jnz label2
+sub r8, 16*16
+movzx ebp, ch
+movzx edi, BYTE PTR [rsi+rbp*8+1]
+movzx ebp, dl
+xor edi, DWORD PTR [rsi+rbp*8+0]
+mov WORD PTR [(r8+16*13)+2], di
+movzx ebp, dh
+movzx edi, BYTE PTR [rsi+rbp*8+1]
+movzx ebp, al
+xor edi, DWORD PTR [rsi+rbp*8+0]
+mov WORD PTR [(r8+16*13)+6], di
+shr edx, 16
+movzx ebp, ah
+movzx edi, BYTE PTR [rsi+rbp*8+1]
+movzx ebp, bl
+xor edi, DWORD PTR [rsi+rbp*8+0]
+mov WORD PTR [(r8+16*13)+10], di
+shr eax, 16
+movzx ebp, bh
+movzx edi, BYTE PTR [rsi+rbp*8+1]
+movzx ebp, cl
+xor edi, DWORD PTR [rsi+rbp*8+0]
+mov WORD PTR [(r8+16*13)+14], di
+shr ebx, 16
+movzx ebp, dh
+movzx edi, BYTE PTR [rsi+rbp*8+1]
+movzx ebp, al
+xor edi, DWORD PTR [rsi+rbp*8+0]
+mov WORD PTR [(r8+16*13)+12], di
+shr ecx, 16
+movzx ebp, ah
+movzx edi, BYTE PTR [rsi+rbp*8+1]
+movzx ebp, bl
+xor edi, DWORD PTR [rsi+rbp*8+0]
+mov WORD PTR [(r8+16*13)+0], di
+movzx ebp, bh
+movzx edi, BYTE PTR [rsi+rbp*8+1]
+movzx ebp, cl
+xor edi, DWORD PTR [rsi+rbp*8+0]
+mov WORD PTR [(r8+16*13)+4], di
+movzx ebp, ch
+movzx edi, BYTE PTR [rsi+rbp*8+1]
+movzx ebp, dl
+xor edi, DWORD PTR [rsi+rbp*8+0]
+mov WORD PTR [(r8+16*13)+8], di
+mov rax, [(r8+16*14)+16]
+mov rbx, [(r8+16*14)+24]
+mov rcx, [(r8+16*18+8)]
+sub rcx, 16
+movdqu xmm2, [rax]
+pxor xmm2, xmm4
+movdqa xmm0, [(r8+16*16)+16]
+paddq xmm0, [(r8+16*14)+16]
+movdqa [(r8+16*14)+16], xmm0
+pxor xmm2, [(r8+16*13)]
+movdqu [rbx], xmm2
+jle label7
+mov [(r8+16*18+8)], rcx
+test rcx, 1
+jnz label1
+movdqa xmm0, [(r8+16*16)]
+paddd xmm0, [(r8+16*14)]
+movdqa [(r8+16*14)], xmm0
+jmp label3
+label7:
+mov rbp, [(r8+16*18)]
+pop r12
+pop rbp
+pop rbx
+pop rdi
+pop rsi
+ret
+Rijndael_Enc_AdvancedProcessBlocks ENDP
+
+ALIGN 8
+GCM_AuthenticateBlocks_2K PROC FRAME
+rex_push_reg rsi
+push_reg rdi
+push_reg rbx
+.endprolog
+mov rsi, r8
+mov r11, r9
+movdqa xmm0, [rsi]
+label0:
+movdqu xmm4, [rcx]
+pxor xmm0, xmm4
+movd ebx, xmm0
+mov eax, 0f0f0f0f0h
+and eax, ebx
+shl ebx, 4
+and ebx, 0f0f0f0f0h
+movzx edi, ah
+movdqa xmm5, XMMWORD PTR [rsi + 32 + 1024 + rdi]
+movzx edi, al
+movdqa xmm4, XMMWORD PTR [rsi + 32 + 1024 + rdi]
+shr eax, 16
+movzx edi, ah
+movdqa xmm3, XMMWORD PTR [rsi + 32 + 1024 + rdi]
+movzx edi, al
+movdqa xmm2, XMMWORD PTR [rsi + 32 + 1024 + rdi]
+psrldq xmm0, 4
+movd eax, xmm0
+and eax, 0f0f0f0f0h
+movzx edi, bh
+pxor xmm5, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
+movzx edi, bl
+pxor xmm4, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
+shr ebx, 16
+movzx edi, bh
+pxor xmm3, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
+movzx edi, bl
+pxor xmm2, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi]
+movd ebx, xmm0
+shl ebx, 4
+and ebx, 0f0f0f0f0h
+movzx edi, ah
+pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
+movzx edi, al
+pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
+shr eax, 16
+movzx edi, ah
+pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
+movzx edi, al
+pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi]
+psrldq xmm0, 4
+movd eax, xmm0
+and eax, 0f0f0f0f0h
+movzx edi, bh
+pxor xmm5, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
+movzx edi, bl
+pxor xmm4, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
+shr ebx, 16
+movzx edi, bh
+pxor xmm3, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
+movzx edi, bl
+pxor xmm2, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi]
+movd ebx, xmm0
+shl ebx, 4
+and ebx, 0f0f0f0f0h
+movzx edi, ah
+pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
+movzx edi, al
+pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
+shr eax, 16
+movzx edi, ah
+pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
+movzx edi, al
+pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi]
+psrldq xmm0, 4
+movd eax, xmm0
+and eax, 0f0f0f0f0h
+movzx edi, bh
+pxor xmm5, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
+movzx edi, bl
+pxor xmm4, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
+shr ebx, 16
+movzx edi, bh
+pxor xmm3, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
+movzx edi, bl
+pxor xmm2, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi]
+movd ebx, xmm0
+shl ebx, 4
+and ebx, 0f0f0f0f0h
+movzx edi, ah
+pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
+movzx edi, al
+pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
+shr eax, 16
+movzx edi, ah
+pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
+movzx edi, al
+pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi]
+movzx edi, bh
+pxor xmm5, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
+movzx edi, bl
+pxor xmm4, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
+shr ebx, 16
+movzx edi, bh
+pxor xmm3, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
+movzx edi, bl
+pxor xmm2, XMMWORD PTR [rsi + 32 + 3*256 + rdi]
+movdqa xmm0, xmm3
+pslldq xmm3, 1
+pxor xmm2, xmm3
+movdqa xmm1, xmm2
+pslldq xmm2, 1
+pxor xmm5, xmm2
+psrldq xmm0, 15
+movd rdi, xmm0
+movzx eax, WORD PTR [r11 + rdi*2]
+shl eax, 8
+movdqa xmm0, xmm5
+pslldq xmm5, 1
+pxor xmm4, xmm5
+psrldq xmm1, 15
+movd rdi, xmm1
+xor ax, WORD PTR [r11 + rdi*2]
+shl eax, 8
+psrldq xmm0, 15
+movd rdi, xmm0
+xor ax, WORD PTR [r11 + rdi*2]
+movd xmm0, eax
+pxor xmm0, xmm4
+add rcx, 16
+sub rdx, 1
+jnz label0
+movdqa [rsi], xmm0
+pop rbx
+pop rdi
+pop rsi
+ret
+GCM_AuthenticateBlocks_2K ENDP
+
+ALIGN 8
+GCM_AuthenticateBlocks_64K PROC FRAME
+rex_push_reg rsi
+push_reg rdi
+.endprolog
+mov rsi, r8
+movdqa xmm0, [rsi]
+label1:
+movdqu xmm1, [rcx]
+pxor xmm1, xmm0
+pxor xmm0, xmm0
+movd eax, xmm1
+psrldq xmm1, 4
+movzx edi, al
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (0*4+0)*256*16 + rdi*8]
+movzx edi, ah
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (0*4+1)*256*16 + rdi*8]
+shr eax, 16
+movzx edi, al
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (0*4+2)*256*16 + rdi*8]
+movzx edi, ah
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (0*4+3)*256*16 + rdi*8]
+movd eax, xmm1
+psrldq xmm1, 4
+movzx edi, al
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (1*4+0)*256*16 + rdi*8]
+movzx edi, ah
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (1*4+1)*256*16 + rdi*8]
+shr eax, 16
+movzx edi, al
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (1*4+2)*256*16 + rdi*8]
+movzx edi, ah
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (1*4+3)*256*16 + rdi*8]
+movd eax, xmm1
+psrldq xmm1, 4
+movzx edi, al
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (2*4+0)*256*16 + rdi*8]
+movzx edi, ah
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (2*4+1)*256*16 + rdi*8]
+shr eax, 16
+movzx edi, al
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (2*4+2)*256*16 + rdi*8]
+movzx edi, ah
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (2*4+3)*256*16 + rdi*8]
+movd eax, xmm1
+psrldq xmm1, 4
+movzx edi, al
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (3*4+0)*256*16 + rdi*8]
+movzx edi, ah
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (3*4+1)*256*16 + rdi*8]
+shr eax, 16
+movzx edi, al
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (3*4+2)*256*16 + rdi*8]
+movzx edi, ah
+add rdi, rdi
+pxor xmm0, [rsi + 32 + (3*4+3)*256*16 + rdi*8]
+add rcx, 16
+sub rdx, 1
+jnz label1
+movdqa [rsi], xmm0
+pop rdi
+pop rsi
+ret
+GCM_AuthenticateBlocks_64K ENDP
+
+ALIGN 8
+X86_SHA256_HashBlocks PROC FRAME
+rex_push_reg rsi
+push_reg rdi
+push_reg rbx
+push_reg rbp
+alloc_stack(8*4 + 16*4 + 4*8 + 8)
+.endprolog
+mov rdi, r8
+lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
+mov [rsp+8*4+16*4+1*8], rcx
+mov [rsp+8*4+16*4+2*8], rdx
+add rdi, rdx
+mov [rsp+8*4+16*4+3*8], rdi
+movdqa xmm0, XMMWORD PTR [rcx+0*16]
+movdqa xmm1, XMMWORD PTR [rcx+1*16]
+mov [rsp+8*4+16*4+0*8], rsi
+label0:
+sub rsi, 48*4
+movdqa [rsp+((1024+7-(0+3)) MOD (8))*4], xmm1
+movdqa [rsp+((1024+7-(0+7)) MOD (8))*4], xmm0
+mov rbx, [rdx+0*8]
+bswap rbx
+mov [rsp+8*4+((1024+15-(0*(1+1)+1)) MOD (16))*4], rbx
+mov rbx, [rdx+1*8]
+bswap rbx
+mov [rsp+8*4+((1024+15-(1*(1+1)+1)) MOD (16))*4], rbx
+mov rbx, [rdx+2*8]
+bswap rbx
+mov [rsp+8*4+((1024+15-(2*(1+1)+1)) MOD (16))*4], rbx
+mov rbx, [rdx+3*8]
+bswap rbx
+mov [rsp+8*4+((1024+15-(3*(1+1)+1)) MOD (16))*4], rbx
+mov rbx, [rdx+4*8]
+bswap rbx
+mov [rsp+8*4+((1024+15-(4*(1+1)+1)) MOD (16))*4], rbx
+mov rbx, [rdx+5*8]
+bswap rbx
+mov [rsp+8*4+((1024+15-(5*(1+1)+1)) MOD (16))*4], rbx
+mov rbx, [rdx+6*8]
+bswap rbx
+mov [rsp+8*4+((1024+15-(6*(1+1)+1)) MOD (16))*4], rbx
+mov rbx, [rdx+7*8]
+bswap rbx
+mov [rsp+8*4+((1024+15-(7*(1+1)+1)) MOD (16))*4], rbx
+mov edi, [rsp+((1024+7-(0+3)) MOD (8))*4]
+mov eax, [rsp+((1024+7-(0+6)) MOD (8))*4]
+xor eax, [rsp+((1024+7-(0+5)) MOD (8))*4]
+mov ecx, [rsp+((1024+7-(0+7)) MOD (8))*4]
+mov edx, [rsp+((1024+7-(0+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+add edx, [rsi+(0)*4]
+add edx, [rsp+8*4+((1024+15-(0)) MOD (16))*4]
+add edx, [rsp+((1024+7-(0)) MOD (8))*4]
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(0+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(0+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(0+4)) MOD (8))*4]
+mov [rsp+((1024+7-(0+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(0)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(1+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+add edi, [rsi+(1)*4]
+add edi, [rsp+8*4+((1024+15-(1)) MOD (16))*4]
+add edi, [rsp+((1024+7-(1)) MOD (8))*4]
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(1+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(1+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(1+4)) MOD (8))*4]
+mov [rsp+((1024+7-(1+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(1)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(2+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+add edx, [rsi+(2)*4]
+add edx, [rsp+8*4+((1024+15-(2)) MOD (16))*4]
+add edx, [rsp+((1024+7-(2)) MOD (8))*4]
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(2+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(2+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(2+4)) MOD (8))*4]
+mov [rsp+((1024+7-(2+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(2)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(3+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+add edi, [rsi+(3)*4]
+add edi, [rsp+8*4+((1024+15-(3)) MOD (16))*4]
+add edi, [rsp+((1024+7-(3)) MOD (8))*4]
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(3+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(3+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(3+4)) MOD (8))*4]
+mov [rsp+((1024+7-(3+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(3)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(4+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+add edx, [rsi+(4)*4]
+add edx, [rsp+8*4+((1024+15-(4)) MOD (16))*4]
+add edx, [rsp+((1024+7-(4)) MOD (8))*4]
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(4+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(4+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(4+4)) MOD (8))*4]
+mov [rsp+((1024+7-(4+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(4)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(5+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+add edi, [rsi+(5)*4]
+add edi, [rsp+8*4+((1024+15-(5)) MOD (16))*4]
+add edi, [rsp+((1024+7-(5)) MOD (8))*4]
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(5+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(5+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(5+4)) MOD (8))*4]
+mov [rsp+((1024+7-(5+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(5)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(6+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+add edx, [rsi+(6)*4]
+add edx, [rsp+8*4+((1024+15-(6)) MOD (16))*4]
+add edx, [rsp+((1024+7-(6)) MOD (8))*4]
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(6+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(6+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(6+4)) MOD (8))*4]
+mov [rsp+((1024+7-(6+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(6)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(7+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+add edi, [rsi+(7)*4]
+add edi, [rsp+8*4+((1024+15-(7)) MOD (16))*4]
+add edi, [rsp+((1024+7-(7)) MOD (8))*4]
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(7+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(7+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(7+4)) MOD (8))*4]
+mov [rsp+((1024+7-(7+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(7)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(8+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+add edx, [rsi+(8)*4]
+add edx, [rsp+8*4+((1024+15-(8)) MOD (16))*4]
+add edx, [rsp+((1024+7-(8)) MOD (8))*4]
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(8+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(8+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(8+4)) MOD (8))*4]
+mov [rsp+((1024+7-(8+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(8)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(9+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+add edi, [rsi+(9)*4]
+add edi, [rsp+8*4+((1024+15-(9)) MOD (16))*4]
+add edi, [rsp+((1024+7-(9)) MOD (8))*4]
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(9+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(9+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(9+4)) MOD (8))*4]
+mov [rsp+((1024+7-(9+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(9)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(10+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+add edx, [rsi+(10)*4]
+add edx, [rsp+8*4+((1024+15-(10)) MOD (16))*4]
+add edx, [rsp+((1024+7-(10)) MOD (8))*4]
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(10+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(10+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(10+4)) MOD (8))*4]
+mov [rsp+((1024+7-(10+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(10)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(11+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+add edi, [rsi+(11)*4]
+add edi, [rsp+8*4+((1024+15-(11)) MOD (16))*4]
+add edi, [rsp+((1024+7-(11)) MOD (8))*4]
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(11+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(11+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(11+4)) MOD (8))*4]
+mov [rsp+((1024+7-(11+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(11)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(12+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+add edx, [rsi+(12)*4]
+add edx, [rsp+8*4+((1024+15-(12)) MOD (16))*4]
+add edx, [rsp+((1024+7-(12)) MOD (8))*4]
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(12+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(12+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(12+4)) MOD (8))*4]
+mov [rsp+((1024+7-(12+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(12)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(13+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+add edi, [rsi+(13)*4]
+add edi, [rsp+8*4+((1024+15-(13)) MOD (16))*4]
+add edi, [rsp+((1024+7-(13)) MOD (8))*4]
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(13+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(13+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(13+4)) MOD (8))*4]
+mov [rsp+((1024+7-(13+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(13)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(14+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+add edx, [rsi+(14)*4]
+add edx, [rsp+8*4+((1024+15-(14)) MOD (16))*4]
+add edx, [rsp+((1024+7-(14)) MOD (8))*4]
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(14+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(14+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(14+4)) MOD (8))*4]
+mov [rsp+((1024+7-(14+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(14)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(15+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+add edi, [rsi+(15)*4]
+add edi, [rsp+8*4+((1024+15-(15)) MOD (16))*4]
+add edi, [rsp+((1024+7-(15)) MOD (8))*4]
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(15+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(15+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(15+4)) MOD (8))*4]
+mov [rsp+((1024+7-(15+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(15)) MOD (8))*4], ecx
+label1:
+add rsi, 4*16
+mov edx, [rsp+((1024+7-(0+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebp, [rsp+8*4+((1024+15-((0)-2)) MOD (16))*4]
+mov edi, [rsp+8*4+((1024+15-((0)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((0)-7)) MOD (16))*4]
+mov ebp, edi
+shr ebp, 3
+ror edi, 7
+add ebx, [rsp+8*4+((1024+15-(0)) MOD (16))*4]
+xor ebp, edi
+add edx, [rsi+(0)*4]
+ror edi, 11
+add edx, [rsp+((1024+7-(0)) MOD (8))*4]
+xor ebp, edi
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(0)) MOD (16))*4], ebp
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(0+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(0+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(0+4)) MOD (8))*4]
+mov [rsp+((1024+7-(0+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(0)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(1+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebp, [rsp+8*4+((1024+15-((1)-2)) MOD (16))*4]
+mov edx, [rsp+8*4+((1024+15-((1)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((1)-7)) MOD (16))*4]
+mov ebp, edx
+shr ebp, 3
+ror edx, 7
+add ebx, [rsp+8*4+((1024+15-(1)) MOD (16))*4]
+xor ebp, edx
+add edi, [rsi+(1)*4]
+ror edx, 11
+add edi, [rsp+((1024+7-(1)) MOD (8))*4]
+xor ebp, edx
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(1)) MOD (16))*4], ebp
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(1+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(1+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(1+4)) MOD (8))*4]
+mov [rsp+((1024+7-(1+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(1)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(2+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebp, [rsp+8*4+((1024+15-((2)-2)) MOD (16))*4]
+mov edi, [rsp+8*4+((1024+15-((2)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((2)-7)) MOD (16))*4]
+mov ebp, edi
+shr ebp, 3
+ror edi, 7
+add ebx, [rsp+8*4+((1024+15-(2)) MOD (16))*4]
+xor ebp, edi
+add edx, [rsi+(2)*4]
+ror edi, 11
+add edx, [rsp+((1024+7-(2)) MOD (8))*4]
+xor ebp, edi
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(2)) MOD (16))*4], ebp
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(2+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(2+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(2+4)) MOD (8))*4]
+mov [rsp+((1024+7-(2+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(2)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(3+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebp, [rsp+8*4+((1024+15-((3)-2)) MOD (16))*4]
+mov edx, [rsp+8*4+((1024+15-((3)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((3)-7)) MOD (16))*4]
+mov ebp, edx
+shr ebp, 3
+ror edx, 7
+add ebx, [rsp+8*4+((1024+15-(3)) MOD (16))*4]
+xor ebp, edx
+add edi, [rsi+(3)*4]
+ror edx, 11
+add edi, [rsp+((1024+7-(3)) MOD (8))*4]
+xor ebp, edx
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(3)) MOD (16))*4], ebp
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(3+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(3+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(3+4)) MOD (8))*4]
+mov [rsp+((1024+7-(3+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(3)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(4+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebp, [rsp+8*4+((1024+15-((4)-2)) MOD (16))*4]
+mov edi, [rsp+8*4+((1024+15-((4)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((4)-7)) MOD (16))*4]
+mov ebp, edi
+shr ebp, 3
+ror edi, 7
+add ebx, [rsp+8*4+((1024+15-(4)) MOD (16))*4]
+xor ebp, edi
+add edx, [rsi+(4)*4]
+ror edi, 11
+add edx, [rsp+((1024+7-(4)) MOD (8))*4]
+xor ebp, edi
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(4)) MOD (16))*4], ebp
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(4+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(4+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(4+4)) MOD (8))*4]
+mov [rsp+((1024+7-(4+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(4)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(5+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebp, [rsp+8*4+((1024+15-((5)-2)) MOD (16))*4]
+mov edx, [rsp+8*4+((1024+15-((5)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((5)-7)) MOD (16))*4]
+mov ebp, edx
+shr ebp, 3
+ror edx, 7
+add ebx, [rsp+8*4+((1024+15-(5)) MOD (16))*4]
+xor ebp, edx
+add edi, [rsi+(5)*4]
+ror edx, 11
+add edi, [rsp+((1024+7-(5)) MOD (8))*4]
+xor ebp, edx
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(5)) MOD (16))*4], ebp
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(5+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(5+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(5+4)) MOD (8))*4]
+mov [rsp+((1024+7-(5+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(5)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(6+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebp, [rsp+8*4+((1024+15-((6)-2)) MOD (16))*4]
+mov edi, [rsp+8*4+((1024+15-((6)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((6)-7)) MOD (16))*4]
+mov ebp, edi
+shr ebp, 3
+ror edi, 7
+add ebx, [rsp+8*4+((1024+15-(6)) MOD (16))*4]
+xor ebp, edi
+add edx, [rsi+(6)*4]
+ror edi, 11
+add edx, [rsp+((1024+7-(6)) MOD (8))*4]
+xor ebp, edi
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(6)) MOD (16))*4], ebp
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(6+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(6+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(6+4)) MOD (8))*4]
+mov [rsp+((1024+7-(6+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(6)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(7+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebp, [rsp+8*4+((1024+15-((7)-2)) MOD (16))*4]
+mov edx, [rsp+8*4+((1024+15-((7)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((7)-7)) MOD (16))*4]
+mov ebp, edx
+shr ebp, 3
+ror edx, 7
+add ebx, [rsp+8*4+((1024+15-(7)) MOD (16))*4]
+xor ebp, edx
+add edi, [rsi+(7)*4]
+ror edx, 11
+add edi, [rsp+((1024+7-(7)) MOD (8))*4]
+xor ebp, edx
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(7)) MOD (16))*4], ebp
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(7+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(7+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(7+4)) MOD (8))*4]
+mov [rsp+((1024+7-(7+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(7)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(8+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebp, [rsp+8*4+((1024+15-((8)-2)) MOD (16))*4]
+mov edi, [rsp+8*4+((1024+15-((8)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((8)-7)) MOD (16))*4]
+mov ebp, edi
+shr ebp, 3
+ror edi, 7
+add ebx, [rsp+8*4+((1024+15-(8)) MOD (16))*4]
+xor ebp, edi
+add edx, [rsi+(8)*4]
+ror edi, 11
+add edx, [rsp+((1024+7-(8)) MOD (8))*4]
+xor ebp, edi
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(8)) MOD (16))*4], ebp
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(8+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(8+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(8+4)) MOD (8))*4]
+mov [rsp+((1024+7-(8+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(8)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(9+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebp, [rsp+8*4+((1024+15-((9)-2)) MOD (16))*4]
+mov edx, [rsp+8*4+((1024+15-((9)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((9)-7)) MOD (16))*4]
+mov ebp, edx
+shr ebp, 3
+ror edx, 7
+add ebx, [rsp+8*4+((1024+15-(9)) MOD (16))*4]
+xor ebp, edx
+add edi, [rsi+(9)*4]
+ror edx, 11
+add edi, [rsp+((1024+7-(9)) MOD (8))*4]
+xor ebp, edx
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(9)) MOD (16))*4], ebp
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(9+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(9+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(9+4)) MOD (8))*4]
+mov [rsp+((1024+7-(9+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(9)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(10+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebp, [rsp+8*4+((1024+15-((10)-2)) MOD (16))*4]
+mov edi, [rsp+8*4+((1024+15-((10)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((10)-7)) MOD (16))*4]
+mov ebp, edi
+shr ebp, 3
+ror edi, 7
+add ebx, [rsp+8*4+((1024+15-(10)) MOD (16))*4]
+xor ebp, edi
+add edx, [rsi+(10)*4]
+ror edi, 11
+add edx, [rsp+((1024+7-(10)) MOD (8))*4]
+xor ebp, edi
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(10)) MOD (16))*4], ebp
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(10+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(10+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(10+4)) MOD (8))*4]
+mov [rsp+((1024+7-(10+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(10)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(11+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebp, [rsp+8*4+((1024+15-((11)-2)) MOD (16))*4]
+mov edx, [rsp+8*4+((1024+15-((11)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((11)-7)) MOD (16))*4]
+mov ebp, edx
+shr ebp, 3
+ror edx, 7
+add ebx, [rsp+8*4+((1024+15-(11)) MOD (16))*4]
+xor ebp, edx
+add edi, [rsi+(11)*4]
+ror edx, 11
+add edi, [rsp+((1024+7-(11)) MOD (8))*4]
+xor ebp, edx
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(11)) MOD (16))*4], ebp
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(11+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(11+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(11+4)) MOD (8))*4]
+mov [rsp+((1024+7-(11+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(11)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(12+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebp, [rsp+8*4+((1024+15-((12)-2)) MOD (16))*4]
+mov edi, [rsp+8*4+((1024+15-((12)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((12)-7)) MOD (16))*4]
+mov ebp, edi
+shr ebp, 3
+ror edi, 7
+add ebx, [rsp+8*4+((1024+15-(12)) MOD (16))*4]
+xor ebp, edi
+add edx, [rsi+(12)*4]
+ror edi, 11
+add edx, [rsp+((1024+7-(12)) MOD (8))*4]
+xor ebp, edi
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(12)) MOD (16))*4], ebp
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(12+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(12+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(12+4)) MOD (8))*4]
+mov [rsp+((1024+7-(12+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(12)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(13+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebp, [rsp+8*4+((1024+15-((13)-2)) MOD (16))*4]
+mov edx, [rsp+8*4+((1024+15-((13)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((13)-7)) MOD (16))*4]
+mov ebp, edx
+shr ebp, 3
+ror edx, 7
+add ebx, [rsp+8*4+((1024+15-(13)) MOD (16))*4]
+xor ebp, edx
+add edi, [rsi+(13)*4]
+ror edx, 11
+add edi, [rsp+((1024+7-(13)) MOD (8))*4]
+xor ebp, edx
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(13)) MOD (16))*4], ebp
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(13+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(13+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(13+4)) MOD (8))*4]
+mov [rsp+((1024+7-(13+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(13)) MOD (8))*4], ecx
+mov edx, [rsp+((1024+7-(14+2)) MOD (8))*4]
+xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4]
+and edx, edi
+xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4]
+mov ebp, edi
+ror edi, 6
+ror ebp, 25
+xor ebp, edi
+ror edi, 5
+xor ebp, edi
+add edx, ebp
+mov ebp, [rsp+8*4+((1024+15-((14)-2)) MOD (16))*4]
+mov edi, [rsp+8*4+((1024+15-((14)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((14)-7)) MOD (16))*4]
+mov ebp, edi
+shr ebp, 3
+ror edi, 7
+add ebx, [rsp+8*4+((1024+15-(14)) MOD (16))*4]
+xor ebp, edi
+add edx, [rsi+(14)*4]
+ror edi, 11
+add edx, [rsp+((1024+7-(14)) MOD (8))*4]
+xor ebp, edi
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(14)) MOD (16))*4], ebp
+add edx, ebp
+mov ebx, ecx
+xor ecx, [rsp+((1024+7-(14+6)) MOD (8))*4]
+and eax, ecx
+xor eax, [rsp+((1024+7-(14+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add eax, edx
+add edx, [rsp+((1024+7-(14+4)) MOD (8))*4]
+mov [rsp+((1024+7-(14+4)) MOD (8))*4], edx
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add eax, ebp
+mov [rsp+((1024+7-(14)) MOD (8))*4], eax
+mov edi, [rsp+((1024+7-(15+2)) MOD (8))*4]
+xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4]
+and edi, edx
+xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4]
+mov ebp, edx
+ror edx, 6
+ror ebp, 25
+xor ebp, edx
+ror edx, 5
+xor ebp, edx
+add edi, ebp
+mov ebp, [rsp+8*4+((1024+15-((15)-2)) MOD (16))*4]
+mov edx, [rsp+8*4+((1024+15-((15)-15)) MOD (16))*4]
+mov ebx, ebp
+shr ebp, 10
+ror ebx, 17
+xor ebp, ebx
+ror ebx, 2
+xor ebx, ebp
+add ebx, [rsp+8*4+((1024+15-((15)-7)) MOD (16))*4]
+mov ebp, edx
+shr ebp, 3
+ror edx, 7
+add ebx, [rsp+8*4+((1024+15-(15)) MOD (16))*4]
+xor ebp, edx
+add edi, [rsi+(15)*4]
+ror edx, 11
+add edi, [rsp+((1024+7-(15)) MOD (8))*4]
+xor ebp, edx
+add ebp, ebx
+mov [rsp+8*4+((1024+15-(15)) MOD (16))*4], ebp
+add edi, ebp
+mov ebx, eax
+xor eax, [rsp+((1024+7-(15+6)) MOD (8))*4]
+and ecx, eax
+xor ecx, [rsp+((1024+7-(15+6)) MOD (8))*4]
+mov ebp, ebx
+ror ebx, 2
+add ecx, edi
+add edi, [rsp+((1024+7-(15+4)) MOD (8))*4]
+mov [rsp+((1024+7-(15+4)) MOD (8))*4], edi
+ror ebp, 22
+xor ebp, ebx
+ror ebx, 11
+xor ebp, ebx
+add ecx, ebp
+mov [rsp+((1024+7-(15)) MOD (8))*4], ecx
+cmp rsi, [rsp+8*4+16*4+0*8]
+jne label1
+mov rcx, [rsp+8*4+16*4+1*8]
+movdqa xmm1, XMMWORD PTR [rcx+1*16]
+movdqa xmm0, XMMWORD PTR [rcx+0*16]
+paddd xmm1, [rsp+((1024+7-(0+3)) MOD (8))*4]
+paddd xmm0, [rsp+((1024+7-(0+7)) MOD (8))*4]
+movdqa [rcx+1*16], xmm1
+movdqa [rcx+0*16], xmm0
+mov rdx, [rsp+8*4+16*4+2*8]
+add rdx, 64
+mov [rsp+8*4+16*4+2*8], rdx
+cmp rdx, [rsp+8*4+16*4+3*8]
+jne label0
+add rsp, 8*4 + 16*4 + 4*8 + 8
+pop rbp
+pop rbx
+pop rdi
+pop rsi
+ret
+X86_SHA256_HashBlocks ENDP
+
+_TEXT ENDS
+END