summaryrefslogtreecommitdiff
path: root/sha.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-05-26 01:51:44 -0400
committerJeffrey Walton <noloader@gmail.com>2017-05-26 01:51:44 -0400
commitde1270656c275074dfc6c013484c157e9d62277b (patch)
treeb1aec7235e9cf59b223f3240fa0187f2feb64dfa /sha.cpp
parentbd7aa155a6c78e6e4381b883ccfca7c6ad1ad983 (diff)
downloadcryptopp-git-de1270656c275074dfc6c013484c157e9d62277b.tar.gz
Avoid extra ByteReverse when using Intel SHA extensions
This gains about 0.6 cpb. SHA-1 is down to 1.7 to 1.9 cpb. SHA-256 is not affected
Diffstat (limited to 'sha.cpp')
-rw-r--r--sha.cpp28
1 files changed, 26 insertions, 2 deletions
diff --git a/sha.cpp b/sha.cpp
index f64ff33d..9c691eb6 100644
--- a/sha.cpp
+++ b/sha.cpp
@@ -112,7 +112,7 @@ static void SHA1_SSE_SHA_Transform(word32 *state, const word32 *data)
ABCD = _mm_loadu_si128((__m128i*) state);
E0 = _mm_set_epi32(state[4], 0, 0, 0);
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
- MASK = _mm_set_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12);
+ MASK = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
// Save current hash
ABCD_SAVE = ABCD;
@@ -498,6 +498,30 @@ void SHA1::Transform(word32 *state, const word32 *data)
s_pfn(state, data);
}
+#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
+size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
+{
+ static const bool noReverse = HasSHA() || NativeByteOrderIs(this->GetByteOrder());
+ const unsigned int blockSize = this->BlockSize();
+ word32* dataBuf = this->DataBuf();
+ do
+ {
+ if (noReverse)
+ this->HashEndianCorrectedBlock(input);
+ else
+ {
+ ByteReverse(dataBuf, input, this->BlockSize());
+ this->HashEndianCorrectedBlock(dataBuf);
+ }
+
+ input += blockSize/sizeof(word32);
+ length -= blockSize;
+ }
+ while (length >= blockSize);
+ return length;
+}
+#endif
+
// *************************************************************
void SHA224::InitState(HashWordType *state)
@@ -1641,7 +1665,7 @@ CRYPTOPP_NAKED static void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state
// first 16 rounds
ASL(0)
- AS2( movq mm0, [edx+eax*8])
+ AS2( movq mm0, [edx+eax*8])
AS2( movq [esi+eax*8], mm0)
AS2( movq [esi+eax*8+16*8], mm0)
AS2( paddq mm0, [ebx+eax*8])