summaryrefslogtreecommitdiff
path: root/sha.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-08-17 12:33:43 -0400
committerGitHub <noreply@github.com>2017-08-17 12:33:43 -0400
commite2c377effd9a4eca0291c217e9c3256b033511dc (patch)
treea33fa7d52e14b923a034bafb9a05172a983a0e7c /sha.cpp
parent74d21068a16c2b3e879336ae7eed180d4466e8b5 (diff)
downloadcryptopp-git-e2c377effd9a4eca0291c217e9c3256b033511dc.tar.gz
Split source files to support Base Implementation + SIMD implementation (GH #461)
Split source files to support Base Implementation + SIMD implementation
Diffstat (limited to 'sha.cpp')
-rw-r--r--sha.cpp933
1 files changed, 59 insertions, 874 deletions
diff --git a/sha.cpp b/sha.cpp
index cd0cdd5d..8f3fcb11 100644
--- a/sha.cpp
+++ b/sha.cpp
@@ -1,6 +1,6 @@
// sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
-// Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey Walton
+// Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey Walton
// implemented Intel SHA extensions based on Intel articles and code by
// Sean Gulley. Jeffrey Walton implemented ARM SHA based on ARM code and
// code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke.
@@ -48,15 +48,21 @@
# undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
#endif
-// Clang __m128i casts
-#define M128_CAST(x) ((__m128i *)(void *)(x))
-#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
-
// C++ makes const internal linkage
#define EXPORT_TABLE extern
NAMESPACE_BEGIN(CryptoPP)
+#if CRYPTOPP_SHANI_AVAILABLE
+extern void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
+extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
+#endif
+
+#if CRYPTOPP_ARM_SHA_AVAILABLE
+extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
+extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
+#endif
+
////////////////////////////////
// start of Steve Reid's code //
////////////////////////////////
@@ -78,7 +84,7 @@ ANONYMOUS_NAMESPACE_BEGIN
#define R3(v,w,x,y,z,i) z+=f3(w,x,y)+blk1(i)+0x8F1BBCDC+rotlFixed(v,5);w=rotlFixed(w,30);
#define R4(v,w,x,y,z,i) z+=f4(w,x,y)+blk1(i)+0xCA62C1D6+rotlFixed(v,5);w=rotlFixed(w,30);
-void SHA1_CXX_HashBlock(word32 *state, const word32 *data)
+void SHA1_HashBlock_CXX(word32 *state, const word32 *data)
{
CRYPTOPP_ASSERT(state);
CRYPTOPP_ASSERT(data);
@@ -125,430 +131,13 @@ ANONYMOUS_NAMESPACE_END
// end of Steve Reid's code //
//////////////////////////////
-///////////////////////////////////
-// start of Walton/Gulley's code //
-///////////////////////////////////
-
-#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
-
-ANONYMOUS_NAMESPACE_BEGIN
-
-// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
-void SHA1_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order)
-{
- CRYPTOPP_ASSERT(state);
- CRYPTOPP_ASSERT(data);
- CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
-
- __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
- __m128i MASK, MSG0, MSG1, MSG2, MSG3;
-
- // Load initial values
- ABCD = _mm_loadu_si128(CONST_M128_CAST(state));
- E0 = _mm_set_epi32(state[4], 0, 0, 0);
- ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
-
- // IA-32 SHA is little endian, SHA::Transform is big endian,
- // and SHA::HashMultipleBlocks can be either. ByteOrder
- // allows us to avoid extra endian reversals. It saves 1.0 cpb.
- MASK = order == BIG_ENDIAN_ORDER ? // Data arrangement
- _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15) :
- _mm_set_epi8(3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12) ;
-
- while (length >= SHA1::BLOCKSIZE)
- {
- // Save current hash
- ABCD_SAVE = ABCD;
- E0_SAVE = E0;
-
- // Rounds 0-3
- MSG0 = _mm_loadu_si128(CONST_M128_CAST(data+0));
- MSG0 = _mm_shuffle_epi8(MSG0, MASK);
- E0 = _mm_add_epi32(E0, MSG0);
- E1 = ABCD;
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
-
- // Rounds 4-7
- MSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4));
- MSG1 = _mm_shuffle_epi8(MSG1, MASK);
- E1 = _mm_sha1nexte_epu32(E1, MSG1);
- E0 = ABCD;
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
- MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
-
- // Rounds 8-11
- MSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8));
- MSG2 = _mm_shuffle_epi8(MSG2, MASK);
- E0 = _mm_sha1nexte_epu32(E0, MSG2);
- E1 = ABCD;
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
- MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
- MSG0 = _mm_xor_si128(MSG0, MSG2);
-
- // Rounds 12-15
- MSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12));
- MSG3 = _mm_shuffle_epi8(MSG3, MASK);
- E1 = _mm_sha1nexte_epu32(E1, MSG3);
- E0 = ABCD;
- MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
- MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
- MSG1 = _mm_xor_si128(MSG1, MSG3);
-
- // Rounds 16-19
- E0 = _mm_sha1nexte_epu32(E0, MSG0);
- E1 = ABCD;
- MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
- MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
- MSG2 = _mm_xor_si128(MSG2, MSG0);
-
- // Rounds 20-23
- E1 = _mm_sha1nexte_epu32(E1, MSG1);
- E0 = ABCD;
- MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
- MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
- MSG3 = _mm_xor_si128(MSG3, MSG1);
-
- // Rounds 24-27
- E0 = _mm_sha1nexte_epu32(E0, MSG2);
- E1 = ABCD;
- MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
- MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
- MSG0 = _mm_xor_si128(MSG0, MSG2);
-
- // Rounds 28-31
- E1 = _mm_sha1nexte_epu32(E1, MSG3);
- E0 = ABCD;
- MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
- MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
- MSG1 = _mm_xor_si128(MSG1, MSG3);
-
- // Rounds 32-35
- E0 = _mm_sha1nexte_epu32(E0, MSG0);
- E1 = ABCD;
- MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
- MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
- MSG2 = _mm_xor_si128(MSG2, MSG0);
-
- // Rounds 36-39
- E1 = _mm_sha1nexte_epu32(E1, MSG1);
- E0 = ABCD;
- MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
- MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
- MSG3 = _mm_xor_si128(MSG3, MSG1);
-
- // Rounds 40-43
- E0 = _mm_sha1nexte_epu32(E0, MSG2);
- E1 = ABCD;
- MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
- MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
- MSG0 = _mm_xor_si128(MSG0, MSG2);
-
- // Rounds 44-47
- E1 = _mm_sha1nexte_epu32(E1, MSG3);
- E0 = ABCD;
- MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
- MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
- MSG1 = _mm_xor_si128(MSG1, MSG3);
-
- // Rounds 48-51
- E0 = _mm_sha1nexte_epu32(E0, MSG0);
- E1 = ABCD;
- MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
- MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
- MSG2 = _mm_xor_si128(MSG2, MSG0);
-
- // Rounds 52-55
- E1 = _mm_sha1nexte_epu32(E1, MSG1);
- E0 = ABCD;
- MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
- MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
- MSG3 = _mm_xor_si128(MSG3, MSG1);
-
- // Rounds 56-59
- E0 = _mm_sha1nexte_epu32(E0, MSG2);
- E1 = ABCD;
- MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
- MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
- MSG0 = _mm_xor_si128(MSG0, MSG2);
-
- // Rounds 60-63
- E1 = _mm_sha1nexte_epu32(E1, MSG3);
- E0 = ABCD;
- MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
- MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
- MSG1 = _mm_xor_si128(MSG1, MSG3);
-
- // Rounds 64-67
- E0 = _mm_sha1nexte_epu32(E0, MSG0);
- E1 = ABCD;
- MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
- MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
- MSG2 = _mm_xor_si128(MSG2, MSG0);
-
- // Rounds 68-71
- E1 = _mm_sha1nexte_epu32(E1, MSG1);
- E0 = ABCD;
- MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
- MSG3 = _mm_xor_si128(MSG3, MSG1);
-
- // Rounds 72-75
- E0 = _mm_sha1nexte_epu32(E0, MSG2);
- E1 = ABCD;
- MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
- ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
-
- // Rounds 76-79
- E1 = _mm_sha1nexte_epu32(E1, MSG3);
- E0 = ABCD;
- ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
-
- // Add values back to state
- E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
- ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
-
- data += SHA1::BLOCKSIZE/sizeof(word32);
- length -= SHA1::BLOCKSIZE;
- }
-
- // Save state
- ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
- _mm_storeu_si128(M128_CAST(state), ABCD);
- state[4] = _mm_extract_epi32(E0, 3);
-}
-
-ANONYMOUS_NAMESPACE_END
-
-#endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
-
-/////////////////////////////////
-// end of Walton/Gulley's code //
-/////////////////////////////////
-
-//////////////////////////////////////////////////////////////
-// start of Walton/Schneiders/O'Rourke/Skip Hovsmith's code //
-//////////////////////////////////////////////////////////////
-
-#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
-
-ANONYMOUS_NAMESPACE_BEGIN
-
-void SHA1_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order)
-{
- CRYPTOPP_ASSERT(state);
- CRYPTOPP_ASSERT(data);
- CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
-
- uint32x4_t C0, C1, C2, C3;
- uint32x4_t ABCD, ABCD_SAVED;
- uint32x4_t MSG0, MSG1, MSG2, MSG3;
- uint32x4_t TMP0, TMP1;
- uint32_t E0, E0_SAVED, E1;
-
- // Load initial values
- C0 = vdupq_n_u32(0x5A827999);
- C1 = vdupq_n_u32(0x6ED9EBA1);
- C2 = vdupq_n_u32(0x8F1BBCDC);
- C3 = vdupq_n_u32(0xCA62C1D6);
-
- ABCD = vld1q_u32(&state[0]);
- E0 = state[4];
-
- while (length >= SHA1::BLOCKSIZE)
- {
- // Save current hash
- ABCD_SAVED = ABCD;
- E0_SAVED = E0;
-
- MSG0 = vld1q_u32(data + 0);
- MSG1 = vld1q_u32(data + 4);
- MSG2 = vld1q_u32(data + 8);
- MSG3 = vld1q_u32(data + 12);
-
- if (order == BIG_ENDIAN_ORDER) // Data arrangement
- {
- MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
- MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
- MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
- MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
- }
-
- TMP0 = vaddq_u32(MSG0, C0);
- TMP1 = vaddq_u32(MSG1, C0);
-
- // Rounds 0-3
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1cq_u32(ABCD, E0, TMP0);
- TMP0 = vaddq_u32(MSG2, C0);
- MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
-
- // Rounds 4-7
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1cq_u32(ABCD, E1, TMP1);
- TMP1 = vaddq_u32(MSG3, C0);
- MSG0 = vsha1su1q_u32(MSG0, MSG3);
- MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
-
- // Rounds 8-11
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1cq_u32(ABCD, E0, TMP0);
- TMP0 = vaddq_u32(MSG0, C0);
- MSG1 = vsha1su1q_u32(MSG1, MSG0);
- MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
-
- // Rounds 12-15
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1cq_u32(ABCD, E1, TMP1);
- TMP1 = vaddq_u32(MSG1, C1);
- MSG2 = vsha1su1q_u32(MSG2, MSG1);
- MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
-
- // Rounds 16-19
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1cq_u32(ABCD, E0, TMP0);
- TMP0 = vaddq_u32(MSG2, C1);
- MSG3 = vsha1su1q_u32(MSG3, MSG2);
- MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
-
- // Rounds 20-23
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E1, TMP1);
- TMP1 = vaddq_u32(MSG3, C1);
- MSG0 = vsha1su1q_u32(MSG0, MSG3);
- MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
-
- // Rounds 24-27
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E0, TMP0);
- TMP0 = vaddq_u32(MSG0, C1);
- MSG1 = vsha1su1q_u32(MSG1, MSG0);
- MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
-
- // Rounds 28-31
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E1, TMP1);
- TMP1 = vaddq_u32(MSG1, C1);
- MSG2 = vsha1su1q_u32(MSG2, MSG1);
- MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
-
- // Rounds 32-35
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E0, TMP0);
- TMP0 = vaddq_u32(MSG2, C2);
- MSG3 = vsha1su1q_u32(MSG3, MSG2);
- MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
-
- // Rounds 36-39
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E1, TMP1);
- TMP1 = vaddq_u32(MSG3, C2);
- MSG0 = vsha1su1q_u32(MSG0, MSG3);
- MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
-
- // Rounds 40-43
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1mq_u32(ABCD, E0, TMP0);
- TMP0 = vaddq_u32(MSG0, C2);
- MSG1 = vsha1su1q_u32(MSG1, MSG0);
- MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
-
- // Rounds 44-47
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1mq_u32(ABCD, E1, TMP1);
- TMP1 = vaddq_u32(MSG1, C2);
- MSG2 = vsha1su1q_u32(MSG2, MSG1);
- MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
-
- // Rounds 48-51
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1mq_u32(ABCD, E0, TMP0);
- TMP0 = vaddq_u32(MSG2, C2);
- MSG3 = vsha1su1q_u32(MSG3, MSG2);
- MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
-
- // Rounds 52-55
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1mq_u32(ABCD, E1, TMP1);
- TMP1 = vaddq_u32(MSG3, C3);
- MSG0 = vsha1su1q_u32(MSG0, MSG3);
- MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
-
- // Rounds 56-59
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1mq_u32(ABCD, E0, TMP0);
- TMP0 = vaddq_u32(MSG0, C3);
- MSG1 = vsha1su1q_u32(MSG1, MSG0);
- MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
-
- // Rounds 60-63
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E1, TMP1);
- TMP1 = vaddq_u32(MSG1, C3);
- MSG2 = vsha1su1q_u32(MSG2, MSG1);
- MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
-
- // Rounds 64-67
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E0, TMP0);
- TMP0 = vaddq_u32(MSG2, C3);
- MSG3 = vsha1su1q_u32(MSG3, MSG2);
- MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
-
- // Rounds 68-71
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E1, TMP1);
- TMP1 = vaddq_u32(MSG3, C3);
- MSG0 = vsha1su1q_u32(MSG0, MSG3);
-
- // Rounds 72-75
- E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E0, TMP0);
-
- // Rounds 76-79
- E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
- ABCD = vsha1pq_u32(ABCD, E1, TMP1);
-
- E0 += E0_SAVED;
- ABCD = vaddq_u32(ABCD_SAVED, ABCD);
-
- data += SHA1::BLOCKSIZE/sizeof(word32);
- length -= SHA1::BLOCKSIZE;
- }
-
- // Save state
- vst1q_u32(&state[0], ABCD);
- state[4] = E0;
-}
-
-ANONYMOUS_NAMESPACE_END
-
-#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
-
-///////////////////////////////////////////////////////
-// end of Walton/Schneiders/O'Rourke/Hovsmith's code //
-///////////////////////////////////////////////////////
-
void SHA1::InitState(HashWordType *state)
{
- state[0] = 0x67452301L;
- state[1] = 0xEFCDAB89L;
- state[2] = 0x98BADCFEL;
- state[3] = 0x10325476L;
- state[4] = 0xC3D2E1F0L;
+ state[0] = 0x67452301;
+ state[1] = 0xEFCDAB89;
+ state[2] = 0x98BADCFE;
+ state[3] = 0x10325476;
+ state[4] = 0xC3D2E1F0;
}
void SHA1::Transform(word32 *state, const word32 *data)
@@ -556,22 +145,22 @@ void SHA1::Transform(word32 *state, const word32 *data)
CRYPTOPP_ASSERT(state);
CRYPTOPP_ASSERT(data);
-#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
+#if CRYPTOPP_SHANI_AVAILABLE
if (HasSHA())
{
- SHA1_SHANI_HashMultipleBlocks(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
+ SHA1_HashMultipleBlocks_SHANI(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
return;
}
#endif
-#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
+#if CRYPTOPP_ARM_SHA_AVAILABLE
if (HasSHA1())
{
- SHA1_ARM_SHA_HashMultipleBlocks(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
+ SHA1_HashMultipleBlocks_ARMV8(state, data, SHA1::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
return;
}
#endif
- SHA1_CXX_HashBlock(state, data);
+ SHA1_HashBlock_CXX(state, data);
}
size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
@@ -579,17 +168,17 @@ size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
CRYPTOPP_ASSERT(input);
CRYPTOPP_ASSERT(length >= SHA1::BLOCKSIZE);
-#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
+#if CRYPTOPP_SHANI_AVAILABLE
if (HasSHA())
{
- SHA1_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER);
+ SHA1_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
return length & (SHA1::BLOCKSIZE - 1);
}
#endif
-#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
+#if CRYPTOPP_ARM_SHA_AVAILABLE
if (HasSHA1())
{
- SHA1_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER);
+ SHA1_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
return length & (SHA1::BLOCKSIZE - 1);
}
#endif
@@ -600,12 +189,12 @@ size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
{
if (noReverse)
{
- SHA1_CXX_HashBlock(m_state, input);
+ SHA1_HashBlock_CXX(m_state, input);
}
else
{
ByteReverse(dataBuf, input, SHA1::BLOCKSIZE);
- SHA1_CXX_HashBlock(m_state, dataBuf);
+ SHA1_HashBlock_CXX(m_state, dataBuf);
}
input += SHA1::BLOCKSIZE/sizeof(word32);
@@ -663,7 +252,7 @@ ANONYMOUS_NAMESPACE_BEGIN
#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
-void SHA256_CXX_HashBlock(word32 *state, const word32 *data)
+void SHA256_HashBlock_CXX(word32 *state, const word32 *data)
{
word32 W[16], T[8];
/* Copy context->state[] to working vars */
@@ -712,7 +301,7 @@ void SHA256::InitState(HashWordType *state)
ANONYMOUS_NAMESPACE_BEGIN
-void CRYPTOPP_FASTCALL SHA256_SSE_HashMultipleBlocks(word32 *state, const word32 *data, size_t len)
+void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len)
{
#define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ
#define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4]
@@ -834,7 +423,7 @@ void CRYPTOPP_FASTCALL SHA256_SSE_HashMultipleBlocks(word32 *state, const word32
INTEL_NOPREFIX
#elif defined(CRYPTOPP_GENERATE_X64_MASM)
ALIGN 8
- SHA256_SSE_HashMultipleBlocks PROC FRAME
+ SHA256_HashMultipleBlocks_SSE2 PROC FRAME
rex_push_reg rsi
push_reg rdi
push_reg rbx
@@ -1013,7 +602,7 @@ INTEL_NOPREFIX
pop rdi
pop rsi
ret
- SHA256_SSE_HashMultipleBlocks ENDP
+ SHA256_HashMultipleBlocks_SSE2 ENDP
#endif
#ifdef __GNUC__
@@ -1039,435 +628,31 @@ ANONYMOUS_NAMESPACE_END
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
EXPORT_TABLE "C" {
-void CRYPTOPP_FASTCALL SHA256_SSE_HashMultipleBlocks(word32 *state, const word32 *data, size_t len);
+void CRYPTOPP_FASTCALL SHA256_HashMultipleBlocks_SSE2(word32 *state, const word32 *data, size_t len);
}
#endif
-///////////////////////////////////
-// start of Walton/Gulley's code //
-///////////////////////////////////
-
-#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
-
-ANONYMOUS_NAMESPACE_BEGIN
-
-// Based on http://software.intel.com/en-us/articles/intel-sha-extensions and code by Sean Gulley.
-void SHA256_SHANI_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order)
-{
- CRYPTOPP_ASSERT(state);
- CRYPTOPP_ASSERT(data);
- CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
-
- __m128i STATE0, STATE1;
- __m128i MSG, TMP, MASK;
- __m128i TMSG0, TMSG1, TMSG2, TMSG3;
- __m128i ABEF_SAVE, CDGH_SAVE;
-
- // Load initial values
- TMP = _mm_loadu_si128(M128_CAST(&state[0]));
- STATE1 = _mm_loadu_si128(M128_CAST(&state[4]));
-
- // IA-32 SHA is little endian, SHA::Transform is big endian,
- // and SHA::HashMultipleBlocks can be either. ByteOrder
- // allows us to avoid extra endian reversals. It saves 1.0 cpb.
- MASK = order == BIG_ENDIAN_ORDER ? // Data arrangement
- _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3) :
- _mm_set_epi8(15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0) ;
-
- TMP = _mm_shuffle_epi32(TMP, 0xB1); // CDAB
- STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH
- STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); // ABEF
- STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); // CDGH
-
- while (length >= SHA256::BLOCKSIZE)
- {
- // Save current hash
- ABEF_SAVE = STATE0;
- CDGH_SAVE = STATE1;
-
- // Rounds 0-3
- MSG = _mm_loadu_si128(CONST_M128_CAST(data+0));
- TMSG0 = _mm_shuffle_epi8(MSG, MASK);
- MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0xE9B5DBA5B5C0FBCF), W64LIT(0x71374491428A2F98)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-
- // Rounds 4-7
- TMSG1 = _mm_loadu_si128(CONST_M128_CAST(data+4));
- TMSG1 = _mm_shuffle_epi8(TMSG1, MASK);
- MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0xAB1C5ED5923F82A4), W64LIT(0x59F111F13956C25B)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
-
- // Rounds 8-11
- TMSG2 = _mm_loadu_si128(CONST_M128_CAST(data+8));
- TMSG2 = _mm_shuffle_epi8(TMSG2, MASK);
- MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x550C7DC3243185BE), W64LIT(0x12835B01D807AA98)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
-
- // Rounds 12-15
- TMSG3 = _mm_loadu_si128(CONST_M128_CAST(data+12));
- TMSG3 = _mm_shuffle_epi8(TMSG3, MASK);
- MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC19BF1749BDC06A7), W64LIT(0x80DEB1FE72BE5D74)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
- TMSG0 = _mm_add_epi32(TMSG0, TMP);
- TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
-
- // Rounds 16-19
- MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x240CA1CC0FC19DC6), W64LIT(0xEFBE4786E49B69C1)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
- TMSG1 = _mm_add_epi32(TMSG1, TMP);
- TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
-
- // Rounds 20-23
- MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x76F988DA5CB0A9DC), W64LIT(0x4A7484AA2DE92C6F)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
- TMSG2 = _mm_add_epi32(TMSG2, TMP);
- TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
-
- // Rounds 24-27
- MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0xBF597FC7B00327C8), W64LIT(0xA831C66D983E5152)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
- TMSG3 = _mm_add_epi32(TMSG3, TMP);
- TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
-
- // Rounds 28-31
- MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0x1429296706CA6351), W64LIT(0xD5A79147C6E00BF3)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
- TMSG0 = _mm_add_epi32(TMSG0, TMP);
- TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
-
- // Rounds 32-35
- MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x53380D134D2C6DFC), W64LIT(0x2E1B213827B70A85)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
- TMSG1 = _mm_add_epi32(TMSG1, TMP);
- TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
-
- // Rounds 36-39
- MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x92722C8581C2C92E), W64LIT(0x766A0ABB650A7354)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
- TMSG2 = _mm_add_epi32(TMSG2, TMP);
- TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG0 = _mm_sha256msg1_epu32(TMSG0, TMSG1);
-
- // Rounds 40-43
- MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0xC76C51A3C24B8B70), W64LIT(0xA81A664BA2BFE8A1)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
- TMSG3 = _mm_add_epi32(TMSG3, TMP);
- TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG1 = _mm_sha256msg1_epu32(TMSG1, TMSG2);
-
- // Rounds 44-47
- MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0x106AA070F40E3585), W64LIT(0xD6990624D192E819)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG3, TMSG2, 4);
- TMSG0 = _mm_add_epi32(TMSG0, TMP);
- TMSG0 = _mm_sha256msg2_epu32(TMSG0, TMSG3);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG2 = _mm_sha256msg1_epu32(TMSG2, TMSG3);
-
- // Rounds 48-51
- MSG = _mm_add_epi32(TMSG0, _mm_set_epi64x(W64LIT(0x34B0BCB52748774C), W64LIT(0x1E376C0819A4C116)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG0, TMSG3, 4);
- TMSG1 = _mm_add_epi32(TMSG1, TMP);
- TMSG1 = _mm_sha256msg2_epu32(TMSG1, TMSG0);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
- TMSG3 = _mm_sha256msg1_epu32(TMSG3, TMSG0);
-
- // Rounds 52-55
- MSG = _mm_add_epi32(TMSG1, _mm_set_epi64x(W64LIT(0x682E6FF35B9CCA4F), W64LIT(0x4ED8AA4A391C0CB3)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG1, TMSG0, 4);
- TMSG2 = _mm_add_epi32(TMSG2, TMP);
- TMSG2 = _mm_sha256msg2_epu32(TMSG2, TMSG1);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-
- // Rounds 56-59
- MSG = _mm_add_epi32(TMSG2, _mm_set_epi64x(W64LIT(0x8CC7020884C87814), W64LIT(0x78A5636F748F82EE)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- TMP = _mm_alignr_epi8(TMSG2, TMSG1, 4);
- TMSG3 = _mm_add_epi32(TMSG3, TMP);
- TMSG3 = _mm_sha256msg2_epu32(TMSG3, TMSG2);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-
- // Rounds 60-63
- MSG = _mm_add_epi32(TMSG3, _mm_set_epi64x(W64LIT(0xC67178F2BEF9A3F7), W64LIT(0xA4506CEB90BEFFFA)));
- STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
- MSG = _mm_shuffle_epi32(MSG, 0x0E);
- STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-
- // Add values back to state
- STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
- STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
-
- data += SHA256::BLOCKSIZE/sizeof(word32);
- length -= SHA256::BLOCKSIZE;
- }
-
- TMP = _mm_shuffle_epi32(STATE0, 0x1B); // FEBA
- STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); // DCHG
- STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); // DCBA
- STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); // ABEF
-
- // Save state
- _mm_storeu_si128(M128_CAST(&state[0]), STATE0);
- _mm_storeu_si128(M128_CAST(&state[4]), STATE1);
-}
-
-ANONYMOUS_NAMESPACE_END
-
-#endif // CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
-
-/////////////////////////////////
-// end of Walton/Gulley's code //
-/////////////////////////////////
-
-/////////////////////////////////////////////////////////
-// start of Walton/Schneiders/O'Rourke/Hovsmith's code //
-/////////////////////////////////////////////////////////
-
-#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
-
-ANONYMOUS_NAMESPACE_BEGIN
-
-void SHA256_ARM_SHA_HashMultipleBlocks(word32 *state, const word32 *data, size_t length, ByteOrder order)
-{
- CRYPTOPP_ASSERT(state);
- CRYPTOPP_ASSERT(data);
- CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
-
- uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
- uint32x4_t MSG0, MSG1, MSG2, MSG3;
- uint32x4_t TMP0, TMP1, TMP2;
-
- // Load initial values
- STATE0 = vld1q_u32(&state[0]);
- STATE1 = vld1q_u32(&state[4]);
-
- while (length >= SHA256::BLOCKSIZE)
- {
- // Save current hash
- ABEF_SAVE = STATE0;
- CDGH_SAVE = STATE1;
-
- // Load message
- MSG0 = vld1q_u32(data + 0);
- MSG1 = vld1q_u32(data + 4);
- MSG2 = vld1q_u32(data + 8);
- MSG3 = vld1q_u32(data + 12);
-
- if (order == BIG_ENDIAN_ORDER) // Data arrangement
- {
- MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
- MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
- MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
- MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
- }
-
- TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x00]));
-
- // Rounds 0-3
- MSG0 = vsha256su0q_u32(MSG0, MSG1);
- TMP2 = STATE0;
- TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x04]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
- MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
-
- // Rounds 4-7
- MSG1 = vsha256su0q_u32(MSG1, MSG2);
- TMP2 = STATE0;
- TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x08]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
- MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
-
- // Rounds 8-11
- MSG2 = vsha256su0q_u32(MSG2, MSG3);
- TMP2 = STATE0;
- TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x0c]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
- MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
-
- // Rounds 12-15
- MSG3 = vsha256su0q_u32(MSG3, MSG0);
- TMP2 = STATE0;
- TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x10]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
- MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
-
- // Rounds 16-19
- MSG0 = vsha256su0q_u32(MSG0, MSG1);
- TMP2 = STATE0;
- TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x14]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
- MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
-
- // Rounds 20-23
- MSG1 = vsha256su0q_u32(MSG1, MSG2);
- TMP2 = STATE0;
- TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x18]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
- MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
-
- // Rounds 24-27
- MSG2 = vsha256su0q_u32(MSG2, MSG3);
- TMP2 = STATE0;
- TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x1c]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
- MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
-
- // Rounds 28-31
- MSG3 = vsha256su0q_u32(MSG3, MSG0);
- TMP2 = STATE0;
- TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x20]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
- MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
-
- // Rounds 32-35
- MSG0 = vsha256su0q_u32(MSG0, MSG1);
- TMP2 = STATE0;
- TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x24]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
- MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);;
-
- // Rounds 36-39
- MSG1 = vsha256su0q_u32(MSG1, MSG2);
- TMP2 = STATE0;
- TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x28]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
- MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);;
-
- // Rounds 40-43
- MSG2 = vsha256su0q_u32(MSG2, MSG3);
- TMP2 = STATE0;
- TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x2c]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
- MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);;
-
- // Rounds 44-47
- MSG3 = vsha256su0q_u32(MSG3, MSG0);
- TMP2 = STATE0;
- TMP0 = vaddq_u32(MSG0, vld1q_u32(&SHA256_K[0x30]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
- MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);;
-
- // Rounds 48-51
- TMP2 = STATE0;
- TMP1 = vaddq_u32(MSG1, vld1q_u32(&SHA256_K[0x34]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);;
-
- // Rounds 52-55
- TMP2 = STATE0;
- TMP0 = vaddq_u32(MSG2, vld1q_u32(&SHA256_K[0x38]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);;
-
- // Rounds 56-59
- TMP2 = STATE0;
- TMP1 = vaddq_u32(MSG3, vld1q_u32(&SHA256_K[0x3c]));
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);;
-
- // Rounds 60-63
- TMP2 = STATE0;
- STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
- STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);;
-
- // Add back to state
- STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
- STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
-
- data += SHA256::BLOCKSIZE/sizeof(word32);
- length -= SHA256::BLOCKSIZE;
- }
-
- // Save state
- vst1q_u32(&state[0], STATE0);
- vst1q_u32(&state[4], STATE1);
-}
-
-ANONYMOUS_NAMESPACE_END
-
-#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
-
-///////////////////////////////////////////////////////
-// end of Walton/Schneiders/O'Rourke/Hovsmith's code //
-///////////////////////////////////////////////////////
-
void SHA256::Transform(word32 *state, const word32 *data)
{
CRYPTOPP_ASSERT(state);
CRYPTOPP_ASSERT(data);
-#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
+#if CRYPTOPP_SHANI_AVAILABLE
if (HasSHA())
{
- SHA256_SHANI_HashMultipleBlocks(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
+ SHA256_HashMultipleBlocks_SHANI(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
return;
}
#endif
-#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
+#if CRYPTOPP_ARM_SHA_AVAILABLE
if (HasSHA2())
{
- SHA256_ARM_SHA_HashMultipleBlocks(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
+ SHA256_HashMultipleBlocks_ARMV8(state, data, SHA256::BLOCKSIZE, LITTLE_ENDIAN_ORDER);
return;
}
#endif
- SHA256_CXX_HashBlock(state, data);
+ SHA256_HashBlock_CXX(state, data);
}
size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
@@ -1475,10 +660,10 @@ size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
CRYPTOPP_ASSERT(input);
CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
-#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
+#if CRYPTOPP_SHANI_AVAILABLE
if (HasSHA())
{
- SHA256_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER);
+ SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
return length & (SHA256::BLOCKSIZE - 1);
}
#endif
@@ -1486,14 +671,14 @@ size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
if (HasSSE2())
{
const size_t res = length & (SHA256::BLOCKSIZE - 1);
- SHA256_SSE_HashMultipleBlocks(m_state, input, length-res);
+ SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
return res;
}
#endif
-#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
+#if CRYPTOPP_ARM_SHA_AVAILABLE
if (HasSHA2())
{
- SHA256_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER);
+ SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
return length & (SHA256::BLOCKSIZE - 1);
}
#endif
@@ -1504,12 +689,12 @@ size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
{
if (noReverse)
{
- SHA256_CXX_HashBlock(m_state, input);
+ SHA256_HashBlock_CXX(m_state, input);
}
else
{
ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
- SHA256_CXX_HashBlock(m_state, dataBuf);
+ SHA256_HashBlock_CXX(m_state, dataBuf);
}
input += SHA256::BLOCKSIZE/sizeof(word32);
@@ -1521,13 +706,13 @@ size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
{
- CRYPTOPP_ASSERT(input);
+ CRYPTOPP_ASSERT(input);
CRYPTOPP_ASSERT(length >= SHA256::BLOCKSIZE);
-#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
+#if CRYPTOPP_SHANI_AVAILABLE
if (HasSHA())
{
- SHA256_SHANI_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER);
+ SHA256_HashMultipleBlocks_SHANI(m_state, input, length, BIG_ENDIAN_ORDER);
return length & (SHA256::BLOCKSIZE - 1);
}
#endif
@@ -1535,14 +720,14 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
if (HasSSE2())
{
const size_t res = length & (SHA256::BLOCKSIZE - 1);
- SHA256_SSE_HashMultipleBlocks(m_state, input, length-res);
+ SHA256_HashMultipleBlocks_SSE2(m_state, input, length-res);
return res;
}
#endif
-#if CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
+#if CRYPTOPP_ARM_SHA_AVAILABLE
if (HasSHA2())
{
- SHA256_ARM_SHA_HashMultipleBlocks(m_state, input, length, BIG_ENDIAN_ORDER);
+ SHA256_HashMultipleBlocks_ARMV8(m_state, input, length, BIG_ENDIAN_ORDER);
return length & (SHA256::BLOCKSIZE - 1);
}
#endif
@@ -1553,12 +738,12 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
{
if (noReverse)
{
- SHA256_CXX_HashBlock(m_state, input);
+ SHA256_HashBlock_CXX(m_state, input);
}
else
{
ByteReverse(dataBuf, input, SHA256::BLOCKSIZE);
- SHA256_CXX_HashBlock(m_state, dataBuf);
+ SHA256_HashBlock_CXX(m_state, dataBuf);
}
input += SHA256::BLOCKSIZE/sizeof(word32);
@@ -1591,7 +776,7 @@ void SHA512::InitState(HashWordType *state)
}
CRYPTOPP_ALIGN_DATA(16)
-static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
+const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
@@ -1638,7 +823,7 @@ static const word64 SHA512_K[80] CRYPTOPP_SECTION_ALIGN16 = {
ANONYMOUS_NAMESPACE_BEGIN
-CRYPTOPP_NAKED void CRYPTOPP_FASTCALL SHA512_SSE2_Transform(word64 *state, const word64 *data)
+CRYPTOPP_NAKED void CRYPTOPP_FASTCALL SHA512_HashBlock_SSE2(word64 *state, const word64 *data)
{
#ifdef __GNUC__
__asm__ __volatile__
@@ -1844,9 +1029,9 @@ ANONYMOUS_NAMESPACE_BEGIN
#define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA512_K[i+j]+\
- (j?blk2(i):blk0(i));d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
+ (j?blk2(i):blk0(i));d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
-void SHA512_CXX_HashBlock(word64 *state, const word64 *data)
+void SHA512_HashBlock_CXX(word64 *state, const word64 *data)
{
CRYPTOPP_ASSERT(state);
CRYPTOPP_ASSERT(data);
@@ -1884,12 +1069,12 @@ void SHA512::Transform(word64 *state, const word64 *data)
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32)
if (HasSSE2())
{
- SHA512_SSE2_Transform(state, data);
+ SHA512_HashBlock_SSE2(state, data);
return;
}
#endif
- SHA512_CXX_HashBlock(state, data);
+ SHA512_HashBlock_CXX(state, data);
}
NAMESPACE_END