summaryrefslogtreecommitdiff
path: root/crc_simd.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2023-04-14 09:15:15 -0400
committerJeffrey Walton <noloader@gmail.com>2023-04-14 09:15:15 -0400
commitee247f86a289138791e6369e28a07cb6e6779feb (patch)
tree91b7fc64660fa529d9e5010f8ac1d202c6d41557 /crc_simd.cpp
parenta21bab3255dd3c7708d8af4cc065e3e2464db48a (diff)
downloadcryptopp-git-ee247f86a289138791e6369e28a07cb6e6779feb.tar.gz
Use _mm_crc32_u64 in CRC32 when available (GH #1202)
Thanks to Paweł Sikora for suggesting the changes to CRC32 on x86_64
Diffstat (limited to 'crc_simd.cpp')
-rw-r--r--crc_simd.cpp33
1 files changed, 27 insertions, 6 deletions
diff --git a/crc_simd.cpp b/crc_simd.cpp
index c1a0725f..a2f87513 100644
--- a/crc_simd.cpp
+++ b/crc_simd.cpp
@@ -33,6 +33,7 @@
#endif
#define CONST_WORD32_CAST(x) ((const word32 *)(void*)(x))
+#define CONST_WORD64_CAST(x) ((const word64 *)(void*)(x))
// Squash MS LNK4221 and libtool warnings
extern const char CRC_SIMD_FNAME[] = __FILE__;
@@ -151,21 +152,41 @@ void CRC32C_Update_ARMV8(const byte *s, size_t n, word32& c)
#if (CRYPTOPP_SSE42_AVAILABLE)
void CRC32C_Update_SSE42(const byte *s, size_t n, word32& c)
{
+ // Temporary due to https://github.com/weidai11/cryptopp/issues/1202
+ word32 v = c;
+
+ // 64-bit code path due to https://github.com/weidai11/cryptopp/issues/1202
+#if CRYPTOPP_BOOL_X64
+ for(; !IsAligned<word64>(s) && n > 0; s++, n--)
+ v = _mm_crc32_u8(v, *s);
+#else
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
- c = _mm_crc32_u8(c, *s);
+ v = _mm_crc32_u8(v, *s);
+#endif
+
+#if CRYPTOPP_BOOL_X64
+ for(; n >= 32; s+=32, n-=32)
+ {
+ v = _mm_crc32_u64(_mm_crc32_u64(_mm_crc32_u64(_mm_crc32_u64(v,
+ *CONST_WORD64_CAST(s+ 0)), *CONST_WORD64_CAST(s+ 8)),
+ *CONST_WORD64_CAST(s+16)), *CONST_WORD64_CAST(s+24));
+ }
+#endif
for(; n >= 16; s+=16, n-=16)
- {
- c = _mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(c,
+ {
+ v = _mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(v,
*CONST_WORD32_CAST(s+ 0)), *CONST_WORD32_CAST(s+ 4)),
*CONST_WORD32_CAST(s+ 8)), *CONST_WORD32_CAST(s+12));
- }
+ }
for(; n >= 4; s+=4, n-=4)
- c = _mm_crc32_u32(c, *CONST_WORD32_CAST(s));
+ v = _mm_crc32_u32(v, *CONST_WORD32_CAST(s));
for(; n > 0; s++, n--)
- c = _mm_crc32_u8(c, *s);
+ v = _mm_crc32_u8(v, *s);
+
+ c = v;
}
#endif