From ee247f86a289138791e6369e28a07cb6e6779feb Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 14 Apr 2023 09:15:15 -0400 Subject: =?UTF-8?q?Use=20=5Fmm=5Fcrc32=5Fu64=20in=20CRC32=20when=20availab?= =?UTF-8?q?le=20(GH=20#1202)=20Thanks=20to=20Pawe=C5=82=20Sikora=20for=20s?= =?UTF-8?q?uggesting=20the=20changes=20to=20CRC32=20on=20x86=5F64?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crc_simd.cpp | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'crc_simd.cpp') diff --git a/crc_simd.cpp b/crc_simd.cpp index c1a0725f..a2f87513 100644 --- a/crc_simd.cpp +++ b/crc_simd.cpp @@ -33,6 +33,7 @@ #endif #define CONST_WORD32_CAST(x) ((const word32 *)(void*)(x)) +#define CONST_WORD64_CAST(x) ((const word64 *)(void*)(x)) // Squash MS LNK4221 and libtool warnings extern const char CRC_SIMD_FNAME[] = __FILE__; @@ -151,21 +152,41 @@ void CRC32C_Update_ARMV8(const byte *s, size_t n, word32& c) #if (CRYPTOPP_SSE42_AVAILABLE) void CRC32C_Update_SSE42(const byte *s, size_t n, word32& c) { + // Temporary due to https://github.com/weidai11/cryptopp/issues/1202 + word32 v = c; + + // 64-bit code path due to https://github.com/weidai11/cryptopp/issues/1202 +#if CRYPTOPP_BOOL_X64 + for(; !IsAligned(s) && n > 0; s++, n--) + v = _mm_crc32_u8(v, *s); +#else for(; !IsAligned(s) && n > 0; s++, n--) - c = _mm_crc32_u8(c, *s); + v = _mm_crc32_u8(v, *s); +#endif + +#if CRYPTOPP_BOOL_X64 + for(; n >= 32; s+=32, n-=32) + { + v = _mm_crc32_u64(_mm_crc32_u64(_mm_crc32_u64(_mm_crc32_u64(v, + *CONST_WORD64_CAST(s+ 0)), *CONST_WORD64_CAST(s+ 8)), + *CONST_WORD64_CAST(s+16)), *CONST_WORD64_CAST(s+24)); + } +#endif for(; n >= 16; s+=16, n-=16) - { - c = _mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(c, + { + v = _mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(_mm_crc32_u32(v, *CONST_WORD32_CAST(s+ 0)), *CONST_WORD32_CAST(s+ 4)), *CONST_WORD32_CAST(s+ 8)), *CONST_WORD32_CAST(s+12)); - } + } for(; n >= 4; s+=4, n-=4) - c = _mm_crc32_u32(c, *CONST_WORD32_CAST(s)); + v = _mm_crc32_u32(v, *CONST_WORD32_CAST(s)); for(; n > 0; s++, n--) - c = _mm_crc32_u8(c, *s); + v = _mm_crc32_u8(v, *s); + + c = v; } #endif -- cgit v1.2.1