From 863bf9133c81933f4417fa9de49850a7c181158c Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 13 Aug 2017 06:32:09 -0400 Subject: Cleanup casts due to Clang --- blake2.cpp | 98 ++++++++++++++++++++++++++++++++------------------------------ 1 file changed, 51 insertions(+), 47 deletions(-) (limited to 'blake2.cpp') diff --git a/blake2.cpp b/blake2.cpp index 09a3794d..6313fec1 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -35,6 +35,10 @@ inline __m128i MM_SET_EPI64X(const word64 a, const word64 b) # define MM_SET_EPI64X(a, b) _mm_set_epi64x(a, b) #endif +// Clang casts +#define M128I_CAST(x) ((__m128i *)(void *)(x)) +#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x)) + // C/C++ implementation static void BLAKE2_CXX_Compress32(const byte* input, BLAKE2_State& state); static void BLAKE2_CXX_Compress64(const byte* input, BLAKE2_State& state); @@ -626,10 +630,10 @@ static void BLAKE2_SSE2_Compress32(const byte* input, BLAKE2_State __m128i row3l, row3h, row4l, row4h; __m128i b0, b1, t0, t1; - row1l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])); - row1h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])); - row2l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])); - row2h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])); - row3l = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(0))); - row3h = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(2))); - row4l = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(4))), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0]))); - row4h = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(6))), _mm_loadu_si128((const __m128i*)(const void*)(&state.f[0]))); + row1l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0])); + row1h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[2])); + row2l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4])); + row2h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[6])); + row3l = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(0))); + row3h = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(2))); + row4l = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(4))), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0]))); + row4h = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(6))), _mm_loadu_si128(CONST_M128I_CAST(&state.f[0]))); b0 = MM_SET_EPI64X(m2, m0); b1 = MM_SET_EPI64X(m6, m4); @@ -1918,13 +1922,13 @@ static void BLAKE2_SSE2_Compress64(const byte* input, BLAKE2_State row1l = _mm_xor_si128(row3l, row1l); row1h = _mm_xor_si128(row3h, row1h); - _mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])), row1l)); - _mm_storeu_si128((__m128i *)(void*)(&state.h[2]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])), row1h)); + _mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[0])), row1l)); + _mm_storeu_si128(M128I_CAST(&state.h[2]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[2])), row1h)); row2l = _mm_xor_si128(row4l, row2l); row2h = _mm_xor_si128(row4h, row2h); - _mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])), row2l)); - _mm_storeu_si128((__m128i *)(void*)(&state.h[6]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])), row2h)); + _mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[4])), row2l)); + _mm_storeu_si128(M128I_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[6])), row2h)); } # endif // (__SUNPRO_CC != 0x5120) #endif // CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE @@ -1941,15 +1945,15 @@ static void BLAKE2_SSE4_Compress32(const byte* input, BLAKE2_State& state) @@ -2496,23 +2500,23 @@ static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9); const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10); - const __m128i m0 = _mm_loadu_si128((const __m128i*)(const void*)(input + 00)); - const __m128i m1 = _mm_loadu_si128((const __m128i*)(const void*)(input + 16)); - const __m128i m2 = _mm_loadu_si128((const __m128i*)(const void*)(input + 32)); - const __m128i m3 = _mm_loadu_si128((const __m128i*)(const void*)(input + 48)); - const __m128i m4 = _mm_loadu_si128((const __m128i*)(const void*)(input + 64)); - const __m128i m5 = _mm_loadu_si128((const __m128i*)(const void*)(input + 80)); - const __m128i m6 = _mm_loadu_si128((const __m128i*)(const void*)(input + 96)); - const __m128i m7 = _mm_loadu_si128((const __m128i*)(const void*)(input + 112)); - - row1l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])); - row1h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])); - row2l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])); - row2h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])); - row3l = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(0))); - row3h = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(2))); - row4l = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(4))), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0]))); - row4h = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(6))), _mm_loadu_si128((const __m128i*)(const void*)(&state.f[0]))); + const __m128i m0 = _mm_loadu_si128(CONST_M128I_CAST(input + 00)); + const __m128i m1 = _mm_loadu_si128(CONST_M128I_CAST(input + 16)); + const __m128i m2 = _mm_loadu_si128(CONST_M128I_CAST(input + 32)); + const __m128i m3 = _mm_loadu_si128(CONST_M128I_CAST(input + 48)); + const __m128i m4 = _mm_loadu_si128(CONST_M128I_CAST(input + 64)); + const __m128i m5 = _mm_loadu_si128(CONST_M128I_CAST(input + 80)); + const __m128i m6 = _mm_loadu_si128(CONST_M128I_CAST(input + 96)); + const __m128i m7 = _mm_loadu_si128(CONST_M128I_CAST(input + 112)); + + row1l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0])); + row1h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[2])); + row2l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4])); + row2h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[6])); + row3l = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(0))); + row3h = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(2))); + row4l = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(4))), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0]))); + row4h = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(6))), _mm_loadu_si128(CONST_M128I_CAST(&state.f[0]))); b0 = _mm_unpacklo_epi64(m0, m1); b1 = _mm_unpacklo_epi64(m2, m3); @@ -3451,13 +3455,13 @@ static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State row1l = _mm_xor_si128(row3l, row1l); row1h = _mm_xor_si128(row3h, row1h); - _mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])), row1l)); - _mm_storeu_si128((__m128i *)(void*)(&state.h[2]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])), row1h)); + _mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[0])), row1l)); + _mm_storeu_si128(M128I_CAST(&state.h[2]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[2])), row1h)); row2l = _mm_xor_si128(row4l, row2l); row2h = _mm_xor_si128(row4h, row2h); - _mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])), row2l)); - _mm_storeu_si128((__m128i *)(void*)(&state.h[6]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])), row2h)); + _mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[4])), row2l)); + _mm_storeu_si128(M128I_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[6])), row2h)); } #endif // CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE -- cgit v1.2.1