summaryrefslogtreecommitdiff
path: root/blake2.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-08-13 06:32:09 -0400
committerJeffrey Walton <noloader@gmail.com>2017-08-13 06:32:09 -0400
commit863bf9133c81933f4417fa9de49850a7c181158c (patch)
treeb4b892aa169c967edd6968faf183c861f651ba9e /blake2.cpp
parentf02bf91ee50877a41aa2f201f4840aee9a8dcc55 (diff)
downloadcryptopp-git-863bf9133c81933f4417fa9de49850a7c181158c.tar.gz
Cleanup casts due to Clang
Diffstat (limited to 'blake2.cpp')
-rw-r--r--blake2.cpp98
1 files changed, 51 insertions, 47 deletions
diff --git a/blake2.cpp b/blake2.cpp
index 09a3794d..6313fec1 100644
--- a/blake2.cpp
+++ b/blake2.cpp
@@ -35,6 +35,10 @@ inline __m128i MM_SET_EPI64X(const word64 a, const word64 b)
# define MM_SET_EPI64X(a, b) _mm_set_epi64x(a, b)
#endif
+// Clang casts
+#define M128I_CAST(x) ((__m128i *)(void *)(x))
+#define CONST_M128I_CAST(x) ((const __m128i *)(const void *)(x))
+
// C/C++ implementation
static void BLAKE2_CXX_Compress32(const byte* input, BLAKE2_State<word32, false>& state);
static void BLAKE2_CXX_Compress64(const byte* input, BLAKE2_State<word64, true>& state);
@@ -626,10 +630,10 @@ static void BLAKE2_SSE2_Compress32(const byte* input, BLAKE2_State<word32, false
__m128i buf1,buf2,buf3,buf4;
__m128i ff0,ff1;
- row1 = ff0 = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0]));
- row2 = ff1 = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4]));
+ row1 = ff0 = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0]));
+ row2 = ff1 = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4]));
row3 = _mm_setr_epi32(BLAKE2S_IV(0),BLAKE2S_IV(1),BLAKE2S_IV(2),BLAKE2S_IV(3));
- row4 = _mm_xor_si128(_mm_setr_epi32(BLAKE2S_IV(4),BLAKE2S_IV(5),BLAKE2S_IV(6),BLAKE2S_IV(7)),_mm_loadu_si128((const __m128i*)(const void*)(&state.t[0])));
+ row4 = _mm_xor_si128(_mm_setr_epi32(BLAKE2S_IV(4),BLAKE2S_IV(5),BLAKE2S_IV(6),BLAKE2S_IV(7)),_mm_loadu_si128(CONST_M128I_CAST(&state.t[0])));
buf1 = _mm_set_epi32(m6,m4,m2,m0);
row1 = _mm_add_epi32(_mm_add_epi32(row1,buf1),row2);
row4 = _mm_xor_si128(row4,row1);
@@ -1030,8 +1034,8 @@ static void BLAKE2_SSE2_Compress32(const byte* input, BLAKE2_State<word32, false
row3 = _mm_shuffle_epi32(row3,_MM_SHUFFLE(1,0,3,2));
row2 = _mm_shuffle_epi32(row2,_MM_SHUFFLE(2,1,0,3));
- _mm_storeu_si128((__m128i *)(void*)(&state.h[0]),_mm_xor_si128(ff0,_mm_xor_si128(row1,row3)));
- _mm_storeu_si128((__m128i *)(void*)(&state.h[4]),_mm_xor_si128(ff1,_mm_xor_si128(row2,row4)));
+ _mm_storeu_si128(M128I_CAST(&state.h[0]),_mm_xor_si128(ff0,_mm_xor_si128(row1,row3)));
+ _mm_storeu_si128(M128I_CAST(&state.h[4]),_mm_xor_si128(ff1,_mm_xor_si128(row2,row4)));
}
# if (__SUNPRO_CC != 0x5120)
@@ -1045,14 +1049,14 @@ static void BLAKE2_SSE2_Compress64(const byte* input, BLAKE2_State<word64, true>
__m128i row3l, row3h, row4l, row4h;
__m128i b0, b1, t0, t1;
- row1l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0]));
- row1h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[2]));
- row2l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4]));
- row2h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[6]));
- row3l = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(0)));
- row3h = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(2)));
- row4l = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(4))), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0])));
- row4h = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(6))), _mm_loadu_si128((const __m128i*)(const void*)(&state.f[0])));
+ row1l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0]));
+ row1h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[2]));
+ row2l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4]));
+ row2h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[6]));
+ row3l = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(0)));
+ row3h = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(2)));
+ row4l = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(4))), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0])));
+ row4h = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(6))), _mm_loadu_si128(CONST_M128I_CAST(&state.f[0])));
b0 = MM_SET_EPI64X(m2, m0);
b1 = MM_SET_EPI64X(m6, m4);
@@ -1918,13 +1922,13 @@ static void BLAKE2_SSE2_Compress64(const byte* input, BLAKE2_State<word64, true>
row1l = _mm_xor_si128(row3l, row1l);
row1h = _mm_xor_si128(row3h, row1h);
- _mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])), row1l));
- _mm_storeu_si128((__m128i *)(void*)(&state.h[2]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])), row1h));
+ _mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[0])), row1l));
+ _mm_storeu_si128(M128I_CAST(&state.h[2]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[2])), row1h));
row2l = _mm_xor_si128(row4l, row2l);
row2h = _mm_xor_si128(row4h, row2h);
- _mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])), row2l));
- _mm_storeu_si128((__m128i *)(void*)(&state.h[6]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])), row2h));
+ _mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[4])), row2l));
+ _mm_storeu_si128(M128I_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[6])), row2h));
}
# endif // (__SUNPRO_CC != 0x5120)
#endif // CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
@@ -1941,15 +1945,15 @@ static void BLAKE2_SSE4_Compress32(const byte* input, BLAKE2_State<word32, false
const __m128i r8 = _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1);
const __m128i r16 = _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
- const __m128i m0 = _mm_loadu_si128((const __m128i*)(const void*)(input + 00));
- const __m128i m1 = _mm_loadu_si128((const __m128i*)(const void*)(input + 16));
- const __m128i m2 = _mm_loadu_si128((const __m128i*)(const void*)(input + 32));
- const __m128i m3 = _mm_loadu_si128((const __m128i*)(const void*)(input + 48));
+ const __m128i m0 = _mm_loadu_si128(CONST_M128I_CAST(input + 00));
+ const __m128i m1 = _mm_loadu_si128(CONST_M128I_CAST(input + 16));
+ const __m128i m2 = _mm_loadu_si128(CONST_M128I_CAST(input + 32));
+ const __m128i m3 = _mm_loadu_si128(CONST_M128I_CAST(input + 48));
- row1 = ff0 = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0]));
- row2 = ff1 = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4]));
+ row1 = ff0 = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0]));
+ row2 = ff1 = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4]));
row3 = _mm_setr_epi32(BLAKE2S_IV(0), BLAKE2S_IV(1), BLAKE2S_IV(2), BLAKE2S_IV(3));
- row4 = _mm_xor_si128(_mm_setr_epi32(BLAKE2S_IV(4), BLAKE2S_IV(5), BLAKE2S_IV(6), BLAKE2S_IV(7)), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0])));
+ row4 = _mm_xor_si128(_mm_setr_epi32(BLAKE2S_IV(4), BLAKE2S_IV(5), BLAKE2S_IV(6), BLAKE2S_IV(7)), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0])));
buf1 = _mm_castps_si128((_mm_shuffle_ps(_mm_castsi128_ps((m0)), _mm_castsi128_ps((m1)), _MM_SHUFFLE(2,0,2,0))));
row1 = _mm_add_epi32(_mm_add_epi32(row1, buf1), row2);
@@ -2481,8 +2485,8 @@ static void BLAKE2_SSE4_Compress32(const byte* input, BLAKE2_State<word32, false
row3 = _mm_shuffle_epi32(row3, _MM_SHUFFLE(1,0,3,2));
row2 = _mm_shuffle_epi32(row2, _MM_SHUFFLE(2,1,0,3));
- _mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(ff0, _mm_xor_si128(row1, row3)));
- _mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(ff1, _mm_xor_si128(row2, row4)));
+ _mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(ff0, _mm_xor_si128(row1, row3)));
+ _mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(ff1, _mm_xor_si128(row2, row4)));
}
static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State<word64, true>& state)
@@ -2496,23 +2500,23 @@ static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State<word64, true>
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
- const __m128i m0 = _mm_loadu_si128((const __m128i*)(const void*)(input + 00));
- const __m128i m1 = _mm_loadu_si128((const __m128i*)(const void*)(input + 16));
- const __m128i m2 = _mm_loadu_si128((const __m128i*)(const void*)(input + 32));
- const __m128i m3 = _mm_loadu_si128((const __m128i*)(const void*)(input + 48));
- const __m128i m4 = _mm_loadu_si128((const __m128i*)(const void*)(input + 64));
- const __m128i m5 = _mm_loadu_si128((const __m128i*)(const void*)(input + 80));
- const __m128i m6 = _mm_loadu_si128((const __m128i*)(const void*)(input + 96));
- const __m128i m7 = _mm_loadu_si128((const __m128i*)(const void*)(input + 112));
-
- row1l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[0]));
- row1h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[2]));
- row2l = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[4]));
- row2h = _mm_loadu_si128((const __m128i*)(const void*)(&state.h[6]));
- row3l = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(0)));
- row3h = _mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(2)));
- row4l = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(4))), _mm_loadu_si128((const __m128i*)(const void*)(&state.t[0])));
- row4h = _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&BLAKE2B_IV(6))), _mm_loadu_si128((const __m128i*)(const void*)(&state.f[0])));
+ const __m128i m0 = _mm_loadu_si128(CONST_M128I_CAST(input + 00));
+ const __m128i m1 = _mm_loadu_si128(CONST_M128I_CAST(input + 16));
+ const __m128i m2 = _mm_loadu_si128(CONST_M128I_CAST(input + 32));
+ const __m128i m3 = _mm_loadu_si128(CONST_M128I_CAST(input + 48));
+ const __m128i m4 = _mm_loadu_si128(CONST_M128I_CAST(input + 64));
+ const __m128i m5 = _mm_loadu_si128(CONST_M128I_CAST(input + 80));
+ const __m128i m6 = _mm_loadu_si128(CONST_M128I_CAST(input + 96));
+ const __m128i m7 = _mm_loadu_si128(CONST_M128I_CAST(input + 112));
+
+ row1l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[0]));
+ row1h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[2]));
+ row2l = _mm_loadu_si128(CONST_M128I_CAST(&state.h[4]));
+ row2h = _mm_loadu_si128(CONST_M128I_CAST(&state.h[6]));
+ row3l = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(0)));
+ row3h = _mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(2)));
+ row4l = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(4))), _mm_loadu_si128(CONST_M128I_CAST(&state.t[0])));
+ row4h = _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&BLAKE2B_IV(6))), _mm_loadu_si128(CONST_M128I_CAST(&state.f[0])));
b0 = _mm_unpacklo_epi64(m0, m1);
b1 = _mm_unpacklo_epi64(m2, m3);
@@ -3451,13 +3455,13 @@ static void BLAKE2_SSE4_Compress64(const byte* input, BLAKE2_State<word64, true>
row1l = _mm_xor_si128(row3l, row1l);
row1h = _mm_xor_si128(row3h, row1h);
- _mm_storeu_si128((__m128i *)(void*)(&state.h[0]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[0])), row1l));
- _mm_storeu_si128((__m128i *)(void*)(&state.h[2]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[2])), row1h));
+ _mm_storeu_si128(M128I_CAST(&state.h[0]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[0])), row1l));
+ _mm_storeu_si128(M128I_CAST(&state.h[2]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[2])), row1h));
row2l = _mm_xor_si128(row4l, row2l);
row2h = _mm_xor_si128(row4h, row2h);
- _mm_storeu_si128((__m128i *)(void*)(&state.h[4]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[4])), row2l));
- _mm_storeu_si128((__m128i *)(void*)(&state.h[6]), _mm_xor_si128(_mm_loadu_si128((const __m128i*)(const void*)(&state.h[6])), row2h));
+ _mm_storeu_si128(M128I_CAST(&state.h[4]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[4])), row2l));
+ _mm_storeu_si128(M128I_CAST(&state.h[6]), _mm_xor_si128(_mm_loadu_si128(CONST_M128I_CAST(&state.h[6])), row2h));
}
#endif // CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE