summaryrefslogtreecommitdiff
path: root/lsh512.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2021-04-21 03:24:15 -0400
committerJeffrey Walton <noloader@gmail.com>2021-04-21 03:24:15 -0400
commit42097e279837ad6f084b7910cea5306c503be988 (patch)
tree2e5db52fcb992a67644986f77d5d95a796ede1de /lsh512.cpp
parent75e0b4979b62108eb0e1729357477a1aba9bc274 (diff)
downloadcryptopp-git-42097e279837ad6f084b7910cea5306c503be988.tar.gz
Align LSH IV's for AVX
Diffstat (limited to 'lsh512.cpp')
-rw-r--r--lsh512.cpp23
1 files changed, 13 insertions, 10 deletions
diff --git a/lsh512.cpp b/lsh512.cpp
index f6340314..b26c5718 100644
--- a/lsh512.cpp
+++ b/lsh512.cpp
@@ -122,7 +122,9 @@ struct LSH512_Internal
};
#if defined(CRYPTOPP_LSH512_AVX_AVAILABLE)
-// Clear upper bits on entry and exit
+// Zero the upper 128 bits of all YMM registers
+// on entry and exit. It avoids AVX state
+// transition penalties when saving state.
struct AVX_Cleanup
{
AVX_Cleanup() {
@@ -204,7 +206,7 @@ lsh_u64 ROTL64(lsh_u64 x, lsh_u32 r) {
# define MAYBE_CONSTEXPR const
#endif
-CRYPTOPP_ALIGN_DATA(16)
+CRYPTOPP_ALIGN_DATA(32)
MAYBE_CONSTEXPR lsh_u64 g_IV224[CV_WORD_LEN] = {
W64LIT(0x0C401E9FE8813A55), W64LIT(0x4A5F446268FD3D35), W64LIT(0xFF13E452334F612A), W64LIT(0xF8227661037E354A),
W64LIT(0xA5F223723C9CA29D), W64LIT(0x95D965A11AED3979), W64LIT(0x01E23835B9AB02CC), W64LIT(0x52D49CBAD5B30616),
@@ -212,7 +214,7 @@ MAYBE_CONSTEXPR lsh_u64 g_IV224[CV_WORD_LEN] = {
W64LIT(0x31E2B67D25BE3813), W64LIT(0xD522C4DEED8E4D83), W64LIT(0xA79F5509B43FBAFE), W64LIT(0xE00D2CD88B4B6C6A),
};
-CRYPTOPP_ALIGN_DATA(16)
+CRYPTOPP_ALIGN_DATA(32)
MAYBE_CONSTEXPR lsh_u64 g_IV256[CV_WORD_LEN] = {
W64LIT(0x6DC57C33DF989423), W64LIT(0xD8EA7F6E8342C199), W64LIT(0x76DF8356F8603AC4), W64LIT(0x40F1B44DE838223A),
W64LIT(0x39FFE7CFC31484CD), W64LIT(0x39C4326CC5281548), W64LIT(0x8A2FF85A346045D8), W64LIT(0xFF202AA46DBDD61E),
@@ -220,7 +222,7 @@ MAYBE_CONSTEXPR lsh_u64 g_IV256[CV_WORD_LEN] = {
W64LIT(0xB596875BF8FF6DBA), W64LIT(0xFCCA39B089EF4615), W64LIT(0xECFF4017D020B4B6), W64LIT(0x7E77384C772ED802),
};
-CRYPTOPP_ALIGN_DATA(16)
+CRYPTOPP_ALIGN_DATA(32)
MAYBE_CONSTEXPR lsh_u64 g_IV384[CV_WORD_LEN] = {
W64LIT(0x53156A66292808F6), W64LIT(0xB2C4F362B204C2BC), W64LIT(0xB84B7213BFA05C4E), W64LIT(0x976CEB7C1B299F73),
W64LIT(0xDF0CC63C0570AE97), W64LIT(0xDA4441BAA486CE3F), W64LIT(0x6559F5D9B5F2ACC2), W64LIT(0x22DACF19B4B52A16),
@@ -228,7 +230,7 @@ MAYBE_CONSTEXPR lsh_u64 g_IV384[CV_WORD_LEN] = {
W64LIT(0xBB08043FB34E3E30), W64LIT(0xA0DEC48D54618EAD), W64LIT(0x150317267464BC57), W64LIT(0x32D1501FDE63DC93)
};
-CRYPTOPP_ALIGN_DATA(16)
+CRYPTOPP_ALIGN_DATA(32)
MAYBE_CONSTEXPR lsh_u64 g_IV512[CV_WORD_LEN] = {
W64LIT(0xadd50f3c7f07094e), W64LIT(0xe3f3cee8f9418a4f), W64LIT(0xb527ecde5b3d0ae9), W64LIT(0x2ef6dec68076f501),
W64LIT(0x8cb994cae5aca216), W64LIT(0xfbb9eae4bba48cc7), W64LIT(0x650a526174725fea), W64LIT(0x1f9a61a73f8d8085),
@@ -1077,18 +1079,19 @@ inline void compress(LSH512_Context* ctx, const lsh_u8 pdMsgBlk[LSH512_MSG_BLK_B
inline void load_iv(word64* cv_l, word64* cv_r, const word64* iv)
{
+ // The IV's are 32-byte aligned so we can use aligned loads.
+
#if defined(CRYPTOPP_LSH512_AVX_AVAILABLE)
_mm256_storeu_si256(M256_CAST(cv_l+0),
- _mm256_loadu_si256(CONST_M256_CAST(iv+0)));
+ _mm256_load_si256(CONST_M256_CAST(iv+0)));
_mm256_storeu_si256(M256_CAST(cv_l+4),
- _mm256_loadu_si256(CONST_M256_CAST(iv+4)));
+ _mm256_load_si256(CONST_M256_CAST(iv+4)));
_mm256_storeu_si256(M256_CAST(cv_r+0),
- _mm256_loadu_si256(CONST_M256_CAST(iv+8)));
+ _mm256_load_si256(CONST_M256_CAST(iv+8)));
_mm256_storeu_si256(M256_CAST(cv_r+4),
- _mm256_loadu_si256(CONST_M256_CAST(iv+12)));
+ _mm256_load_si256(CONST_M256_CAST(iv+12)));
#elif defined(CRYPTOPP_LSH512_SSE2_AVAILABLE)
- // The IV's are 16-byte aligned so we can use _mm_load_si128.
_mm_storeu_si128(M128_CAST(cv_l+0),
_mm_load_si128(CONST_M128_CAST(iv+0)));
_mm_storeu_si128(M128_CAST(cv_l+2),