Switch to vec_mergeh and vec_mergel for PowerPC in binary curves

This saves aboout 4 instructions in VMULL
author: Jeffrey Walton <noloader@gmail.com> 2019-01-19 04:52:36 -0500
committer: Jeffrey Walton <noloader@gmail.com> 2019-01-19 04:52:36 -0500
commit: 76b7390840d361d89cb76a4008d1f5c6dd536b9f (patch)
tree: 2c7accc98aa0f7217ad71c680a6d5b9b820037cf /gf2n_simd.cpp
parent: e3bc76832cc4ebd9ca2e8d2ef1438a580b29bda7 (diff)
download: cryptopp-git-76b7390840d361d89cb76a4008d1f5c6dd536b9f.tar.gz
1 files changed, 8 insertions, 8 deletions
diff --git a/gf2n_simd.cpp b/gf2n_simd.cpp
index 903d38c7..9fa9aec3 100644
--- a/gf2n_simd.cpp
+++ b/gf2n_simd.cpp
@@ -317,8 +317,6 @@ using CryptoPP::VecOr;
 using CryptoPP::VecXor;
 using CryptoPP::VecAnd;
 
-using CryptoPP::VecGetLow;
-using CryptoPP::VecGetHigh;
 using CryptoPP::VecPermute;
 using CryptoPP::VecShiftLeft;
 using CryptoPP::VecShiftRight;
@@ -336,24 +334,26 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val)
 // _mm_clmulepi64_si128(a, b, 0x00)
 inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b)
 {
+    const uint64x2_p z={0};
 #if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
-    return VMULL2LE(__vpmsumd (VecGetHigh(a), VecGetHigh(b)));
+    return VMULL2LE(__vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
 #elif defined(__clang__)
-    return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
+    return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
 #else
-    return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
+    return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
 #endif
 }
 
 // _mm_clmulepi64_si128(a, b, 0x11)
 inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b)
 {
+    const uint64x2_p z={0};
 #if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
-    return VMULL2LE(__vpmsumd (VecGetLow(a), b));
+    return VMULL2LE(__vpmsumd (vec_mergel(z, a), b));
 #elif defined(__clang__)
-    return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetLow(a), b));
+    return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergel(z, a), b));
 #else
-    return VMULL2LE(__builtin_crypto_vpmsumd (VecGetLow(a), b));
+    return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergel(z, a), b));
 #endif
 }
author	Jeffrey Walton <noloader@gmail.com>	2019-01-19 04:52:36 -0500
committer	Jeffrey Walton <noloader@gmail.com>	2019-01-19 04:52:36 -0500
commit	76b7390840d361d89cb76a4008d1f5c6dd536b9f (patch)
tree	2c7accc98aa0f7217ad71c680a6d5b9b820037cf /gf2n_simd.cpp
parent	e3bc76832cc4ebd9ca2e8d2ef1438a580b29bda7 (diff)
download	cryptopp-git-76b7390840d361d89cb76a4008d1f5c6dd536b9f.tar.gz