diff options
author | Jeffrey Walton <noloader@gmail.com> | 2019-01-19 04:52:36 -0500 |
---|---|---|
committer | Jeffrey Walton <noloader@gmail.com> | 2019-01-19 04:52:36 -0500 |
commit | 76b7390840d361d89cb76a4008d1f5c6dd536b9f (patch) | |
tree | 2c7accc98aa0f7217ad71c680a6d5b9b820037cf /gf2n_simd.cpp | |
parent | e3bc76832cc4ebd9ca2e8d2ef1438a580b29bda7 (diff) | |
download | cryptopp-git-76b7390840d361d89cb76a4008d1f5c6dd536b9f.tar.gz |
Switch to vec_mergeh and vec_mergel for PowerPC in binary curves
This saves aboout 4 instructions in VMULL
Diffstat (limited to 'gf2n_simd.cpp')
-rw-r--r-- | gf2n_simd.cpp | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/gf2n_simd.cpp b/gf2n_simd.cpp index 903d38c7..9fa9aec3 100644 --- a/gf2n_simd.cpp +++ b/gf2n_simd.cpp @@ -317,8 +317,6 @@ using CryptoPP::VecOr; using CryptoPP::VecXor;
using CryptoPP::VecAnd;
-using CryptoPP::VecGetLow;
-using CryptoPP::VecGetHigh;
using CryptoPP::VecPermute;
using CryptoPP::VecShiftLeft;
using CryptoPP::VecShiftRight;
@@ -336,24 +334,26 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val) // _mm_clmulepi64_si128(a, b, 0x00)
inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b)
{
+ const uint64x2_p z={0};
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
- return VMULL2LE(__vpmsumd (VecGetHigh(a), VecGetHigh(b)));
+ return VMULL2LE(__vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
#elif defined(__clang__)
- return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
+ return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
#else
- return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
+ return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
#endif
}
// _mm_clmulepi64_si128(a, b, 0x11)
inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b)
{
+ const uint64x2_p z={0};
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
- return VMULL2LE(__vpmsumd (VecGetLow(a), b));
+ return VMULL2LE(__vpmsumd (vec_mergel(z, a), b));
#elif defined(__clang__)
- return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetLow(a), b));
+ return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergel(z, a), b));
#else
- return VMULL2LE(__builtin_crypto_vpmsumd (VecGetLow(a), b));
+ return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergel(z, a), b));
#endif
}
|