summaryrefslogtreecommitdiff
path: root/gf2n_simd.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2019-01-19 04:52:36 -0500
committerJeffrey Walton <noloader@gmail.com>2019-01-19 04:52:36 -0500
commit76b7390840d361d89cb76a4008d1f5c6dd536b9f (patch)
tree2c7accc98aa0f7217ad71c680a6d5b9b820037cf /gf2n_simd.cpp
parente3bc76832cc4ebd9ca2e8d2ef1438a580b29bda7 (diff)
downloadcryptopp-git-76b7390840d361d89cb76a4008d1f5c6dd536b9f.tar.gz
Switch to vec_mergeh and vec_mergel for PowerPC in binary curves
This saves aboout 4 instructions in VMULL
Diffstat (limited to 'gf2n_simd.cpp')
-rw-r--r--gf2n_simd.cpp16
1 files changed, 8 insertions, 8 deletions
diff --git a/gf2n_simd.cpp b/gf2n_simd.cpp
index 903d38c7..9fa9aec3 100644
--- a/gf2n_simd.cpp
+++ b/gf2n_simd.cpp
@@ -317,8 +317,6 @@ using CryptoPP::VecOr;
using CryptoPP::VecXor;
using CryptoPP::VecAnd;
-using CryptoPP::VecGetLow;
-using CryptoPP::VecGetHigh;
using CryptoPP::VecPermute;
using CryptoPP::VecShiftLeft;
using CryptoPP::VecShiftRight;
@@ -336,24 +334,26 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val)
// _mm_clmulepi64_si128(a, b, 0x00)
inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b)
{
+ const uint64x2_p z={0};
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
- return VMULL2LE(__vpmsumd (VecGetHigh(a), VecGetHigh(b)));
+ return VMULL2LE(__vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
#elif defined(__clang__)
- return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
+ return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
#else
- return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
+ return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergeh(z, a), vec_mergeh(z, b)));
#endif
}
// _mm_clmulepi64_si128(a, b, 0x11)
inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b)
{
+ const uint64x2_p z={0};
#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
- return VMULL2LE(__vpmsumd (VecGetLow(a), b));
+ return VMULL2LE(__vpmsumd (vec_mergel(z, a), b));
#elif defined(__clang__)
- return VMULL2LE(__builtin_altivec_crypto_vpmsumd (VecGetLow(a), b));
+ return VMULL2LE(__builtin_altivec_crypto_vpmsumd (vec_mergel(z, a), b));
#else
- return VMULL2LE(__builtin_crypto_vpmsumd (VecGetLow(a), b));
+ return VMULL2LE(__builtin_crypto_vpmsumd (vec_mergel(z, a), b));
#endif
}