summaryrefslogtreecommitdiff
path: root/gcm.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-01-20 18:48:11 -0500
committerJeffrey Walton <noloader@gmail.com>2017-01-20 18:48:11 -0500
commit74328f93a82b31567f2c7842855b7e21d2b89952 (patch)
tree49b7a20289dd5d24cf1a1b3201e644c99385407c /gcm.cpp
parent08c0e260200b3441c43bb529b5dbe7cdff6e37f7 (diff)
downloadcryptopp-git-74328f93a82b31567f2c7842855b7e21d2b89952.tar.gz
Fix "error: impossible constraint in asm" under Aarch64 (Issue 366)
Diffstat (limited to 'gcm.cpp')
-rw-r--r--gcm.cpp23
1 files changed, 20 insertions, 3 deletions
diff --git a/gcm.cpp b/gcm.cpp
index 4e3e2845..8602fbc8 100644
--- a/gcm.cpp
+++ b/gcm.cpp
@@ -90,6 +90,16 @@ inline uint64x2_t VEXT_8(uint64x2_t a, uint64x2_t b, unsigned int c)
:"=w" (r) : "w" (a), "w" (b), "I" (c) );
return r;
}
+
+// https://github.com/weidai11/cryptopp/issues/366
+template <unsigned int C>
+inline uint64x2_t VEXT_8(uint64x2_t a, uint64x2_t b)
+{
+ uint64x2_t r;
+ __asm __volatile("ext %0.16b, %1.16b, %2.16b, %3 \n\t"
+ :"=w" (r) : "w" (a), "w" (b), "I" (C) );
+ return r;
+}
#endif // GCC and compatibles
#if defined(_MSC_VER)
@@ -121,6 +131,13 @@ inline uint64x2_t VEXT_8(uint64x2_t a, uint64x2_t b, unsigned int c)
{
return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), c);
}
+
+// https://github.com/weidai11/cryptopp/issues/366
+template <unsigned int C>
+inline uint64x2_t VEXT_8(uint64x2_t a, uint64x2_t b)
+{
+ return (uint64x2_t)vextq_u8(vreinterpretq_u8_u64(a), vreinterpretq_u8_u64(b), C);
+}
#endif // Microsoft and compatibles
#endif // CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
@@ -276,13 +293,13 @@ static const unsigned int s_clmulTableSizeInBlocks = 8;
inline uint64x2_t PMULL_Reduce(uint64x2_t c0, uint64x2_t c1, uint64x2_t c2, const uint64x2_t &r)
{
// See comments fo CLMUL_Reduce
- c1 = veorq_u64(c1, VEXT_8(vdupq_n_u64(0), c0, 8));
+ c1 = veorq_u64(c1, VEXT_8<8>(vdupq_n_u64(0), c0));
c1 = veorq_u64(c1, PMULL_01(c0, r));
- c0 = VEXT_8(c0, vdupq_n_u64(0), 8);
+ c0 = VEXT_8<8>(c0, vdupq_n_u64(0));
c0 = vshlq_n_u64(veorq_u64(c0, c1), 1);
c0 = PMULL_00(c0, r);
c2 = veorq_u64(c2, c0);
- c2 = veorq_u64(c2, VEXT_8(c1, vdupq_n_u64(0), 8));
+ c2 = veorq_u64(c2, VEXT_8<8>(c1, vdupq_n_u64(0)));
c1 = vshrq_n_u64(vcombine_u64(vget_low_u64(c1), vget_low_u64(c2)), 63);
c2 = vshlq_n_u64(c2, 1);