summaryrefslogtreecommitdiff
path: root/integer.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2019-05-31 09:42:39 -0400
committerJeffrey Walton <noloader@gmail.com>2019-05-31 09:42:39 -0400
commit4952fa489d43ca87dc50517917525caa92cc0cc9 (patch)
treeb73b130c3140e3f12ff03eb1086a53e65124cc4c /integer.cpp
parentfb0bef1eb6e5c4ba890656aef0795a202975fd25 (diff)
downloadcryptopp-git-4952fa489d43ca87dc50517917525caa92cc0cc9.tar.gz
Use BMI2 when available for MultiplyWordsLoHi, MulAcc and friends
Using BMI2 saves about 0.03 ms on a Core i5 6400 @ 2.7 GHz. It is small but measurable. It also gives GCC more freedom in selecting memory or register operands
Diffstat (limited to 'integer.cpp')
-rw-r--r--integer.cpp11
1 files changed, 6 insertions, 5 deletions
diff --git a/integer.cpp b/integer.cpp
index 6834ef8a..c03c543e 100644
--- a/integer.cpp
+++ b/integer.cpp
@@ -77,7 +77,8 @@
#include <c_asm.h>
#endif
-// "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
+// "Error: The operand ___LKDB cannot be assigned to",
+// http://github.com/weidai11/cryptopp/issues/188
#if (__SUNPRO_CC >= 0x5130)
# define MAYBE_CONST
# define MAYBE_UNCONST_CAST(x) const_cast<word*>(x)
@@ -87,7 +88,7 @@
#endif
// "Inline assembly operands don't work with .intel_syntax",
-// http://llvm.org/bugs/show_bug.cgi?id=24232
+// http://llvm.org/bugs/show_bug.cgi?id=24232
#if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_MIXED_ASM)
# undef CRYPTOPP_X86_ASM_AVAILABLE
# undef CRYPTOPP_X32_ASM_AVAILABLE
@@ -213,11 +214,11 @@ static word AtomicInverseModPower2(word A)
#define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
#elif defined(__BMI2__)
#define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulxq %3, %0, %1" : "=r"(p0), "=r"(p1) : "d"(a), "r"(b));
- #define MulAcc(c, d, a, b) asm ("mulxq %6, %3, %4; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=r"(p0), "=r"(p1) : "d"(a), "r"(b) : "cc");
+ #define MulAcc(c, d, a, b) asm ("mulxq %6, %3, %4; addq %3, %0; adcxq %4, %1; adcxq %7, %2;" : "+&r"(c), "+&r"(d##0), "+&r"(d##1), "=&r"(p0), "=&r"(p1) : "d"(a), "r"(b), "r"(W64LIT(0)) : "cc");
#define Double3Words(c, d) asm ("addq %0, %0; adcxq %1, %1; adcxq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
- #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcxq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b), "r"(W64LIT(0)) : "cc");
+ #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcxq %3, %1;" : "+&r"(a##0), "+r"(a##1) : "r"(b), "r"(W64LIT(0)) : "cc");
#define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcxq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
- #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcxq %6, %1; adcxq %7, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1), "r"(W64LIT(0)) : "cc");
+ #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcxq %6, %1; adcxq %7, %2;" : "+r"(c), "=&r"(e##0), "=&r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1), "r"(W64LIT(0)) : "cc");
#else
#define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
#define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");