summaryrefslogtreecommitdiff
path: root/longlong.h
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2002-11-30 03:50:27 +0100
committertege <tege@gmplib.org>2002-11-30 03:50:27 +0100
commit9dd0d1aa6519fd39c589232a3766b37884a494ba (patch)
treeb739ba3bd32f36f70c4c4401697324d8dda7b199 /longlong.h
parent67d3131b69530bcff3e3fa50cb8a05a16922be0f (diff)
downloadgmp-9dd0d1aa6519fd39c589232a3766b37884a494ba.tar.gz
(umul_ppmm) [ia64]: Form both product parts in asm.
Diffstat (limited to 'longlong.h')
-rw-r--r--longlong.h13
1 files changed, 6 insertions, 7 deletions
diff --git a/longlong.h b/longlong.h
index 3a1523e17..9910e96f9 100644
--- a/longlong.h
+++ b/longlong.h
@@ -197,14 +197,13 @@ long __MPN(count_leading_zeros) _PROTO ((UDItype));
#if defined (__ia64) && W_TYPE_SIZE == 64
#if defined (__GNUC__)
+/* Do both product parts in assembly, since that gives better code with
+ all gcc versions. Some callers will just use the upper part, and in
+ that situation we waste an instruction, but not any cycles. */
#define umul_ppmm(ph, pl, m0, m1) \
- do { \
- UDItype __m0 = (m0), __m1 = (m1); \
- __asm__ ("xma.hu %0 = %1, %2, f0" \
- : "=f" (ph) \
- : "f" (m0), "f" (m1)); \
- (pl) = __m0 * __m1; \
- } while (0)
+ __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
+ : "=f" (ph), "=f" (pl) \
+ : "f" (m0), "f" (m1))
#define UMUL_TIME 14
#define count_leading_zeros(count, x) \
do { \