diff options
author | tege <tege@gmplib.org> | 2002-11-30 03:50:27 +0100 |
---|---|---|
committer | tege <tege@gmplib.org> | 2002-11-30 03:50:27 +0100 |
commit | 9dd0d1aa6519fd39c589232a3766b37884a494ba (patch) | |
tree | b739ba3bd32f36f70c4c4401697324d8dda7b199 /longlong.h | |
parent | 67d3131b69530bcff3e3fa50cb8a05a16922be0f (diff) | |
download | gmp-9dd0d1aa6519fd39c589232a3766b37884a494ba.tar.gz |
(umul_ppmm) [ia64]: Form both product parts in asm.
Diffstat (limited to 'longlong.h')
-rw-r--r-- | longlong.h | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/longlong.h b/longlong.h index 3a1523e17..9910e96f9 100644 --- a/longlong.h +++ b/longlong.h @@ -197,14 +197,13 @@ long __MPN(count_leading_zeros) _PROTO ((UDItype)); #if defined (__ia64) && W_TYPE_SIZE == 64 #if defined (__GNUC__) +/* Do both product parts in assembly, since that gives better code with + all gcc versions. Some callers will just use the upper part, and in + that situation we waste an instruction, but not any cycles. */ #define umul_ppmm(ph, pl, m0, m1) \ - do { \ - UDItype __m0 = (m0), __m1 = (m1); \ - __asm__ ("xma.hu %0 = %1, %2, f0" \ - : "=f" (ph) \ - : "f" (m0), "f" (m1)); \ - (pl) = __m0 * __m1; \ - } while (0) + __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \ + : "=f" (ph), "=f" (pl) \ + : "f" (m0), "f" (m1)) #define UMUL_TIME 14 #define count_leading_zeros(count, x) \ do { \ |