summaryrefslogtreecommitdiff
path: root/longlong.h
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2001-06-12 00:10:16 +0200
committerKevin Ryde <user42@zip.com.au>2001-06-12 00:10:16 +0200
commit9db31ead5fdcae4dfd83b85aa28e32945797848c (patch)
treec5b5b5fa446729e4b6bb9087700fdf2c2406a81b /longlong.h
parent9bd299a2dbd9ab9a9f3fffa1d755a5fdbab8d972 (diff)
downloadgmp-9db31ead5fdcae4dfd83b85aa28e32945797848c.tar.gz
* longlong.h (count_leading_zeros) [pentiumpro]: Work around a partial
register stall on gcc < 3.
Diffstat (limited to 'longlong.h')
-rw-r--r--longlong.h18
1 files changed, 18 insertions, 0 deletions
diff --git a/longlong.h b/longlong.h
index 0b33bd4a0..fb49f7014 100644
--- a/longlong.h
+++ b/longlong.h
@@ -615,6 +615,22 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
depending where the least significant 1 bit is. */
#else
+/* gcc on p6 prior to 3.0 generates a partial register stall for __cbtmp^31,
+ due to using "xorb $31" instead of "xorl $31", the former being 1 code
+ byte smaller. "31-__cbtmp" is a workaround, probably at the cost of one
+ extra instruction. Do this for "i386" too, since that means generic
+ x86. */
+#if __GNUC__ < 3 \
+ && (HAVE_HOST_CPU_i386 || HAVE_HOST_CPU_pentiumpro \
+ || HAVE_HOST_CPU_pentium2 || HAVE_HOST_CPU_pentium3)
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ ASSERT ((x) != 0); \
+ __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+ (count) = 31 - __cbtmp; \
+ } while (0)
+#else
#define count_leading_zeros(count, x) \
do { \
USItype __cbtmp; \
@@ -622,6 +638,8 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
__asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
(count) = __cbtmp ^ 31; \
} while (0)
+#endif \
+ \
#define count_trailing_zeros(count, x) \
do { \
ASSERT ((x) != 0); \