summaryrefslogtreecommitdiff
path: root/stdlib/longlong.h
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>1995-10-16 01:37:51 +0000
committerRoland McGrath <roland@gnu.org>1995-10-16 01:37:51 +0000
commit8f5ca04bc7fd53741d80117df992995ace8f6d2d (patch)
treee39c13fc198b22ec55647259a8080051988e8c69 /stdlib/longlong.h
parent5d82cf5c55f56ae10d3b0a205d1fcc7de1cf56a0 (diff)
downloadglibc-8f5ca04bc7fd53741d80117df992995ace8f6d2d.tar.gz
Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>
* malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include <errno.h>. * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de> * malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include <errno.h>. * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up.
Diffstat (limited to 'stdlib/longlong.h')
-rw-r--r--stdlib/longlong.h229
1 files changed, 142 insertions, 87 deletions
diff --git a/stdlib/longlong.h b/stdlib/longlong.h
index 97c469d8c0..bbb92e3af8 100644
--- a/stdlib/longlong.h
+++ b/stdlib/longlong.h
@@ -97,7 +97,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define __AND_CLOBBER_CC , "cc"
#endif /* __GNUC__ < 2 */
-#if (defined (__a29k__) || defined (___AM29K__)) && W_TYPE_SIZE == 32
+#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("add %1,%4,%5
addc %0,%2,%3" \
@@ -152,6 +152,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
(pl) = __m0 * __m1; \
} while (0)
#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { UDItype __r; \
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
@@ -159,12 +160,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
} while (0)
extern UDItype __udiv_qrnnd ();
#define UDIV_TIME 220
-#endif
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha__ */
#if defined (__arm__) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
- __asm__ ("adds %1,%4,%5
- adc %0,%2,%3" \
+ __asm__ ("adds %1, %4, %5
+ adc %0, %2, %3" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "%r" ((USItype)(ah)), \
@@ -172,8 +174,8 @@ extern UDItype __udiv_qrnnd ();
"%r" ((USItype)(al)), \
"rI" ((USItype)(bl)))
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
- __asm__ ("subs %1,%4,%5
- sbc %0,%2,%3" \
+ __asm__ ("subs %1, %4, %5
+ sbc %0, %2, %3" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "r" ((USItype)(ah)), \
@@ -181,19 +183,19 @@ extern UDItype __udiv_qrnnd ();
"r" ((USItype)(al)), \
"rI" ((USItype)(bl)))
#define umul_ppmm(xh, xl, a, b) \
- __asm__ ("; Inlined umul_ppmm
- mov r0,%2 lsr 16
- mov r2,%3 lsr 16
- bic r1,%2,r0 lsl 16
- bic r2,%3,r2 lsl 16
- mul %1,r1,r2
- mul r2,r0,r2
- mul r1,%0,r1
- mul %0,r0,%0
- adds r1,r2,r1
- addcs %0,%0,0x10000
- adds %1,%1,r1 lsl 16
- adc %0,%0,r1 lsr 16" \
+ __asm__ ("%@ Inlined umul_ppmm
+ mov %|r0, %2, lsr #16
+ mov %|r2, %3, lsr #16
+ bic %|r1, %2, %|r0, lsl #16
+ bic %|r2, %3, %|r2, lsl #16
+ mul %1, %|r1, %|r2
+ mul %|r2, %|r0, %|r2
+ mul %|r1, %0, %|r1
+ mul %0, %|r0, %0
+ adds %|r1, %|r2, %|r1
+ addcs %0, %0, #65536
+ adds %1, %1, %|r1, lsl #16
+ adc %0, %0, %|r1, lsr #16" \
: "=&r" ((USItype)(xh)), \
"=r" ((USItype)(xl)) \
: "r" ((USItype)(a)), \
@@ -296,9 +298,9 @@ extern UDItype __udiv_qrnnd ();
struct {USItype __h, __l;} __i; \
} __xx; \
__asm__ ("xmpyu %1,%2,%0" \
- : "=x" (__xx.__ll) \
- : "x" ((USItype)(u)), \
- "x" ((USItype)(v))); \
+ : "=fx" (__xx.__ll) \
+ : "fx" ((USItype)(u)), \
+ "fx" ((USItype)(v))); \
(wh) = __xx.__i.__h; \
(wl) = __xx.__i.__l; \
} while (0)
@@ -308,12 +310,14 @@ extern UDItype __udiv_qrnnd ();
#define UMUL_TIME 40
#define UDIV_TIME 80
#endif
+#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { USItype __r; \
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
(r) = __r; \
} while (0)
extern USItype __udiv_qrnnd ();
+#endif /* LONGLONG_STANDALONE */
#define count_leading_zeros(count, x) \
do { \
USItype __tmp; \
@@ -419,8 +423,12 @@ extern USItype __udiv_qrnnd ();
} while (0)
#define count_trailing_zeros(count, x) \
__asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
#define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
#define UDIV_TIME 40
+#endif
#endif /* 80x86 */
#if defined (__i960__) && W_TYPE_SIZE == 32
@@ -442,7 +450,7 @@ extern USItype __udiv_qrnnd ();
__w; })
#endif /* __i960__ */
-#if defined (__mc68000__) && W_TYPE_SIZE == 32
+#if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("add%.l %5,%1
addx%.l %3,%0" \
@@ -489,38 +497,34 @@ extern USItype __udiv_qrnnd ();
: "=d" ((USItype)(count)) \
: "od" ((USItype)(x)), "n" (0))
#else /* not mc68020 */
-#define umul_ppmm(xh, xl, a, b) \
- __asm__ ("| Inlined umul_ppmm
- move%.l %2,%/d0
- move%.l %3,%/d1
- move%.l %/d0,%/d2
- swap %/d0
- move%.l %/d1,%/d3
- swap %/d1
- move%.w %/d2,%/d4
- mulu %/d3,%/d4
- mulu %/d1,%/d2
- mulu %/d0,%/d3
- mulu %/d0,%/d1
- move%.l %/d4,%/d0
- eor%.w %/d0,%/d0
- swap %/d0
- add%.l %/d0,%/d2
- add%.l %/d3,%/d2
+#define umul_ppmmxx(xh, xl, a, b) \
+ do { USItype __umul_tmp1, __umul_tmp2; \
+ __asm__ ("| Inlined umul_ppmm
+ move%.l %5,%3
+ move%.l %2,%0
+ move%.w %3,%1
+ swap %3
+ swap %0
+ mulu %2,%1
+ mulu %3,%0
+ mulu %2,%3
+ swap %2
+ mulu %5,%2
+ add%.l %3,%2
jcc 1f
- add%.l #65536,%/d1
-1: swap %/d2
- moveq #0,%/d0
- move%.w %/d2,%/d0
- move%.w %/d4,%/d2
- move%.l %/d2,%1
- add%.l %/d1,%/d0
- move%.l %/d0,%0" \
- : "=g" ((USItype)(xh)), \
- "=g" ((USItype)(xl)) \
- : "g" ((USItype)(a)), \
- "g" ((USItype)(b)) \
- : "d0", "d1", "d2", "d3", "d4")
+ add%.l %#0x10000,%0
+1: move%.l %2,%3
+ clr%.w %2
+ swap %2
+ swap %3
+ clr%.w %3
+ add%.l %3,%1
+ addx%.l %2,%0
+ | End inlined umul_ppmm" \
+ : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+ "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+ : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+ } while (0)
#define UMUL_TIME 100
#define UDIV_TIME 400
#endif /* not mc68020 */
@@ -553,7 +557,7 @@ extern USItype __udiv_qrnnd ();
: "r" ((USItype)(x))); \
(count) = __cbtmp ^ 31; \
} while (0)
-#if defined (__mc88110__)
+#if defined (__m88110__)
#define umul_ppmm(wh, wl, u, v) \
do { \
union {UDItype __ll; \
@@ -582,10 +586,18 @@ extern USItype __udiv_qrnnd ();
#else
#define UMUL_TIME 17
#define UDIV_TIME 150
-#endif /* __mc88110__ */
+#endif /* __m88110__ */
#endif /* __m88000__ */
#if defined (__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("multu %2,%3" \
+ : "=l" ((USItype)(w0)), \
+ "=h" ((USItype)(w1)) \
+ : "d" ((USItype)(u)), \
+ "d" ((USItype)(v)))
+#else
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("multu %2,%3
mflo %0
@@ -594,11 +606,20 @@ extern USItype __udiv_qrnnd ();
"=d" ((USItype)(w1)) \
: "d" ((USItype)(u)), \
"d" ((USItype)(v)))
+#endif
#define UMUL_TIME 10
#define UDIV_TIME 100
#endif /* __mips__ */
#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmultu %2,%3" \
+ : "=l" ((UDItype)(w0)), \
+ "=h" ((UDItype)(w1)) \
+ : "d" ((UDItype)(u)), \
+ "d" ((UDItype)(v)))
+#else
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("dmultu %2,%3
mflo %0
@@ -607,8 +628,9 @@ extern USItype __udiv_qrnnd ();
"=d" ((UDItype)(w1)) \
: "d" ((UDItype)(u)), \
"d" ((UDItype)(v)))
-#define UMUL_TIME 10
-#define UDIV_TIME 100
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
#endif /* __mips__ */
#if defined (__ns32000__) && W_TYPE_SIZE == 32
@@ -647,7 +669,7 @@ extern USItype __udiv_qrnnd ();
} while (0)
#endif /* __ns32000__ */
-#if (defined (__powerpc__) || defined (___IBMR2__)) && W_TYPE_SIZE == 32
+#if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (bh) && (bh) == 0) \
@@ -676,14 +698,14 @@ extern USItype __udiv_qrnnd ();
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (ah) && (ah) == 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "r" ((USItype)(bh)), \
"rI" ((USItype)(al)), \
"r" ((USItype)(bl))); \
else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "r" ((USItype)(bh)), \
@@ -716,7 +738,7 @@ extern USItype __udiv_qrnnd ();
__asm__ ("{cntlz|cntlzw} %0,%1" \
: "=r" ((USItype)(count)) \
: "r" ((USItype)(x)))
-#if defined (__powerpc__)
+#if defined (_ARCH_PPC)
#define umul_ppmm(ph, pl, m0, m1) \
do { \
USItype __m0 = (m0), __m1 = (m1); \
@@ -785,16 +807,15 @@ extern USItype __udiv_qrnnd ();
"g" ((USItype)(bh)), \
"1" ((USItype)(al)), \
"g" ((USItype)(bl)))
-/* This insn doesn't work on ancient pyramids. */
+/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
#define umul_ppmm(w1, w0, u, v) \
({union {UDItype __ll; \
struct {USItype __h, __l;} __i; \
} __xx; \
- __xx.__i.__l = u; \
- __asm__ ("uemul %3,%0" \
- : "=r" (__xx.__i.__h), \
- "=r" (__xx.__i.__l) \
- : "1" (__xx.__i.__l), \
+ __asm__ ("movw %1,%R0
+ uemul %2,%0" \
+ : "=&r" (__xx.__ll) \
+ : "g" ((USItype) (u)), \
"g" ((USItype)(v))); \
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
#endif /* __pyr__ */
@@ -868,6 +889,20 @@ extern USItype __udiv_qrnnd ();
} while (0)
#endif
+#if defined (__sh2__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ( \
+ "dmulu.l %2,%3
+ sts macl,%1
+ sts mach,%0" \
+ : "=r" ((USItype)(w1)), \
+ "=r" ((USItype)(w0)) \
+ : "r" ((USItype)(u)), \
+ "r" ((USItype)(v)) \
+ : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
#if defined (__sparc__) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("addcc %r4,%5,%1
@@ -901,17 +936,21 @@ extern USItype __udiv_qrnnd ();
: "r" ((USItype)(u)), \
"r" ((USItype)(v)))
#define UMUL_TIME 5
-/* We might want to leave this undefined for `SuperSPARC (tm)' since
- its implementation is crippled and often traps. */
+#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
#define udiv_qrnnd(q, r, n1, n0, d) \
- __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
- : "=&r" ((USItype)(q)), \
- "=&r" ((USItype)(r)) \
- : "r" ((USItype)(n1)), \
- "r" ((USItype)(n0)), \
- "r" ((USItype)(d)))
+ do { \
+ USItype __q; \
+ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ : "=r" ((USItype)(__q)) \
+ : "r" ((USItype)(n1)), \
+ "r" ((USItype)(n0)), \
+ "r" ((USItype)(d))); \
+ (r) = (n0) - __q * (d); \
+ (q) = __q; \
+ } while (0)
#define UDIV_TIME 25
-#else
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
#if defined (__sparclite__)
/* This has hardware multiply but not divide. It also has two additional
instructions scan (ffs from high bit) and divscc. */
@@ -973,9 +1012,10 @@ extern USItype __udiv_qrnnd ();
__asm__ ("scan %1,0,%0" \
: "=r" ((USItype)(x)) \
: "r" ((USItype)(count)))
-#else
-/* SPARC without integer multiplication and divide instructions.
- (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
+#ifndef umul_ppmm
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("! Inlined umul_ppmm
wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr
@@ -1023,6 +1063,9 @@ extern USItype __udiv_qrnnd ();
"r" ((USItype)(v)) \
: "%g1", "%g2" __AND_CLOBBER_CC)
#define UMUL_TIME 39 /* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { USItype __r; \
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
@@ -1030,8 +1073,8 @@ extern USItype __udiv_qrnnd ();
} while (0)
extern USItype __udiv_qrnnd ();
#define UDIV_TIME 140
-#endif /* __sparclite__ */
-#endif /* __sparc_v8__ */
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
#endif /* __sparc__ */
#if defined (__vax__) && W_TYPE_SIZE == 32
@@ -1075,7 +1118,7 @@ extern USItype __udiv_qrnnd ();
__xx.__i.__h = n1; __xx.__i.__l = n0; \
__asm__ ("ediv %3,%2,%0,%1" \
: "=g" (q), "=g" (r) \
- : "g" (__n1n0.ll), "g" (d)); \
+ : "g" (__xx.ll), "g" (d)); \
} while (0)
#endif /* __vax__ */
@@ -1173,11 +1216,12 @@ extern USItype __udiv_qrnnd ();
do { \
UWtype __x0, __x1, __x2, __x3; \
UHWtype __ul, __vl, __uh, __vh; \
+ UWtype __u = (u), __v = (v); \
\
- __ul = __ll_lowpart (u); \
- __uh = __ll_highpart (u); \
- __vl = __ll_lowpart (v); \
- __vh = __ll_highpart (v); \
+ __ul = __ll_lowpart (__u); \
+ __uh = __ll_highpart (__u); \
+ __vl = __ll_lowpart (__v); \
+ __vh = __ll_highpart (__v); \
\
__x0 = (UWtype) __ul * __vl; \
__x1 = (UWtype) __ul * __vh; \
@@ -1194,6 +1238,17 @@ extern USItype __udiv_qrnnd ();
} while (0)
#endif
+#if !defined (umul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __w1; \
+ UWtype __m0 = (u), __m1 = (v); \
+ umul_ppmm (__w1, w0, __m0, __m1); \
+ (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
+ - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
+ } while (0)
+#endif
+
/* Define this unconditionally, so it can be used for debugging. */
#define __udiv_qrnnd_c(q, r, n1, n0, d) \
do { \