summaryrefslogtreecommitdiff
path: root/stdlib
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>1995-10-16 01:37:51 +0000
committerRoland McGrath <roland@gnu.org>1995-10-16 01:37:51 +0000
commit8f5ca04bc7fd53741d80117df992995ace8f6d2d (patch)
treee39c13fc198b22ec55647259a8080051988e8c69 /stdlib
parent5d82cf5c55f56ae10d3b0a205d1fcc7de1cf56a0 (diff)
downloadglibc-8f5ca04bc7fd53741d80117df992995ace8f6d2d.tar.gz
Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>
* malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include <errno.h>. * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up. Sat Oct 14 02:52:36 1995 Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de> * malloc/malloc.c (_malloc_internal): Performance fix. Move if statement out of loop. * stdio/_itoa.c, stdio/_itoa.h: Complete rewrite. Much faster implementation using GMP functions. Contributed by Torbjorn Granlund and Ulrich Drepper. * stdio/test_rdwr.c: Include <errno.h>. * sysdeps/i386/i586/Implies: New file. New highly optimized string functions for i[345]86. * sysdeps/i386/memchr.S, sysdeps/i386/memcmp.S: New files. * sysdeps/i386/stpcpy.S, sysdeps/i386/stpncpy.S: New files. * sysdeps/i386/strchr.S, sysdeps/i386/strcspn.S: New files. * sysdeps/i386/strpbrk.S, sysdeps/i386/strrchr.S: New files. * sysdeps/i386/strspn.S, sysdeps/i386/i486/strcat.S: New files. * sysdeps/i386/i486/strlen.S, sysdeps/i386/i586/strchr.S: New files. * sysdeps/i386/i586/strlen.S: New file. * sysdeps/i386/memchr.c: Removed. There is now an assembler version. * sysdeps/i386/i586/memcopy.h (WORD_COPY_BWD): Parameters did not correspond to used values. * sysdeps/unix/sysv/linux/nfs/nfs.h: New file. Simply a wrapper around a kernel header file. * sysdeps/unix/sysv/linux/Dist: Add it. * sysdeps/unix/sysv/linux/Makefile [$(subdir)=sunrpc] (headers): Likewise. * sysdeps/unix/sysv/linux/local_lim.h: Rewrite. Instead of defining ourself we use a kernel header file. * sysdeps/unix/sysv/linux/i386/sysdep.h (DO_CALL): Optimize system call handler for i586. * sysdeps/unix/sysv/linux/sys/param.h: Add copyright and clean up.
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/gmp-impl.h66
-rw-r--r--stdlib/gmp.h40
-rw-r--r--stdlib/longlong.h229
3 files changed, 220 insertions, 115 deletions
diff --git a/stdlib/gmp-impl.h b/stdlib/gmp-impl.h
index ccffe7bb1e..48d3af9761 100644
--- a/stdlib/gmp-impl.h
+++ b/stdlib/gmp-impl.h
@@ -19,11 +19,17 @@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#if ! defined (alloca)
-#if defined (__GNUC__) || defined (__sparc__) || defined (sparc)
+#if defined (__GNUC__)
#define alloca __builtin_alloca
#endif
#endif
+#if ! defined (alloca)
+#if defined (__sparc__) || defined (sparc) || defined (__sgi)
+#include <alloca.h>
+#endif
+#endif
+
#ifndef NULL
#define NULL 0L
#endif
@@ -168,6 +174,7 @@ void _mp_default_free ();
else \
____mpn_sqr_n (prodp, up, size, tspace); \
} while (0);
+#define assert(trueval) do {if (!(trueval)) abort ();} while (0)
/* Structure for conversion between internal binary format and
strings in base 2..36. */
@@ -197,9 +204,11 @@ struct bases
extern const struct bases __mp_bases[];
extern mp_size_t __gmp_default_fp_limb_precision;
-/* Divide the two-limb number in (NH,,NL) by D, with DI being a 32 bit
- approximation to (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
- Put the quotient in Q and the remainder in R. */
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ If this would yield overflow, DI should be the largest possible number
+ (i.e., only ones). For correct operation, the most significant bit of D
+ has to be set. Put the quotient in Q and the remainder in R. */
#define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
do { \
mp_limb _q, _ql, _r; \
@@ -226,6 +235,8 @@ extern mp_size_t __gmp_default_fp_limb_precision;
(r) = _r; \
(q) = _q; \
} while (0)
+/* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
+ so that its most significant bit is set. LGUP is ceil(log2(D)). */
#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
do { \
mp_limb n2, n10, n1, nadj, q1; \
@@ -243,6 +254,8 @@ extern mp_size_t __gmp_default_fp_limb_precision;
(r) = _xl + ((d) & _xh); \
(q) = _xh - q1; \
} while (0)
+/* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
+ version to use. */
#define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
do { \
mp_limb n2, n10, n1, nadj, q1; \
@@ -262,22 +275,49 @@ extern mp_size_t __gmp_default_fp_limb_precision;
} while (0)
#if defined (__GNUC__)
-/* Define stuff for longlong.h asm macros. */
-#if __GNUC_NEW_ATTR_MODE_SYNTAX
-typedef unsigned int UQItype __attribute__ ((mode ("QI")));
-typedef int SItype __attribute__ ((mode ("SI")));
-typedef unsigned int USItype __attribute__ ((mode ("SI")));
-typedef int DItype __attribute__ ((mode ("DI")));
-typedef unsigned int UDItype __attribute__ ((mode ("DI")));
-#else
+/* Define stuff for longlong.h. */
typedef unsigned int UQItype __attribute__ ((mode (QI)));
typedef int SItype __attribute__ ((mode (SI)));
typedef unsigned int USItype __attribute__ ((mode (SI)));
typedef int DItype __attribute__ ((mode (DI)));
typedef unsigned int UDItype __attribute__ ((mode (DI)));
-#endif
+#else
+typedef unsigned char UQItype;
+typedef long SItype;
+typedef unsigned long USItype;
#endif
typedef mp_limb UWtype;
typedef unsigned int UHWtype;
#define W_TYPE_SIZE BITS_PER_MP_LIMB
+
+
+#ifndef IEEE_DOUBLE_BIG_ENDIAN
+#define IEEE_DOUBLE_BIG_ENDIAN 1
+#endif
+
+#if IEEE_DOUBLE_BIG_ENDIAN
+union ieee_double_extract
+{
+ struct
+ {
+ unsigned long sig:1;
+ unsigned long exp:11;
+ unsigned long manh:20;
+ unsigned long manl:32;
+ } s;
+ double d;
+};
+#else
+union ieee_double_extract
+{
+ struct
+ {
+ unsigned long manl:32;
+ unsigned long manh:20;
+ unsigned long exp:11;
+ unsigned long sig:1;
+ } s;
+ double d;
+};
+#endif
diff --git a/stdlib/gmp.h b/stdlib/gmp.h
index 95c2f1beba..0b2cb29014 100644
--- a/stdlib/gmp.h
+++ b/stdlib/gmp.h
@@ -24,13 +24,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define __need_size_t
#include <stddef.h>
-#ifdef __STDC__
+#if defined (__STDC__)
#define __gmp_const const
#else
#define __gmp_const
#endif
-#ifdef __GNUC__
+#if defined (__GNUC__)
#define __gmp_inline inline
#else
#define __gmp_inline
@@ -40,9 +40,14 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
typedef unsigned int mp_limb;
typedef int mp_limb_signed;
#else
+#if _LONG_LONG_LIMB
+typedef unsigned long long int mp_limb;
+typedef long long int mp_limb_signed;
+#else
typedef unsigned long int mp_limb;
typedef long int mp_limb_signed;
#endif
+#endif
typedef mp_limb * mp_ptr;
typedef __gmp_const mp_limb * mp_srcptr;
@@ -52,9 +57,9 @@ typedef long int mp_exp_t;
#ifndef __MP_SMALL__
typedef struct
{
- long int alloc; /* Number of *limbs* allocated and pointed
+ mp_size_t alloc; /* Number of *limbs* allocated and pointed
to by the D field. */
- long int size; /* abs(SIZE) is the number of limbs
+ mp_size_t size; /* abs(SIZE) is the number of limbs
the last field points to. If SIZE
is negative this is a negative
number. */
@@ -130,12 +135,16 @@ typedef __mpf_struct *mpf_ptr;
typedef __gmp_const __mpq_struct *mpq_srcptr;
typedef __mpq_struct *mpq_ptr;
-#ifdef __STDC__
+#if defined (__STDC__)
#define _PROTO(x) x
#else
#define _PROTO(x) ()
#endif
+#if defined (FILE) || defined (_STDIO_H_) || defined (__STDIO_H__) || defined (H_STDIO)
+#define _GMP_H_HAVE_FILE 1
+#endif
+
void mp_set_memory_functions _PROTO((void *(*) (size_t),
void *(*) (void *, size_t, size_t),
void (*) (void *, size_t)));
@@ -165,7 +174,7 @@ unsigned long int mpz_get_ui _PROTO ((mpz_srcptr));
mp_limb mpz_getlimbn _PROTO ((mpz_srcptr, mp_size_t));
mp_size_t mpz_hamdist _PROTO ((mpz_srcptr, mpz_srcptr));
void mpz_init _PROTO ((mpz_ptr));
-#ifdef FILE
+#ifdef _GMP_H_HAVE_FILE
void mpz_inp_raw _PROTO ((mpz_ptr, FILE *));
int mpz_inp_str _PROTO ((mpz_ptr, FILE *, int));
#endif
@@ -180,7 +189,7 @@ void mpz_mul _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
void mpz_mul_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
void mpz_mul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
void mpz_neg _PROTO ((mpz_ptr, mpz_srcptr));
-#ifdef FILE
+#ifdef _GMP_H_HAVE_FILE
void mpz_out_raw _PROTO ((FILE *, mpz_srcptr));
void mpz_out_str _PROTO ((FILE *, int, mpz_srcptr));
#endif
@@ -218,6 +227,8 @@ void mpz_tdiv_qr_ui _PROTO((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
void mpz_tdiv_r _PROTO((mpz_ptr, mpz_srcptr, mpz_srcptr));
void mpz_tdiv_r_ui _PROTO((mpz_ptr, mpz_srcptr, unsigned long int));
+void mpz_array_init (mpz_ptr, size_t, mp_size_t);
+
/**************** Rational (i.e. Q) routines. ****************/
void mpq_init _PROTO ((mpq_ptr));
@@ -253,7 +264,7 @@ void mpf_dump _PROTO ((mpf_srcptr));
char *mpf_get_str _PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
void mpf_init _PROTO ((mpf_ptr));
void mpf_init2 _PROTO ((mpf_ptr, mp_size_t));
-#ifdef FILE
+#ifdef _GMP_H_HAVE_FILE
void mpf_inp_str _PROTO ((mpf_ptr, FILE *, int));
#endif
void mpf_init_set _PROTO ((mpf_ptr, mpf_srcptr));
@@ -265,7 +276,7 @@ void mpf_mul _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
void mpf_mul_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
void mpf_mul_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
void mpf_neg _PROTO ((mpf_ptr, mpf_srcptr));
-#ifdef FILE
+#ifdef _GMP_H_HAVE_FILE
void mpf_out_str _PROTO ((mpf_ptr, int, size_t, FILE *));
#endif
void mpf_set _PROTO ((mpf_ptr, mpf_srcptr));
@@ -335,7 +346,7 @@ mp_limb __mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb));
static __gmp_inline mp_limb
-#if __STDC__
+#if defined (__STDC__)
__mpn_add_1 (register mp_ptr res_ptr,
register mp_srcptr s1_ptr,
register mp_size_t s1_size,
@@ -377,7 +388,7 @@ __mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb)
}
static __gmp_inline mp_limb
-#if __STDC__
+#if defined (__STDC__)
__mpn_add (register mp_ptr res_ptr,
register mp_srcptr s1_ptr,
register mp_size_t s1_size,
@@ -406,7 +417,7 @@ __mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
}
static __gmp_inline mp_limb
-#if __STDC__
+#if defined (__STDC__)
__mpn_sub_1 (register mp_ptr res_ptr,
register mp_srcptr s1_ptr,
register mp_size_t s1_size,
@@ -448,7 +459,7 @@ __mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb)
}
static __gmp_inline mp_limb
-#if __STDC__
+#if defined (__STDC__)
__mpn_sub (register mp_ptr res_ptr,
register mp_srcptr s1_ptr,
register mp_size_t s1_size,
@@ -477,7 +488,7 @@ __mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
}
static __gmp_inline mp_size_t
-#if __STDC__
+#if defined (__STDC__)
__mpn_normal_size (mp_srcptr ptr, mp_size_t size)
#else
__mpn_normal_size (ptr, size)
@@ -512,7 +523,6 @@ __mpn_normal_size (ptr, size)
/* Useful synonyms, but not quite compatible with GMP 1. */
#define mpz_div mpz_fdiv_q
#define mpz_divmod mpz_fdiv_qr
-#define mpz_mod mpz_fdiv_r
#define mpz_div_ui mpz_fdiv_q_ui
#define mpz_divmod_ui mpz_fdiv_qr_ui
#define mpz_mod_ui mpz_fdiv_r_ui
diff --git a/stdlib/longlong.h b/stdlib/longlong.h
index 97c469d8c0..bbb92e3af8 100644
--- a/stdlib/longlong.h
+++ b/stdlib/longlong.h
@@ -97,7 +97,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define __AND_CLOBBER_CC , "cc"
#endif /* __GNUC__ < 2 */
-#if (defined (__a29k__) || defined (___AM29K__)) && W_TYPE_SIZE == 32
+#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("add %1,%4,%5
addc %0,%2,%3" \
@@ -152,6 +152,7 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
(pl) = __m0 * __m1; \
} while (0)
#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { UDItype __r; \
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
@@ -159,12 +160,13 @@ the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
} while (0)
extern UDItype __udiv_qrnnd ();
#define UDIV_TIME 220
-#endif
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha__ */
#if defined (__arm__) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
- __asm__ ("adds %1,%4,%5
- adc %0,%2,%3" \
+ __asm__ ("adds %1, %4, %5
+ adc %0, %2, %3" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "%r" ((USItype)(ah)), \
@@ -172,8 +174,8 @@ extern UDItype __udiv_qrnnd ();
"%r" ((USItype)(al)), \
"rI" ((USItype)(bl)))
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
- __asm__ ("subs %1,%4,%5
- sbc %0,%2,%3" \
+ __asm__ ("subs %1, %4, %5
+ sbc %0, %2, %3" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "r" ((USItype)(ah)), \
@@ -181,19 +183,19 @@ extern UDItype __udiv_qrnnd ();
"r" ((USItype)(al)), \
"rI" ((USItype)(bl)))
#define umul_ppmm(xh, xl, a, b) \
- __asm__ ("; Inlined umul_ppmm
- mov r0,%2 lsr 16
- mov r2,%3 lsr 16
- bic r1,%2,r0 lsl 16
- bic r2,%3,r2 lsl 16
- mul %1,r1,r2
- mul r2,r0,r2
- mul r1,%0,r1
- mul %0,r0,%0
- adds r1,r2,r1
- addcs %0,%0,0x10000
- adds %1,%1,r1 lsl 16
- adc %0,%0,r1 lsr 16" \
+ __asm__ ("%@ Inlined umul_ppmm
+ mov %|r0, %2, lsr #16
+ mov %|r2, %3, lsr #16
+ bic %|r1, %2, %|r0, lsl #16
+ bic %|r2, %3, %|r2, lsl #16
+ mul %1, %|r1, %|r2
+ mul %|r2, %|r0, %|r2
+ mul %|r1, %0, %|r1
+ mul %0, %|r0, %0
+ adds %|r1, %|r2, %|r1
+ addcs %0, %0, #65536
+ adds %1, %1, %|r1, lsl #16
+ adc %0, %0, %|r1, lsr #16" \
: "=&r" ((USItype)(xh)), \
"=r" ((USItype)(xl)) \
: "r" ((USItype)(a)), \
@@ -296,9 +298,9 @@ extern UDItype __udiv_qrnnd ();
struct {USItype __h, __l;} __i; \
} __xx; \
__asm__ ("xmpyu %1,%2,%0" \
- : "=x" (__xx.__ll) \
- : "x" ((USItype)(u)), \
- "x" ((USItype)(v))); \
+ : "=fx" (__xx.__ll) \
+ : "fx" ((USItype)(u)), \
+ "fx" ((USItype)(v))); \
(wh) = __xx.__i.__h; \
(wl) = __xx.__i.__l; \
} while (0)
@@ -308,12 +310,14 @@ extern UDItype __udiv_qrnnd ();
#define UMUL_TIME 40
#define UDIV_TIME 80
#endif
+#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { USItype __r; \
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
(r) = __r; \
} while (0)
extern USItype __udiv_qrnnd ();
+#endif /* LONGLONG_STANDALONE */
#define count_leading_zeros(count, x) \
do { \
USItype __tmp; \
@@ -419,8 +423,12 @@ extern USItype __udiv_qrnnd ();
} while (0)
#define count_trailing_zeros(count, x) \
__asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
#define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
#define UDIV_TIME 40
+#endif
#endif /* 80x86 */
#if defined (__i960__) && W_TYPE_SIZE == 32
@@ -442,7 +450,7 @@ extern USItype __udiv_qrnnd ();
__w; })
#endif /* __i960__ */
-#if defined (__mc68000__) && W_TYPE_SIZE == 32
+#if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("add%.l %5,%1
addx%.l %3,%0" \
@@ -489,38 +497,34 @@ extern USItype __udiv_qrnnd ();
: "=d" ((USItype)(count)) \
: "od" ((USItype)(x)), "n" (0))
#else /* not mc68020 */
-#define umul_ppmm(xh, xl, a, b) \
- __asm__ ("| Inlined umul_ppmm
- move%.l %2,%/d0
- move%.l %3,%/d1
- move%.l %/d0,%/d2
- swap %/d0
- move%.l %/d1,%/d3
- swap %/d1
- move%.w %/d2,%/d4
- mulu %/d3,%/d4
- mulu %/d1,%/d2
- mulu %/d0,%/d3
- mulu %/d0,%/d1
- move%.l %/d4,%/d0
- eor%.w %/d0,%/d0
- swap %/d0
- add%.l %/d0,%/d2
- add%.l %/d3,%/d2
+#define umul_ppmmxx(xh, xl, a, b) \
+ do { USItype __umul_tmp1, __umul_tmp2; \
+ __asm__ ("| Inlined umul_ppmm
+ move%.l %5,%3
+ move%.l %2,%0
+ move%.w %3,%1
+ swap %3
+ swap %0
+ mulu %2,%1
+ mulu %3,%0
+ mulu %2,%3
+ swap %2
+ mulu %5,%2
+ add%.l %3,%2
jcc 1f
- add%.l #65536,%/d1
-1: swap %/d2
- moveq #0,%/d0
- move%.w %/d2,%/d0
- move%.w %/d4,%/d2
- move%.l %/d2,%1
- add%.l %/d1,%/d0
- move%.l %/d0,%0" \
- : "=g" ((USItype)(xh)), \
- "=g" ((USItype)(xl)) \
- : "g" ((USItype)(a)), \
- "g" ((USItype)(b)) \
- : "d0", "d1", "d2", "d3", "d4")
+ add%.l %#0x10000,%0
+1: move%.l %2,%3
+ clr%.w %2
+ swap %2
+ swap %3
+ clr%.w %3
+ add%.l %3,%1
+ addx%.l %2,%0
+ | End inlined umul_ppmm" \
+ : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+ "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+ : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+ } while (0)
#define UMUL_TIME 100
#define UDIV_TIME 400
#endif /* not mc68020 */
@@ -553,7 +557,7 @@ extern USItype __udiv_qrnnd ();
: "r" ((USItype)(x))); \
(count) = __cbtmp ^ 31; \
} while (0)
-#if defined (__mc88110__)
+#if defined (__m88110__)
#define umul_ppmm(wh, wl, u, v) \
do { \
union {UDItype __ll; \
@@ -582,10 +586,18 @@ extern USItype __udiv_qrnnd ();
#else
#define UMUL_TIME 17
#define UDIV_TIME 150
-#endif /* __mc88110__ */
+#endif /* __m88110__ */
#endif /* __m88000__ */
#if defined (__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("multu %2,%3" \
+ : "=l" ((USItype)(w0)), \
+ "=h" ((USItype)(w1)) \
+ : "d" ((USItype)(u)), \
+ "d" ((USItype)(v)))
+#else
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("multu %2,%3
mflo %0
@@ -594,11 +606,20 @@ extern USItype __udiv_qrnnd ();
"=d" ((USItype)(w1)) \
: "d" ((USItype)(u)), \
"d" ((USItype)(v)))
+#endif
#define UMUL_TIME 10
#define UDIV_TIME 100
#endif /* __mips__ */
#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmultu %2,%3" \
+ : "=l" ((UDItype)(w0)), \
+ "=h" ((UDItype)(w1)) \
+ : "d" ((UDItype)(u)), \
+ "d" ((UDItype)(v)))
+#else
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("dmultu %2,%3
mflo %0
@@ -607,8 +628,9 @@ extern USItype __udiv_qrnnd ();
"=d" ((UDItype)(w1)) \
: "d" ((UDItype)(u)), \
"d" ((UDItype)(v)))
-#define UMUL_TIME 10
-#define UDIV_TIME 100
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
#endif /* __mips__ */
#if defined (__ns32000__) && W_TYPE_SIZE == 32
@@ -647,7 +669,7 @@ extern USItype __udiv_qrnnd ();
} while (0)
#endif /* __ns32000__ */
-#if (defined (__powerpc__) || defined (___IBMR2__)) && W_TYPE_SIZE == 32
+#if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (bh) && (bh) == 0) \
@@ -676,14 +698,14 @@ extern USItype __udiv_qrnnd ();
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (ah) && (ah) == 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "r" ((USItype)(bh)), \
"rI" ((USItype)(al)), \
"r" ((USItype)(bl))); \
else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
: "=r" ((USItype)(sh)), \
"=&r" ((USItype)(sl)) \
: "r" ((USItype)(bh)), \
@@ -716,7 +738,7 @@ extern USItype __udiv_qrnnd ();
__asm__ ("{cntlz|cntlzw} %0,%1" \
: "=r" ((USItype)(count)) \
: "r" ((USItype)(x)))
-#if defined (__powerpc__)
+#if defined (_ARCH_PPC)
#define umul_ppmm(ph, pl, m0, m1) \
do { \
USItype __m0 = (m0), __m1 = (m1); \
@@ -785,16 +807,15 @@ extern USItype __udiv_qrnnd ();
"g" ((USItype)(bh)), \
"1" ((USItype)(al)), \
"g" ((USItype)(bl)))
-/* This insn doesn't work on ancient pyramids. */
+/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
#define umul_ppmm(w1, w0, u, v) \
({union {UDItype __ll; \
struct {USItype __h, __l;} __i; \
} __xx; \
- __xx.__i.__l = u; \
- __asm__ ("uemul %3,%0" \
- : "=r" (__xx.__i.__h), \
- "=r" (__xx.__i.__l) \
- : "1" (__xx.__i.__l), \
+ __asm__ ("movw %1,%R0
+ uemul %2,%0" \
+ : "=&r" (__xx.__ll) \
+ : "g" ((USItype) (u)), \
"g" ((USItype)(v))); \
(w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
#endif /* __pyr__ */
@@ -868,6 +889,20 @@ extern USItype __udiv_qrnnd ();
} while (0)
#endif
+#if defined (__sh2__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ( \
+ "dmulu.l %2,%3
+ sts macl,%1
+ sts mach,%0" \
+ : "=r" ((USItype)(w1)), \
+ "=r" ((USItype)(w0)) \
+ : "r" ((USItype)(u)), \
+ "r" ((USItype)(v)) \
+ : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
#if defined (__sparc__) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("addcc %r4,%5,%1
@@ -901,17 +936,21 @@ extern USItype __udiv_qrnnd ();
: "r" ((USItype)(u)), \
"r" ((USItype)(v)))
#define UMUL_TIME 5
-/* We might want to leave this undefined for `SuperSPARC (tm)' since
- its implementation is crippled and often traps. */
+#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
#define udiv_qrnnd(q, r, n1, n0, d) \
- __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
- : "=&r" ((USItype)(q)), \
- "=&r" ((USItype)(r)) \
- : "r" ((USItype)(n1)), \
- "r" ((USItype)(n0)), \
- "r" ((USItype)(d)))
+ do { \
+ USItype __q; \
+ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ : "=r" ((USItype)(__q)) \
+ : "r" ((USItype)(n1)), \
+ "r" ((USItype)(n0)), \
+ "r" ((USItype)(d))); \
+ (r) = (n0) - __q * (d); \
+ (q) = __q; \
+ } while (0)
#define UDIV_TIME 25
-#else
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
#if defined (__sparclite__)
/* This has hardware multiply but not divide. It also has two additional
instructions scan (ffs from high bit) and divscc. */
@@ -973,9 +1012,10 @@ extern USItype __udiv_qrnnd ();
__asm__ ("scan %1,0,%0" \
: "=r" ((USItype)(x)) \
: "r" ((USItype)(count)))
-#else
-/* SPARC without integer multiplication and divide instructions.
- (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
+#ifndef umul_ppmm
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("! Inlined umul_ppmm
wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr
@@ -1023,6 +1063,9 @@ extern USItype __udiv_qrnnd ();
"r" ((USItype)(v)) \
: "%g1", "%g2" __AND_CLOBBER_CC)
#define UMUL_TIME 39 /* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { USItype __r; \
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
@@ -1030,8 +1073,8 @@ extern USItype __udiv_qrnnd ();
} while (0)
extern USItype __udiv_qrnnd ();
#define UDIV_TIME 140
-#endif /* __sparclite__ */
-#endif /* __sparc_v8__ */
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
#endif /* __sparc__ */
#if defined (__vax__) && W_TYPE_SIZE == 32
@@ -1075,7 +1118,7 @@ extern USItype __udiv_qrnnd ();
__xx.__i.__h = n1; __xx.__i.__l = n0; \
__asm__ ("ediv %3,%2,%0,%1" \
: "=g" (q), "=g" (r) \
- : "g" (__n1n0.ll), "g" (d)); \
+ : "g" (__xx.ll), "g" (d)); \
} while (0)
#endif /* __vax__ */
@@ -1173,11 +1216,12 @@ extern USItype __udiv_qrnnd ();
do { \
UWtype __x0, __x1, __x2, __x3; \
UHWtype __ul, __vl, __uh, __vh; \
+ UWtype __u = (u), __v = (v); \
\
- __ul = __ll_lowpart (u); \
- __uh = __ll_highpart (u); \
- __vl = __ll_lowpart (v); \
- __vh = __ll_highpart (v); \
+ __ul = __ll_lowpart (__u); \
+ __uh = __ll_highpart (__u); \
+ __vl = __ll_lowpart (__v); \
+ __vh = __ll_highpart (__v); \
\
__x0 = (UWtype) __ul * __vl; \
__x1 = (UWtype) __ul * __vh; \
@@ -1194,6 +1238,17 @@ extern USItype __udiv_qrnnd ();
} while (0)
#endif
+#if !defined (umul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __w1; \
+ UWtype __m0 = (u), __m1 = (v); \
+ umul_ppmm (__w1, w0, __m0, __m1); \
+ (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
+ - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
+ } while (0)
+#endif
+
/* Define this unconditionally, so it can be used for debugging. */
#define __udiv_qrnnd_c(q, r, n1, n0, d) \
do { \