summaryrefslogtreecommitdiff
path: root/gmp-impl.h
diff options
context:
space:
mode:
authortege <tege@gmplib.org>1997-07-25 17:23:52 +0200
committertege <tege@gmplib.org>1997-07-25 17:23:52 +0200
commit1f135c20c4a77cdd21a73ee0b6acbc7467f6e2a1 (patch)
tree95d50218bc5801eeed642a387e4e709ede099f3b /gmp-impl.h
parentd084d5eff9bfd8f379b1f21fa5b82f0d3a893cb6 (diff)
downloadgmp-1f135c20c4a77cdd21a73ee0b6acbc7467f6e2a1.tar.gz
(KARATSUBA_MUL_THRESHOLD): New #define.
(KARATSUBA_SQR_THRESHOLD): Likewise. (MPN_SQR_N_RECURSE): Use KARATSUBA_SQR_THRESHOLD. (MPN_MUL_N_RECURSE): Use KARATSUBA_MUL_THRESHOLD. (__gmp_0): Make it `const'. (MPN_MUL_N_RECURSE): Update calls to match new names and parameter conventions. (MPN_SQR_RECURSE): New name for MPN_SQR_N_RECURSE. Update calls to match new names and parameter conventions. (MPN_COPY): Define as annotated inline function for Crays and Fujitsu VPPs. (CNST_LIMB): New macro. Protect definitions using `__attribute__ ((mode (...)))' with test also for __GNUC_MINOR__. (struct bases): Change type of chars_per_bit_exactly from float to double. In code for determining endianess, test also __BIG_ENDIAN__ and __hppa__. Remove test of __NeXT__. (Cray/uxp MPN_COPY): Really declare as inline. (MPN_COPY_INCR, MPN_COPY_DECR): Define as inline asm for for x86, but leave disabled for now. Move definitions of PP and PP_INVERTED from mpn/generic/perfsqr.c. Move defauling of UMUL_TIME and UDIV_TIME from here to longlong.h. (MPZ_CHECK_FORMAT): New #define. (MPZ_PROVOKE_REALLOC): New #define.
Diffstat (limited to 'gmp-impl.h')
-rw-r--r--gmp-impl.h147
1 files changed, 125 insertions, 22 deletions
diff --git a/gmp-impl.h b/gmp-impl.h
index 8dad1f585..f70397e29 100644
--- a/gmp-impl.h
+++ b/gmp-impl.h
@@ -111,29 +111,75 @@ void *_mp_default_reallocate ();
void _mp_default_free ();
#endif
+#if defined (__GNUC__) && defined (__i386__)
+#if 0 /* check that these actually improve things */
+#define MPN_COPY_INCR(DST, SRC, N) \
+ __asm__ ("cld\n\trep\n\tmovsl" : : \
+ "D" (DST), "S" (SRC), "c" (N) : \
+ "cx", "di", "si", "memory")
+#define MPN_COPY_DECR(DST, SRC, N) \
+ __asm__ ("std\n\trep\n\tmovsl" : : \
+ "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
+ "cx", "di", "si", "memory")
+#define MPN_NORMALIZE_NOT_ZERO(P, N) \
+ do { \
+ __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \
+ "a" (0), "D" ((P) + (N) - 1), "0" (N) : \
+ "cx", "di"); \
+ (N)++; \
+ } while (0)
+#endif
+#endif
+
/* Copy NLIMBS *limbs* from SRC to DST. */
+#ifndef MPN_COPY_INCR
#define MPN_COPY_INCR(DST, SRC, NLIMBS) \
do { \
mp_size_t __i; \
for (__i = 0; __i < (NLIMBS); __i++) \
(DST)[__i] = (SRC)[__i]; \
} while (0)
+#endif
+
+#ifndef MPN_COPY_DECR
#define MPN_COPY_DECR(DST, SRC, NLIMBS) \
do { \
mp_size_t __i; \
for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
(DST)[__i] = (SRC)[__i]; \
} while (0)
+#endif
+
+/* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro,
+ rely on function inlining. */
+#if defined (_CRAY) || defined (__uxp__)
+static inline void
+_MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
+{
+ int i; /* Faster for Cray with plain int */
+#pragma _CRI ivdep /* Cray PVP systems */
+#pragma loop noalias d,s /* Fujitsu VPP systems */
+ for (i = 0; i < n; i++)
+ d[i] = s[i];
+}
+#define MPN_COPY _MPN_COPY
+#endif
+
+#ifndef MPN_COPY
#define MPN_COPY MPN_COPY_INCR
+#endif
/* Zero NLIMBS *limbs* AT DST. */
+#ifndef MPN_ZERO
#define MPN_ZERO(DST, NLIMBS) \
do { \
mp_size_t __i; \
for (__i = 0; __i < (NLIMBS); __i++) \
(DST)[__i] = 0; \
} while (0)
+#endif
+#ifndef MPN_NORMALIZE
#define MPN_NORMALIZE(DST, NLIMBS) \
do { \
while (NLIMBS > 0) \
@@ -143,6 +189,8 @@ void _mp_default_free ();
NLIMBS--; \
} \
} while (0)
+#endif
+#ifndef MPN_NORMALIZE_NOT_ZERO
#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
do { \
while (1) \
@@ -152,6 +200,7 @@ void _mp_default_free ();
NLIMBS--; \
} \
} while (0)
+#endif
/* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
temporary variable; it will be automatically cleared out at function
@@ -166,19 +215,35 @@ void _mp_default_free ();
#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
do { \
- if ((size) < KARATSUBA_THRESHOLD) \
- impn_mul_n_basecase (prodp, up, vp, size); \
+ if ((size) < KARATSUBA_MUL_THRESHOLD) \
+ __gmpn_mul_basecase (prodp, up, size, vp, size); \
else \
- impn_mul_n (prodp, up, vp, size, tspace); \
+ __gmpn_mul_n (prodp, up, vp, size, tspace); \
} while (0);
-#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
+#define MPN_SQR_RECURSE(prodp, up, size, tspace) \
do { \
- if ((size) < KARATSUBA_THRESHOLD) \
- impn_sqr_n_basecase (prodp, up, size); \
+ if ((size) < KARATSUBA_SQR_THRESHOLD) \
+ __gmpn_sqr_basecase (prodp, up, size); \
else \
- impn_sqr_n (prodp, up, size, tspace); \
+ __gmpn_sqr (prodp, up, size, tspace); \
} while (0);
+/* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
+ value which is good on most machines. */
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 12
+#endif
+
+/* We can't handle KARATSUBA_MUL_THRESHOLD smaller than 2. */
+#if KARATSUBA_MUL_THRESHOLD < 2
+#undef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 2
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
+#endif
+
/* Structure for conversion between internal binary format and
strings in base 2..36. */
struct bases
@@ -189,7 +254,7 @@ struct bases
int chars_per_limb;
/* log(2)/log(conversion_base) */
- float chars_per_bit_exactly;
+ double chars_per_bit_exactly;
/* base**chars_per_limb, i.e. the biggest number that fits a word, built by
factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
@@ -275,16 +340,19 @@ extern mp_size_t __gmp_default_fp_limb_precision;
(q) = _xh - q1; \
} while (0)
-#if defined (__GNUC__)
+/* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
+ between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
+ until then. */
+#if __GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)
/* Define stuff for longlong.h. */
typedef unsigned int UQItype __attribute__ ((mode (QI)));
-typedef int SItype __attribute__ ((mode (SI)));
+typedef int SItype __attribute__ ((mode (SI)));
typedef unsigned int USItype __attribute__ ((mode (SI)));
typedef int DItype __attribute__ ((mode (DI)));
typedef unsigned int UDItype __attribute__ ((mode (DI)));
#else
typedef unsigned char UQItype;
-typedef long SItype;
+typedef long SItype;
typedef unsigned long USItype;
#endif
@@ -292,12 +360,6 @@ typedef mp_limb_t UWtype;
typedef unsigned int UHWtype;
#define W_TYPE_SIZE BITS_PER_MP_LIMB
-/* Internal mpn calls */
-#define impn_mul_n_basecase __MPN(impn_mul_n_basecase)
-#define impn_mul_n __MPN(impn_mul_n)
-#define impn_sqr_n_basecase __MPN(impn_sqr_n_basecase)
-#define impn_sqr_n __MPN(impn_sqr_n)
-
/* Define ieee_double_extract and _GMP_IEEE_FLOATS. */
#if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \
@@ -324,16 +386,16 @@ union ieee_double_extract
double d;
};
#else /* Need this as an #else since the tests aren't made exclusive. */
-#if defined (_BIG_ENDIAN) \
+#if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \
|| defined (__a29k__) || defined (_AM29K) \
|| defined (__arm__) \
|| (defined (__convex__) && defined (_IEEE_FLOAT_)) \
|| defined (__i370__) || defined (__mvs__) \
- || defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__)\
+ || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
|| defined(mc68020) \
|| defined (__m88000__) \
|| defined (MIPSEB) || defined (_MIPSEB) \
- || defined (__hppa) \
+ || defined (__hppa) || defined (__hppa__) \
|| defined (__pyr__) \
|| defined (__ibm032__) \
|| defined (_IBMR2) || defined (_ARCH_PPC) \
@@ -363,8 +425,49 @@ union ieee_double_extract
#endif
double __gmp_scale2 _PROTO ((double, int));
-int __gmp_extract_double _PROTO((mp_ptr, double));
+int __gmp_extract_double _PROTO ((mp_ptr, double));
+
+void __gmpn_mul_basecase _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+void __gmpn_sqr_basecase _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+void __gmpn_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+void __gmpn_sqr _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
extern int __gmp_junk;
-extern int __gmp_0;
+extern const int __gmp_0;
#define DIVIDE_BY_ZERO (__gmp_junk = 10/__gmp_0)
+
+#if defined _LONG_LONG_LIMB
+#if defined (__STDC__)
+#define CNST_LIMB(C) C##LL
+#else
+#define CNST_LIMB(C) C/**/LL
+#endif
+#else /* not _LONG_LONG_LIMB */
+#if defined (__STDC__)
+#define CNST_LIMB(C) C##L
+#else
+#define CNST_LIMB(C) C/**/L
+#endif
+#endif /* _LONG_LONG_LIMB */
+
+/*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
+#if BITS_PER_MP_LIMB == 32
+#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
+#define PP_INVERTED 0x53E5645CL
+#define PP_MAXPRIME 29
+#define PP_MASK 0x208A28A8L
+#endif
+
+#if BITS_PER_MP_LIMB == 64
+#define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
+#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
+#define PP_MAXPRIME 53
+#define PP_MASK CNST_LIMB(0x208A20A08A28A8)
+#endif
+
+/* For testing and debugging. */
+#define MPZ_CHECK_FORMAT(z) \
+ do { if (SIZ(z) != 0 && PTR(z)[ABSIZ(z) - 1] == 0) abort (); \
+ if (ALLOC(z) < ABSIZ(z)) abort (); } while (0)
+#define MPZ_PROVOKE_REALLOC(z) \
+ do { ALLOC(z) = ABSIZ(z); } while (0)