summaryrefslogtreecommitdiff
path: root/ghc/includes/PrimOps.h
diff options
context:
space:
mode:
Diffstat (limited to 'ghc/includes/PrimOps.h')
-rw-r--r--ghc/includes/PrimOps.h284
1 files changed, 171 insertions, 113 deletions
diff --git a/ghc/includes/PrimOps.h b/ghc/includes/PrimOps.h
index b121e4ac0b..53072c3c95 100644
--- a/ghc/includes/PrimOps.h
+++ b/ghc/includes/PrimOps.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------------
- * $Id: PrimOps.h,v 1.18 1999/02/11 17:15:20 simonm Exp $
+ * $Id: PrimOps.h,v 1.19 1999/02/17 15:57:30 simonm Exp $
*
* (c) The GHC Team, 1998-1999
*
@@ -82,15 +82,64 @@ I_ stg_div (I_ a, I_ b);
#define remIntzh(r,a,b) r=(a)%(b)
#define negateIntzh(r,a) r=-(a)
-/* The following operations are the standard add,subtract and multiply
- * except that they return a carry if the operation overflows.
+/* -----------------------------------------------------------------------------
+ * Int operations with carry.
+ * -------------------------------------------------------------------------- */
+
+/* With some bit-twiddling, we can define int{Add,Sub}Czh portably in
+ * C, and without needing any comparisons. This may not be the
+ * fastest way to do it - if you have better code, please send it! --SDM
+ *
+ * Return : r = a + b, c = 0 if no overflow, 1 on overflow.
+ *
+ * We currently don't make use of the r value if c is != 0 (i.e.
+ * overflow), we just convert to big integers and try again. This
+ * could be improved by making r and c the correct values for
+ * plugging into a new J#.
+ */
+#define addIntCzh(r,c,a,b) \
+{ r = a + b; \
+ c = ((StgWord)(~(a^b) & (a^r))) \
+ >> (BITS_PER_BYTE * sizeof(I_) - 1); \
+}
+
+
+#define subIntCzh(r,c,a,b) \
+{ r = a - b; \
+ c = ((StgWord)((a^b) & (a^r))) \
+ >> (BITS_PER_BYTE * sizeof(I_) - 1); \
+}
+
+/* Multiply with overflow checking.
+ *
+ * This is slightly more tricky - the usual sign rules for add/subtract
+ * don't apply.
*
- * They are all defined in terms of 32-bit integers and use the GCC
- * 'long long' extension to get a 64-bit result. We'd like to use
- * 64-bit integers on 64-bit architectures, but it seems that gcc's
- * 'long long' type is set at 64-bits even on a 64-bit machine.
+ * On x86 hardware we use a hand-crafted assembly fragment to do the job.
+ *
+ * On other 32-bit machines we use gcc's 'long long' types, finding
+ * overflow with some careful bit-twiddling.
+ *
+ * On 64-bit machines where gcc's 'long long' type is also 64-bits,
+ * we use a crude approximation, testing whether either operand is
+ * larger than 32-bits; if neither is, then we go ahead with the
+ * multiplication.
*/
+#if i386_TARGET_ARCH
+
+#define mulIntCzh(r,c,a,b) \
+{ \
+ __asm__("xor %1,%1\n\t \
+ imull %2,%3\n\t \
+ jno 1f\n\t \
+ movl $1,%1\n\t \
+ 1:" \
+ : "=r" (r), "=r" (c) : "r" (a), "0" (b)); \
+}
+
+#elif SIZEOF_VOID_P == 4
+
#ifdef WORDS_BIGENDIAN
#define C 0
#define R 1
@@ -104,27 +153,37 @@ typedef union {
StgInt32 i[2];
} long_long_u ;
-#define addWithCarryzh(r,c,a,b) \
-{ long_long_u z; \
- z.l = a + b; \
- r = z.i[R]; \
- c = z.i[C]; \
+#define mulIntCzh(r,c,a,b) \
+ long_long_u z; \
+ z.l = (StgInt64)a * (StgInt64)b; \
+ r = z.i[R]; \
+ c = z.i[C]; \
+ if (c == 0 || c == -1) { \
+ c = ((StgWord)((a^b) ^ r)) \
+ >> (BITS_PER_BYTE * sizeof(I_) - 1); \
+ } \
}
+/* Careful: the carry calculation above is extremely delicate. Make sure
+ * you test it thoroughly after changing it.
+ */
+#else
-#define subWithCarryzh(r,c,a,b) \
-{ long_long_u z; \
- z.l = a + b; \
- r = z.i[R]; \
- c = z.i[C]; \
-}
+#define HALF_INT (1 << (BITS_PER_BYTE * sizeof(I_) / 2))
+
+#define stg_abs(a) ((a) < 0 ? -(a) : (a))
-#define mulWithCarryzh(r,c,a,b) \
-{ long_long_u z; \
- z.l = a * b; \
- r = z.i[R]; \
- c = z.i[C]; \
+#define mulIntCzh(r,c,a,b) \
+{ \
+ if (stg_abs(a) >= HALF_INT \
+ stg_abs(b) >= HALF_INT) { \
+ c = 1; \
+ } else { \
+ r = a * b; \
+ c = 0; \
+ } \
}
+#endif
/* -----------------------------------------------------------------------------
Word PrimOps.
@@ -248,50 +307,48 @@ typedef union {
* to allocate any memory.
*/
-#define integer2Intzh(r, aa,sa,da) \
-{ MP_INT arg; \
- \
- arg._mp_alloc = (aa); \
- arg._mp_size = (sa); \
- arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- \
- (r) = RET_PRIM_STGCALL1(I_,mpz_get_si,&arg); \
+#define integer2Intzh(r, sa,da) \
+{ MP_INT arg; \
+ \
+ arg._mp_size = (sa); \
+ arg._mp_alloc = ((StgArrWords *)da)->words; \
+ arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
+ \
+ (r) = RET_PRIM_STGCALL1(I_,mpz_get_si,&arg); \
}
-#define integer2Wordzh(r, aa,sa,da) \
-{ MP_INT arg; \
- \
- arg._mp_alloc = (aa); \
- arg._mp_size = (sa); \
- arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- \
- (r) = RET_PRIM_STGCALL1(I_,mpz_get_ui,&arg); \
+#define integer2Wordzh(r, sa,da) \
+{ MP_INT arg; \
+ \
+ arg._mp_size = (sa); \
+ arg._mp_alloc = ((StgArrWords *)da)->words; \
+ arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
+ \
+ (r) = RET_PRIM_STGCALL1(I_,mpz_get_ui,&arg); \
}
-#define cmpIntegerzh(r, a1,s1,d1, a2,s2,d2) \
-{ MP_INT arg1; \
- MP_INT arg2; \
- \
- arg1._mp_alloc= (a1); \
- arg1._mp_size = (s1); \
- arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \
- arg2._mp_alloc= (a2); \
- arg2._mp_size = (s2); \
- arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \
- \
- (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2); \
+#define cmpIntegerzh(r, s1,d1, s2,d2) \
+{ MP_INT arg1; \
+ MP_INT arg2; \
+ \
+ arg1._mp_size = (s1); \
+ arg1._mp_alloc= ((StgArrWords *)d1)->words; \
+ arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \
+ arg2._mp_size = (s2); \
+ arg2._mp_alloc= ((StgArrWords *)d2)->words; \
+ arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \
+ \
+ (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2); \
}
-/* A glorious hack: calling mpz_neg would entail allocation and
- * copying, but by looking at what mpz_neg actually does, we can
- * derive a better version:
- */
-
-#define negateIntegerzh(ra, rs, rd, a, s, d) \
-{ \
- (ra) = (a); \
- (rs) = -(s); \
- (rd) = d; \
+#define cmpIntegerIntzh(r, s,d, i) \
+{ MP_INT arg; \
+ \
+ arg._mp_size = (s); \
+ arg._mp_alloc = ((StgArrWords *)d)->words; \
+ arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d)); \
+ \
+ (r) = RET_PRIM_STGCALL2(I_,mpz_cmp_si,&arg,i); \
}
/* The rest are all out-of-line: -------- */
@@ -309,11 +366,8 @@ EF_(int2Integerzh_fast);
EF_(word2Integerzh_fast);
EF_(addr2Integerzh_fast);
-/* Floating-point encodings/decodings */
-EF_(encodeFloatzh_fast);
+/* Floating-point decodings */
EF_(decodeFloatzh_fast);
-
-EF_(encodeDoublezh_fast);
EF_(decodeDoublezh_fast);
/* -----------------------------------------------------------------------------
@@ -322,37 +376,41 @@ EF_(decodeDoublezh_fast);
#ifdef SUPPORT_LONG_LONGS
-#define integerToWord64zh(r, aa,sa,da) \
-{ unsigned long int* d; \
- StgNat64 res; \
- \
- d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- if ( (aa) == 0 ) { \
- res = (LW_)0; \
- } else if ( (aa) == 1) { \
- res = (LW_)d[0]; \
- } else { \
- res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL; \
- } \
- (r) = res; \
+#define integerToWord64zh(r, sa,da) \
+{ unsigned long int* d; \
+ I_ aa; \
+ StgNat64 res; \
+ \
+ d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
+ aa = ((StgArrWords *)da)->words; \
+ if ( (aa) == 0 ) { \
+ res = (LW_)0; \
+ } else if ( (aa) == 1) { \
+ res = (LW_)d[0]; \
+ } else { \
+ res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL; \
+ } \
+ (r) = res; \
}
-#define integerToInt64zh(r, aa,sa,da) \
-{ unsigned long int* d; \
- StgInt64 res; \
- \
- d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- if ( (aa) == 0 ) { \
- res = (LI_)0; \
- } else if ( (aa) == 1) { \
- res = (LI_)d[0]; \
- } else { \
- res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL; \
- if ( sa < 0 ) { \
- res = (LI_)-res; \
- } \
- } \
- (r) = res; \
+#define integerToInt64zh(r, sa,da) \
+{ unsigned long int* d; \
+ I_ aa; \
+ StgInt64 res; \
+ \
+ d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
+ aa = ((StgArrWords *)da)->words; \
+ if ( (aa) == 0 ) { \
+ res = (LI_)0; \
+ } else if ( (aa) == 1) { \
+ res = (LI_)d[0]; \
+ } else { \
+ res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL; \
+ if ( sa < 0 ) { \
+ res = (LI_)-res; \
+ } \
+ } \
+ (r) = res; \
}
/* Conversions */
@@ -539,29 +597,29 @@ EF_(newArrayzh_fast);
#include "ieee-flpt.h"
#if FLOATS_AS_DOUBLES /* i.e. 64-bit machines */
-#define encodeFloatzh(r, aa,sa,da, expon) encodeDoublezh(r, aa,sa,da, expon)
+#define encodeFloatzh(r, sa,da, expon) encodeDoublezh(r, sa,da, expon)
#else
-#define encodeFloatzh(r, aa,sa,da, expon) \
-{ MP_INT arg; \
- /* Does not allocate memory */ \
- \
- arg._mp_alloc = aa; \
- arg._mp_size = sa; \
- arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- \
- r = RET_PRIM_STGCALL2(StgFloat, __encodeFloat,&arg,(expon));\
+#define encodeFloatzh(r, sa,da, expon) \
+{ MP_INT arg; \
+ /* Does not allocate memory */ \
+ \
+ arg._mp_size = sa; \
+ arg._mp_alloc = ((StgArrWords *)da)->words; \
+ arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
+ \
+ r = RET_PRIM_STGCALL2(StgFloat, __encodeFloat,&arg,(expon)); \
}
#endif /* FLOATS_AS_DOUBLES */
-#define encodeDoublezh(r, aa,sa,da, expon) \
-{ MP_INT arg; \
- /* Does not allocate memory */ \
- \
- arg._mp_alloc = aa; \
- arg._mp_size = sa; \
- arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
- \
- r = RET_PRIM_STGCALL2(StgDouble, __encodeDouble,&arg,(expon));\
+#define encodeDoublezh(r, sa,da, expon) \
+{ MP_INT arg; \
+ /* Does not allocate memory */ \
+ \
+ arg._mp_size = sa; \
+ arg._mp_alloc = ((StgArrWords *)da)->words; \
+ arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \
+ \
+ r = RET_PRIM_STGCALL2(StgDouble, __encodeDouble,&arg,(expon)); \
}
/* The decode operations are out-of-line because they need to allocate