diff options
Diffstat (limited to 'ghc/includes/PrimOps.h')
-rw-r--r-- | ghc/includes/PrimOps.h | 284 |
1 files changed, 171 insertions, 113 deletions
diff --git a/ghc/includes/PrimOps.h b/ghc/includes/PrimOps.h index b121e4ac0b..53072c3c95 100644 --- a/ghc/includes/PrimOps.h +++ b/ghc/includes/PrimOps.h @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------------- - * $Id: PrimOps.h,v 1.18 1999/02/11 17:15:20 simonm Exp $ + * $Id: PrimOps.h,v 1.19 1999/02/17 15:57:30 simonm Exp $ * * (c) The GHC Team, 1998-1999 * @@ -82,15 +82,64 @@ I_ stg_div (I_ a, I_ b); #define remIntzh(r,a,b) r=(a)%(b) #define negateIntzh(r,a) r=-(a) -/* The following operations are the standard add,subtract and multiply - * except that they return a carry if the operation overflows. +/* ----------------------------------------------------------------------------- + * Int operations with carry. + * -------------------------------------------------------------------------- */ + +/* With some bit-twiddling, we can define int{Add,Sub}Czh portably in + * C, and without needing any comparisons. This may not be the + * fastest way to do it - if you have better code, please send it! --SDM + * + * Return : r = a + b, c = 0 if no overflow, 1 on overflow. + * + * We currently don't make use of the r value if c is != 0 (i.e. + * overflow), we just convert to big integers and try again. This + * could be improved by making r and c the correct values for + * plugging into a new J#. + */ +#define addIntCzh(r,c,a,b) \ +{ r = a + b; \ + c = ((StgWord)(~(a^b) & (a^r))) \ + >> (BITS_PER_BYTE * sizeof(I_) - 1); \ +} + + +#define subIntCzh(r,c,a,b) \ +{ r = a - b; \ + c = ((StgWord)((a^b) & (a^r))) \ + >> (BITS_PER_BYTE * sizeof(I_) - 1); \ +} + +/* Multiply with overflow checking. + * + * This is slightly more tricky - the usual sign rules for add/subtract + * don't apply. * - * They are all defined in terms of 32-bit integers and use the GCC - * 'long long' extension to get a 64-bit result. We'd like to use - * 64-bit integers on 64-bit architectures, but it seems that gcc's - * 'long long' type is set at 64-bits even on a 64-bit machine. + * On x86 hardware we use a hand-crafted assembly fragment to do the job. + * + * On other 32-bit machines we use gcc's 'long long' types, finding + * overflow with some careful bit-twiddling. + * + * On 64-bit machines where gcc's 'long long' type is also 64-bits, + * we use a crude approximation, testing whether either operand is + * larger than 32-bits; if neither is, then we go ahead with the + * multiplication. */ +#if i386_TARGET_ARCH + +#define mulIntCzh(r,c,a,b) \ +{ \ + __asm__("xor %1,%1\n\t \ + imull %2,%3\n\t \ + jno 1f\n\t \ + movl $1,%1\n\t \ + 1:" \ + : "=r" (r), "=r" (c) : "r" (a), "0" (b)); \ +} + +#elif SIZEOF_VOID_P == 4 + #ifdef WORDS_BIGENDIAN #define C 0 #define R 1 @@ -104,27 +153,37 @@ typedef union { StgInt32 i[2]; } long_long_u ; -#define addWithCarryzh(r,c,a,b) \ -{ long_long_u z; \ - z.l = a + b; \ - r = z.i[R]; \ - c = z.i[C]; \ +#define mulIntCzh(r,c,a,b) \ + long_long_u z; \ + z.l = (StgInt64)a * (StgInt64)b; \ + r = z.i[R]; \ + c = z.i[C]; \ + if (c == 0 || c == -1) { \ + c = ((StgWord)((a^b) ^ r)) \ + >> (BITS_PER_BYTE * sizeof(I_) - 1); \ + } \ } +/* Careful: the carry calculation above is extremely delicate. Make sure + * you test it thoroughly after changing it. + */ +#else -#define subWithCarryzh(r,c,a,b) \ -{ long_long_u z; \ - z.l = a + b; \ - r = z.i[R]; \ - c = z.i[C]; \ -} +#define HALF_INT (1 << (BITS_PER_BYTE * sizeof(I_) / 2)) + +#define stg_abs(a) ((a) < 0 ? -(a) : (a)) -#define mulWithCarryzh(r,c,a,b) \ -{ long_long_u z; \ - z.l = a * b; \ - r = z.i[R]; \ - c = z.i[C]; \ +#define mulIntCzh(r,c,a,b) \ +{ \ + if (stg_abs(a) >= HALF_INT \ + stg_abs(b) >= HALF_INT) { \ + c = 1; \ + } else { \ + r = a * b; \ + c = 0; \ + } \ } +#endif /* ----------------------------------------------------------------------------- Word PrimOps. @@ -248,50 +307,48 @@ typedef union { * to allocate any memory. */ -#define integer2Intzh(r, aa,sa,da) \ -{ MP_INT arg; \ - \ - arg._mp_alloc = (aa); \ - arg._mp_size = (sa); \ - arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - \ - (r) = RET_PRIM_STGCALL1(I_,mpz_get_si,&arg); \ +#define integer2Intzh(r, sa,da) \ +{ MP_INT arg; \ + \ + arg._mp_size = (sa); \ + arg._mp_alloc = ((StgArrWords *)da)->words; \ + arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ + \ + (r) = RET_PRIM_STGCALL1(I_,mpz_get_si,&arg); \ } -#define integer2Wordzh(r, aa,sa,da) \ -{ MP_INT arg; \ - \ - arg._mp_alloc = (aa); \ - arg._mp_size = (sa); \ - arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - \ - (r) = RET_PRIM_STGCALL1(I_,mpz_get_ui,&arg); \ +#define integer2Wordzh(r, sa,da) \ +{ MP_INT arg; \ + \ + arg._mp_size = (sa); \ + arg._mp_alloc = ((StgArrWords *)da)->words; \ + arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ + \ + (r) = RET_PRIM_STGCALL1(I_,mpz_get_ui,&arg); \ } -#define cmpIntegerzh(r, a1,s1,d1, a2,s2,d2) \ -{ MP_INT arg1; \ - MP_INT arg2; \ - \ - arg1._mp_alloc= (a1); \ - arg1._mp_size = (s1); \ - arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \ - arg2._mp_alloc= (a2); \ - arg2._mp_size = (s2); \ - arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \ - \ - (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2); \ +#define cmpIntegerzh(r, s1,d1, s2,d2) \ +{ MP_INT arg1; \ + MP_INT arg2; \ + \ + arg1._mp_size = (s1); \ + arg1._mp_alloc= ((StgArrWords *)d1)->words; \ + arg1._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d1)); \ + arg2._mp_size = (s2); \ + arg2._mp_alloc= ((StgArrWords *)d2)->words; \ + arg2._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d2)); \ + \ + (r) = RET_PRIM_STGCALL2(I_,mpz_cmp,&arg1,&arg2); \ } -/* A glorious hack: calling mpz_neg would entail allocation and - * copying, but by looking at what mpz_neg actually does, we can - * derive a better version: - */ - -#define negateIntegerzh(ra, rs, rd, a, s, d) \ -{ \ - (ra) = (a); \ - (rs) = -(s); \ - (rd) = d; \ +#define cmpIntegerIntzh(r, s,d, i) \ +{ MP_INT arg; \ + \ + arg._mp_size = (s); \ + arg._mp_alloc = ((StgArrWords *)d)->words; \ + arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(d)); \ + \ + (r) = RET_PRIM_STGCALL2(I_,mpz_cmp_si,&arg,i); \ } /* The rest are all out-of-line: -------- */ @@ -309,11 +366,8 @@ EF_(int2Integerzh_fast); EF_(word2Integerzh_fast); EF_(addr2Integerzh_fast); -/* Floating-point encodings/decodings */ -EF_(encodeFloatzh_fast); +/* Floating-point decodings */ EF_(decodeFloatzh_fast); - -EF_(encodeDoublezh_fast); EF_(decodeDoublezh_fast); /* ----------------------------------------------------------------------------- @@ -322,37 +376,41 @@ EF_(decodeDoublezh_fast); #ifdef SUPPORT_LONG_LONGS -#define integerToWord64zh(r, aa,sa,da) \ -{ unsigned long int* d; \ - StgNat64 res; \ - \ - d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - if ( (aa) == 0 ) { \ - res = (LW_)0; \ - } else if ( (aa) == 1) { \ - res = (LW_)d[0]; \ - } else { \ - res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL; \ - } \ - (r) = res; \ +#define integerToWord64zh(r, sa,da) \ +{ unsigned long int* d; \ + I_ aa; \ + StgNat64 res; \ + \ + d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ + aa = ((StgArrWords *)da)->words; \ + if ( (aa) == 0 ) { \ + res = (LW_)0; \ + } else if ( (aa) == 1) { \ + res = (LW_)d[0]; \ + } else { \ + res = (LW_)d[0] + (LW_)d[1] * 0x100000000ULL; \ + } \ + (r) = res; \ } -#define integerToInt64zh(r, aa,sa,da) \ -{ unsigned long int* d; \ - StgInt64 res; \ - \ - d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - if ( (aa) == 0 ) { \ - res = (LI_)0; \ - } else if ( (aa) == 1) { \ - res = (LI_)d[0]; \ - } else { \ - res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL; \ - if ( sa < 0 ) { \ - res = (LI_)-res; \ - } \ - } \ - (r) = res; \ +#define integerToInt64zh(r, sa,da) \ +{ unsigned long int* d; \ + I_ aa; \ + StgInt64 res; \ + \ + d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ + aa = ((StgArrWords *)da)->words; \ + if ( (aa) == 0 ) { \ + res = (LI_)0; \ + } else if ( (aa) == 1) { \ + res = (LI_)d[0]; \ + } else { \ + res = (LI_)d[0] + (LI_)d[1] * 0x100000000LL; \ + if ( sa < 0 ) { \ + res = (LI_)-res; \ + } \ + } \ + (r) = res; \ } /* Conversions */ @@ -539,29 +597,29 @@ EF_(newArrayzh_fast); #include "ieee-flpt.h" #if FLOATS_AS_DOUBLES /* i.e. 64-bit machines */ -#define encodeFloatzh(r, aa,sa,da, expon) encodeDoublezh(r, aa,sa,da, expon) +#define encodeFloatzh(r, sa,da, expon) encodeDoublezh(r, sa,da, expon) #else -#define encodeFloatzh(r, aa,sa,da, expon) \ -{ MP_INT arg; \ - /* Does not allocate memory */ \ - \ - arg._mp_alloc = aa; \ - arg._mp_size = sa; \ - arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - \ - r = RET_PRIM_STGCALL2(StgFloat, __encodeFloat,&arg,(expon));\ +#define encodeFloatzh(r, sa,da, expon) \ +{ MP_INT arg; \ + /* Does not allocate memory */ \ + \ + arg._mp_size = sa; \ + arg._mp_alloc = ((StgArrWords *)da)->words; \ + arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ + \ + r = RET_PRIM_STGCALL2(StgFloat, __encodeFloat,&arg,(expon)); \ } #endif /* FLOATS_AS_DOUBLES */ -#define encodeDoublezh(r, aa,sa,da, expon) \ -{ MP_INT arg; \ - /* Does not allocate memory */ \ - \ - arg._mp_alloc = aa; \ - arg._mp_size = sa; \ - arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ - \ - r = RET_PRIM_STGCALL2(StgDouble, __encodeDouble,&arg,(expon));\ +#define encodeDoublezh(r, sa,da, expon) \ +{ MP_INT arg; \ + /* Does not allocate memory */ \ + \ + arg._mp_size = sa; \ + arg._mp_alloc = ((StgArrWords *)da)->words; \ + arg._mp_d = (unsigned long int *) (BYTE_ARR_CTS(da)); \ + \ + r = RET_PRIM_STGCALL2(StgDouble, __encodeDouble,&arg,(expon)); \ } /* The decode operations are out-of-line because they need to allocate |