/* mpfr_sub1sp -- internal function to perform a "real" subtraction All the op must have the same precision Copyright 2003-2021 Free Software Foundation, Inc. Contributed by the AriC and Caramba projects, INRIA. This file is part of the GNU MPFR Library. The GNU MPFR Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MPFR Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" /* define MPFR_FULLSUB to use alternate code in mpfr_sub1sp2 and mpfr_sub1sp2n (see comments in mpfr_sub1sp2) */ /* #define MPFR_FULLSUB */ #if MPFR_WANT_ASSERT >= 2 /* Check the result of mpfr_sub1sp with mpfr_sub1. Note: mpfr_sub1sp itself has two algorithms: one always valid and one faster for small precisions (up to 3 limbs). The latter one is disabled if MPFR_GENERIC_ABI is defined. When MPFR_WANT_ASSERT >= 2, it could be interesting to compare the results of these different algorithms. For the time being, this is currently done by running the same code on the same data with and without MPFR_GENERIC_ABI defined, where we have the following comparisons in small precisions: mpfr_sub1sp slow <-> mpfr_sub1 when MPFR_GENERIC_ABI is defined; mpfr_sub1sp fast <-> mpfr_sub1 when MPFR_GENERIC_ABI is not defined. By transitivity, the absence of failures implies that the 3 results are the same. */ int mpfr_sub1sp_ref (mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_rnd_t); int mpfr_sub1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) { mpfr_t tmpa, tmpb, tmpc; mpfr_flags_t old_flags, flags, flags2; int inexb, inexc, inexact, inexact2; if (rnd_mode == MPFR_RNDF) return mpfr_sub1sp_ref (a, b, c, rnd_mode); old_flags = __gmpfr_flags; mpfr_init2 (tmpa, MPFR_PREC (a)); mpfr_init2 (tmpb, MPFR_PREC (b)); mpfr_init2 (tmpc, MPFR_PREC (c)); inexb = mpfr_set (tmpb, b, MPFR_RNDN); MPFR_ASSERTN (inexb == 0); inexc = mpfr_set (tmpc, c, MPFR_RNDN); MPFR_ASSERTN (inexc == 0); MPFR_ASSERTN (__gmpfr_flags == old_flags); inexact2 = mpfr_sub1 (tmpa, tmpb, tmpc, rnd_mode); flags2 = __gmpfr_flags; __gmpfr_flags = old_flags; inexact = mpfr_sub1sp_ref (a, b, c, rnd_mode); flags = __gmpfr_flags; /* Convert the ternary values to (-1,0,1). */ inexact2 = VSIGN (inexact2); inexact = VSIGN (inexact); if (! mpfr_equal_p (tmpa, a) || inexact != inexact2 || flags != flags2) { fprintf (stderr, "sub1 & sub1sp return different values for %s\n" "Prec_a = %lu, Prec_b = %lu, Prec_c = %lu\nB = ", mpfr_print_rnd_mode (rnd_mode), (unsigned long) MPFR_PREC (a), (unsigned long) MPFR_PREC (b), (unsigned long) MPFR_PREC (c)); mpfr_fdump (stderr, tmpb); fprintf (stderr, "C = "); mpfr_fdump (stderr, tmpc); fprintf (stderr, "sub1 : "); mpfr_fdump (stderr, tmpa); fprintf (stderr, "sub1sp: "); mpfr_fdump (stderr, a); fprintf (stderr, "sub1 : ternary = %2d, flags =", inexact2); flags_fout (stderr, flags2); fprintf (stderr, "sub1sp: ternary = %2d, flags =", inexact); flags_fout (stderr, flags); MPFR_ASSERTN (0); } mpfr_clears (tmpa, tmpb, tmpc, (mpfr_ptr) 0); return inexact; } # define mpfr_sub1sp mpfr_sub1sp_ref #endif /* MPFR_WANT_ASSERT >= 2 */ #if !defined(MPFR_GENERIC_ABI) /* the sub1sp1_extracted.c is not ready yet */ #if 0 && defined(MPFR_WANT_PROVEN_CODE) && GMP_NUMB_BITS == 64 && \ UINT_MAX == 0xffffffff && MPFR_PREC_BITS == 64 && \ _MPFR_PREC_FORMAT == 3 && _MPFR_EXP_FORMAT == _MPFR_PREC_FORMAT /* The code assumes that mp_limb_t has 64 bits exactly, unsigned int has 32 bits exactly, mpfr_prec_t and mpfr_exp_t are of type long, which has 64 bits exactly. */ #include "sub1sp1_extracted.c" #else /* special code for p < GMP_NUMB_BITS */ static int mpfr_sub1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode, mpfr_prec_t p) { mpfr_exp_t bx = MPFR_GET_EXP (b); mpfr_exp_t cx = MPFR_GET_EXP (c); mp_limb_t *ap = MPFR_MANT(a); mp_limb_t *bp = MPFR_MANT(b); mp_limb_t *cp = MPFR_MANT(c); mpfr_prec_t cnt, INITIALIZED(sh); mp_limb_t rb; /* round bit */ mp_limb_t sb; /* sticky bit */ mp_limb_t a0; mp_limb_t mask; mpfr_uexp_t d; MPFR_ASSERTD(p < GMP_NUMB_BITS); if (bx == cx) { if (MPFR_UNLIKELY(bp[0] == cp[0])) /* result is zero */ { if (rnd_mode == MPFR_RNDD) MPFR_SET_NEG(a); else MPFR_SET_POS(a); MPFR_SET_ZERO(a); MPFR_RET (0); } else if (cp[0] > bp[0]) /* borrow: |c| > |b| */ { a0 = cp[0] - bp[0]; MPFR_SET_OPPOSITE_SIGN (a, b); } else /* bp[0] > cp[0] */ { a0 = bp[0] - cp[0]; MPFR_SET_SAME_SIGN (a, b); } /* now a0 != 0 */ MPFR_ASSERTD(a0 != 0); count_leading_zeros (cnt, a0); ap[0] = a0 << cnt; bx -= cnt; rb = sb = 0; /* Note: sh is not initialized, but will not be used in this case. */ } else { if (bx < cx) /* swap b and c */ { mpfr_exp_t tx; mp_limb_t *tp; tx = bx; bx = cx; cx = tx; tp = bp; bp = cp; cp = tp; MPFR_SET_OPPOSITE_SIGN (a, b); } else { MPFR_SET_SAME_SIGN (a, b); } MPFR_ASSERTD (bx > cx); d = (mpfr_uexp_t) bx - cx; sh = GMP_NUMB_BITS - p; mask = MPFR_LIMB_MASK(sh); if (d < GMP_NUMB_BITS) { sb = - (cp[0] << (GMP_NUMB_BITS - d)); /* neglected part of -c */ /* Note that "a0 = bp[0] - (cp[0] >> d) - (sb != 0);" is faster on some other machines and has no immediate dependencies for the first subtraction. In the future, make sure that the code is recognized as a *single* subtraction with borrow and/or use a builtin when available (currently provided by Clang, but not by GCC); create a new macro for that. See the TODO later. Note also that with Clang, the constant 0 for the first subtraction instead of a variable breaks the optimization: https://llvm.org/bugs/show_bug.cgi?id=31754 */ a0 = bp[0] - (sb != 0) - (cp[0] >> d); /* a0 cannot be zero here since: a) if d >= 2, then a0 >= 2^(w-1) - (2^(w-2)-1) with w = GMP_NUMB_BITS, thus a0 - 1 >= 2^(w-2), b) if d = 1, then since p < GMP_NUMB_BITS we have sb=0. */ MPFR_ASSERTD(a0 > 0); count_leading_zeros (cnt, a0); if (cnt) a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt)); sb <<= cnt; bx -= cnt; /* sh > 0 since p < GMP_NUMB_BITS */ MPFR_ASSERTD(sh > 0); rb = a0 & (MPFR_LIMB_ONE << (sh - 1)); sb |= (a0 & mask) ^ rb; ap[0] = a0 & ~mask; } else /* d >= GMP_NUMB_BITS */ { if (bp[0] > MPFR_LIMB_HIGHBIT) { /* We compute b - ulp(b), and the remainder ulp(b) - c satisfies: 1/2 ulp(b) < ulp(b) - c < ulp(b), thus rb = sb = 1. */ ap[0] = bp[0] - (MPFR_LIMB_ONE << sh); rb = 1; } else { /* Warning: since we have an exponent decrease, when p = GMP_NUMB_BITS - 1 and d = GMP_NUMB_BITS, the round bit corresponds to the upper bit of -c. In that case rb = 0 and sb = 1, except when c0 = MPFR_LIMB_HIGHBIT where rb = 1 and sb = 0. */ rb = sh > 1 || d > GMP_NUMB_BITS || cp[0] == MPFR_LIMB_HIGHBIT; /* sb=1 below is incorrect when p = GMP_NUMB_BITS - 1, d = GMP_NUMB_BITS and c0 = MPFR_LIMB_HIGHBIT, but in that case the even rule wound round up too. */ ap[0] = ~mask; bx --; /* Warning: if d = GMP_NUMB_BITS and c0 = 1000...000, then b0 - c0 = |0111...111|1000...000|, which after the shift becomes |111...111|000...000| thus if p = GMP_NUMB_BITS-1 we have rb = 1 but sb = 0. However in this case the round even rule will round up, which is what we get with sb = 1: the final result will be correct, while sb is incorrect. */ } sb = 1; } } /* now perform rounding */ /* Warning: MPFR considers underflow *after* rounding with an unbounded exponent range. However since b and c have same precision p, they are multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the subtraction (with an unbounded exponent range) is exact, so that bx is also the exponent after rounding with an unbounded exponent range. */ if (MPFR_UNLIKELY(bx < __gmpfr_emin)) { /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2) we have to change to RNDZ. This corresponds to: (a) either bx < emin - 1 (b) or bx = emin - 1 and ap[0] = 1000....000 (in this case necessarily rb = sb = 0 since b-c is multiple of 2^(emin-p) */ if (rnd_mode == MPFR_RNDN && (bx < __gmpfr_emin - 1 || ap[0] == MPFR_LIMB_HIGHBIT)) { MPFR_ASSERTD(rb == 0 && sb == 0); rnd_mode = MPFR_RNDZ; } return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a)); } MPFR_SET_EXP (a, bx); if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) MPFR_RET (0); else if (rnd_mode == MPFR_RNDN) { if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0)) goto truncate; else goto add_one_ulp; } else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a))) { truncate: MPFR_RET(-MPFR_SIGN(a)); } else /* round away from zero */ { add_one_ulp: ap[0] += MPFR_LIMB_ONE << sh; if (MPFR_UNLIKELY(ap[0] == 0)) { ap[0] = MPFR_LIMB_HIGHBIT; /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus bx+1 is at most equal to the original exponent of b. */ MPFR_ASSERTD(bx + 1 <= __gmpfr_emax); MPFR_SET_EXP (a, bx + 1); } MPFR_RET(MPFR_SIGN(a)); } } #endif /* MPFR_WANT_PROVEN_CODE */ /* special code for p = GMP_NUMB_BITS */ static int mpfr_sub1sp1n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) { mpfr_exp_t bx = MPFR_GET_EXP (b); mpfr_exp_t cx = MPFR_GET_EXP (c); mp_limb_t *ap = MPFR_MANT(a); mp_limb_t *bp = MPFR_MANT(b); mp_limb_t *cp = MPFR_MANT(c); mpfr_prec_t cnt; mp_limb_t rb; /* round bit */ mp_limb_t sb; /* sticky bit */ mp_limb_t a0; mpfr_uexp_t d; MPFR_ASSERTD(MPFR_PREC(a) == GMP_NUMB_BITS); MPFR_ASSERTD(MPFR_PREC(b) == GMP_NUMB_BITS); MPFR_ASSERTD(MPFR_PREC(c) == GMP_NUMB_BITS); if (bx == cx) { a0 = bp[0] - cp[0]; if (a0 == 0) /* result is zero */ { if (rnd_mode == MPFR_RNDD) MPFR_SET_NEG(a); else MPFR_SET_POS(a); MPFR_SET_ZERO(a); MPFR_RET (0); } else if (a0 > bp[0]) /* borrow: |c| > |b| */ { MPFR_SET_OPPOSITE_SIGN (a, b); a0 = -a0; } else /* bp[0] > cp[0] */ MPFR_SET_SAME_SIGN (a, b); /* now a0 != 0 */ MPFR_ASSERTD(a0 != 0); count_leading_zeros (cnt, a0); ap[0] = a0 << cnt; bx -= cnt; rb = sb = 0; } else { if (bx < cx) /* swap b and c */ { mpfr_exp_t tx; mp_limb_t *tp; tx = bx; bx = cx; cx = tx; tp = bp; bp = cp; cp = tp; MPFR_SET_OPPOSITE_SIGN (a, b); } else { MPFR_SET_SAME_SIGN (a, b); } MPFR_ASSERTD (bx > cx); d = (mpfr_uexp_t) bx - cx; if (d < GMP_NUMB_BITS) { sb = - (cp[0] << (GMP_NUMB_BITS - d)); /* neglected part of -c */ a0 = bp[0] - (sb != 0) - (cp[0] >> d); /* a0 can only be zero when d=1, b0 = B/2, and c0 = B-1, where B = 2^GMP_NUMB_BITS, thus b0 - c0/2 = 1/2 */ if (a0 == MPFR_LIMB_ZERO) { bx -= GMP_NUMB_BITS; ap[0] = MPFR_LIMB_HIGHBIT; rb = sb = 0; } else { count_leading_zeros (cnt, a0); if (cnt) a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt)); sb <<= cnt; bx -= cnt; rb = sb & MPFR_LIMB_HIGHBIT; sb &= ~MPFR_LIMB_HIGHBIT; ap[0] = a0; } } else /* d >= GMP_NUMB_BITS */ { /* We compute b - ulp(b) */ if (bp[0] > MPFR_LIMB_HIGHBIT) { /* If d = GMP_NUMB_BITS, rb = 0 and sb = 1, unless c0 = MPFR_LIMB_HIGHBIT in which case rb = 1 and sb = 0. If d > GMP_NUMB_BITS, rb = sb = 1. */ rb = d > GMP_NUMB_BITS || cp[0] == MPFR_LIMB_HIGHBIT; sb = d > GMP_NUMB_BITS || cp[0] != MPFR_LIMB_HIGHBIT; ap[0] = bp[0] - MPFR_LIMB_ONE; } else { /* Warning: in this case a0 is shifted by one! If d=GMP_NUMB_BITS: (a) if c0 = MPFR_LIMB_HIGHBIT, a0 = 111...111, rb = sb = 0 (b) otherwise, a0 = 111...110, rb = -c0 >= 01000...000, sb = (-c0) << 2 If d=GMP_NUMB_BITS+1: a0 = 111...111 (c) if c0 = MPFR_LIMB_HIGHBIT, rb = 1 and sb = 0 (d) otherwise rb = 0 and sb = 1 If d > GMP_NUMB_BITS+1: (e) a0 = 111...111, rb = sb = 1 */ bx --; if (d == GMP_NUMB_BITS && cp[0] > MPFR_LIMB_HIGHBIT) { /* case (b) */ rb = MPFR_LIMB(-cp[0]) >= (MPFR_LIMB_HIGHBIT >> 1); sb = MPFR_LIMB(-cp[0]) << 2; ap[0] = -(MPFR_LIMB_ONE << 1); } else /* cases (a), (c), (d) and (e) */ { /* rb=1 in case (e) and case (c) */ rb = d > GMP_NUMB_BITS + 1 || (d == GMP_NUMB_BITS + 1 && cp[0] == MPFR_LIMB_HIGHBIT); /* sb = 1 in case (d) and (e) */ sb = d > GMP_NUMB_BITS + 1 || (d == GMP_NUMB_BITS + 1 && cp[0] > MPFR_LIMB_HIGHBIT); /* Warning: only set ap[0] last, otherwise in case ap=cp, the above comparisons involving cp[0] would be wrong */ ap[0] = -MPFR_LIMB_ONE; } } } } /* now perform rounding */ /* Warning: MPFR considers underflow *after* rounding with an unbounded exponent range. However since b and c have same precision p, they are multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the subtraction (with an unbounded exponent range) is exact, so that bx is also the exponent after rounding with an unbounded exponent range. */ if (MPFR_UNLIKELY(bx < __gmpfr_emin)) { /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2) we have to change to RNDZ. This corresponds to: (a) either bx < emin - 1 (b) or bx = emin - 1 and ap[0] = 1000....000 (in this case necessarily rb = sb = 0 since b-c is multiple of 2^(emin-p) */ if (rnd_mode == MPFR_RNDN && (bx < __gmpfr_emin - 1 || ap[0] == MPFR_LIMB_HIGHBIT)) { MPFR_ASSERTD(rb == 0 && sb == 0); rnd_mode = MPFR_RNDZ; } return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a)); } MPFR_SET_EXP (a, bx); if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) MPFR_RET (0); else if (rnd_mode == MPFR_RNDN) { if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0)) goto truncate; else goto add_one_ulp; } else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a))) { truncate: MPFR_RET(-MPFR_SIGN(a)); } else /* round away from zero */ { add_one_ulp: ap[0] += MPFR_LIMB_ONE; if (MPFR_UNLIKELY(ap[0] == 0)) { ap[0] = MPFR_LIMB_HIGHBIT; /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus bx+1 is at most equal to the original exponent of b. */ MPFR_ASSERTD(bx + 1 <= __gmpfr_emax); MPFR_SET_EXP (a, bx + 1); } MPFR_RET(MPFR_SIGN(a)); } } /* special code for GMP_NUMB_BITS < p < 2*GMP_NUMB_BITS */ static int mpfr_sub1sp2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode, mpfr_prec_t p) { mpfr_exp_t bx = MPFR_GET_EXP (b); mpfr_exp_t cx = MPFR_GET_EXP (c); mp_limb_t *ap = MPFR_MANT(a); mp_limb_t *bp = MPFR_MANT(b); mp_limb_t *cp = MPFR_MANT(c); mpfr_prec_t cnt, INITIALIZED(sh); mp_limb_t rb; /* round bit */ mp_limb_t sb; /* sticky bit */ mp_limb_t mask, a0, a1; mpfr_uexp_t d; MPFR_ASSERTD(GMP_NUMB_BITS < p && p < 2 * GMP_NUMB_BITS); if (bx == cx) /* subtraction is exact in this case */ { /* first compute a0: if the compiler is smart enough, it will use the generated borrow to get for free the term (bp[0] < cp[0]) */ a0 = bp[0] - cp[0]; a1 = bp[1] - cp[1] - (bp[0] < cp[0]); if (a1 == 0 && a0 == 0) /* result is zero */ { if (rnd_mode == MPFR_RNDD) MPFR_SET_NEG(a); else MPFR_SET_POS(a); MPFR_SET_ZERO(a); MPFR_RET (0); } else if (a1 >= bp[1]) /* borrow: |c| > |b| */ { MPFR_SET_OPPOSITE_SIGN (a, b); /* a = b-c mod 2^(2*GMP_NUMB_BITS) */ a0 = -a0; a1 = -a1 - (a0 != 0); } else /* bp[0] > cp[0] */ MPFR_SET_SAME_SIGN (a, b); if (a1 == 0) { a1 = a0; a0 = 0; bx -= GMP_NUMB_BITS; } /* now a1 != 0 */ MPFR_ASSERTD(a1 != 0); count_leading_zeros (cnt, a1); if (cnt) { ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt)); ap[0] = a0 << cnt; bx -= cnt; } else { ap[1] = a1; ap[0] = a0; } rb = sb = 0; /* Note: sh is not initialized, but will not be used in this case. */ } else { mp_limb_t t; if (bx < cx) /* swap b and c */ { mpfr_exp_t tx; mp_limb_t *tp; tx = bx; bx = cx; cx = tx; tp = bp; bp = cp; cp = tp; MPFR_SET_OPPOSITE_SIGN (a, b); } else { MPFR_SET_SAME_SIGN (a, b); } MPFR_ASSERTD (bx > cx); d = (mpfr_uexp_t) bx - cx; sh = 2 * GMP_NUMB_BITS - p; mask = MPFR_LIMB_MASK(sh); if (d < GMP_NUMB_BITS) { t = (cp[1] << (GMP_NUMB_BITS - d)) | (cp[0] >> d); /* TODO: Change the code to generate a full subtraction with borrow, avoiding the test on sb and the corresponding correction. Note that Clang has builtins: https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins but the generated code may not be good: https://llvm.org/bugs/show_bug.cgi?id=20748 With the current source code, Clang generates on x86_64: 1. sub %rsi,%rbx for the first subtraction in a1; 2. sub %rdi,%rax for the subtraction in a0; 3. sbb $0x0,%rbx for the second subtraction in a1, i.e. just subtracting the borrow out from (2). So, Clang recognizes the borrow, but doesn't merge (1) and (3). Bug: https://llvm.org/bugs/show_bug.cgi?id=25858 For GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79173 */ #ifndef MPFR_FULLSUB a0 = bp[0] - t; a1 = bp[1] - (cp[1] >> d) - (bp[0] < t); sb = cp[0] << (GMP_NUMB_BITS - d); /* neglected part of c */ if (sb) { a1 -= (a0 == 0); a0 --; /* a = a1,a0 cannot become zero here, since: a) if d >= 2, then a1 >= 2^(w-1) - (2^(w-2)-1) with w = GMP_NUMB_BITS, thus a1 - 1 >= 2^(w-2), b) if d = 1, then since p < 2*GMP_NUMB_BITS we have sb=0. */ MPFR_ASSERTD(a1 > 0 || a0 > 0); sb = -sb; /* 2^GMP_NUMB_BITS - sb */ } #else sb = - (cp[0] << (GMP_NUMB_BITS - d)); a0 = bp[0] - t - (sb != 0); a1 = bp[1] - (cp[1] >> d) - (bp[0] < t || (bp[0] == t && sb != 0)); #endif if (a1 == 0) { /* this implies d=1, which in turn implies sb=0 */ MPFR_ASSERTD(sb == 0); a1 = a0; a0 = 0; /* should be a0 = sb */ /* since sb=0 already, no need to set it to 0 */ bx -= GMP_NUMB_BITS; } /* now a1 != 0 */ MPFR_ASSERTD(a1 != 0); count_leading_zeros (cnt, a1); if (cnt) { ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt)); a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt)); sb <<= cnt; bx -= cnt; } else ap[1] = a1; /* sh > 0 since p < 2*GMP_NUMB_BITS */ MPFR_ASSERTD(sh > 0); rb = a0 & (MPFR_LIMB_ONE << (sh - 1)); sb |= (a0 & mask) ^ rb; ap[0] = a0 & ~mask; } else if (d < 2 * GMP_NUMB_BITS) { /* GMP_NUMB_BITS <= d < 2*GMP_NUMB_BITS */ /* warning: the most significant bit of sb might become the least significant bit of a0 below */ sb = (d == GMP_NUMB_BITS) ? cp[0] : (cp[1] << (2*GMP_NUMB_BITS - d)) | (cp[0] != 0); t = (cp[1] >> (d - GMP_NUMB_BITS)) + (sb != 0); /* warning: t might overflow to 0 if d=GMP_NUMB_BITS and sb <> 0 */ a0 = bp[0] - t; a1 = bp[1] - (bp[0] < t) - (t == 0 && sb != 0); sb = -sb; /* since bp[1] has its most significant bit set, we can have an exponent decrease of at most one */ if (a1 < MPFR_LIMB_HIGHBIT) { ap[1] = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1)); a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1)); sb <<= 1; bx --; } else ap[1] = a1; rb = a0 & (MPFR_LIMB_ONE << (sh - 1)); sb |= (a0 & mask) ^ rb; ap[0] = a0 & ~mask; } else /* d >= 2*GMP_NUMB_BITS */ { /* We compute b - ulp(b), and the remainder ulp(b) - c satisfies: 1/2 ulp(b) < ulp(b) - c < ulp(b), thus rb = sb = 1, unless we had an exponent decrease. */ t = MPFR_LIMB_ONE << sh; a0 = bp[0] - t; a1 = bp[1] - (bp[0] < t); if (a1 < MPFR_LIMB_HIGHBIT) { /* necessarily we had b = 1000...000 */ /* Warning: since we have an exponent decrease, when p = 2*GMP_NUMB_BITS - 1 and d = 2*GMP_NUMB_BITS, the round bit corresponds to the upper bit of -c. In that case rb = 0 and sb = 1, except when c = 1000...000 where rb = 1 and sb = 0. */ rb = sh > 1 || d > 2 * GMP_NUMB_BITS || (cp[1] == MPFR_LIMB_HIGHBIT && cp[0] == MPFR_LIMB_ZERO); /* sb=1 below is incorrect when p = 2*GMP_NUMB_BITS - 1, d = 2*GMP_NUMB_BITS and c = 1000...000, but in that case the even rule wound round up too. */ ap[0] = ~mask; ap[1] = MPFR_LIMB_MAX; bx --; } else { ap[0] = a0; ap[1] = a1; rb = 1; } sb = 1; } } /* now perform rounding */ /* Warning: MPFR considers underflow *after* rounding with an unbounded exponent range. However since b and c have same precision p, they are multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the subtraction (with an unbounded exponent range) is exact, so that bx is also the exponent after rounding with an unbounded exponent range. */ if (MPFR_UNLIKELY(bx < __gmpfr_emin)) { /* for RNDN, mpfr_underflow always rounds away, thus for |a|<=2^(emin-2) we have to change to RNDZ */ if (rnd_mode == MPFR_RNDN && (bx < __gmpfr_emin - 1 || (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0))) rnd_mode = MPFR_RNDZ; return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a)); } MPFR_SET_EXP (a, bx); if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) MPFR_RET (0); else if (rnd_mode == MPFR_RNDN) { if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0)) goto truncate; else goto add_one_ulp; } else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a))) { truncate: MPFR_RET(-MPFR_SIGN(a)); } else /* round away from zero */ { add_one_ulp: ap[0] += MPFR_LIMB_ONE << sh; ap[1] += (ap[0] == 0); if (MPFR_UNLIKELY(ap[1] == 0)) { ap[1] = MPFR_LIMB_HIGHBIT; /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus bx+1 is at most equal to the original exponent of b. */ MPFR_ASSERTD(bx + 1 <= __gmpfr_emax); MPFR_SET_EXP (a, bx + 1); } MPFR_RET(MPFR_SIGN(a)); } } /* special code for p = 2*GMP_NUMB_BITS */ static int mpfr_sub1sp2n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) { mpfr_exp_t bx = MPFR_GET_EXP (b); mpfr_exp_t cx = MPFR_GET_EXP (c); mp_limb_t *ap = MPFR_MANT(a); mp_limb_t *bp = MPFR_MANT(b); mp_limb_t *cp = MPFR_MANT(c); mpfr_prec_t cnt; mp_limb_t rb; /* round bit */ mp_limb_t sb; /* sticky bit */ mp_limb_t a0, a1; mpfr_uexp_t d; /* this function is inspired by mpfr_sub1sp2 (for the operations of the 2-limb arrays) and by mpfr_sub1sp1n (for the different cases to handle) */ if (bx == cx) /* subtraction is exact in this case */ { a0 = bp[0] - cp[0]; a1 = bp[1] - cp[1] - (bp[0] < cp[0]); if (a1 == 0 && a0 == 0) /* result is zero */ { if (rnd_mode == MPFR_RNDD) MPFR_SET_NEG(a); else MPFR_SET_POS(a); MPFR_SET_ZERO(a); MPFR_RET (0); } /* since B/2 <= bp[1], cp[1] < B with B=2^GMP_NUMB_BITS, if no borrow we have 0 <= bp[1] - cp[1] - x < B/2 where x = (bp[0] < cp[0]) is 0 or 1, thus a1 < B/2 <= bp[1] */ else if (a1 >= bp[1]) /* borrow: |c| > |b| */ { MPFR_SET_OPPOSITE_SIGN (a, b); /* negate [a1,a0] */ a0 = -a0; a1 = -a1 - (a0 != 0); } else /* no borrow */ MPFR_SET_SAME_SIGN (a, b); /* now [a1,a0] is the absolute value of b - c, maybe not normalized */ if (a1 == 0) { a1 = a0; a0 = 0; bx -= GMP_NUMB_BITS; } /* now a1 != 0 */ MPFR_ASSERTD(a1 != 0); count_leading_zeros (cnt, a1); if (cnt) { /* shift [a1,a0] left by cnt bit and store in result */ ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt)); ap[0] = a0 << cnt; bx -= cnt; } else { ap[1] = a1; ap[0] = a0; } rb = sb = 0; /* the subtraction is exact */ } else { mp_limb_t t; if (bx < cx) /* swap b and c */ { mpfr_exp_t tx; mp_limb_t *tp; tx = bx; bx = cx; cx = tx; tp = bp; bp = cp; cp = tp; MPFR_SET_OPPOSITE_SIGN (a, b); } else { MPFR_SET_SAME_SIGN (a, b); } MPFR_ASSERTD (bx > cx); d = (mpfr_uexp_t) bx - cx; if (d < GMP_NUMB_BITS) { t = (cp[1] << (GMP_NUMB_BITS - d)) | (cp[0] >> d); /* t is the part that should be subtracted to bp[0]: | a1 | a0 | | bp[1] | bp[0] | | cp[1]>>d | t | sb | */ #ifndef MPFR_FULLSUB a0 = bp[0] - t; a1 = bp[1] - (cp[1] >> d) - (bp[0] < t); sb = cp[0] << (GMP_NUMB_BITS - d); /* neglected part of c */ /* now negate sb and subtract borrow to a0 if sb <> 0 */ if (sb) { a1 -= (a0 == 0); a0 --; /* a = a1,a0 can only be zero when d=1, b = 0.1000...000*2^bx, and c = 0.111...111*2^(bx-1). In that case (where we have sb = MPFR_LIMB_HIGHBIT below), the subtraction is exact, the result is b/2^(2*GMP_NUMB_BITS). This case is dealt below. */ sb = -sb; } #else sb = - (cp[0] << (GMP_NUMB_BITS - d)); a0 = bp[0] - t - (sb != 0); a1 = bp[1] - (cp[1] >> d) - (bp[0] < t || (bp[0] == t && sb != 0)); #endif /* now the result is formed of [a1,a0,sb], which might not be normalized */ if (a1 == MPFR_LIMB_ZERO) { /* this implies d=1 */ MPFR_ASSERTD(d == 1); a1 = a0; a0 = sb; sb = MPFR_LIMB_ZERO; bx -= GMP_NUMB_BITS; } if (a1 == MPFR_LIMB_ZERO) /* case a = a1,a0 = 0 mentioned above */ { MPFR_ASSERTD(a0 == MPFR_LIMB_HIGHBIT); /* was sb above */ a1 = a0; a0 = sb; bx -= GMP_NUMB_BITS; sb = MPFR_LIMB_ZERO; } else { count_leading_zeros (cnt, a1); if (cnt) { /* shift [a1,a0,sb] left by cnt bits and adjust exponent */ a1 = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt)); a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt)); sb <<= cnt; bx -= cnt; } } rb = sb & MPFR_LIMB_HIGHBIT; sb = sb & ~MPFR_LIMB_HIGHBIT; ap[1] = a1; ap[0] = a0; } else if (d < 2 * GMP_NUMB_BITS) { /* GMP_NUMB_BITS <= d < 2*GMP_NUMB_BITS: compute t, the part to be subtracted to bp[0], and sb, the neglected part of c: | a1 | a0 | | bp[1] | bp[0] | | t | sb | */ /* warning: we should not ignore the low bits from cp[0] in case d > GMP_NUMB_BITS */ sb = (d == GMP_NUMB_BITS) ? cp[0] : (cp[1] << (2*GMP_NUMB_BITS - d)) | (cp[0] >> (d - GMP_NUMB_BITS)) | ((cp[0] << (2*GMP_NUMB_BITS - d)) != 0); t = (cp[1] >> (d - GMP_NUMB_BITS)) + (sb != 0); /* Warning: t might overflow to 0 if d=GMP_NUMB_BITS, sb <> 0, and cp[1] = 111...111 */ a0 = bp[0] - t; a1 = bp[1] - (bp[0] < t) - (t == 0 && sb != 0); sb = -sb; /* now the result is [a1,a0,sb]. Since bp[1] has its most significant bit set, we can have an exponent decrease of at most one */ if (a1 < MPFR_LIMB_HIGHBIT) { /* shift [a1,a0] left by 1 bit */ a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1)); MPFR_ASSERTD(a1 >= MPFR_LIMB_HIGHBIT); a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1)); sb <<= 1; bx --; } ap[1] = a1; ap[0] = a0; rb = sb & MPFR_LIMB_HIGHBIT; sb = sb & ~MPFR_LIMB_HIGHBIT; } else { /* d >= 2*GMP_NUMB_BITS: | a1 | a0 | | bp[1] | bp[0] | | cp[1] | cp[0] | */ /* we mimic the case d >= GMP_NUMB_BITS of mpfr_sub1sp1n */ int tst = cp[1] == MPFR_LIMB_HIGHBIT && cp[0] == MPFR_LIMB_ZERO; /* if d = 2 * GMP_NUMB_BITS and tst=1, c = 1/2*ulp(b) */ if (bp[1] > MPFR_LIMB_HIGHBIT || bp[0] > MPFR_LIMB_ZERO) { /* no borrow in b - ulp(b) */ rb = d > 2 * GMP_NUMB_BITS || tst; sb = d > 2 * GMP_NUMB_BITS || !tst; ap[1] = bp[1] - (bp[0] == MPFR_LIMB_ZERO); ap[0] = bp[0] - MPFR_LIMB_ONE; } else { /* b = 1000...000, thus subtracting c yields an exponent shift */ bx --; if (d == 2 * GMP_NUMB_BITS && !tst) /* c > 1/2*ulp(b) */ { t = -cp[1] - (cp[0] > MPFR_LIMB_ZERO); /* the rounding bit is the 2nd most significant bit of t (where the most significant bit of t is necessarily 0), and the sticky bit is formed by the remaining bits of t, and those from -cp[0] */ rb = t >= (MPFR_LIMB_HIGHBIT >> 1); sb = (t << 2) | cp[0]; ap[1] = MPFR_LIMB_MAX; ap[0] = -(MPFR_LIMB_ONE << 1); } else /* c <= 1/2*ulp(b) */ { rb = d > 2 * GMP_NUMB_BITS + 1 || (d == 2 * GMP_NUMB_BITS + 1 && tst); sb = d > 2 * GMP_NUMB_BITS + 1 || (d == 2 * GMP_NUMB_BITS + 1 && !tst); ap[1] = -MPFR_LIMB_ONE; ap[0] = -MPFR_LIMB_ONE; } } } } /* now perform rounding */ /* Warning: MPFR considers underflow *after* rounding with an unbounded exponent range. However since b and c have same precision p, they are multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the subtraction (with an unbounded exponent range) is exact, so that bx is also the exponent after rounding with an unbounded exponent range. */ if (MPFR_UNLIKELY(bx < __gmpfr_emin)) { /* for RNDN, mpfr_underflow always rounds away, thus for |a|<=2^(emin-2) we have to change to RNDZ */ if (rnd_mode == MPFR_RNDN && (bx < __gmpfr_emin - 1 || (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0))) rnd_mode = MPFR_RNDZ; return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a)); } MPFR_SET_EXP (a, bx); if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) MPFR_RET (0); else if (rnd_mode == MPFR_RNDN) { if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0)) goto truncate; else goto add_one_ulp; } else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a))) { truncate: MPFR_RET(-MPFR_SIGN(a)); } else /* round away from zero */ { add_one_ulp: ap[0] += MPFR_LIMB_ONE; ap[1] += (ap[0] == 0); if (MPFR_UNLIKELY(ap[1] == 0)) { ap[1] = MPFR_LIMB_HIGHBIT; /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus bx+1 is at most equal to the original exponent of b. */ MPFR_ASSERTD(bx + 1 <= __gmpfr_emax); MPFR_SET_EXP (a, bx + 1); } MPFR_RET(MPFR_SIGN(a)); } } /* special code for 2*GMP_NUMB_BITS < p < 3*GMP_NUMB_BITS */ static int mpfr_sub1sp3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode, mpfr_prec_t p) { mpfr_exp_t bx = MPFR_GET_EXP (b); mpfr_exp_t cx = MPFR_GET_EXP (c); mp_limb_t *ap = MPFR_MANT(a); mp_limb_t *bp = MPFR_MANT(b); mp_limb_t *cp = MPFR_MANT(c); mpfr_prec_t cnt, INITIALIZED(sh); mp_limb_t rb; /* round bit */ mp_limb_t sb; /* sticky bit */ mp_limb_t mask, a0, a1, a2; mpfr_uexp_t d; MPFR_ASSERTD(2 * GMP_NUMB_BITS < p && p < 3 * GMP_NUMB_BITS); if (bx == cx) /* subtraction is exact in this case */ { a0 = bp[0] - cp[0]; a1 = bp[1] - cp[1] - (bp[0] < cp[0]); /* a borrow is generated for a when either bp[1] < cp[1], or bp[1] = cp[1] and bp[0] < cp[0] */ a2 = bp[2] - cp[2] - (bp[1] < cp[1] || (bp[1] == cp[1] && bp[0] < cp[0])); if (a2 == 0 && a1 == 0 && a0 == 0) /* result is zero */ { if (rnd_mode == MPFR_RNDD) MPFR_SET_NEG(a); else MPFR_SET_POS(a); MPFR_SET_ZERO(a); MPFR_RET (0); } else if (a2 >= bp[2]) /* borrow: |c| > |b| */ { MPFR_SET_OPPOSITE_SIGN (a, b); /* a = b-c mod 2^(3*GMP_NUMB_BITS) */ a0 = -a0; a1 = -a1 - (a0 != 0); a2 = -a2 - (a0 != 0 || a1 != 0); } else /* bp[0] > cp[0] */ MPFR_SET_SAME_SIGN (a, b); if (a2 == 0) { a2 = a1; a1 = a0; a0 = 0; bx -= GMP_NUMB_BITS; if (a2 == 0) { a2 = a1; a1 = 0; bx -= GMP_NUMB_BITS; } } /* now a2 != 0 */ MPFR_ASSERTD(a2 != 0); count_leading_zeros (cnt, a2); if (cnt) { ap[2] = (a2 << cnt) | (a1 >> (GMP_NUMB_BITS - cnt)); ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt)); ap[0] = a0 << cnt; bx -= cnt; } else { ap[2] = a2; ap[1] = a1; ap[0] = a0; } rb = sb = 0; /* Note: sh is not initialized, but will not be used in this case. */ } else { if (bx < cx) /* swap b and c */ { mpfr_exp_t tx; mp_limb_t *tp; tx = bx; bx = cx; cx = tx; tp = bp; bp = cp; cp = tp; MPFR_SET_OPPOSITE_SIGN (a, b); } else { MPFR_SET_SAME_SIGN (a, b); } MPFR_ASSERTD (bx > cx); d = (mpfr_uexp_t) bx - cx; sh = 3 * GMP_NUMB_BITS - p; mask = MPFR_LIMB_MASK(sh); if (d < GMP_NUMB_BITS) { mp_limb_t cy; /* warning: we must have the most significant bit of sb correct since it might become the round bit below */ sb = cp[0] << (GMP_NUMB_BITS - d); /* neglected part of c */ a0 = bp[0] - ((cp[1] << (GMP_NUMB_BITS - d)) | (cp[0] >> d)); a1 = bp[1] - ((cp[2] << (GMP_NUMB_BITS - d)) | (cp[1] >> d)) - (a0 > bp[0]); cy = a1 > bp[1] || (a1 == bp[1] && a0 > bp[0]); /* borrow in a1 */ a2 = bp[2] - (cp[2] >> d) - cy; /* if sb is non-zero, subtract 1 from a2, a1, a0 since we want a non-negative neglected part */ if (sb) { a2 -= (a1 == 0 && a0 == 0); a1 -= (a0 == 0); a0 --; /* a = a2,a1,a0 cannot become zero here, since: a) if d >= 2, then a2 >= 2^(w-1) - (2^(w-2)-1) with w = GMP_NUMB_BITS, thus a2 - 1 >= 2^(w-2), b) if d = 1, then since p < 3*GMP_NUMB_BITS we have sb=0. */ MPFR_ASSERTD(a2 > 0 || a1 > 0 || a0 > 0); sb = -sb; /* 2^GMP_NUMB_BITS - sb */ } if (a2 == 0) { /* this implies d=1, which in turn implies sb=0 */ MPFR_ASSERTD(sb == 0); a2 = a1; a1 = a0; a0 = 0; /* should be a0 = sb */ /* since sb=0 already, no need to set it to 0 */ bx -= GMP_NUMB_BITS; if (a2 == 0) { a2 = a1; a1 = 0; /* should be a1 = a0 */ bx -= GMP_NUMB_BITS; } } /* now a1 != 0 */ MPFR_ASSERTD(a2 != 0); count_leading_zeros (cnt, a2); if (cnt) { ap[2] = (a2 << cnt) | (a1 >> (GMP_NUMB_BITS - cnt)); ap[1] = (a1 << cnt) | (a0 >> (GMP_NUMB_BITS - cnt)); a0 = (a0 << cnt) | (sb >> (GMP_NUMB_BITS - cnt)); sb <<= cnt; bx -= cnt; } else { ap[2] = a2; ap[1] = a1; } /* sh > 0 since p < 2*GMP_NUMB_BITS */ MPFR_ASSERTD(sh > 0); rb = a0 & (MPFR_LIMB_ONE << (sh - 1)); sb |= (a0 & mask) ^ rb; ap[0] = a0 & ~mask; } else if (d < 2 * GMP_NUMB_BITS) { mp_limb_t c0shifted; /* warning: we must have the most significant bit of sb correct since it might become the round bit below */ sb = (d == GMP_NUMB_BITS) ? cp[0] : (cp[1] << (2*GMP_NUMB_BITS - d)) | (cp[0] != 0); c0shifted = (d == GMP_NUMB_BITS) ? cp[1] : (cp[2] << (2*GMP_NUMB_BITS-d)) | (cp[1] >> (d - GMP_NUMB_BITS)); a0 = bp[0] - c0shifted; /* TODO: add a non-regression test for cp[2] == MPFR_LIMB_MAX, d == GMP_NUMB_BITS and a0 > bp[0]. */ a1 = bp[1] - (cp[2] >> (d - GMP_NUMB_BITS)) - (a0 > bp[0]); a2 = bp[2] - (a1 > bp[1] || (a1 == bp[1] && a0 > bp[0])); /* if sb is non-zero, subtract 1 from a2, a1, a0 since we want a non-negative neglected part */ if (sb) { a2 -= (a1 == 0 && a0 == 0); a1 -= (a0 == 0); a0 --; /* a = a2,a1,a0 cannot become zero here, since: a) if d >= 2, then a2 >= 2^(w-1) - (2^(w-2)-1) with w = GMP_NUMB_BITS, thus a2 - 1 >= 2^(w-2), b) if d = 1, then since p < 3*GMP_NUMB_BITS we have sb=0. */ MPFR_ASSERTD(a2 > 0 || a1 > 0 || a0 > 0); sb = -sb; /* 2^GMP_NUMB_BITS - sb */ } /* since bp[2] has its most significant bit set, we can have an exponent decrease of at most one */ if (a2 < MPFR_LIMB_HIGHBIT) { ap[2] = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1)); ap[1] = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1)); a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1)); sb <<= 1; bx --; } else { ap[2] = a2; ap[1] = a1; } rb = a0 & (MPFR_LIMB_ONE << (sh - 1)); sb |= (a0 & mask) ^ rb; ap[0] = a0 & ~mask; } else if (d < 3 * GMP_NUMB_BITS) /* 2*GMP_NUMB_BITS<=d<3*GMP_NUMB_BITS */ { MPFR_ASSERTD (2*GMP_NUMB_BITS <= d && d < 3*GMP_NUMB_BITS); /* warning: we must have the most significant bit of sb correct since it might become the round bit below */ if (d == 2 * GMP_NUMB_BITS) sb = cp[1] | (cp[0] != 0); else sb = cp[2] << (3*GMP_NUMB_BITS - d) | (cp[1] != 0) | (cp[0] != 0); sb = -sb; /* TODO: add a non-regression test for cp[2] == MPFR_LIMB_MAX, d == 2*GMP_NUMB_BITS and sb != 0. */ a0 = bp[0] - (cp[2] >> (d - 2*GMP_NUMB_BITS)) - (sb != 0); a1 = bp[1] - (a0 > bp[0] || (a0 == bp[0] && sb != 0)); a2 = bp[2] - (a1 > bp[1]); if (a2 < MPFR_LIMB_HIGHBIT) { ap[2] = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1)); ap[1] = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1)); a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1)); sb <<= 1; bx --; } else { ap[2] = a2; ap[1] = a1; } rb = a0 & (MPFR_LIMB_ONE << (sh - 1)); sb |= (a0 & mask) ^ rb; ap[0] = a0 & ~mask; } else /* d >= 3*GMP_NUMB_BITS */ { /* We compute b - ulp(b), and the remainder ulp(b) - c satisfies: 1/2 ulp(b) < ulp(b) - c < ulp(b), thus rb = sb = 1. */ mp_limb_t t = MPFR_LIMB_ONE << sh; a0 = bp[0] - t; a1 = bp[1] - (bp[0] < t); a2 = bp[2] - (a1 > bp[1]); if (a2 < MPFR_LIMB_HIGHBIT) { /* necessarily we had b = 1000...000 */ /* Warning: since we have an exponent decrease, when p = 3*GMP_NUMB_BITS - 1 and d = 3*GMP_NUMB_BITS, the round bit corresponds to the upper bit of -c. In that case rb = 0 and sb = 1, except when c = 1000...000 where rb = 1 and sb = 0. */ rb = sh > 1 || d > 3 * GMP_NUMB_BITS || (cp[2] == MPFR_LIMB_HIGHBIT && cp[1] == MPFR_LIMB_ZERO && cp[0] == MPFR_LIMB_ZERO); /* sb=1 below is incorrect when p = 2*GMP_NUMB_BITS - 1, d = 2*GMP_NUMB_BITS and c = 1000...000, but in that case the even rule wound round up too. */ ap[0] = ~mask; ap[1] = MPFR_LIMB_MAX; ap[2] = MPFR_LIMB_MAX; bx --; } else { ap[0] = a0; ap[1] = a1; ap[2] = a2; rb = 1; } sb = 1; } } /* now perform rounding */ /* Warning: MPFR considers underflow *after* rounding with an unbounded exponent range. However since b and c have same precision p, they are multiples of 2^(emin-p), likewise for b-c. Thus if bx < emin, the subtraction (with an unbounded exponent range) is exact, so that bx is also the exponent after rounding with an unbounded exponent range. */ if (MPFR_UNLIKELY(bx < __gmpfr_emin)) { /* for RNDN, mpfr_underflow always rounds away, thus for |a|<=2^(emin-2) we have to change to RNDZ */ if (rnd_mode == MPFR_RNDN && (bx < __gmpfr_emin - 1 || (ap[2] == MPFR_LIMB_HIGHBIT && ap[1] == 0 && ap[0] == 0))) rnd_mode = MPFR_RNDZ; return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a)); } MPFR_SET_EXP (a, bx); if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) MPFR_RET (0); else if (rnd_mode == MPFR_RNDN) { if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0)) goto truncate; else goto add_one_ulp; } else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a))) { truncate: MPFR_RET(-MPFR_SIGN(a)); } else /* round away from zero */ { add_one_ulp: ap[0] += MPFR_LIMB_ONE << sh; ap[1] += (ap[0] == 0); ap[2] += (ap[1] == 0 && ap[0] == 0); if (MPFR_UNLIKELY(ap[2] == 0)) { ap[2] = MPFR_LIMB_HIGHBIT; /* Note: bx+1 cannot exceed __gmpfr_emax, since |a| <= |b|, thus bx+1 is at most equal to the original exponent of b. */ MPFR_ASSERTD(bx + 1 <= __gmpfr_emax); MPFR_SET_EXP (a, bx + 1); } MPFR_RET(MPFR_SIGN(a)); } } #endif /* !defined(MPFR_GENERIC_ABI) */ /* Rounding Sub */ /* compute sgn(b)*(|b| - |c|) if |b|>|c| else -sgn(b)*(|c| -|b|) Returns 0 iff result is exact, a negative value when the result is less than the exact value, a positive value otherwise. */ /* A0...Ap-1 * Cp Cp+1 .... * <- C'p+1 -> * Cp = -1 if calculated from c mantissa * Cp = 0 if 0 from a or c * Cp = 1 if calculated from a. * C'p+1 = First bit not null or 0 if there isn't one * * Can't have Cp=-1 and C'p+1=1*/ /* RND = MPFR_RNDZ: * + if Cp=0 and C'p+1=0,1, Truncate. * + if Cp=0 and C'p+1=-1, SubOneUlp * + if Cp=-1, SubOneUlp * + if Cp=1, AddOneUlp * RND = MPFR_RNDA (Away) * + if Cp=0 and C'p+1=0,-1, Truncate * + if Cp=0 and C'p+1=1, AddOneUlp * + if Cp=1, AddOneUlp * + if Cp=-1, Truncate * RND = MPFR_RNDN * + if Cp=0, Truncate * + if Cp=1 and C'p+1=1, AddOneUlp * + if Cp=1 and C'p+1=-1, Truncate * + if Cp=1 and C'p+1=0, Truncate if Ap-1=0, AddOneUlp else * + if Cp=-1 and C'p+1=-1, SubOneUlp * + if Cp=-1 and C'p+1=0, Truncate if Ap-1=0, SubOneUlp else * * If AddOneUlp: * If carry, then it is 11111111111 + 1 = 10000000000000 * ap[n-1]=MPFR_HIGHT_BIT * If SubOneUlp: * If we lose one bit, it is 1000000000 - 1 = 0111111111111 * Then shift, and put as last bit x which is calculated * according Cp, Cp-1 and rnd_mode. * If Truncate, * If it is a power of 2, * we may have to suboneulp in some special cases. * * To simplify, we don't use Cp = 1. * */ MPFR_HOT_FUNCTION_ATTR int mpfr_sub1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) { mpfr_exp_t bx, cx; mpfr_uexp_t d; mpfr_prec_t p, sh, cnt; mp_size_t n, k; mp_limb_t *ap = MPFR_MANT(a); mp_limb_t *bp = MPFR_MANT(b); mp_limb_t *cp = MPFR_MANT(c); mp_limb_t limb; int inexact; mp_limb_t rb, sb; /* round and sticky bits. They are interpreted as negative, i.e., if rb <> 0, then we should subtract 1 at the round bit position, and of sb <> 0, we should subtract something below the round bit position. */ mp_limb_t rbb = MPFR_LIMB_MAX, sbb = MPFR_LIMB_MAX; /* rbb is the next bit after the round bit, and sbb the corresponding sticky bit. gcc claims that they might be used uninitialized. We fill them with invalid values, which should produce a failure if so. See README.dev file. */ int pow2; MPFR_TMP_DECL(marker); MPFR_ASSERTD(MPFR_PREC(a) == MPFR_PREC(b) && MPFR_PREC(b) == MPFR_PREC(c)); MPFR_ASSERTD(MPFR_IS_PURE_FP(b)); MPFR_ASSERTD(MPFR_IS_PURE_FP(c)); /* Read prec and num of limbs */ p = MPFR_GET_PREC (b); #if !defined(MPFR_GENERIC_ABI) /* special case for p < GMP_NUMB_BITS */ if (p < GMP_NUMB_BITS) return mpfr_sub1sp1 (a, b, c, rnd_mode, p); /* special case for GMP_NUMB_BITS < p < 2*GMP_NUMB_BITS */ if (GMP_NUMB_BITS < p && p < 2 * GMP_NUMB_BITS) return mpfr_sub1sp2 (a, b, c, rnd_mode, p); /* special case for p = GMP_NUMB_BITS: we put it *after* mpfr_sub1sp2, in order not to slow down mpfr_sub1sp2, which should be more frequent */ if (p == GMP_NUMB_BITS) return mpfr_sub1sp1n (a, b, c, rnd_mode); /* special case for 2*GMP_NUMB_BITS < p < 3*GMP_NUMB_BITS */ if (2 * GMP_NUMB_BITS < p && p < 3 * GMP_NUMB_BITS) return mpfr_sub1sp3 (a, b, c, rnd_mode, p); if (p == 2 * GMP_NUMB_BITS) return mpfr_sub1sp2n (a, b, c, rnd_mode); #endif n = MPFR_PREC2LIMBS (p); /* Fast cmp of |b| and |c| */ bx = MPFR_GET_EXP (b); cx = MPFR_GET_EXP (c); MPFR_TMP_MARK(marker); k = n - 1; if (bx == cx) { /* Check mantissa since exponents are equal */ while (k >= 0 && MPFR_UNLIKELY(bp[k] == cp[k])) k--; /* now k = - 1 if b == c, otherwise k is the largest integer < n such that bp[k] <> cp[k] */ if (k < 0) /* b == c ! */ { /* Return exact number 0 */ if (rnd_mode == MPFR_RNDD) MPFR_SET_NEG(a); else MPFR_SET_POS(a); MPFR_SET_ZERO(a); MPFR_RET(0); } else if (bp[k] > cp[k]) goto BGreater; else { MPFR_ASSERTD(bp[k] < cp[k]); goto CGreater; } } else if (bx < cx) { /* Swap b and c and set sign */ mpfr_srcptr t; mpfr_exp_t tx; mp_limb_t *tp; tx = bx; bx = cx; cx = tx; CGreater: MPFR_SET_OPPOSITE_SIGN(a,b); t = b; b = c; c = t; tp = bp; bp = cp; cp = tp; } else { /* |b| > |c| */ BGreater: MPFR_SET_SAME_SIGN(a,b); } /* Now |b| > |c| */ MPFR_ASSERTD(bx >= cx); d = (mpfr_uexp_t) bx - cx; /* printf ("New with diff=%lu\n", (unsigned long) d); */ /* FIXME: The goto's below are too complex (long backward) and make the code unreadable. */ if (d == 0) { /* <-- b --> <-- c --> : exact sub */ mpn_sub_n (ap, bp, cp, n); /* Normalize */ ExactNormalize: limb = ap[n-1]; if (MPFR_LIKELY (limb != 0)) { /* First limb is not zero. */ count_leading_zeros (cnt, limb); /* Warning: cnt can be 0 when we come from the case SubD1Lose with goto ExactNormalize */ if (MPFR_LIKELY(cnt)) { mpn_lshift (ap, ap, n, cnt); /* Normalize number */ bx -= cnt; /* Update final expo */ } /* Last limb should be OK */ MPFR_ASSERTD(!(ap[0] & MPFR_LIMB_MASK((unsigned int) (-p) % GMP_NUMB_BITS))); } else { /* First limb is zero: this can only occur for n >= 2 */ mp_size_t len; /* Find the first limb not equal to zero. It necessarily exists since |b| > |c|. We know that bp[k] > cp[k] and all upper limbs are equal. */ while (ap[k] == 0) k--; limb = ap[k]; /* ap[k] is the non-zero limb of largest index, thus we have to consider the k+1 least significant limbs */ MPFR_ASSERTD(limb != 0); count_leading_zeros(cnt, limb); k++; len = n - k; /* Number of most significant zero limbs */ MPFR_ASSERTD(k > 0); if (cnt) mpn_lshift (ap + len, ap, k, cnt); /* Normalize the High Limb*/ else /* Must use copyd since src and dst may overlap & dst>=src */ mpn_copyd (ap + len, ap, k); MPN_ZERO(ap, len); /* Zeroing the last limbs */ bx -= cnt + len * GMP_NUMB_BITS; /* update exponent */ /* ap[len] should have its low bits zero: it is bp[0]-cp[0] */ MPFR_ASSERTD(!(ap[len] & MPFR_LIMB_MASK((unsigned int) (-p) % GMP_NUMB_BITS))); } /* Check exponent underflow (no overflow can happen) */ if (MPFR_UNLIKELY(bx < __gmpfr_emin)) { MPFR_TMP_FREE(marker); /* since b and c have same sign, exponent and precision, the subtraction is exact */ /* printf("(D==0 Underflow)\n"); */ /* for MPFR_RNDN, mpfr_underflow always rounds away from zero, thus for |a| <= 2^(emin-2) we change to RNDZ. */ if (rnd_mode == MPFR_RNDN && (bx < __gmpfr_emin - 1 || mpfr_powerof2_raw (a))) rnd_mode = MPFR_RNDZ; return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a)); } MPFR_SET_EXP (a, bx); /* No rounding is necessary since the result is exact */ MPFR_ASSERTD(ap[n-1] & MPFR_LIMB_HIGHBIT); MPFR_TMP_FREE(marker); return 0; } else if (d == 1) { /* | <-- b --> | <-- c --> */ mp_limb_t c0, mask; MPFR_UNSIGNED_MINUS_MODULO(sh, p); /* If we lose at least one bit, compute 2*b-c (Exact) * else compute b-c/2 */ limb = bp[k] - cp[k]/2; /* Let W = 2^GMP_NUMB_BITS: we have |b|-|c| >= limb*W^k - (2*W^k-1)/2 >= limb*W^k - W^k + 1/2 thus if limb > W/2, |b|-|c| >= 1/2*W^n. Moreover if trunc(|c|) represents the first p-1 bits of |c|, minus the last significant bit called c0 below (in fact c0 is that bit shifted by sh bits), then we have |b|-trunc(|c|) >= 1/2*W^n+1, thus the two mpn_sub_n calls below necessarily yield a > 1/2*W^n. */ if (limb > MPFR_LIMB_HIGHBIT) /* case limb > W/2 */ { mp_limb_t *tp; /* The exponent cannot decrease: compute b-c/2 */ /* Shift c in the allocated temporary block */ SubD1NoLose: c0 = cp[0] & (MPFR_LIMB_ONE << sh); mask = ~MPFR_LIMB_MASK(sh); tp = MPFR_TMP_LIMBS_ALLOC (n); /* FIXME: it might be faster to have one function shifting c by 1 to the right and adding with b to a, which would read c once only, and avoid a temporary allocation. */ mpn_rshift (tp, cp, n, 1); tp[0] &= mask; /* Zero last bit of c if set */ mpn_sub_n (ap, bp, tp, n); MPFR_SET_EXP(a, bx); /* No exponent overflow! */ MPFR_ASSERTD(ap[n-1] & MPFR_LIMB_HIGHBIT); if (MPFR_LIKELY(c0 == 0)) { /* Result is exact: no need of rounding! */ MPFR_TMP_FREE(marker); return 0; } /* c0 is non-zero, thus we have to subtract 1/2*ulp(a), however we know (see analysis above) that this cannot make the exponent decrease */ MPFR_ASSERTD( !(ap[0] & ~mask) ); /* Check last bits */ /* No normalize is needed */ /* Rounding is necessary since c0 is non-zero */ /* we have to subtract 1 at the round bit position, and 0 for the lower bits */ rb = 1; rbb = sbb = 0; } else if (MPFR_LIKELY(limb < MPFR_LIMB_HIGHBIT)) { mp_limb_t *tp; /* |b| - |c| <= (W/2-1)*W^k + W^k-1 = 1/2*W^n - 1 */ /* The exponent decreases by one. */ SubD1Lose: /* Compute 2*b-c (Exact) */ #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_RSBLSH1_N) /* {ap, n} = 2*{bp, n} - {cp, n} */ /* mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[] */ __gmpn_rsblsh1_n (ap, cp, bp, n); #else tp = MPFR_TMP_LIMBS_ALLOC (n); /* Shift b in the allocated temporary block */ mpn_lshift (tp, bp, n, 1); mpn_sub_n (ap, tp, cp, n); #endif bx--; MPFR_ASSERTD(k == n-1); goto ExactNormalize; } else /* limb = MPFR_LIMB_HIGHBIT */ { /* Case: limb = 100000000000 */ /* Check while b[l] == c'[l] (C' is C shifted by 1) */ /* If b[l] We lose at least one bit */ /* If b[l]>c'[l] => We don't lose any bit */ /* If l==-1 => We don't lose any bit AND the result is 100000000000 0000000000 00000000000 */ mp_size_t l = n - 1; mp_limb_t cl_shifted; do { /* the first loop will compare b[n-2] and c'[n-2] */ cl_shifted = cp[l] << (GMP_NUMB_BITS - 1); if (--l < 0) break; cl_shifted += cp[l] >> 1; } while (bp[l] == cl_shifted); if (MPFR_UNLIKELY(l < 0)) { if (MPFR_UNLIKELY(cl_shifted)) { /* Since cl_shifted is what should be subtracted from ap[-1], if non-zero then necessarily the precision is a multiple of GMP_NUMB_BITS, and we lose one bit, thus the (exact) result is a power of 2 minus 1. */ memset (ap, -1, n * MPFR_BYTES_PER_MP_LIMB); MPFR_SET_EXP (a, bx - 1); /* No underflow is possible since cx = bx-1 is a valid exponent. */ } else { /* cl_shifted=0: result is a power of 2. */ MPN_ZERO (ap, n - 1); ap[n-1] = MPFR_LIMB_HIGHBIT; MPFR_SET_EXP (a, bx); /* No exponent overflow! */ } /* No Normalize is needed */ /* No Rounding is needed */ MPFR_TMP_FREE (marker); return 0; } /* cl_shifted is the shifted value c'[l] */ else if (bp[l] > cl_shifted) goto SubD1NoLose; /* |b|-|c| >= 1/2*W^n */ else { /* we cannot have bp[l] = cl_shifted since the only way we can exit the while loop above is when bp[l] <> cl_shifted or l < 0, and the case l < 0 was already treated above. */ MPFR_ASSERTD(bp[l] < cl_shifted); goto SubD1Lose; /* |b|-|c| <= 1/2*W^n-1 and is exact */ } } } else if (MPFR_UNLIKELY(d >= p)) /* the difference of exponents is larger than the precision of all operands, thus the result is either b or b - 1 ulp, with a possible exact result when d = p, b = 2^e and c = 1/2 ulp(b) */ { MPFR_UNSIGNED_MINUS_MODULO(sh, p); /* We can't set A before since we use cp for rounding... */ /* Perform rounding: check if a=b or a=b-ulp(b) */ if (MPFR_UNLIKELY(d == p)) { /* since c is normalized, we need to subtract 1/2 ulp(b) */ rb = 1; /* rbb is the bit of weight 1/4 ulp(b) in c. We assume a limb has at least 2 bits. If the precision is 1, we read in the unused bits, which should be zero, and this is what we want. */ rbb = cp[n-1] & (MPFR_LIMB_HIGHBIT >> 1); /* We need also sbb */ sbb = cp[n-1] & MPFR_LIMB_MASK(GMP_NUMB_BITS - 2); for (k = n-1; sbb == 0 && k > 0;) sbb = cp[--k]; } else { rb = 0; if (d == p + 1) { rbb = 1; sbb = cp[n-1] & MPFR_LIMB_MASK(GMP_NUMB_BITS - 1); for (k = n-1; sbb == 0 && k > 0;) sbb = cp[--k]; } else { rbb = 0; sbb = 1; /* since C is non-zero */ } } /* Copy mantissa B in A */ MPN_COPY(ap, bp, n); } else /* case 2 <= d < p */ { mpfr_uexp_t dm; mp_size_t m; mp_limb_t mask, *tp; MPFR_UNSIGNED_MINUS_MODULO(sh, p); tp = MPFR_TMP_LIMBS_ALLOC (n); /* Shift c in temporary allocated place */ dm = d % GMP_NUMB_BITS; m = d / GMP_NUMB_BITS; if (MPFR_UNLIKELY(dm == 0)) { /* dm = 0 and m > 0: Just copy */ MPFR_ASSERTD(m != 0); MPN_COPY(tp, cp + m, n - m); MPN_ZERO(tp + n - m, m); } else if (MPFR_LIKELY(m == 0)) { /* dm >=2 and m == 0: just shift */ MPFR_ASSERTD(dm >= 2); mpn_rshift (tp, cp, n, dm); } else { /* dm > 0 and m > 0: shift and zero */ mpn_rshift (tp, cp + m, n - m, dm); MPN_ZERO (tp + n - m, m); } /* FIXME: Instead of doing "cp = tp;", keep using tp to avoid confusion? Thus in the block below, we don't need "mp_limb_t *cp = MPFR_MANT(c);". In short, cp should always be MPFR_MANT(c) defined earlier, possibly after the swap. */ cp = tp; /* mpfr_print_mant_binary("Before", MPFR_MANT(c), p); */ /* mpfr_print_mant_binary("B= ", MPFR_MANT(b), p); */ /* mpfr_print_mant_binary("After ", cp, p); */ /* Compute rb=Cp and sb=C'p+1 */ { /* Compute rb and rbb from C */ mp_limb_t *cp = MPFR_MANT(c); /* The round bit is bit p-d in C, assuming the most significant bit of C is bit 0 */ mpfr_prec_t x = p - d; mp_size_t kx = n - 1 - (x / GMP_NUMB_BITS); mpfr_prec_t sx = GMP_NUMB_BITS - 1 - (x % GMP_NUMB_BITS); /* the round bit is in cp[kx], at position sx */ MPFR_ASSERTD(p >= d); rb = cp[kx] & (MPFR_LIMB_ONE << sx); /* Now compute rbb: since d >= 2 it always exists in C */ if (sx == 0) /* rbb is in the next limb */ { kx --; sx = GMP_NUMB_BITS - 1; } else sx --; /* rb and rbb are in the same limb */ rbb = cp[kx] & (MPFR_LIMB_ONE << sx); /* Now look at the remaining low bits of C to determine sbb */ sbb = cp[kx] & MPFR_LIMB_MASK(sx); while (sbb == 0 && kx > 0) sbb = cp[--kx]; } /* printf("sh=%lu Cp=%d C'p+1=%d\n", sh, rb!=0, sb!=0); */ /* Clean shifted C' */ mask = ~MPFR_LIMB_MASK (sh); cp[0] &= mask; /* Subtract the mantissa c from b in a */ mpn_sub_n (ap, bp, cp, n); /* mpfr_print_mant_binary("Sub= ", ap, p); */ /* Normalize: we lose at most one bit */ if (MPFR_UNLIKELY(MPFR_LIMB_MSB(ap[n-1]) == 0)) { /* High bit is not set and we have to fix it! */ /* Ap >= 010000xxx001 */ mpn_lshift (ap, ap, n, 1); /* Ap >= 100000xxx010 */ if (MPFR_UNLIKELY(rb != 0)) /* Check if Cp = -1 */ /* Since Cp == -1, we have to subtract one more */ { mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh); MPFR_ASSERTD(MPFR_LIMB_MSB(ap[n-1]) != 0); } /* Ap >= 10000xxx001 */ /* Final exponent -1 since we have shifted the mantissa */ bx--; /* Update rb and sb */ rb = rbb; rbb = sbb; /* We don't have anymore a valid Cp+1! But since Ap >= 100000xxx001, the final sub can't unnormalize!*/ } MPFR_ASSERTD( !(ap[0] & ~mask) ); } rounding: /* at this point 'a' contains b - high(c), normalized, and we have to subtract rb * 1/2 ulp(a), rbb * 1/4 ulp(a), and sbb * 1/8 ulp(a), interpreting rb/rbb/sbb as 1 if non-zero. */ sb = rbb | sbb; if (rb == 0 && sb == 0) { inexact = 0; goto end_of_sub; } pow2 = mpfr_powerof2_raw (a); if (pow2 && rb != 0) /* subtract 1 ulp */ { mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh); ap[n-1] |= MPFR_LIMB_HIGHBIT; bx--; rb = rbb; rbb = sbb; sbb = 0; /* Note: for p=1 this case can only happen with d=1, but then we will have rb=sb=0 at the next round. */ goto rounding; } /* now if a is a power of two, necessary rb = 0, which means the exact result is always in (pred(a), a), and the bounds cannot be attained */ if (rnd_mode == MPFR_RNDF) inexact = 0; else if (rnd_mode == MPFR_RNDN) { if (pow2) { MPFR_ASSERTD(rb == 0); /* since we have at the end of the binade, we have in fact rb = rbb and sb = sbb */ rb = rbb; sb = sbb; } /* Warning: for p=1, the significand is always odd: the "even" rule rounds to the value with largest magnitude, thus we have to check that case separately */ if (rb == 0 || (rb != 0 && sb == 0 && ((ap[0] & (MPFR_LIMB_ONE << sh)) == 0 || p == 1))) inexact = 1; /* round to a and return 1 */ else /* round to pred(a) and return -1 */ { subtract: mpn_sub_1 (ap, ap, n, MPFR_LIMB_ONE << sh); if (pow2) /* deal with cancellation */ { ap[n-1] |= MPFR_LIMB_HIGHBIT; bx--; } inexact = -1; } } else /* directed rounding */ { MPFR_UPDATE_RND_MODE(rnd_mode, MPFR_IS_NEG(a)); if (rnd_mode == MPFR_RNDZ) goto subtract; else inexact = 1; } end_of_sub: /* Update Exponent */ /* bx >= emin. Proof: If d==0 : Exact case. This is never called. if 1 < d < p : bx=MPFR_EXP(b) or MPFR_EXP(b)-1 > MPFR_EXP(c) > emin if d == 1 : bx=MPFR_EXP(b). If we could lose any bits, the exact normalization is called. if d >= p : bx=MPFR_EXP(b) >= MPFR_EXP(c) + p > emin After SubOneUlp, we could have one bit less. if 1 < d < p : bx >= MPFR_EXP(b)-2 >= MPFR_EXP(c) > emin if d == 1 : bx >= MPFR_EXP(b)-1 = MPFR_EXP(c) > emin. if d >= p : bx >= MPFR_EXP(b)-1 > emin since p>=2. */ MPFR_ASSERTD( bx >= __gmpfr_emin); MPFR_SET_EXP (a, bx); MPFR_TMP_FREE(marker); MPFR_RET (inexact * MPFR_INT_SIGN (a)); }