diff options
Diffstat (limited to 'mpc/src/sqr.c')
-rw-r--r-- | mpc/src/sqr.c | 324 |
1 files changed, 324 insertions, 0 deletions
diff --git a/mpc/src/sqr.c b/mpc/src/sqr.c new file mode 100644 index 0000000000..f1ce1bac8a --- /dev/null +++ b/mpc/src/sqr.c @@ -0,0 +1,324 @@ +/* mpc_sqr -- Square a complex number. + +Copyright (C) 2002, 2005, 2008, 2009, 2010, 2011, 2012 INRIA + +This file is part of GNU MPC. + +GNU MPC is free software; you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +GNU MPC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for +more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see http://www.gnu.org/licenses/ . +*/ + +#include <stdio.h> /* for MPC_ASSERT */ +#include "mpc-impl.h" + + +static int +mpfr_fsss (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr c, mpfr_rnd_t rnd) +{ + /* Computes z = a^2 - c^2. + Assumes that a and c are finite and non-zero; so a squaring yielding + an infinity is an overflow, and a squaring yielding 0 is an underflow. + Assumes further that z is distinct from a and c. */ + + int inex; + mpfr_t u, v; + + /* u=a^2, v=c^2 exactly */ + mpfr_init2 (u, 2*mpfr_get_prec (a)); + mpfr_init2 (v, 2*mpfr_get_prec (c)); + mpfr_sqr (u, a, GMP_RNDN); + mpfr_sqr (v, c, GMP_RNDN); + + /* tentatively compute z as u-v; here we need z to be distinct + from a and c to not lose the latter */ + inex = mpfr_sub (z, u, v, rnd); + + if (mpfr_inf_p (z)) { + /* replace by "correctly rounded overflow" */ + mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), GMP_RNDN); + inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd); + } + else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) { + /* exactly u underflowed, determine inexact flag */ + inex = (mpfr_signbit (u) ? 1 : -1); + } + else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) { + /* exactly v underflowed, determine inexact flag */ + inex = (mpfr_signbit (v) ? -1 : 1); + } + else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) { + /* In the first case, u and v are +inf. + In the second case, u and v are zeroes; their difference may be 0 + or the least representable number, with a sign to be determined. + Redo the computations with mpz_t exponents */ + mpfr_exp_t ea, ec; + mpz_t eu, ev; + /* cheat to work around the const qualifiers */ + + /* Normalise the input by shifting and keep track of the shifts in + the exponents of u and v */ + ea = mpfr_get_exp (a); + ec = mpfr_get_exp (c); + + mpfr_set_exp ((mpfr_ptr) a, (mpfr_prec_t) 0); + mpfr_set_exp ((mpfr_ptr) c, (mpfr_prec_t) 0); + + mpz_init (eu); + mpz_init (ev); + mpz_set_si (eu, (long int) ea); + mpz_mul_2exp (eu, eu, 1); + mpz_set_si (ev, (long int) ec); + mpz_mul_2exp (ev, ev, 1); + + /* recompute u and v and move exponents to eu and ev */ + mpfr_sqr (u, a, GMP_RNDN); + /* exponent of u is non-positive */ + mpz_sub_ui (eu, eu, (unsigned long int) (-mpfr_get_exp (u))); + mpfr_set_exp (u, (mpfr_prec_t) 0); + mpfr_sqr (v, c, GMP_RNDN); + mpz_sub_ui (ev, ev, (unsigned long int) (-mpfr_get_exp (v))); + mpfr_set_exp (v, (mpfr_prec_t) 0); + if (mpfr_nan_p (z)) { + mpfr_exp_t emax = mpfr_get_emax (); + int overflow; + /* We have a = ma * 2^ea with 1/2 <= |ma| < 1 and ea <= emax. + So eu <= 2*emax, and eu > emax since we have + an overflow. The same holds for ev. Shift u and v by as much as + possible so that one of them has exponent emax and the + remaining exponents in eu and ev are the same. Then carry out + the addition. Shifting u and v prevents an underflow. */ + if (mpz_cmp (eu, ev) >= 0) { + mpfr_set_exp (u, emax); + mpz_sub_ui (eu, eu, (long int) emax); + mpz_sub (ev, ev, eu); + mpfr_set_exp (v, (mpfr_exp_t) mpz_get_ui (ev)); + /* remaining common exponent is now in eu */ + } + else { + mpfr_set_exp (v, emax); + mpz_sub_ui (ev, ev, (long int) emax); + mpz_sub (eu, eu, ev); + mpfr_set_exp (u, (mpfr_exp_t) mpz_get_ui (eu)); + mpz_set (eu, ev); + /* remaining common exponent is now also in eu */ + } + inex = mpfr_sub (z, u, v, rnd); + /* Result is finite since u and v have the same sign. */ + overflow = mpfr_mul_2ui (z, z, mpz_get_ui (eu), rnd); + if (overflow) + inex = overflow; + } + else { + int underflow; + /* Subtraction of two zeroes. We have a = ma * 2^ea + with 1/2 <= |ma| < 1 and ea >= emin and similarly for b. + So 2*emin < 2*emin+1 <= eu < emin < 0, and analogously for v. */ + mpfr_exp_t emin = mpfr_get_emin (); + if (mpz_cmp (eu, ev) <= 0) { + mpfr_set_exp (u, emin); + mpz_add_ui (eu, eu, (unsigned long int) (-emin)); + mpz_sub (ev, ev, eu); + mpfr_set_exp (v, (mpfr_exp_t) mpz_get_si (ev)); + } + else { + mpfr_set_exp (v, emin); + mpz_add_ui (ev, ev, (unsigned long int) (-emin)); + mpz_sub (eu, eu, ev); + mpfr_set_exp (u, (mpfr_exp_t) mpz_get_si (eu)); + mpz_set (eu, ev); + } + inex = mpfr_sub (z, u, v, rnd); + mpz_neg (eu, eu); + underflow = mpfr_div_2ui (z, z, mpz_get_ui (eu), rnd); + if (underflow) + inex = underflow; + } + + mpz_clear (eu); + mpz_clear (ev); + + mpfr_set_exp ((mpfr_ptr) a, ea); + mpfr_set_exp ((mpfr_ptr) c, ec); + /* works also when a == c */ + } + + mpfr_clear (u); + mpfr_clear (v); + + return inex; +} + + +int +mpc_sqr (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd) +{ + int ok; + mpfr_t u, v; + mpfr_t x; + /* temporary variable to hold the real part of op, + needed in the case rop==op */ + mpfr_prec_t prec; + int inex_re, inex_im, inexact; + mpfr_exp_t emin; + int saved_underflow; + + /* special values: NaN and infinities */ + if (!mpc_fin_p (op)) { + if (mpfr_nan_p (mpc_realref (op)) || mpfr_nan_p (mpc_imagref (op))) { + mpfr_set_nan (mpc_realref (rop)); + mpfr_set_nan (mpc_imagref (rop)); + } + else if (mpfr_inf_p (mpc_realref (op))) { + if (mpfr_inf_p (mpc_imagref (op))) { + mpfr_set_inf (mpc_imagref (rop), + MPFR_SIGN (mpc_realref (op)) * MPFR_SIGN (mpc_imagref (op))); + mpfr_set_nan (mpc_realref (rop)); + } + else { + if (mpfr_zero_p (mpc_imagref (op))) + mpfr_set_nan (mpc_imagref (rop)); + else + mpfr_set_inf (mpc_imagref (rop), + MPFR_SIGN (mpc_realref (op)) * MPFR_SIGN (mpc_imagref (op))); + mpfr_set_inf (mpc_realref (rop), +1); + } + } + else /* IM(op) is infinity, RE(op) is not */ { + if (mpfr_zero_p (mpc_realref (op))) + mpfr_set_nan (mpc_imagref (rop)); + else + mpfr_set_inf (mpc_imagref (rop), + MPFR_SIGN (mpc_realref (op)) * MPFR_SIGN (mpc_imagref (op))); + mpfr_set_inf (mpc_realref (rop), -1); + } + return MPC_INEX (0, 0); /* exact */ + } + + prec = MPC_MAX_PREC(rop); + + /* Check for real resp. purely imaginary number */ + if (mpfr_zero_p (mpc_imagref(op))) { + int same_sign = mpfr_signbit (mpc_realref (op)) == mpfr_signbit (mpc_imagref (op)); + inex_re = mpfr_sqr (mpc_realref(rop), mpc_realref(op), MPC_RND_RE(rnd)); + inex_im = mpfr_set_ui (mpc_imagref(rop), 0ul, GMP_RNDN); + if (!same_sign) + mpc_conj (rop, rop, MPC_RNDNN); + return MPC_INEX(inex_re, inex_im); + } + if (mpfr_zero_p (mpc_realref(op))) { + int same_sign = mpfr_signbit (mpc_realref (op)) == mpfr_signbit (mpc_imagref (op)); + inex_re = -mpfr_sqr (mpc_realref(rop), mpc_imagref(op), INV_RND (MPC_RND_RE(rnd))); + mpfr_neg (mpc_realref(rop), mpc_realref(rop), GMP_RNDN); + inex_im = mpfr_set_ui (mpc_imagref(rop), 0ul, GMP_RNDN); + if (!same_sign) + mpc_conj (rop, rop, MPC_RNDNN); + return MPC_INEX(inex_re, inex_im); + } + + if (rop == op) + { + mpfr_init2 (x, MPC_PREC_RE (op)); + mpfr_set (x, op->re, GMP_RNDN); + } + else + x [0] = op->re [0]; + /* From here on, use x instead of op->re and safely overwrite rop->re. */ + + /* Compute real part of result. */ + if (SAFE_ABS (mpfr_exp_t, + mpfr_get_exp (mpc_realref (op)) - mpfr_get_exp (mpc_imagref (op))) + > (mpfr_exp_t) MPC_MAX_PREC (op) / 2) { + /* If the real and imaginary parts of the argument have very different + exponents, it is not reasonable to use Karatsuba squaring; compute + exactly with the standard formulae instead, even if this means an + additional multiplication. Using the approach copied from mul, over- + and underflows are also handled correctly. */ + + inex_re = mpfr_fsss (rop->re, x, op->im, MPC_RND_RE (rnd)); + } + else { + /* Karatsuba squaring: we compute the real part as (x+y)*(x-y) and the + imaginary part as 2*x*y, with a total of 2M instead of 2S+1M for the + naive algorithm, which computes x^2-y^2 and 2*y*y */ + mpfr_init (u); + mpfr_init (v); + + emin = mpfr_get_emin (); + + do + { + prec += mpc_ceil_log2 (prec) + 5; + + mpfr_set_prec (u, prec); + mpfr_set_prec (v, prec); + + /* Let op = x + iy. We need u = x+y and v = x-y, rounded away. */ + /* The error is bounded above by 1 ulp. */ + /* We first let inexact be 1 if the real part is not computed */ + /* exactly and determine the sign later. */ + inexact = ROUND_AWAY (mpfr_add (u, x, mpc_imagref (op), MPFR_RNDA), u) + | ROUND_AWAY (mpfr_sub (v, x, mpc_imagref (op), MPFR_RNDA), v); + + /* compute the real part as u*v, rounded away */ + /* determine also the sign of inex_re */ + + if (mpfr_sgn (u) == 0 || mpfr_sgn (v) == 0) { + /* as we have rounded away, the result is exact */ + mpfr_set_ui (mpc_realref (rop), 0, GMP_RNDN); + inex_re = 0; + ok = 1; + } + else { + mpfr_rnd_t rnd_away; + /* FIXME: can be replaced by MPFR_RNDA in mpfr >= 3 */ + rnd_away = (mpfr_sgn (u) * mpfr_sgn (v) > 0 ? GMP_RNDU : GMP_RNDD); + inexact |= ROUND_AWAY (mpfr_mul (u, u, v, MPFR_RNDA), u); /* error 5 */ + if (mpfr_get_exp (u) == emin || mpfr_inf_p (u)) { + /* under- or overflow */ + inex_re = mpfr_fsss (rop->re, x, op->im, MPC_RND_RE (rnd)); + ok = 1; + } + else { + ok = (!inexact) | mpfr_can_round (u, prec - 3, + rnd_away, GMP_RNDZ, + MPC_PREC_RE (rop) + (MPC_RND_RE (rnd) == GMP_RNDN)); + if (ok) { + inex_re = mpfr_set (mpc_realref (rop), u, MPC_RND_RE (rnd)); + if (inex_re == 0) + /* remember that u was already rounded */ + inex_re = inexact; + } + } + } + } + while (!ok); + + mpfr_clear (u); + mpfr_clear (v); + } + + saved_underflow = mpfr_underflow_p (); + mpfr_clear_underflow (); + inex_im = mpfr_mul (rop->im, x, op->im, MPC_RND_IM (rnd)); + if (!mpfr_underflow_p ()) + inex_im |= mpfr_mul_2ui (rop->im, rop->im, 1, MPC_RND_IM (rnd)); + /* We must not multiply by 2 if rop->im has been set to the smallest + representable number. */ + if (saved_underflow) + mpfr_set_underflow (); + + if (rop == op) + mpfr_clear (x); + + return MPC_INEX (inex_re, inex_im); +} |