1 files changed, 324 insertions, 0 deletions
diff --git a/mpc/src/sqr.c b/mpc/src/sqr.c
new file mode 100644
index 0000000000..f1ce1bac8a
--- /dev/null
+++ b/mpc/src/sqr.c
@@ -0,0 +1,324 @@
+/* mpc_sqr -- Square a complex number.
+
+Copyright (C) 2002, 2005, 2008, 2009, 2010, 2011, 2012 INRIA
+
+This file is part of GNU MPC.
+
+GNU MPC is free software; you can redistribute it and/or modify it under
+the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+GNU MPC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this program. If not, see http://www.gnu.org/licenses/ .
+*/
+
+#include <stdio.h>    /* for MPC_ASSERT */
+#include "mpc-impl.h"
+
+
+static int
+mpfr_fsss (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr c, mpfr_rnd_t rnd)
+{
+   /* Computes z = a^2 - c^2.
+      Assumes that a and c are finite and non-zero; so a squaring yielding
+      an infinity is an overflow, and a squaring yielding 0 is an underflow.
+      Assumes further that z is distinct from a and c. */
+
+   int inex;
+   mpfr_t u, v;
+
+   /* u=a^2, v=c^2 exactly */
+   mpfr_init2 (u, 2*mpfr_get_prec (a));
+   mpfr_init2 (v, 2*mpfr_get_prec (c));
+   mpfr_sqr (u, a, GMP_RNDN);
+   mpfr_sqr (v, c, GMP_RNDN);
+
+   /* tentatively compute z as u-v; here we need z to be distinct
+      from a and c to not lose the latter */
+   inex = mpfr_sub (z, u, v, rnd);
+
+   if (mpfr_inf_p (z)) {
+      /* replace by "correctly rounded overflow" */
+      mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), GMP_RNDN);
+      inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd);
+   }
+   else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) {
+      /* exactly u underflowed, determine inexact flag */
+      inex = (mpfr_signbit (u) ? 1 : -1);
+   }
+   else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) {
+      /* exactly v underflowed, determine inexact flag */
+      inex = (mpfr_signbit (v) ? -1 : 1);
+   }
+   else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) {
+      /* In the first case, u and v are +inf.
+         In the second case, u and v are zeroes; their difference may be 0
+         or the least representable number, with a sign to be determined.
+         Redo the computations with mpz_t exponents */
+      mpfr_exp_t ea, ec;
+      mpz_t eu, ev;
+         /* cheat to work around the const qualifiers */
+
+      /* Normalise the input by shifting and keep track of the shifts in
+         the exponents of u and v */
+      ea = mpfr_get_exp (a);
+      ec = mpfr_get_exp (c);
+
+      mpfr_set_exp ((mpfr_ptr) a, (mpfr_prec_t) 0);
+      mpfr_set_exp ((mpfr_ptr) c, (mpfr_prec_t) 0);
+
+      mpz_init (eu);
+      mpz_init (ev);
+      mpz_set_si (eu, (long int) ea);
+      mpz_mul_2exp (eu, eu, 1);
+      mpz_set_si (ev, (long int) ec);
+      mpz_mul_2exp (ev, ev, 1);
+
+      /* recompute u and v and move exponents to eu and ev */
+      mpfr_sqr (u, a, GMP_RNDN);
+      /* exponent of u is non-positive */
+      mpz_sub_ui (eu, eu, (unsigned long int) (-mpfr_get_exp (u)));
+      mpfr_set_exp (u, (mpfr_prec_t) 0);
+      mpfr_sqr (v, c, GMP_RNDN);
+      mpz_sub_ui (ev, ev, (unsigned long int) (-mpfr_get_exp (v)));
+      mpfr_set_exp (v, (mpfr_prec_t) 0);
+      if (mpfr_nan_p (z)) {
+         mpfr_exp_t emax = mpfr_get_emax ();
+         int overflow;
+         /* We have a = ma * 2^ea with 1/2 <= |ma| < 1 and ea <= emax.
+            So eu <= 2*emax, and eu > emax since we have
+            an overflow. The same holds for ev. Shift u and v by as much as
+            possible so that one of them has exponent emax and the
+            remaining exponents in eu and ev are the same. Then carry out
+            the addition. Shifting u and v prevents an underflow. */
+         if (mpz_cmp (eu, ev) >= 0) {
+            mpfr_set_exp (u, emax);
+            mpz_sub_ui (eu, eu, (long int) emax);
+            mpz_sub (ev, ev, eu);
+            mpfr_set_exp (v, (mpfr_exp_t) mpz_get_ui (ev));
+               /* remaining common exponent is now in eu */
+         }
+         else {
+            mpfr_set_exp (v, emax);
+            mpz_sub_ui (ev, ev, (long int) emax);
+            mpz_sub (eu, eu, ev);
+            mpfr_set_exp (u, (mpfr_exp_t) mpz_get_ui (eu));
+            mpz_set (eu, ev);
+               /* remaining common exponent is now also in eu */
+         }
+         inex = mpfr_sub (z, u, v, rnd);
+            /* Result is finite since u and v have the same sign. */
+         overflow = mpfr_mul_2ui (z, z, mpz_get_ui (eu), rnd);
+         if (overflow)
+            inex = overflow;
+      }
+      else {
+         int underflow;
+         /* Subtraction of two zeroes. We have a = ma * 2^ea
+            with 1/2 <= |ma| < 1 and ea >= emin and similarly for b.
+            So 2*emin < 2*emin+1 <= eu < emin < 0, and analogously for v. */
+         mpfr_exp_t emin = mpfr_get_emin ();
+         if (mpz_cmp (eu, ev) <= 0) {
+            mpfr_set_exp (u, emin);
+            mpz_add_ui (eu, eu, (unsigned long int) (-emin));
+            mpz_sub (ev, ev, eu);
+            mpfr_set_exp (v, (mpfr_exp_t) mpz_get_si (ev));
+         }
+         else {
+            mpfr_set_exp (v, emin);
+            mpz_add_ui (ev, ev, (unsigned long int) (-emin));
+            mpz_sub (eu, eu, ev);
+            mpfr_set_exp (u, (mpfr_exp_t) mpz_get_si (eu));
+            mpz_set (eu, ev);
+         }
+         inex = mpfr_sub (z, u, v, rnd);
+         mpz_neg (eu, eu);
+         underflow = mpfr_div_2ui (z, z, mpz_get_ui (eu), rnd);
+         if (underflow)
+            inex = underflow;
+      }
+
+      mpz_clear (eu);
+      mpz_clear (ev);
+
+      mpfr_set_exp ((mpfr_ptr) a, ea);
+      mpfr_set_exp ((mpfr_ptr) c, ec);
+         /* works also when a == c */
+   }
+
+   mpfr_clear (u);
+   mpfr_clear (v);
+
+   return inex;
+}
+
+
+int
+mpc_sqr (mpc_ptr rop, mpc_srcptr op, mpc_rnd_t rnd)
+{
+   int ok;
+   mpfr_t u, v;
+   mpfr_t x;
+      /* temporary variable to hold the real part of op,
+         needed in the case rop==op */
+   mpfr_prec_t prec;
+   int inex_re, inex_im, inexact;
+   mpfr_exp_t emin;
+   int saved_underflow;
+
+   /* special values: NaN and infinities */
+   if (!mpc_fin_p (op)) {
+      if (mpfr_nan_p (mpc_realref (op)) || mpfr_nan_p (mpc_imagref (op))) {
+         mpfr_set_nan (mpc_realref (rop));
+         mpfr_set_nan (mpc_imagref (rop));
+      }
+      else if (mpfr_inf_p (mpc_realref (op))) {
+         if (mpfr_inf_p (mpc_imagref (op))) {
+            mpfr_set_inf (mpc_imagref (rop),
+                          MPFR_SIGN (mpc_realref (op)) * MPFR_SIGN (mpc_imagref (op)));
+            mpfr_set_nan (mpc_realref (rop));
+         }
+         else {
+            if (mpfr_zero_p (mpc_imagref (op)))
+               mpfr_set_nan (mpc_imagref (rop));
+            else
+               mpfr_set_inf (mpc_imagref (rop),
+                             MPFR_SIGN (mpc_realref (op)) * MPFR_SIGN (mpc_imagref (op)));
+            mpfr_set_inf (mpc_realref (rop), +1);
+         }
+      }
+      else /* IM(op) is infinity, RE(op) is not */ {
+         if (mpfr_zero_p (mpc_realref (op)))
+            mpfr_set_nan (mpc_imagref (rop));
+         else
+            mpfr_set_inf (mpc_imagref (rop),
+                          MPFR_SIGN (mpc_realref (op)) * MPFR_SIGN (mpc_imagref (op)));
+         mpfr_set_inf (mpc_realref (rop), -1);
+      }
+      return MPC_INEX (0, 0); /* exact */
+   }
+
+   prec = MPC_MAX_PREC(rop);
+
+   /* Check for real resp. purely imaginary number */
+   if (mpfr_zero_p (mpc_imagref(op))) {
+      int same_sign = mpfr_signbit (mpc_realref (op)) == mpfr_signbit (mpc_imagref (op));
+      inex_re = mpfr_sqr (mpc_realref(rop), mpc_realref(op), MPC_RND_RE(rnd));
+      inex_im = mpfr_set_ui (mpc_imagref(rop), 0ul, GMP_RNDN);
+      if (!same_sign)
+        mpc_conj (rop, rop, MPC_RNDNN);
+      return MPC_INEX(inex_re, inex_im);
+   }
+   if (mpfr_zero_p (mpc_realref(op))) {
+      int same_sign = mpfr_signbit (mpc_realref (op)) == mpfr_signbit (mpc_imagref (op));
+      inex_re = -mpfr_sqr (mpc_realref(rop), mpc_imagref(op), INV_RND (MPC_RND_RE(rnd)));
+      mpfr_neg (mpc_realref(rop), mpc_realref(rop), GMP_RNDN);
+      inex_im = mpfr_set_ui (mpc_imagref(rop), 0ul, GMP_RNDN);
+      if (!same_sign)
+        mpc_conj (rop, rop, MPC_RNDNN);
+      return MPC_INEX(inex_re, inex_im);
+   }
+
+   if (rop == op)
+   {
+      mpfr_init2 (x, MPC_PREC_RE (op));
+      mpfr_set (x, op->re, GMP_RNDN);
+   }
+   else
+      x [0] = op->re [0];
+   /* From here on, use x instead of op->re and safely overwrite rop->re. */
+
+   /* Compute real part of result. */
+   if (SAFE_ABS (mpfr_exp_t,
+                 mpfr_get_exp (mpc_realref (op)) - mpfr_get_exp (mpc_imagref (op)))
+       > (mpfr_exp_t) MPC_MAX_PREC (op) / 2) {
+      /* If the real and imaginary parts of the argument have very different
+         exponents, it is not reasonable to use Karatsuba squaring; compute
+         exactly with the standard formulae instead, even if this means an
+         additional multiplication. Using the approach copied from mul, over-
+         and underflows are also handled correctly. */
+
+      inex_re = mpfr_fsss (rop->re, x, op->im, MPC_RND_RE (rnd));
+   }
+   else {
+      /* Karatsuba squaring: we compute the real part as (x+y)*(x-y) and the
+         imaginary part as 2*x*y, with a total of 2M instead of 2S+1M for the
+         naive algorithm, which computes x^2-y^2 and 2*y*y */
+      mpfr_init (u);
+      mpfr_init (v);
+
+      emin = mpfr_get_emin ();
+
+      do
+      {
+         prec += mpc_ceil_log2 (prec) + 5;
+
+         mpfr_set_prec (u, prec);
+         mpfr_set_prec (v, prec);
+
+         /* Let op = x + iy. We need u = x+y and v = x-y, rounded away.      */
+         /* The error is bounded above by 1 ulp.                             */
+         /* We first let inexact be 1 if the real part is not computed       */
+         /* exactly and determine the sign later.                            */
+         inexact =  ROUND_AWAY (mpfr_add (u, x, mpc_imagref (op), MPFR_RNDA), u)
+                  | ROUND_AWAY (mpfr_sub (v, x, mpc_imagref (op), MPFR_RNDA), v);
+
+         /* compute the real part as u*v, rounded away                    */
+         /* determine also the sign of inex_re                            */
+
+         if (mpfr_sgn (u) == 0 || mpfr_sgn (v) == 0) {
+            /* as we have rounded away, the result is exact */
+            mpfr_set_ui (mpc_realref (rop), 0, GMP_RNDN);
+            inex_re = 0;
+            ok = 1;
+         }
+         else {
+            mpfr_rnd_t rnd_away;
+               /* FIXME: can be replaced by MPFR_RNDA in mpfr >= 3 */
+            rnd_away = (mpfr_sgn (u) * mpfr_sgn (v) > 0 ? GMP_RNDU : GMP_RNDD);
+            inexact |= ROUND_AWAY (mpfr_mul (u, u, v, MPFR_RNDA), u); /* error 5 */
+            if (mpfr_get_exp (u) == emin || mpfr_inf_p (u)) {
+               /* under- or overflow */
+               inex_re = mpfr_fsss (rop->re, x, op->im, MPC_RND_RE (rnd));
+               ok = 1;
+            }
+            else {
+               ok = (!inexact) | mpfr_can_round (u, prec - 3,
+                     rnd_away, GMP_RNDZ,
+                     MPC_PREC_RE (rop) + (MPC_RND_RE (rnd) == GMP_RNDN));
+               if (ok) {
+                  inex_re = mpfr_set (mpc_realref (rop), u, MPC_RND_RE (rnd));
+                  if (inex_re == 0)
+                     /* remember that u was already rounded */
+                     inex_re = inexact;
+               }
+            }
+         }
+      }
+      while (!ok);
+
+      mpfr_clear (u);
+      mpfr_clear (v);
+   }
+
+   saved_underflow = mpfr_underflow_p ();
+   mpfr_clear_underflow ();
+   inex_im = mpfr_mul (rop->im, x, op->im, MPC_RND_IM (rnd));
+   if (!mpfr_underflow_p ())
+      inex_im |= mpfr_mul_2ui (rop->im, rop->im, 1, MPC_RND_IM (rnd));
+      /* We must not multiply by 2 if rop->im has been set to the smallest
+         representable number. */
+   if (saved_underflow)
+      mpfr_set_underflow ();
+
+   if (rop == op)
+      mpfr_clear (x);
+
+   return MPC_INEX (inex_re, inex_im);
+}