mpc/src/norm.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

/* mpc_norm -- Square of the norm of a complex number.

Copyright (C) 2002, 2005, 2008, 2009, 2010, 2011 INRIA

This file is part of GNU MPC.

GNU MPC is free software; you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

GNU MPC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
more details.

You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see http://www.gnu.org/licenses/ .
*/

#include <stdio.h>    /* for MPC_ASSERT */
#include "mpc-impl.h"

/* a <- norm(b) = b * conj(b)
   (the rounding mode is mpfr_rnd_t here since we return an mpfr number) */
int
mpc_norm (mpfr_ptr a, mpc_srcptr b, mpfr_rnd_t rnd)
{
   int inexact;
   int saved_underflow, saved_overflow;

   /* handling of special values; consistent with abs in that
      norm = abs^2; so norm (+-inf, xxx) = norm (xxx, +-inf) = +inf */
   if (!mpc_fin_p (b))
         return mpc_abs (a, b, rnd);
   else if (mpfr_zero_p (mpc_realref (b))) {
      if (mpfr_zero_p (mpc_imagref (b)))
         return mpfr_set_ui (a, 0, rnd); /* +0 */
      else
         return mpfr_sqr (a, mpc_imagref (b), rnd);
   }
   else if (mpfr_zero_p (mpc_imagref (b)))
     return mpfr_sqr (a, mpc_realref (b), rnd); /* Re(b) <> 0 */

   else /* everything finite and non-zero */ {
      mpfr_t u, v, res;
      mpfr_prec_t prec, prec_u, prec_v;
      int loops;
      const int max_loops = 2;
         /* switch to exact squarings when loops==max_loops */

      prec = mpfr_get_prec (a);

      mpfr_init (u);
      mpfr_init (v);
      mpfr_init (res);

      /* save the underflow or overflow flags from MPFR */
      saved_underflow = mpfr_underflow_p ();
      saved_overflow = mpfr_overflow_p ();

      loops = 0;
      mpfr_clear_underflow ();
      mpfr_clear_overflow ();
      do {
         loops++;
         prec += mpc_ceil_log2 (prec) + 3;
         if (loops >= max_loops) {
            prec_u = 2 * MPC_PREC_RE (b);
            prec_v = 2 * MPC_PREC_IM (b);
         }
         else {
            prec_u = MPC_MIN (prec, 2 * MPC_PREC_RE (b));
            prec_v = MPC_MIN (prec, 2 * MPC_PREC_IM (b));
         }

         mpfr_set_prec (u, prec_u);
         mpfr_set_prec (v, prec_v);

         inexact  = mpfr_sqr (u, mpc_realref(b), GMP_RNDD); /* err <= 1 ulp in prec */
         inexact |= mpfr_sqr (v, mpc_imagref(b), GMP_RNDD); /* err <= 1 ulp in prec */

         /* If loops = max_loops, inexact should be 0 here, except in case
               of underflow or overflow.
            If loops < max_loops and inexact is zero, we can exit the
            while-loop since it only remains to add u and v into a. */
         if (inexact) {
             mpfr_set_prec (res, prec);
             mpfr_add (res, u, v, GMP_RNDD); /* err <= 3 ulp in prec */
         }

      } while (loops < max_loops && inexact != 0
               && !mpfr_can_round (res, prec - 2, GMP_RNDD, GMP_RNDU,
                                   mpfr_get_prec (a) + (rnd == GMP_RNDN)));

      if (!inexact)
         /* squarings were exact, neither underflow nor overflow */
         inexact = mpfr_add (a, u, v, rnd);
      /* if there was an overflow in Re(b)^2 or Im(b)^2 or their sum,
         since the norm is larger, there is an overflow for the norm */
      else if (mpfr_overflow_p ()) {
         /* replace by "correctly rounded overflow" */
         mpfr_set_ui (a, 1ul, GMP_RNDN);
         inexact = mpfr_mul_2ui (a, a, mpfr_get_emax (), rnd);
      }
      else if (mpfr_underflow_p ()) {
         /* necessarily one of the squarings did underflow (otherwise their
            sum could not underflow), thus one of u, v is zero. */
         mpfr_exp_t emin = mpfr_get_emin ();

         /* Now either both u and v are zero, or u is zero and v exact,
            or v is zero and u exact.
            In the latter case, Im(b)^2 < 2^(emin-1).
            If ulp(u) >= 2^(emin+1) and norm(b) is not exactly
            representable at the target precision, then rounding u+Im(b)^2
            is equivalent to rounding u+2^(emin-1).
            For instance, if exp(u)>0 and the target precision is smaller
            than about |emin|, the norm is not representable. To make the
            scaling in the "else" case work without underflow, we test
            whether exp(u) is larger than a small negative number instead.
            The second case is handled analogously.                        */
         if (!mpfr_zero_p (u)
             && mpfr_get_exp (u) - 2 * (mpfr_exp_t) prec_u > emin
             && mpfr_get_exp (u) > -10) {
               mpfr_set_prec (v, MPFR_PREC_MIN);
               mpfr_set_ui_2exp (v, 1, emin - 1, GMP_RNDZ);
               inexact = mpfr_add (a, u, v, rnd);
         }
         else if (!mpfr_zero_p (v)
             && mpfr_get_exp (v) - 2 * (mpfr_exp_t) prec_v > emin
             && mpfr_get_exp (v) > -10) {
               mpfr_set_prec (u, MPFR_PREC_MIN);
               mpfr_set_ui_2exp (u, 1, emin - 1, GMP_RNDZ);
               inexact = mpfr_add (a, u, v, rnd);
         }
         else {
            unsigned long int scale, exp_re, exp_im;
            int inex_underflow;

            /* scale the input to an average exponent close to 0 */
            exp_re = (unsigned long int) (-mpfr_get_exp (mpc_realref (b)));
            exp_im = (unsigned long int) (-mpfr_get_exp (mpc_imagref (b)));
            scale = exp_re / 2 + exp_im / 2 + (exp_re % 2 + exp_im % 2) / 2;
               /* (exp_re + exp_im) / 2, computed in a way avoiding
                  integer overflow                                  */
            if (mpfr_zero_p (u)) {
               /* recompute the scaled value exactly */
               mpfr_mul_2ui (u, mpc_realref (b), scale, GMP_RNDN);
               mpfr_sqr (u, u, GMP_RNDN);
            }
            else /* just scale */
               mpfr_mul_2ui (u, u, 2*scale, GMP_RNDN);
            if (mpfr_zero_p (v)) {
               mpfr_mul_2ui (v, mpc_imagref (b), scale, GMP_RNDN);
               mpfr_sqr (v, v, GMP_RNDN);
            }
            else
               mpfr_mul_2ui (v, v, 2*scale, GMP_RNDN);

            inexact = mpfr_add (a, u, v, rnd);
            mpfr_clear_underflow ();
            inex_underflow = mpfr_div_2ui (a, a, 2*scale, rnd);
            if (mpfr_underflow_p ())
               inexact = inex_underflow;
         }
      }
      else /* no problems, ternary value due to mpfr_can_round trick */
         inexact = mpfr_set (a, res, rnd);

      /* restore underflow and overflow flags from MPFR */
      if (saved_underflow)
        mpfr_set_underflow ();
      if (saved_overflow)
        mpfr_set_overflow ();

      mpfr_clear (u);
      mpfr_clear (v);
      mpfr_clear (res);
   }

   return inexact;
}