summaryrefslogtreecommitdiff
path: root/tests/tnrandom_chisq.c
blob: 4a41a78eb7fb92a4b086bc4df611937b4e068a38 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
/* Chi-squared test for mpfr_nrandom

Copyright 2011-2021 Free Software Foundation, Inc.
Contributed by Charles Karney <charles@karney.com>, SRI International.

This file is part of the GNU MPFR Library.

The GNU MPFR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

The GNU MPFR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */

#include "mpfr-test.h"

/* Return Phi(x) = erf(x / sqrt(2)) / 2, the cumulative probability function
 * for the normal distribution.  We only take differences of this function so
 * the offset doesn't matter; here Phi(0) = 0. */
static void
normal_cumulative (mpfr_ptr z, mpfr_ptr x, mpfr_rnd_t rnd)
{
  mpfr_sqrt_ui (z, 2, rnd);
  mpfr_div (z, x, z, rnd);
  mpfr_erf (z, z, rnd);
  mpfr_div_ui (z, z, 2, rnd);
}

/* Given nu and chisqp, compute probability that chisq > chisqp.  This uses,
 * A&S 26.4.16,
 *
 * Q(nu,chisqp) =
 *     erfc( (3/2)*sqrt(nu) * ( cbrt(chisqp/nu) - 1 + 2/(9*nu) ) ) / 2
 *
 * which is valid for nu > 30.  This is the basis for the formula in Knuth,
 * TAOCP, Vol 2, 3.3.1, Table 1.  It more accurate than the similar formula,
 * DLMF 8.11.10. */
static void
chisq_prob (mpfr_ptr q, long nu, mpfr_ptr chisqp)
{
  mpfr_t t;
  mpfr_rnd_t rnd;

  rnd = MPFR_RNDN;  /* This uses an approx formula.  Might as well use RNDN. */
  mpfr_init2 (t, mpfr_get_prec (q));

  mpfr_div_si (q, chisqp, nu, rnd); /* chisqp/nu */
  mpfr_cbrt (q, q, rnd);            /* (chisqp/nu)^(1/3) */
  mpfr_sub_ui (q, q, 1, rnd);       /* (chisqp/nu)^(1/3) - 1 */
  mpfr_set_ui (t, 2, rnd);
  mpfr_div_si (t, t, 9*nu, rnd); /* 2/(9*nu) */
  mpfr_add (q, q, t, rnd);       /* (chisqp/nu)^(1/3) - 1 + 2/(9*nu) */
  mpfr_sqrt_ui (t, nu, rnd);     /* sqrt(nu) */
  mpfr_mul_d (t, t, 1.5, rnd);   /* (3/2)*sqrt(nu) */
  mpfr_mul (q, q, t, rnd);       /* arg to erfc */
  mpfr_erfc (q, q, rnd);         /* erfc(...) */
  mpfr_div_ui (q, q, 2, rnd);    /* erfc(...)/2 */

  mpfr_clear (t);
}

/* The continuous chi-squared test on with a set of bins of equal width.
 *
 * A single precision is picked for sampling and the chi-squared calculation.
 * This should picked high enough so that binning in test doesn't need to be
 * accurately aligned with possible values of the deviates.  Also we need the
 * precision big enough that chi-squared calculation itself is reliable.
 *
 * There's no particular benefit is testing with at very higher precisions;
 * because of the way tnrandom samples, this just adds additional barely
 * significant random bits to the deviates.  So this chi-squared test with
 * continuous equal width bins isn't a good tool for finding problems here.
 *
 * The testing of low precision normal deviates is done by
 * test_nrandom_chisq_disc. */
static double
test_nrandom_chisq_cont (long num, mpfr_prec_t prec, int nu,
                         double xmin, double xmax, int verbose)
{
  mpfr_t x, a, b, dx, z, pa, pb, ps, t;
  long *counts;
  int i, inexact;
  long k;
  mpfr_rnd_t rnd, rndd;
  double Q, chisq;

  rnd = MPFR_RNDN;              /* For chi-squared calculation */
  rndd = MPFR_RNDD;             /* For sampling and figuring the bins */
  mpfr_inits2 (prec, x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);

  counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
  for (i = 0; i <= nu; i++)
    counts[i] = 0;

  /* a and b are bounds of nu equally spaced bins.  Set dx = (b-a)/nu */
  mpfr_set_d (a, xmin, rnd);
  mpfr_set_d (b, xmax, rnd);

  mpfr_sub (dx, b, a, rnd);
  mpfr_div_si (dx, dx, nu, rnd);

  for (k = 0; k < num; ++k)
    {
      inexact = mpfr_nrandom (x, RANDS, rndd);
      if (inexact == 0)
        {
          /* one call in the loop pretended to return an exact number! */
          printf ("Error: mpfr_nrandom() returns a zero ternary value.\n");
          exit (1);
        }
      mpfr_sub (x, x, a, rndd);
      mpfr_div (x, x, dx, rndd);
      i = mpfr_get_si (x, rndd);
      ++counts[i >= 0 && i < nu ? i : nu];
    }

  mpfr_set (x, a, rnd);
  normal_cumulative (pa, x, rnd);
  mpfr_add_ui (ps, pa, 1, rnd);
  mpfr_set_zero (t, 1);
  for (i = 0; i <= nu; ++i)
    {
      if (i < nu)
        {
          mpfr_add (x, x, dx, rnd);
          normal_cumulative (pb, x, rnd);
          mpfr_sub (pa, pb, pa, rnd); /* prob for this bin */
        }
      else
        mpfr_sub (pa, ps, pa, rnd); /* prob for last bin, i = nu */

      /* Compute z = counts[i] - num * p; t += z * z / (num * p) */
      mpfr_mul_ui (pa, pa, num, rnd);
      mpfr_ui_sub (z, counts[i], pa, rnd);
      mpfr_sqr (z, z, rnd);
      mpfr_div (z, z, pa, rnd);
      mpfr_add (t, t, z, rnd);
      mpfr_swap (pa, pb);       /* i.e., pa = pb */
    }

  chisq = mpfr_get_d (t, rnd);
  chisq_prob (t, nu, t);
  Q = mpfr_get_d (t, rnd);
  if (verbose)
    {
      printf ("num = %ld, equal bins in [%.2f, %.2f], nu = %d: chisq = %.2f\n",
              num, xmin, xmax, nu, chisq);
      if (Q < 0.05)
        printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
    }

  tests_free (counts, (nu + 1) * sizeof (long));
  mpfr_clears (x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
  return Q;
}

/* Return a sequential number for a positive low-precision x.  x is altered by
 * this function.  low precision means prec = 2, 3, or 4.  High values of
 * precision will result in integer overflow. */
static long
sequential (mpfr_ptr x)
{
  long expt, prec;

  prec = mpfr_get_prec (x);
  expt =  mpfr_get_exp (x);
  mpfr_mul_2si (x, x, prec - expt, MPFR_RNDN);

  return expt * (1 << (prec - 1)) + mpfr_get_si (x, MPFR_RNDN);
}

/* The chi-squared test on low precision normal deviates.  wprec is the working
 * precision for the chi-squared calculation.  prec is the precision for the
 * sampling; choose this in [2,5].  The bins consist of all the possible
 * deviate values in the range [xmin, xmax] coupled with the value of inexact.
 * Thus with prec = 2, the bins are
 *   ...
 *   (7/16, 1/2)  x = 1/2, inexact = +1
 *   (1/2 , 5/8)  x = 1/2, inexact = -1
 *   (5/8 , 3/4)  x = 3/4, inexact = +1
 *   (3/4 , 7/8)  x = 3/4, inexact = -1
 *   (7/8 , 1  )  x = 1  , inexact = +1
 *   (1   , 5/4)  x = 1  , inexact = -1
 *   (5/4 , 3/2)  x = 3/2, inexact = +1
 *   (3/2 , 7/4)  x = 3/2, inexact = -1
 *   ...
 * In addition, two bins are allocated for [0,xmin) and (xmax,inf).
 *
 * This test is applied to the absolute values of the deviates.  The sign is
 * tested by test_nrandom_chisq_cont.  In any case, the way the sign is
 * assigned in mpfr_nrandom is trivial.  In addition, the sampling is with
 * MPFR_RNDN.  This is the rounding mode which elicits the most information.
 * trandom_deviate includes checks on the consistency of the results extracted
 * from a random_deviate with other rounding modes.  */
static double
test_nrandom_chisq_disc (long num, mpfr_prec_t wprec, int prec,
                         double xmin, double xmax, int verbose)
{
  mpfr_t x, v, pa, pb, z, t;
  mpfr_rnd_t rnd;
  int i, inexact, nu;
  long *counts;
  long k, seqmin, seqmax, seq;
  double Q, chisq;

  rnd = MPFR_RNDN;
  mpfr_init2 (x, prec);
  mpfr_init2 (v, prec+1);
  mpfr_inits2 (wprec, pa, pb, z, t, (mpfr_ptr) 0);

  mpfr_set_d (x, xmin, rnd);
  xmin = mpfr_get_d (x, rnd);
  mpfr_set (v, x, rnd);
  seqmin = sequential (x);
  mpfr_set_d (x, xmax, rnd);
  xmax = mpfr_get_d (x, rnd);
  seqmax = sequential (x);

  /* Two bins for each sequential number (for inexact = +/- 1), plus 1 for u <
   * umin and 1 for u > umax, minus 1 for degrees of freedom */
  nu = 2 * (seqmax - seqmin + 1) + 2 - 1;
  counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
  for (i = 0; i <= nu; i++)
    counts[i] = 0;

  for (k = 0; k < num; ++k)
    {
      inexact = mpfr_nrandom (x, RANDS, rnd);
      if (mpfr_signbit (x))
        {
          inexact = -inexact;
          mpfr_setsign (x, x, 0, rnd);
        }
      /* Don't call sequential with small args to avoid undefined behavior with
       * zero and possibility of overflow. */
      seq = mpfr_greaterequal_p (x, v) ? sequential (x) : seqmin - 1;
      ++counts[seq < seqmin ? 0 :
               seq <= seqmax ? 2 * (seq - seqmin) + 1 + (inexact > 0 ? 0 : 1) :
               nu];
    }

  mpfr_set_zero (v, 1);
  normal_cumulative (pa, v, rnd);
  /* Cycle through all the bin boundaries using mpfr_nextabove at precision
   * prec + 1 starting at mpfr_nextbelow (xmin) */
  mpfr_set_d (x, xmin, rnd);
  mpfr_set (v, x, rnd);
  mpfr_nextbelow (v);
  mpfr_nextbelow (v);
  mpfr_set_zero (t, 1);
  for (i = 0; i <= nu; ++i)
    {
      if (i < nu)
        mpfr_nextabove (v);
      else
        mpfr_set_inf (v, 1);
      normal_cumulative (pb, v, rnd);
      mpfr_sub (pa, pb, pa, rnd);

      /* Compute z = counts[i] - num * p; t += z * z / (num * p).  2*num to
       * account for the fact the p needs to be doubled since we are
       * considering only the absolute value of the deviates. */
      mpfr_mul_ui (pa, pa, 2*num, rnd);
      mpfr_ui_sub (z, counts[i], pa, rnd);
      mpfr_sqr (z, z, rnd);
      mpfr_div (z, z, pa, rnd);
      mpfr_add (t, t, z, rnd);
      mpfr_swap (pa, pb);       /* i.e., pa = pb */
    }

  chisq = mpfr_get_d (t, rnd);
  chisq_prob (t, nu, t);
  Q = mpfr_get_d (t, rnd);
  if (verbose)
    {
      printf ("num = %ld, discrete (prec = %d) bins in [%.6f, %.2f], "
              "nu = %d: chisq = %.2f\n", num, prec, xmin, xmax, nu, chisq);
      if (Q < 0.05)
        printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
    }

  tests_free (counts, (nu + 1) * sizeof (long));
  mpfr_clears (x, v, pa, pb, z, t, (mpfr_ptr) 0);
  return Q;
}

static void
run_chisq (double (*f)(long, mpfr_prec_t, int, double, double, int),
           long num, mpfr_prec_t prec, int bin,
           double xmin, double xmax, int verbose)
{
  double Q, Qcum, Qbad, Qthresh;
  int i;

  Qcum = 1;
  Qbad = 1.e-9;
  Qthresh = 0.01;
  for (i = 0; i < 3; ++i)
    {
      Q = (*f)(num, prec, bin, xmin, xmax, verbose);
      Qcum *= Q;
      if (Q > Qthresh)
        return;
      else if (Q < Qbad)
        {
          printf ("Error: mpfr_nrandom chi-squared failure "
                  "(prob = %.2e)\n", Q);
          exit (1);
        }
      num *= 10;
      Qthresh /= 10;
    }
  if (Qcum < Qbad)              /* Presumably this is true */
    {
      printf ("Error: mpfr_nrandom combined chi-squared failure "
              "(prob = %.2e)\n", Qcum);
      exit (1);
    }
}

int
main (int argc, char *argv[])
{
  long nbtests;
  int verbose;

  tests_start_mpfr ();

  verbose = 0;
  nbtests = 100000;
  if (argc > 1)
    {
      long a = atol (argv[1]);
      verbose = 1;
      if (a != 0)
        nbtests = a;
    }

  run_chisq (test_nrandom_chisq_cont, nbtests, 64, 60, -4, 4, verbose);
  run_chisq (test_nrandom_chisq_disc, nbtests, 64, 2, 0.0005, 3, verbose);
  run_chisq (test_nrandom_chisq_disc, nbtests, 64, 3, 0.002, 4, verbose);
  run_chisq (test_nrandom_chisq_disc, nbtests, 64, 4, 0.004, 4, verbose);

  tests_end_mpfr ();
  return 0;
}