mpn/generic/hgcd_jacobi.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235

/* hgcd_jacobi.c.

   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.

Copyright 2003, 2004, 2005, 2008, 2011 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */

#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"

/* This file is almost a copy of hgcd.c, with some added calls to
   mpn_jacobi_update */

struct hgcd_jacobi_ctx
{
  struct hgcd_matrix *M;
  unsigned *bitsp;
};

static void
hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
		  mp_srcptr qp, mp_size_t qn, int d)
{
  ASSERT (!gp);
  ASSERT (d >= 0);

  MPN_NORMALIZE (qp, qn);
  if (qn > 0)
    {
      struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
      /* NOTES: This is a bit ugly. A tp area is passed to
	 gcd_subdiv_step, which stores q at the start of that area. We
	 now use the rest. */
      mp_ptr tp = (mp_ptr) qp + qn;

      mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
      *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
    }
}

/* Perform a few steps, using some of mpn_hgcd2, subtraction and
   division. Reduces the size by almost one limb or more, but never
   below the given size s. Return new size for a and b, or 0 if no
   more steps are possible.

   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
   resulting size of M.

   If N is the input size to the calling hgcd, then s = floor(N/2) +
   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
   < N, so N is sufficient.
*/

static mp_size_t
hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
		  struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
{
  struct hgcd_matrix1 M1;
  mp_limb_t mask;
  mp_limb_t ah, al, bh, bl;
  mp_size_t an, bn, qn;
  int col;

  ASSERT (n > s);

  mask = ap[n-1] | bp[n-1];
  ASSERT (mask > 0);

  if (n == s + 1)
    {
      if (mask < 4)
	goto subtract;

      ah = ap[n-1]; al = ap[n-2];
      bh = bp[n-1]; bl = bp[n-2];
    }
  else if (mask & GMP_NUMB_HIGHBIT)
    {
      ah = ap[n-1]; al = ap[n-2];
      bh = bp[n-1]; bl = bp[n-2];
    }
  else
    {
      int shift;

      count_leading_zeros (shift, mask);
      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
    }

  /* Try an mpn_hgcd2 step */
  if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
    {
      /* Multiply M <- M * M1 */
      mpn_hgcd_matrix_mul_1 (M, &M1, tp);

      /* Can't swap inputs, so we need to copy. */
      MPN_COPY (tp, ap, n);
      /* Multiply M1^{-1} (a;b) */
      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
    }

 subtract:
  {
    struct hgcd_jacobi_ctx ctx;
    ctx.M = M;
    ctx.bitsp = bitsp;

    return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
  }
}

/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
   with elements of size at most (n+1)/2 - 1. Returns new size of a,
   b, or zero if no reduction is possible. */

/* Same scratch requirements as for mpn_hgcd. */
mp_size_t
mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
		 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
{
  mp_size_t s = n/2 + 1;

  mp_size_t nn;
  int success = 0;

  if (n <= s)
    /* Happens when n <= 2, a fairly uninteresting case but exercised
       by the random inputs of the testsuite. */
    return 0;

  ASSERT ((ap[n-1] | bp[n-1]) > 0);

  ASSERT ((n+1)/2 - 1 < M->alloc);

  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
    {
      mp_size_t n2 = (3*n)/4 + 1;
      mp_size_t p = n/2;

      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
      if (nn > 0)
	{
	  /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
	     = 2 (n - 1) */
	  n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
	  success = 1;
	}
      while (n > n2)
	{
	  /* Needs n + 1 storage */
	  nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
	  if (!nn)
	    return success ? n : 0;
	  n = nn;
	  success = 1;
	}

      if (n > s + 2)
	{
	  struct hgcd_matrix M1;
	  mp_size_t scratch;

	  p = 2*s - n + 1;
	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);

	  mpn_hgcd_matrix_init(&M1, n - p, tp);
	  nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
	  if (nn > 0)
	    {
	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
	      ASSERT (M->n + 2 >= M1.n);

	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
		 then either q or q + 1 is a correct quotient, and M1 will
		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
		 rules out the case that the size of M * M1 is much
		 smaller than the expected M->n + M1->n. */

	      ASSERT (M->n + M1.n < M->alloc);

	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);

	      /* We need a bound for of M->n + M1.n. Let n be the original
		 input size. Then

		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2

		 and it follows that

		 M.n + M1.n <= ceil(n/2) + 1

		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
		 amount of needed scratch space. */
	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
	      success = 1;
	    }
	}
    }

  for (;;)
    {
      /* Needs s+3 < n */
      nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
      if (!nn)
	return success ? n : 0;

      n = nn;
      success = 1;
    }
}