mpn/generic/mu_div_q.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

/* mpn_mu_div_q, mpn_preinv_mu_div_q.

   Contributed to the GNU project by Torbjörn Granlund.

   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
   RELEASE.

Copyright 2005, 2006, 2007 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option) any
later version.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
details.

You should have received a copy of the GNU General Public License along with
the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */

/*
  Things to work on:

  1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
     probably close to optimal, except when mpn_mu_divappr_q fails.

     An alternative which could be considered for much simpler code for the
     complex qn>=dn arm would be to allocate a temporary nn+1 limb buffer, then
     simply call mpn_mu_divappr_q.  Such a temporary allocation is
     unfortunately very large.

  2. Instead of falling back to mpn_mu_div_qr when we detect a possible
     mpn_mu_divappr_q rounding problem, we could multiply and compare.
     Unfortunately, since mpn_mu_divappr_q does not return the partial
     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr
     could solve that.

  3. The allocations done here should be made from the scratch area.
*/

#include "gmp.h"
#include "gmp-impl.h"


mp_limb_t
mpn_mu_div_q (mp_ptr qp,
	      mp_ptr np, mp_size_t nn,
	      mp_srcptr dp, mp_size_t dn,
	      mp_ptr scratch)
{
  mp_ptr tp, rp, ip, this_ip;
  mp_size_t qn, in, this_in;
  mp_limb_t cy;
  TMP_DECL;

  TMP_MARK;

  qn = nn - dn;

  tp = TMP_BALLOC_LIMBS (qn + 1);

  if (qn >= dn)			/* nn >= 2*dn + 1 */
    {
      /* Find max inverse size needed by the two preinv calls.  */
      if (dn != qn)
	{
	  mp_size_t in1, in2;

	  in1 = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
	  in2 = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
	  in = MAX (in1, in2);
	}
      else
	{
	  in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
	}

      ip = TMP_BALLOC_LIMBS (in + 1);

      if (dn == in)
	{
	  MPN_COPY (scratch + 1, dp, in);
	  scratch[0] = 1;
	  mpn_invert (ip, scratch, in + 1);
	  MPN_COPY_INCR (ip, ip + 1, in);
	}
      else
	{
	  cy = mpn_add_1 (scratch, dp + dn - (in + 1), in + 1, 1);
	  if (UNLIKELY (cy != 0))
	    MPN_ZERO (ip, in);
	  else
	    {
	      mpn_invert (ip, scratch, in + 1);
	      MPN_COPY_INCR (ip, ip + 1, in);
	    }
	}

       /* |_______________________|   dividend
			 |________|   divisor  */
      rp = TMP_BALLOC_LIMBS (2 * dn + 1);
      if (dn != qn)		/* FIXME: perhaps mpn_mu_div_qr should DTRT */
	{
	  this_in = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
	  this_ip = ip + in - this_in;
	  mpn_preinv_mu_div_qr (tp + dn + 1, rp + dn + 1, np + dn, qn, dp, dn,
				this_ip, this_in, scratch);
	}
      else
	MPN_COPY (rp + dn + 1, np + dn, dn);

      MPN_COPY (rp + 1, np, dn);
      rp[0] = 0;
      this_in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
      this_ip = ip + in - this_in;
      mpn_preinv_mu_divappr_q (tp, rp, 2*dn + 1, dp, dn, this_ip, this_in, scratch);

      /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
	 greater than the max error, we cannot trust the quotient.  */
      if (tp[0] > 4)
	{
	  MPN_COPY (qp, tp + 1, qn);
	}
      else
	{
	  /* Fall back to plain mpn_mu_div_qr.  */
	  mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
	}
    }
  else
    {
       /* |_______________________|   dividend
		 |________________|   divisor  */
      mpn_mu_divappr_q (tp, np + nn - (2*qn + 2), 2*qn + 2, dp + dn - (qn + 1), qn + 1, scratch);

      if (tp[0] > 4)
	{
	  MPN_COPY (qp, tp + 1, qn);
	}
      else
	{
	  rp = TMP_BALLOC_LIMBS (dn);
	  mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
	}
    }

  TMP_FREE;
  return 0;
}