/* mpn_sqrtrem_new -- integer square root with remainder (should be directly integrated in a future release of GNU MP) Copyright (C) 2000 PolKA project, Inria Lorraine and Loria This file is part of the MPFR Library. The MPFR Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The MPFR Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with the MPFR Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include /* for NULL */ #include "gmp.h" #include "gmp-impl.h" #include "longlong.h" /* table generated by isqrt(2^8*i) $ i=64..255 in MuPAD */ static const unsigned char approx_tab[192] = { 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255}; /* same as mpn_sqrtrem, but for size=1 and {np, 1} normalized */ mp_size_t mpn_sqrtrem1 (mp_ptr sp, mp_ptr rp, mp_srcptr np) { mp_limb_t np0, s, r, q, u; int prec; /* first compute a 8-bit approximation from the high 8-bits of np[0] */ np0 = np[0]; q = np0 >> (BITS_PER_MP_LIMB - 8); /* 2^6 = 64 <= q < 256 = 2^8 */ s = approx_tab[q - 64]; /* 128 <= s < 255 */ r = (np0 >> (BITS_PER_MP_LIMB - 16)) - s * s; /* r <= 2*s */ if (r > 2 * s) { r -= 2 * s + 1; s++; } #ifdef DEBUG if (r > 2*s) { fprintf(stderr, "Error: r > 2*s, np[0]=%lu r=%lu s=%lu\n", np[0], r, s); exit(1); } #endif prec = 8; np0 <<= 2 * prec; while (2*prec < BITS_PER_MP_LIMB) { /* invariant: s has prec bits, and r <= 2*s */ r = (r << prec) + (np0 >> (BITS_PER_MP_LIMB - prec)); np0 <<= prec; u = 2 * s; q = r / u; u = r - q * u; s = (s << prec) + q; u = (u << prec) + (np0 >> (BITS_PER_MP_LIMB - prec)); q = q * q; r = u - q; if (u < q) { r += 2 * s - 1; s --; } np0 <<= prec; prec = 2 * prec; } sp[0] = s; if (rp != NULL) rp[0] = r; return (r) ? 1 : 0; } #define Prec (BITS_PER_MP_LIMB >> 1) /* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized return cc such that {np, 2} = sp[0]^2 + cc*2^BITS+PER_MP_LIMB + rp[0] */ mp_limb_t mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np) { mp_limb_t qhl, q, u, np0; int cc; np0 = np[0]; mpn_sqrtrem1(sp, rp, np+1); qhl = 0; while (rp[0] >= sp[0]) { qhl ++; rp[0] -= sp[0]; } /* now rp[0] < sp[0] < 2^Prec */ rp[0] = (rp[0] << Prec) + (np0 >> Prec); u = 2 * sp[0]; q = rp[0] / u; u = rp[0] - q * u; q += (qhl & 1) << (Prec-1); qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */ /* now we have (initial rp[0])<>Prec = (qhl<> Prec; rp[0] = (u << Prec) + (np0 & (((mp_limb_t)1 << Prec) - 1)); /* subtract q * q or qhl*2^(2*Prec) from rp */ cc -= mpn_sub_1(rp, rp, 1, q * q) + qhl; /* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */ if (cc < 0) { cc += (sp[0]) ? mpn_add_1(rp, rp, 1, sp[0]) : 1; cc += mpn_add_1(rp, rp, 1, --sp[0]); } return cc; } /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n}, and in {np, n} the low n limbs of the remainder, returns the high limb of the remainder (which is 0 or 1). Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4 where B=2^BITS_PER_MP_LIMB. */ mp_limb_t mpn_dq_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n) { mp_limb_t q; /* carry out of {sp, n} */ int c, b; /* carry out of remainder */ mp_size_t l, h; if (n == 1) return mpn_sqrtrem2 (sp, np, np); l = n / 2; h = n - l; q = mpn_dq_sqrtrem (sp + l, np + 2 * l, h); if (q) mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h); q += mpn_divrem (sp, 0, np + l, n, sp + l, h); c = sp[0] & 1; mpn_rshift (sp, sp, l, 1); sp[l-1] |= q << (BITS_PER_MP_LIMB - 1); q >>= 1; if (c) c = mpn_add_n (np + l, np + l, sp + l, h); mpn_mul_n (np + n, sp, sp, l); b = q + mpn_sub_n (np, np, np + n, 2 * l); c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, b); q = mpn_add_1 (sp + l, sp + l, h, q); if (c < 0) { c += mpn_addmul_1 (np, sp, n, 2) + 2 * q; c -= mpn_sub_1 (np, np, n, 1); q -= mpn_sub_1 (sp, sp, n, 1); } return c; } /* main function */ mp_size_t mpn_sqrtrem_new (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn) { mp_limb_t *tp, s0[1], cc; int c, tn; mp_size_t rn; TMP_DECL (marker); #ifdef DEBUG if (np[nn-1] == 0) { fprintf (stderr, "Error in mpn_sqrtrem: most significant limb is zero\n"); exit(1); } #endif count_leading_zeros(c, np[nn-1]); if (nn == 1 && c == 0) return mpn_sqrtrem1 (sp, rp, np); c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */ tn = (nn+1) / 2; /* 2*tn is the smallest even integer >= nn */ TMP_MARK (marker); if ((nn % 2) || (c > 0)) { tp = (mp_limb_t*) TMP_ALLOC (2 * tn * BYTES_PER_MP_LIMB); tp[0] = 0; /* needed only when 2*tn > nn, but saves a test */ if (c) mpn_lshift(tp + 2*tn - nn, np, nn, 2 * c); else MPN_COPY (tp + 2*tn - nn, np, nn); rn = mpn_dq_sqrtrem (sp, tp, tn); /* we have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*BITS_PER_MP_LIMB/2, thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */ c += (nn % 2) * BITS_PER_MP_LIMB / 2; /* c now represents k */ s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1); /* S mod 2^k */ rn += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]); /* R = R + 2*s0*S */ cc = mpn_submul_1 (tp, s0, 1, s0[0]); rn -= (tn > 1) ? mpn_sub_1(tp + 1, tp + 1, tn - 1, cc) : cc; mpn_rshift (sp, sp, tn, c); tp[tn] = rn; if (rp == NULL) rp = tp; c = c << 1; if (c < BITS_PER_MP_LIMB) tn++; else { tp++; c -= BITS_PER_MP_LIMB; } if (c) mpn_rshift (rp, tp, tn, c); else MPN_COPY(rp, tp, tn); rn = tn; } else { if (rp == NULL) rp = (mp_limb_t*) TMP_ALLOC (nn * BYTES_PER_MP_LIMB); if (rp != np) MPN_COPY (rp, np, nn); rn = tn + (rp[tn] = mpn_dq_sqrtrem (sp, rp, tn)); } while (rp[rn-1]==0 && rn) rn--; TMP_FREE (marker); return rn; }