* mpf/mul_ui.c: Incorporate carry from low limbs, for exactness.

author: Kevin Ryde <user42@zip.com.au> 2004-03-14 00:17:59 +0100
committer: Kevin Ryde <user42@zip.com.au> 2004-03-14 00:17:59 +0100
commit: a51ace3cb879457cfc321c8a650ae399a71d8a30 (patch)
tree: 28613ef9ff1bbc321e1ba76f158a2626cccec889 /mpf
parent: d9b8945f7875c4029dab8e4047e168174317463d (diff)
download: gmp-a51ace3cb879457cfc321c8a650ae399a71d8a30.tar.gz
1 files changed, 104 insertions, 13 deletions
diff --git a/mpf/mul_ui.c b/mpf/mul_ui.c
index b2ba8c9f7..018f0b580 100644
--- a/mpf/mul_ui.c
+++ b/mpf/mul_ui.c
@@ -21,6 +21,62 @@ MA 02111-1307, USA. */
 
 #include "gmp.h"
 #include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The core operation is a multiply of PREC(r) limbs from u by v, producing
+   either PREC(r) or PREC(r)+1 result limbs.  If u is shorter than PREC(r),
+   then we take only as much as it has.  If u is longer we incorporate a
+   carry from the lower limbs.
+
+   If u has just 1 extra limb, then the carry to add is high(up[0]*v).  That
+   is of course what mpn_mul_1 would do if it was called with PREC(r)+1
+   limbs of input.
+
+   If u has more than 1 extra limb, then there can be a further carry bit
+   out of lower uncalculated limbs (the way the low of one product adds to
+   the high of the product below it).  This is of course what an mpn_mul_1
+   would do if it was called with the full u operand.  But we instead work
+   downwards explicitly, until a carry occurs or until a value other than
+   GMP_NUMB_MAX occurs (that being the only value a carry bit can propagate
+   across).
+
+   The carry determination normally requires two umul_ppmm's, only rarely
+   will GMP_NUMB_MAX occur and require further products.
+
+   The carry limb is conveniently added into the mul_1 using mpn_mul_1c when
+   that function exists, otherwise a subsequent mpn_add_1 is needed.
+
+   Clearly when mpn_mul_1c is used the carry must be calculated first.  But
+   this is also the case when add_1 is used, since if r==u and ABSIZ(r) >
+   PREC(r) then the mpn_mul_1 overwrites the low part of the input.
+
+   An reuse r==u with size > prec can occur from a size PREC(r)+1 in the
+   usual way, or it can occur from an mpf_set_prec_raw leaving a bigger
+   sized value.  In both cases we can end up calling mpn_mul_1 with
+   overlapping src and dst regions, but this will be with dst < src and such
+   an overlap is permitted.
+
+   Not done:
+
+   No attempt is made to determine in advance whether the result will be
+   PREC(r) or PREC(r)+1 limbs.  If it's going to be PREC(r)+1 then we could
+   take one less limb from u and generate just PREC(r), that of course
+   satisfying application requested precision.  But any test counting bits
+   or forming the high product would almost certainly take longer than the
+   incremental cost of an extra limb in mpn_mul_1.
+
+   Enhancements:
+
+   Repeated mpf_mul_ui's with an even v will accumulate low zero bits on the
+   result, leaving low zero limbs after a while, which it might be nice to
+   strip to save work in subsequent operations.  Calculating the low limb
+   explicitly would let us direct mpn_mul_1 to put the balance at rp when
+   the low is zero (instead of normally rp+1).  But it's not clear whether
+   this would be worthwhile.  Explicit code for the low limb will probably
+   be slower than having it done in mpn_mul_1, so we need to consider how
+   often a zero will be stripped and how much that's likely to save
+   later.  */
 
 void
 mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
@@ -28,12 +84,12 @@ mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
   mp_srcptr up;
   mp_size_t usize;
   mp_size_t size;
-  mp_size_t prec;
-  mp_limb_t cy_limb;
+  mp_size_t prec, excess;
+  mp_limb_t cy_limb, vl, cbit, cin;
   mp_ptr rp;
 
   usize = u->_mp_size;
-  if (usize == 0 || v == 0)
+  if (UNLIKELY (v == 0) || UNLIKELY (usize == 0))
     {
       r->_mp_size = 0;
       r->_mp_exp = 0;
@@ -59,21 +115,56 @@ mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
   size = ABS (usize);
   prec = r->_mp_prec;
   up = u->_mp_d;
-  if (size > prec)
+  vl = v;
+  excess = size - prec;
+  cin = 0;
+
+  if (excess > 0)
     {
-      up += size - prec;
+      /* up is bigger than desired rp, shorten it to prec limbs and
+         determine a carry-in */
+
+      mp_limb_t  vl_shifted = vl << GMP_NAIL_BITS;
+      mp_limb_t  hi, lo, next_lo, sum;
+      mp_size_t  i;
+
+      /* high limb of top product */
+      i = excess - 1;
+      umul_ppmm (cin, lo, up[i], vl_shifted);
+
+      /* and carry bit out of products below that, if any */
+      for (;;)
+        {
+          i--;
+          if (i < 0)
+            break;
+
+          umul_ppmm (hi, next_lo, up[i], vl_shifted);
+          lo >>= GMP_NAIL_BITS;
+          ADDC_LIMB (cbit, sum, hi, lo);
+          cin += cbit;
+          lo = next_lo;
+
+          /* Continue only if the sum is GMP_NUMB_MAX.  GMP_NUMB_MAX is the
+             only value a carry from below can propagate across.  If we've
+             just seen the carry out (ie. cbit!=0) then sum!=GMP_NUMB_MAX,
+             so this test stops us for that case too.  */
+          if (LIKELY (sum != GMP_NUMB_MAX))
+            break;
+        }
+
+      up += excess;
       size = prec;
     }
 
-#if 0
-  /* Since we can do it at almost no cost, remove zero limbs at low end of
-     result. */
-  if (up[0] == 0)
-    up++, size--;
-#endif
-
   rp = r->_mp_d;
-  cy_limb = mpn_mul_1 (rp, up, size, (mp_limb_t) v);
+#if HAVE_NATIVE_mpn_mul_1c
+  cy_limb = mpn_mul_1c (rp, up, size, vl, cin);
+#else
+  cy_limb = mpn_mul_1 (rp, up, size, vl);
+  __GMPN_ADD_1 (cbit, rp, rp, size, cin);
+  cy_limb += cbit;
+#endif
   rp[size] = cy_limb;
   cy_limb = cy_limb != 0;
   r->_mp_exp = u->_mp_exp + cy_limb;
author	Kevin Ryde <user42@zip.com.au>	2004-03-14 00:17:59 +0100
committer	Kevin Ryde <user42@zip.com.au>	2004-03-14 00:17:59 +0100
commit	a51ace3cb879457cfc321c8a650ae399a71d8a30 (patch)
tree	28613ef9ff1bbc321e1ba76f158a2626cccec889 /mpf
parent	d9b8945f7875c4029dab8e4047e168174317463d (diff)
download	gmp-a51ace3cb879457cfc321c8a650ae399a71d8a30.tar.gz