summaryrefslogtreecommitdiff
path: root/mpf
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2004-03-14 00:17:59 +0100
committerKevin Ryde <user42@zip.com.au>2004-03-14 00:17:59 +0100
commita51ace3cb879457cfc321c8a650ae399a71d8a30 (patch)
tree28613ef9ff1bbc321e1ba76f158a2626cccec889 /mpf
parentd9b8945f7875c4029dab8e4047e168174317463d (diff)
downloadgmp-a51ace3cb879457cfc321c8a650ae399a71d8a30.tar.gz
* mpf/mul_ui.c: Incorporate carry from low limbs, for exactness.
Diffstat (limited to 'mpf')
-rw-r--r--mpf/mul_ui.c117
1 files changed, 104 insertions, 13 deletions
diff --git a/mpf/mul_ui.c b/mpf/mul_ui.c
index b2ba8c9f7..018f0b580 100644
--- a/mpf/mul_ui.c
+++ b/mpf/mul_ui.c
@@ -21,6 +21,62 @@ MA 02111-1307, USA. */
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The core operation is a multiply of PREC(r) limbs from u by v, producing
+ either PREC(r) or PREC(r)+1 result limbs. If u is shorter than PREC(r),
+ then we take only as much as it has. If u is longer we incorporate a
+ carry from the lower limbs.
+
+ If u has just 1 extra limb, then the carry to add is high(up[0]*v). That
+ is of course what mpn_mul_1 would do if it was called with PREC(r)+1
+ limbs of input.
+
+ If u has more than 1 extra limb, then there can be a further carry bit
+ out of lower uncalculated limbs (the way the low of one product adds to
+ the high of the product below it). This is of course what an mpn_mul_1
+ would do if it was called with the full u operand. But we instead work
+ downwards explicitly, until a carry occurs or until a value other than
+ GMP_NUMB_MAX occurs (that being the only value a carry bit can propagate
+ across).
+
+ The carry determination normally requires two umul_ppmm's, only rarely
+ will GMP_NUMB_MAX occur and require further products.
+
+ The carry limb is conveniently added into the mul_1 using mpn_mul_1c when
+ that function exists, otherwise a subsequent mpn_add_1 is needed.
+
+ Clearly when mpn_mul_1c is used the carry must be calculated first. But
+ this is also the case when add_1 is used, since if r==u and ABSIZ(r) >
+ PREC(r) then the mpn_mul_1 overwrites the low part of the input.
+
+ An reuse r==u with size > prec can occur from a size PREC(r)+1 in the
+ usual way, or it can occur from an mpf_set_prec_raw leaving a bigger
+ sized value. In both cases we can end up calling mpn_mul_1 with
+ overlapping src and dst regions, but this will be with dst < src and such
+ an overlap is permitted.
+
+ Not done:
+
+ No attempt is made to determine in advance whether the result will be
+ PREC(r) or PREC(r)+1 limbs. If it's going to be PREC(r)+1 then we could
+ take one less limb from u and generate just PREC(r), that of course
+ satisfying application requested precision. But any test counting bits
+ or forming the high product would almost certainly take longer than the
+ incremental cost of an extra limb in mpn_mul_1.
+
+ Enhancements:
+
+ Repeated mpf_mul_ui's with an even v will accumulate low zero bits on the
+ result, leaving low zero limbs after a while, which it might be nice to
+ strip to save work in subsequent operations. Calculating the low limb
+ explicitly would let us direct mpn_mul_1 to put the balance at rp when
+ the low is zero (instead of normally rp+1). But it's not clear whether
+ this would be worthwhile. Explicit code for the low limb will probably
+ be slower than having it done in mpn_mul_1, so we need to consider how
+ often a zero will be stripped and how much that's likely to save
+ later. */
void
mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
@@ -28,12 +84,12 @@ mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
mp_srcptr up;
mp_size_t usize;
mp_size_t size;
- mp_size_t prec;
- mp_limb_t cy_limb;
+ mp_size_t prec, excess;
+ mp_limb_t cy_limb, vl, cbit, cin;
mp_ptr rp;
usize = u->_mp_size;
- if (usize == 0 || v == 0)
+ if (UNLIKELY (v == 0) || UNLIKELY (usize == 0))
{
r->_mp_size = 0;
r->_mp_exp = 0;
@@ -59,21 +115,56 @@ mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
size = ABS (usize);
prec = r->_mp_prec;
up = u->_mp_d;
- if (size > prec)
+ vl = v;
+ excess = size - prec;
+ cin = 0;
+
+ if (excess > 0)
{
- up += size - prec;
+ /* up is bigger than desired rp, shorten it to prec limbs and
+ determine a carry-in */
+
+ mp_limb_t vl_shifted = vl << GMP_NAIL_BITS;
+ mp_limb_t hi, lo, next_lo, sum;
+ mp_size_t i;
+
+ /* high limb of top product */
+ i = excess - 1;
+ umul_ppmm (cin, lo, up[i], vl_shifted);
+
+ /* and carry bit out of products below that, if any */
+ for (;;)
+ {
+ i--;
+ if (i < 0)
+ break;
+
+ umul_ppmm (hi, next_lo, up[i], vl_shifted);
+ lo >>= GMP_NAIL_BITS;
+ ADDC_LIMB (cbit, sum, hi, lo);
+ cin += cbit;
+ lo = next_lo;
+
+ /* Continue only if the sum is GMP_NUMB_MAX. GMP_NUMB_MAX is the
+ only value a carry from below can propagate across. If we've
+ just seen the carry out (ie. cbit!=0) then sum!=GMP_NUMB_MAX,
+ so this test stops us for that case too. */
+ if (LIKELY (sum != GMP_NUMB_MAX))
+ break;
+ }
+
+ up += excess;
size = prec;
}
-#if 0
- /* Since we can do it at almost no cost, remove zero limbs at low end of
- result. */
- if (up[0] == 0)
- up++, size--;
-#endif
-
rp = r->_mp_d;
- cy_limb = mpn_mul_1 (rp, up, size, (mp_limb_t) v);
+#if HAVE_NATIVE_mpn_mul_1c
+ cy_limb = mpn_mul_1c (rp, up, size, vl, cin);
+#else
+ cy_limb = mpn_mul_1 (rp, up, size, vl);
+ __GMPN_ADD_1 (cbit, rp, rp, size, cin);
+ cy_limb += cbit;
+#endif
rp[size] = cy_limb;
cy_limb = cy_limb != 0;
r->_mp_exp = u->_mp_exp + cy_limb;