From 79a796b7d3db5d100eedfc774954a6b44944363a Mon Sep 17 00:00:00 2001 From: ian Date: Wed, 21 Nov 2012 07:03:38 +0000 Subject: libgo: Update to current version of master library. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@193688 138bc75d-0d04-0410-961f-82ee72b054a4 --- libgo/go/math/big/int.go | 13 ++-- libgo/go/math/big/int_test.go | 25 +++++-- libgo/go/math/big/nat.go | 164 +++++++++++++++++++++++++++++------------- libgo/go/math/big/nat_test.go | 44 ++++++++++-- 4 files changed, 181 insertions(+), 65 deletions(-) (limited to 'libgo/go/math') diff --git a/libgo/go/math/big/int.go b/libgo/go/math/big/int.go index 4bd7958ae50..95c0d58ee97 100644 --- a/libgo/go/math/big/int.go +++ b/libgo/go/math/big/int.go @@ -412,7 +412,7 @@ func (x *Int) Format(s fmt.State, ch rune) { if precisionSet { switch { case len(digits) < precision: - zeroes = precision - len(digits) // count of zero padding + zeroes = precision - len(digits) // count of zero padding case digits == "0" && precision == 0: return // print nothing if zero value (x == 0) and zero precision ("." or ".0") } @@ -561,19 +561,18 @@ func (x *Int) BitLen() int { return x.abs.bitLen() } -// Exp sets z = x**y mod m and returns z. If m is nil, z = x**y. +// Exp sets z = x**y mod |m| (i.e. the sign of m is ignored), and returns z. +// If y <= 0, the result is 1; if m == nil or m == 0, z = x**y. // See Knuth, volume 2, section 4.6.3. func (z *Int) Exp(x, y, m *Int) *Int { if y.neg || len(y.abs) == 0 { - neg := x.neg - z.SetInt64(1) - z.neg = neg - return z + return z.SetInt64(1) } + // y > 0 var mWords nat if m != nil { - mWords = m.abs + mWords = m.abs // m.abs may be nil for m == 0 } z.abs = z.abs.expNN(x.abs, y.abs, mWords) diff --git a/libgo/go/math/big/int_test.go b/libgo/go/math/big/int_test.go index 27834cec6af..d3c5a0e8bfe 100644 --- a/libgo/go/math/big/int_test.go +++ b/libgo/go/math/big/int_test.go @@ -767,8 +767,10 @@ var expTests = []struct { x, y, m string out string }{ + {"5", "-7", "", "1"}, + {"-5", "-7", "", "1"}, {"5", "0", "", "1"}, - {"-5", "0", "", "-1"}, + {"-5", "0", "", "1"}, {"5", "1", "", "5"}, {"-5", "1", "", "-5"}, {"-2", "3", "2", "0"}, @@ -779,6 +781,7 @@ var expTests = []struct { {"0x8000000000000000", "3", "6719", "5447"}, {"0x8000000000000000", "1000", "6719", "1603"}, {"0x8000000000000000", "1000000", "6719", "3199"}, + {"0x8000000000000000", "-1000000", "6719", "1"}, { "2938462938472983472983659726349017249287491026512746239764525612965293865296239471239874193284792387498274256129746192347", "298472983472983471903246121093472394872319615612417471234712061", @@ -807,12 +810,22 @@ func TestExp(t *testing.T) { continue } - z := y.Exp(x, y, m) - if !isNormalized(z) { - t.Errorf("#%d: %v is not normalized", i, *z) + z1 := new(Int).Exp(x, y, m) + if !isNormalized(z1) { + t.Errorf("#%d: %v is not normalized", i, *z1) + } + if z1.Cmp(out) != 0 { + t.Errorf("#%d: got %s want %s", i, z1, out) } - if z.Cmp(out) != 0 { - t.Errorf("#%d: got %s want %s", i, z, out) + + if m == nil { + // the result should be the same as for m == 0; + // specifically, there should be no div-zero panic + m = &Int{abs: nat{}} // m != nil && len(m.abs) == 0 + z2 := new(Int).Exp(x, y, m) + if z2.Cmp(z1) != 0 { + t.Errorf("#%d: got %s want %s", i, z1, z2) + } } } } diff --git a/libgo/go/math/big/nat.go b/libgo/go/math/big/nat.go index 2d5a5c9587d..13a623a7032 100644 --- a/libgo/go/math/big/nat.go +++ b/libgo/go/math/big/nat.go @@ -421,17 +421,17 @@ func (z nat) mul(x, y nat) nat { z[2*k:].clear() // upper portion of z is garbage (and 2*k <= m+n since k <= n <= m) // If xh != 0 or yh != 0, add the missing terms to z. For - // - // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) - // yh = y1*b (0 <= y1 < b) - // - // the missing terms are - // - // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 - // - // since all the yi for i > 1 are 0 by choice of k: If any of them - // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would - // be a larger valid threshold contradicting the assumption about k. + // + // xh = xi*b^i + ... + x2*b^2 + x1*b (0 <= xi < b) + // yh = y1*b (0 <= y1 < b) + // + // the missing terms are + // + // x0*y1*b and xi*y0*b^i, xi*y1*b^(i+1) for i > 0 + // + // since all the yi for i > 1 are 0 by choice of k: If any of them + // were > 0, then yh >= b^2 and thus y >= b^2. Then k' = k*2 would + // be a larger valid threshold contradicting the assumption about k. // if k < n || m != n { var t nat @@ -828,16 +828,16 @@ func (x nat) string(charset string) string { // by nat/nat division using tabulated divisors. Otherwise, it is converted iteratively using // repeated nat/Word divison. // -// The iterative method processes n Words by n divW() calls, each of which visits every Word in the -// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s. -// Recursive conversion divides q by its approximate square root, yielding two parts, each half +// The iterative method processes n Words by n divW() calls, each of which visits every Word in the +// incrementally shortened q for a total of n + (n-1) + (n-2) ... + 2 + 1, or n(n+1)/2 divW()'s. +// Recursive conversion divides q by its approximate square root, yielding two parts, each half // the size of q. Using the iterative method on both halves means 2 * (n/2)(n/2 + 1)/2 divW()'s // plus the expensive long div(). Asymptotically, the ratio is favorable at 1/2 the divW()'s, and -// is made better by splitting the subblocks recursively. Best is to split blocks until one more -// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the -// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the -// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and -// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for +// is made better by splitting the subblocks recursively. Best is to split blocks until one more +// split would take longer (because of the nat/nat div()) than the twice as many divW()'s of the +// iterative approach. This threshold is represented by leafSize. Benchmarking of leafSize in the +// range 2..64 shows that values of 8 and 16 work well, with a 4x speedup at medium lengths and +// ~30x for 20000 digits. Use nat_test.go's BenchmarkLeafSize tests to optimize leafSize for // specific hardware. // func (q nat) convertWords(s []byte, charset string, b Word, ndigits int, bb Word, table []divisor) { @@ -920,8 +920,10 @@ type divisor struct { ndigits int // digit length of divisor in terms of output base digits } -var cacheBase10 [64]divisor // cached divisors for base 10 -var cacheLock sync.Mutex // protects cacheBase10 +var cacheBase10 struct { + sync.Mutex + table [64]divisor // cached divisors for base 10 +} // expWW computes x**y func (z nat) expWW(x, y Word) nat { @@ -937,34 +939,28 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor { // determine k where (bb**leafSize)**(2**k) >= sqrt(x) k := 1 - for words := leafSize; words < m>>1 && k < len(cacheBase10); words <<= 1 { + for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 { k++ } - // create new table of divisors or extend and reuse existing table as appropriate - var table []divisor - var cached bool - switch b { - case 10: - table = cacheBase10[0:k] // reuse old table for this conversion - cached = true - default: - table = make([]divisor, k) // new table for this conversion + // reuse and extend existing table of divisors or create new table as appropriate + var table []divisor // for b == 10, table overlaps with cacheBase10.table + if b == 10 { + cacheBase10.Lock() + table = cacheBase10.table[0:k] // reuse old table for this conversion + } else { + table = make([]divisor, k) // create new table for this conversion } // extend table if table[k-1].ndigits == 0 { - if cached { - cacheLock.Lock() // begin critical section - } - // add new entries as needed var larger nat for i := 0; i < k; i++ { if table[i].ndigits == 0 { if i == 0 { - table[i].bbb = nat(nil).expWW(bb, Word(leafSize)) - table[i].ndigits = ndigits * leafSize + table[0].bbb = nat(nil).expWW(bb, Word(leafSize)) + table[0].ndigits = ndigits * leafSize } else { table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb) table[i].ndigits = 2 * table[i-1].ndigits @@ -980,10 +976,10 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor { table[i].nbits = table[i].bbb.bitLen() } } + } - if cached { - cacheLock.Unlock() // end critical section - } + if b == 10 { + cacheBase10.Unlock() } return table @@ -1231,8 +1227,8 @@ func (z nat) random(rand *rand.Rand, limit nat, n int) nat { return z.norm() } -// If m != nil, expNN calculates x**y mod m. Otherwise it calculates x**y. It -// reuses the storage of z if possible. +// If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; +// otherwise it sets z to x**y. The result is the value of z. func (z nat) expNN(x, y, m nat) nat { if alias(z, x) || alias(z, y) { // We cannot allow in-place modification of x or y. @@ -1244,15 +1240,24 @@ func (z nat) expNN(x, y, m nat) nat { z[0] = 1 return z } + // y > 0 - if m != nil { + if len(m) != 0 { // We likely end up being as long as the modulus. z = z.make(len(m)) } z = z.set(x) - v := y[len(y)-1] - // It's invalid for the most significant word to be zero, therefore we - // will find a one bit. + + // If the base is non-trivial and the exponent is large, we use + // 4-bit, windowed exponentiation. This involves precomputing 14 values + // (x^2...x^15) but then reduces the number of multiply-reduces by a + // third. Even for a 32-bit exponent, this reduces the number of + // operations. + if len(x) > 1 && len(y) > 1 && len(m) > 0 { + return z.expNNWindowed(x, y, m) + } + + v := y[len(y)-1] // v > 0 because y is normalized and y > 0 shift := leadingZeros(v) + 1 v <<= shift var q nat @@ -1276,7 +1281,7 @@ func (z nat) expNN(x, y, m nat) nat { zz, z = z, zz } - if m != nil { + if len(m) != 0 { zz, r = zz.div(r, z, m) zz, r, q, z = q, z, zz, r } @@ -1296,7 +1301,7 @@ func (z nat) expNN(x, y, m nat) nat { zz, z = z, zz } - if m != nil { + if len(m) != 0 { zz, r = zz.div(r, z, m) zz, r, q, z = q, z, zz, r } @@ -1308,6 +1313,69 @@ func (z nat) expNN(x, y, m nat) nat { return z.norm() } +// expNNWindowed calculates x**y mod m using a fixed, 4-bit window. +func (z nat) expNNWindowed(x, y, m nat) nat { + // zz and r are used to avoid allocating in mul and div as otherwise + // the arguments would alias. + var zz, r nat + + const n = 4 + // powers[i] contains x^i. + var powers [1 << n]nat + powers[0] = natOne + powers[1] = x + for i := 2; i < 1<= 0; i-- { + yi := y[i] + for j := 0; j < _W; j += n { + if i != len(y)-1 || j != 0 { + // Unrolled loop for significant performance + // gain. Use go test -bench=".*" in crypto/rsa + // to check performance before making changes. + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + } + + zz = zz.mul(z, powers[yi>>(_W-n)]) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + yi <<= n + } + } + + return z.norm() +} + // probablyPrime performs reps Miller-Rabin tests to check whether n is prime. // If it returns true, n is prime with probability 1 - 1/4^reps. // If it returns false, n is not prime. diff --git a/libgo/go/math/big/nat_test.go b/libgo/go/math/big/nat_test.go index 68dd1a96d3c..6244eeefc91 100644 --- a/libgo/go/math/big/nat_test.go +++ b/libgo/go/math/big/nat_test.go @@ -166,7 +166,7 @@ func TestMulRangeN(t *testing.T) { } } -// allocBytes returns the number of bytes allocated by invoking f. +// allocBytes returns the number of bytes allocated by invoking f. func allocBytes(f func()) uint64 { var stats runtime.MemStats runtime.ReadMemStats(&stats) @@ -409,6 +409,20 @@ func TestScanPi(t *testing.T) { } } +func TestScanPiParallel(t *testing.T) { + const n = 2 + c := make(chan int) + for i := 0; i < n; i++ { + go func() { + TestScanPi(t) + c <- 0 + }() + } + for i := 0; i < n; i++ { + <-c + } +} + func BenchmarkScanPi(b *testing.B) { for i := 0; i < b.N; i++ { var x nat @@ -416,6 +430,28 @@ func BenchmarkScanPi(b *testing.B) { } } +func BenchmarkStringPiParallel(b *testing.B) { + var x nat + x, _, _ = x.scan(strings.NewReader(pi), 0) + if x.decimalString() != pi { + panic("benchmark incorrect: conversion failed") + } + n := runtime.GOMAXPROCS(0) + m := b.N / n // n*m <= b.N due to flooring, but the error is neglibible (n is not very large) + c := make(chan int, n) + for i := 0; i < n; i++ { + go func() { + for j := 0; j < m; j++ { + x.decimalString() + } + c <- 0 + }() + } + for i := 0; i < n; i++ { + <-c + } +} + func BenchmarkScan10Base2(b *testing.B) { ScanHelper(b, 2, 10, 10) } func BenchmarkScan100Base2(b *testing.B) { ScanHelper(b, 2, 10, 100) } func BenchmarkScan1000Base2(b *testing.B) { ScanHelper(b, 2, 10, 1000) } @@ -510,13 +546,13 @@ func BenchmarkLeafSize13(b *testing.B) { LeafSizeHelper(b, 10, 13) } func BenchmarkLeafSize14(b *testing.B) { LeafSizeHelper(b, 10, 14) } func BenchmarkLeafSize15(b *testing.B) { LeafSizeHelper(b, 10, 15) } func BenchmarkLeafSize16(b *testing.B) { LeafSizeHelper(b, 10, 16) } -func BenchmarkLeafSize32(b *testing.B) { LeafSizeHelper(b, 10, 32) } // try some large lengths +func BenchmarkLeafSize32(b *testing.B) { LeafSizeHelper(b, 10, 32) } // try some large lengths func BenchmarkLeafSize64(b *testing.B) { LeafSizeHelper(b, 10, 64) } func LeafSizeHelper(b *testing.B, base Word, size int) { b.StopTimer() originalLeafSize := leafSize - resetTable(cacheBase10[:]) + resetTable(cacheBase10.table[:]) leafSize = size b.StartTimer() @@ -533,7 +569,7 @@ func LeafSizeHelper(b *testing.B, base Word, size int) { } b.StopTimer() - resetTable(cacheBase10[:]) + resetTable(cacheBase10.table[:]) leafSize = originalLeafSize b.StartTimer() } -- cgit v1.2.1