diff options
author | Ruslan Andreev <ruslan.andreev@huawei.com> | 2021-03-10 19:31:59 +0800 |
---|---|---|
committer | Emmanuel Odeke <emmanuel@orijtech.com> | 2021-04-26 17:13:36 +0000 |
commit | d5d24dbe419c429b43046049d57b97b0abd42a87 (patch) | |
tree | e114398b9a8297b9c6ea576335fdbe9c7bf0231d /src | |
parent | 1f7ddf57d2908319c0ca7dc621a206935d8726f2 (diff) | |
download | go-git-d5d24dbe419c429b43046049d57b97b0abd42a87.tar.gz |
sync: improve sync.Pool object stealing
This CL provide abilty to randomly select P to steal object from its
shared queue. In order to provide such ability randomOrder structure
was copied from runtime/proc.go.
It should reduce contention in firsts Ps and improve balance of object
stealing across all Ps. Also, the patch provides new benchmark
PoolStarvation which force Ps to steal objects.
Benchmarks:
name old time/op new time/op delta
Pool-8 2.16ns ±14% 2.14ns ±16% ~ (p=0.425 n=10+10)
PoolOverflow-8 489ns ± 0% 489ns ± 0% ~ (p=0.719 n=9+10)
PoolStarvation-8 7.00µs ± 4% 6.59µs ± 2% -5.86% (p=0.000 n=10+10)
PoolSTW-8 15.1µs ± 1% 15.2µs ± 1% +0.99% (p=0.001 n=10+10)
PoolExpensiveNew-8 1.25ms ±10% 1.31ms ± 9% ~ (p=0.143 n=10+10)
[Geo mean] 2.68µs 2.68µs -0.28%
name old p50-ns/STW new p50-ns/STW delta
PoolSTW-8 15.0k ± 1% 15.1k ± 1% +0.92% (p=0.000 n=10+10)
name old p95-ns/STW new p95-ns/STW delta
PoolSTW-8 16.2k ± 3% 16.4k ± 2% ~ (p=0.143 n=10+10)
name old GCs/op new GCs/op delta
PoolExpensiveNew-8 0.29 ± 2% 0.30 ± 1% +2.84% (p=0.000 n=8+10)
name old New/op new New/op delta
PoolExpensiveNew-8 8.07 ±11% 8.49 ±10% ~ (p=0.123 n=10+10)
Change-Id: I3ca1d0bf1f358b1148c58e64740fb2d5bfc0bc02
Reviewed-on: https://go-review.googlesource.com/c/go/+/303949
Reviewed-by: David Chase <drchase@google.com>
Trust: Emmanuel Odeke <emmanuel@orijtech.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/sync/pool.go | 103 | ||||
-rw-r--r-- | src/sync/pool_test.go | 18 |
2 files changed, 111 insertions, 10 deletions
diff --git a/src/sync/pool.go b/src/sync/pool.go index 1ae70127ac..3fb4dc07de 100644 --- a/src/sync/pool.go +++ b/src/sync/pool.go @@ -70,6 +70,57 @@ type poolLocal struct { pad [128 - unsafe.Sizeof(poolLocalInternal{})%128]byte } +// The randomOrder and randomEnum are copied from runtime/proc.go +type randomOrder struct { + count uint32 + coprimes []uint32 +} + +type randomEnum struct { + i uint32 + count uint32 + pos uint32 + inc uint32 +} + +func (ord *randomOrder) reset(count uint32) { + ord.count = count + ord.coprimes = ord.coprimes[:0] + for i := uint32(1); i <= count; i++ { + if gcd(i, count) == 1 { + ord.coprimes = append(ord.coprimes, i) + } + } +} + +func (ord *randomOrder) start(i uint32) randomEnum { + return randomEnum{ + count: ord.count, + pos: i % ord.count, + inc: ord.coprimes[i%uint32(len(ord.coprimes))], + } +} + +func (enum *randomEnum) done() bool { + return enum.i == enum.count +} + +func (enum *randomEnum) next() { + enum.i++ + enum.pos = (enum.pos + enum.inc) % enum.count +} + +func (enum *randomEnum) position() uint32 { + return enum.pos +} + +func gcd(a, b uint32) uint32 { + for b != 0 { + a, b = b, a%b + } + return a +} + // from runtime func fastrand() uint32 @@ -153,12 +204,27 @@ func (p *Pool) Get() interface{} { func (p *Pool) getSlow(pid int) interface{} { // See the comment in pin regarding ordering of the loads. size := runtime_LoadAcquintptr(&p.localSize) // load-acquire - locals := p.local // load-consume - // Try to steal one element from other procs. - for i := 0; i < int(size); i++ { - l := indexLocal(locals, (pid+i+1)%int(size)) - if x, _ := l.shared.popTail(); x != nil { - return x + // Load pOrder atomically to prevent possible races + order := (*randomOrder)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&pOrder)))) // load-consume + + // Pin function always returns non-zero localSize, and it will remain so until runtime_procUnpin + // is called. This invariant is maintained by pin ensuring that locals is always big enough to + // account for the current P and that poolCleanup can never execute concurrently with a pinned P + // due to disabled preemtion. + // So, we can remove this condition which protects from division by zero in loop's body, + // but we leave it here just to be sure there is no any possibility for error + if size != 0 { + locals := p.local // load-consume + // Try to steal one element from other procs. + for rndp := order.start(fastrand()); !rndp.done(); rndp.next() { + i := int(rndp.position()) + // While pOrder is limited to returning indexes within the range of Ps, + // locals may be smaller either because it was reset or because of a race + // with pinSlow. Hence, we must still mod the local index by size. + l := indexLocal(locals, (pid+i+1)%int(size)) + if x, _ := l.shared.popTail(); x != nil { + return x + } } } @@ -166,16 +232,25 @@ func (p *Pool) getSlow(pid int) interface{} { // from all primary caches because we want objects in the // victim cache to age out if at all possible. size = atomic.LoadUintptr(&p.victimSize) + + // We also have to ensure that victim cache is big enough to account current P + // and size is not equal to zero (protects from division by zero) similar as pin + // function do if uintptr(pid) >= size { return nil } - locals = p.victim + locals := p.victim l := indexLocal(locals, pid) + + // Check private cache if x := l.private; x != nil { l.private = nil return x } - for i := 0; i < int(size); i++ { + + // Try to fetch from the tail of other P queues + for rndp := order.start(fastrand()); !rndp.done(); rndp.next() { + i := int(rndp.position()) l := indexLocal(locals, (pid+i)%int(size)) if x, _ := l.shared.popTail(); x != nil { return x @@ -224,9 +299,13 @@ func (p *Pool) pinSlow() (*poolLocal, int) { } // If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one. size := runtime.GOMAXPROCS(0) + // Set count of Ps for random ordering + order := &randomOrder{} + order.reset(uint32(size)) local := make([]poolLocal, size) - atomic.StorePointer(&p.local, unsafe.Pointer(&local[0])) // store-release - runtime_StoreReluintptr(&p.localSize, uintptr(size)) // store-release + atomic.StorePointer(&p.local, unsafe.Pointer(&local[0])) // store-release + atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&pOrder)), unsafe.Pointer(order)) // store-release + runtime_StoreReluintptr(&p.localSize, uintptr(size)) // store-release return &local[pid], pid } @@ -267,6 +346,10 @@ var ( // oldPools is the set of pools that may have non-empty victim // caches. Protected by STW. oldPools []*Pool + + // pOrder is a random order of Ps used for stealing. Writes + // are protected by allPoolsMu. Reads are atomic. + pOrder *randomOrder ) func init() { diff --git a/src/sync/pool_test.go b/src/sync/pool_test.go index 65666daab4..6cccd8a533 100644 --- a/src/sync/pool_test.go +++ b/src/sync/pool_test.go @@ -271,6 +271,24 @@ func BenchmarkPoolOverflow(b *testing.B) { }) } +// Simulate object starvation in order to force Ps to steal objects +// from other Ps. +func BenchmarkPoolStarvation(b *testing.B) { + var p Pool + count := 100 + count_starved := count - (count / runtime.GOMAXPROCS(0)) + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + for b := 0; b < count_starved; b++ { + p.Put(1) + } + for b := 0; b < count; b++ { + p.Get() + } + } + }) +} + var globalSink interface{} func BenchmarkPoolSTW(b *testing.B) { |