diff options
author | Russ Cox <rsc@golang.org> | 2014-11-16 16:44:45 -0500 |
---|---|---|
committer | Russ Cox <rsc@golang.org> | 2014-11-16 16:44:45 -0500 |
commit | ca1e131f589cad741fda16c17155e9225ec9ec04 (patch) | |
tree | 90077f6987427eea0815aa1e4c52144780f5385e | |
parent | 5005e63e270d757af01625b857450168f02585f9 (diff) | |
download | go-ca1e131f589cad741fda16c17155e9225ec9ec04.tar.gz |
runtime: fix sudog leak
The SudoG used to sit on the stack, so it was cheap to allocated
and didn't need to be cleaned up when finished.
For the conversion to Go, we had to move sudog off the stack
for a few reasons, so we added a cache of recently used sudogs
to keep allocation cheap. But we didn't add any of the necessary
cleanup before adding a SudoG to the new cache, and so the cached
SudoGs had stale pointers inside them that have caused all sorts
of awful, hard to debug problems.
CL 155760043 made sure SudoG.elem is cleaned up.
CL 150520043 made sure SudoG.selectdone is cleaned up.
This CL makes sure SudoG.next, SudoG.prev, and SudoG.waitlink
are cleaned up. I should have done this when I did the other two
fields; instead I wasted a week tracking down a leak they caused.
A dangling SudoG.waitlink can point into a sudogcache list that
has been "forgotten" in order to let the GC collect it, but that
dangling .waitlink keeps the list from being collected.
And then the list holding the SudoG with the dangling waitlink
can find itself in the same situation, and so on. We end up
with lists of lists of unusable SudoGs that are still linked into
the object graph and never collected (given the right mix of
non-trivial selects and non-channel synchronization).
More details in golang.org/issue/9110.
Fixes issue 9110.
LGTM=r
R=r
CC=dvyukov, golang-codereviews, iant, khr
https://codereview.appspot.com/177870043
-rw-r--r-- | src/runtime/chan.go | 1 | ||||
-rw-r--r-- | src/runtime/mgc0.go | 16 | ||||
-rw-r--r-- | src/runtime/proc.go | 10 | ||||
-rw-r--r-- | src/runtime/select.go | 2 | ||||
-rw-r--r-- | src/runtime/sema.go | 2 | ||||
-rw-r--r-- | test/fixedbugs/issue9110.go | 90 |
6 files changed, 121 insertions, 0 deletions
diff --git a/src/runtime/chan.go b/src/runtime/chan.go index 004970182..0eb87df74 100644 --- a/src/runtime/chan.go +++ b/src/runtime/chan.go @@ -630,6 +630,7 @@ func (q *waitq) dequeue() *sudog { return nil } q.first = sgp.next + sgp.next = nil if q.last == sgp { q.last = nil } diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go index 3a7204b54..cbf5e9cfd 100644 --- a/src/runtime/mgc0.go +++ b/src/runtime/mgc0.go @@ -51,10 +51,26 @@ func clearpools() { if c := p.mcache; c != nil { c.tiny = nil c.tinysize = 0 + + // disconnect cached list before dropping it on the floor, + // so that a dangling ref to one entry does not pin all of them. + var sg, sgnext *sudog + for sg = c.sudogcache; sg != nil; sg = sgnext { + sgnext = sg.next + sg.next = nil + } c.sudogcache = nil } + // clear defer pools for i := range p.deferpool { + // disconnect cached list before dropping it on the floor, + // so that a dangling ref to one entry does not pin all of them. + var d, dlink *_defer + for d = p.deferpool[i]; d != nil; d = dlink { + dlink = d.link + d.link = nil + } p.deferpool[i] = nil } } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 5b8c7d8ae..517ca03df 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -152,6 +152,7 @@ func acquireSudog() *sudog { gothrow("acquireSudog: found s.elem != nil in cache") } c.sudogcache = s.next + s.next = nil return s } @@ -177,6 +178,15 @@ func releaseSudog(s *sudog) { if s.selectdone != nil { gothrow("runtime: sudog with non-nil selectdone") } + if s.next != nil { + gothrow("runtime: sudog with non-nil next") + } + if s.prev != nil { + gothrow("runtime: sudog with non-nil prev") + } + if s.waitlink != nil { + gothrow("runtime: sudog with non-nil waitlink") + } gp := getg() if gp.param != nil { gothrow("runtime: releaseSudog with non-nil gp.param") diff --git a/src/runtime/select.go b/src/runtime/select.go index efe68c1f5..f735a71e2 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -404,6 +404,7 @@ loop: } } sgnext = sglist.waitlink + sglist.waitlink = nil releaseSudog(sglist) sglist = sgnext } @@ -641,6 +642,7 @@ func (q *waitq) dequeueSudoG(s *sudog) { if q.last == sgp { q.last = prevsgp } + s.next = nil return } l = &sgp.next diff --git a/src/runtime/sema.go b/src/runtime/sema.go index d2a028c01..26dbd30ea 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -201,6 +201,7 @@ func syncsemacquire(s *syncSema) { } unlock(&s.lock) if wake != nil { + wake.next = nil goready(wake.g) } } else { @@ -242,6 +243,7 @@ func syncsemrelease(s *syncSema, n uint32) { if wake.releasetime != 0 { wake.releasetime = cputicks() } + wake.next = nil goready(wake.g) n-- } diff --git a/test/fixedbugs/issue9110.go b/test/fixedbugs/issue9110.go new file mode 100644 index 000000000..729463305 --- /dev/null +++ b/test/fixedbugs/issue9110.go @@ -0,0 +1,90 @@ +// run + +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Scenario that used to leak arbitrarily many SudoG structs. +// See golang.org/issue/9110. + +package main + +import ( + "runtime" + "runtime/debug" + "sync" + "time" +) + +func main() { + debug.SetGCPercent(1000000) // only GC when we ask for GC + + var stats, stats1, stats2 runtime.MemStats + + release := func() {} + for i := 0; i < 20; i++ { + if i == 10 { + // Should be warmed up by now. + runtime.ReadMemStats(&stats1) + } + + c := make(chan int) + for i := 0; i < 10; i++ { + go func() { + select { + case <-c: + case <-c: + case <-c: + } + }() + } + time.Sleep(1 * time.Millisecond) + release() + + close(c) // let select put its sudog's into the cache + time.Sleep(1 * time.Millisecond) + + // pick up top sudog + var cond1 sync.Cond + var mu1 sync.Mutex + cond1.L = &mu1 + go func() { + mu1.Lock() + cond1.Wait() + mu1.Unlock() + }() + time.Sleep(1 * time.Millisecond) + + // pick up next sudog + var cond2 sync.Cond + var mu2 sync.Mutex + cond2.L = &mu2 + go func() { + mu2.Lock() + cond2.Wait() + mu2.Unlock() + }() + time.Sleep(1 * time.Millisecond) + + // put top sudog back + cond1.Broadcast() + time.Sleep(1 * time.Millisecond) + + // drop cache on floor + runtime.GC() + + // release cond2 after select has gotten to run + release = func() { + cond2.Broadcast() + time.Sleep(1 * time.Millisecond) + } + } + + runtime.GC() + + runtime.ReadMemStats(&stats2) + + if int(stats2.HeapObjects)-int(stats1.HeapObjects) > 20 { // normally at most 1 or 2; was 300 with leak + print("BUG: object leak: ", stats.HeapObjects, " -> ", stats1.HeapObjects, " -> ", stats2.HeapObjects, "\n") + } +} |