summaryrefslogtreecommitdiff
path: root/libgo/runtime/mgc0.c
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/runtime/mgc0.c')
-rw-r--r--libgo/runtime/mgc0.c634
1 files changed, 519 insertions, 115 deletions
diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c
index 900ebde687c..cb585251b53 100644
--- a/libgo/runtime/mgc0.c
+++ b/libgo/runtime/mgc0.c
@@ -5,13 +5,14 @@
// Garbage collector.
#include "runtime.h"
+#include "arch.h"
#include "malloc.h"
enum {
Debug = 0,
- UseCas = 1,
PtrSize = sizeof(void*),
-
+ DebugMark = 0, // run second pass to check mark
+
// Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
bitShift = sizeof(void*)*8/4,
@@ -50,28 +51,72 @@ enum {
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
+// TODO: Make these per-M.
static uint64 nlookup;
static uint64 nsizelookup;
static uint64 naddrlookup;
+static uint64 nhandoff;
+
static int32 gctrace;
typedef struct Workbuf Workbuf;
struct Workbuf
{
Workbuf *next;
- uintptr nw;
- byte *w[2048-2];
+ uintptr nobj;
+ byte *obj[512-2];
+};
+
+typedef struct Finalizer Finalizer;
+struct Finalizer
+{
+ void (*fn)(void*);
+ void *arg;
+ const struct __go_func_type *ft;
+};
+
+typedef struct FinBlock FinBlock;
+struct FinBlock
+{
+ FinBlock *alllink;
+ FinBlock *next;
+ int32 cnt;
+ int32 cap;
+ Finalizer fin[1];
};
static bool finstarted;
static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER;
-static Finalizer *finq;
+static FinBlock *finq; // list of finalizers that are to be executed
+static FinBlock *finc; // cache of free blocks
+static FinBlock *allfin; // list of all blocks
+static Lock finlock;
static int32 fingwait;
static void runfinq(void*);
static Workbuf* getempty(Workbuf*);
static Workbuf* getfull(Workbuf*);
+static void putempty(Workbuf*);
+static Workbuf* handoff(Workbuf*);
+
+static struct {
+ Lock fmu;
+ Workbuf *full;
+ Lock emu;
+ Workbuf *empty;
+ uint32 nproc;
+ volatile uint32 nwait;
+ volatile uint32 ndone;
+ Note alldone;
+ Lock markgate;
+ Lock sweepgate;
+ MSpan *spans;
+
+ Lock;
+ byte *chunk;
+ uintptr nchunk;
+} work;
// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
@@ -82,13 +127,14 @@ static Workbuf* getfull(Workbuf*);
static void
scanblock(byte *b, int64 n)
{
- byte *obj, *arena_start, *p;
+ byte *obj, *arena_start, *arena_used, *p;
void **vp;
- uintptr size, *bitp, bits, shift, i, j, x, xbits, off;
+ uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
MSpan *s;
PageID k;
- void **bw, **w, **ew;
+ void **wp;
Workbuf *wbuf;
+ bool keepworking;
if((int64)(uintptr)n != n || n < 0) {
// runtime_printf("scanblock %p %lld\n", b, (long long)n);
@@ -97,11 +143,19 @@ scanblock(byte *b, int64 n)
// Memory arena parameters.
arena_start = runtime_mheap.arena_start;
-
+ arena_used = runtime_mheap.arena_used;
+ nproc = work.nproc;
+
wbuf = nil; // current work buffer
- ew = nil; // end of work buffer
- bw = nil; // beginning of work buffer
- w = nil; // current pointer into work buffer
+ wp = nil; // storage for next queued pointer (write pointer)
+ nobj = 0; // number of queued objects
+
+ // Scanblock helpers pass b==nil.
+ // The main proc needs to return to make more
+ // calls to scanblock. But if work.nproc==1 then
+ // might as well process blocks as soon as we
+ // have them.
+ keepworking = b == nil || work.nproc == 1;
// Align b to a word boundary.
off = (uintptr)b & (PtrSize-1);
@@ -117,17 +171,17 @@ scanblock(byte *b, int64 n)
runtime_printf("scanblock %p %lld\n", b, (long long) n);
vp = (void**)b;
- n /= PtrSize;
+ n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
for(i=0; i<(uintptr)n; i++) {
obj = (byte*)vp[i];
-
+
// Words outside the arena cannot be pointers.
- if((byte*)obj < arena_start || (byte*)obj >= runtime_mheap.arena_used)
+ if((byte*)obj < arena_start || (byte*)obj >= arena_used)
continue;
-
+
// obj may be a pointer to a live object.
// Try to find the beginning of the object.
-
+
// Round down to word boundary.
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
@@ -185,47 +239,72 @@ scanblock(byte *b, int64 n)
found:
// Now we have bits, bitp, and shift correct for
// obj pointing at the base of the object.
- // If not allocated or already marked, done.
- if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0)
+ // Only care about allocated and not marked.
+ if((bits & (bitAllocated|bitMarked)) != bitAllocated)
continue;
- *bitp |= bitMarked<<shift;
+ if(nproc == 1)
+ *bitp |= bitMarked<<shift;
+ else {
+ for(;;) {
+ x = *bitp;
+ if(x & (bitMarked<<shift))
+ goto continue_obj;
+ if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
+ break;
+ }
+ }
// If object has no pointers, don't need to scan further.
if((bits & bitNoPointers) != 0)
continue;
+ // If another proc wants a pointer, give it some.
+ if(nobj > 4 && work.nwait > 0 && work.full == nil) {
+ wbuf->nobj = nobj;
+ wbuf = handoff(wbuf);
+ nobj = wbuf->nobj;
+ wp = (void**)(wbuf->obj + nobj);
+ }
+
// If buffer is full, get a new one.
- if(w >= ew) {
+ if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
+ if(wbuf != nil)
+ wbuf->nobj = nobj;
wbuf = getempty(wbuf);
- bw = (void**)wbuf->w;
- w = bw;
- ew = bw + nelem(wbuf->w);
+ wp = (void**)(wbuf->obj);
+ nobj = 0;
}
- *w++ = obj;
+ *wp++ = obj;
+ nobj++;
+ continue_obj:;
}
-
+
// Done scanning [b, b+n). Prepare for the next iteration of
// the loop by setting b and n to the parameters for the next block.
- // Fetch b from the work buffers.
- if(w <= bw) {
+ // Fetch b from the work buffer.
+ if(nobj == 0) {
+ if(!keepworking) {
+ putempty(wbuf);
+ return;
+ }
// Emptied our buffer: refill.
wbuf = getfull(wbuf);
if(wbuf == nil)
- break;
- bw = (void**)wbuf->w;
- ew = (void**)(wbuf->w + nelem(wbuf->w));
- w = bw+wbuf->nw;
+ return;
+ nobj = wbuf->nobj;
+ wp = (void**)(wbuf->obj + wbuf->nobj);
}
- b = *--w;
-
+ b = *--wp;
+ nobj--;
+
// Figure out n = size of b. Start by loading bits for b.
off = (uintptr*)b - (uintptr*)arena_start;
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
-
+
// Might be small; look for nearby block boundary.
// A block boundary is marked by either bitBlockBoundary
// or bitAllocated being set (see notes near their definition).
@@ -244,12 +323,12 @@ scanblock(byte *b, int64 n)
// apply a mask to keep only the bits corresponding
// to shift+j < bitShift aka j < bitShift-shift.
bits &= (boundary<<(bitShift-shift)) - boundary;
-
+
// A block boundary j words before b is indicated by
// xbits>>(shift-j) & boundary
// (assuming shift >= j). There is no cleverness here
// avoid the test, because when j gets too large the shift
- // turns negative, which is undefined in C.
+ // turns negative, which is undefined in C.
for(j=1; j<bitShift; j++) {
if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) {
@@ -257,7 +336,7 @@ scanblock(byte *b, int64 n)
goto scan;
}
}
-
+
// Fall back to asking span about size class.
// (Manually inlined copy of MHeap_Lookup.)
nlookup++;
@@ -274,29 +353,123 @@ scanblock(byte *b, int64 n)
}
}
-static struct {
- Workbuf *full;
- Workbuf *empty;
- byte *chunk;
- uintptr nchunk;
-} work;
+// debug_scanblock is the debug copy of scanblock.
+// it is simpler, slower, single-threaded, recursive,
+// and uses bitSpecial as the mark bit.
+static void
+debug_scanblock(byte *b, int64 n)
+{
+ byte *obj, *p;
+ void **vp;
+ uintptr size, *bitp, bits, shift, i, xbits, off;
+ MSpan *s;
+
+ if(!DebugMark)
+ runtime_throw("debug_scanblock without DebugMark");
+
+ if((int64)(uintptr)n != n || n < 0) {
+ //runtime_printf("debug_scanblock %p %D\n", b, n);
+ runtime_throw("debug_scanblock");
+ }
+
+ // Align b to a word boundary.
+ off = (uintptr)b & (PtrSize-1);
+ if(off != 0) {
+ b += PtrSize - off;
+ n -= PtrSize - off;
+ }
+
+ vp = (void**)b;
+ n /= PtrSize;
+ for(i=0; i<(uintptr)n; i++) {
+ obj = (byte*)vp[i];
+
+ // Words outside the arena cannot be pointers.
+ if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
+ continue;
+
+ // Round down to word boundary.
+ obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
+
+ // Consult span table to find beginning.
+ s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
+ if(s == nil)
+ continue;
+
+
+ p = (byte*)((uintptr)s->start<<PageShift);
+ if(s->sizeclass == 0) {
+ obj = p;
+ size = (uintptr)s->npages<<PageShift;
+ } else {
+ if((byte*)obj >= (byte*)s->limit)
+ continue;
+ size = runtime_class_to_size[s->sizeclass];
+ int32 i = ((byte*)obj - p)/size;
+ obj = p+i*size;
+ }
+
+ // Now that we know the object header, reload bits.
+ off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
+ bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ shift = off % wordsPerBitmapWord;
+ xbits = *bitp;
+ bits = xbits >> shift;
+
+ // Now we have bits, bitp, and shift correct for
+ // obj pointing at the base of the object.
+ // If not allocated or already marked, done.
+ if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
+ continue;
+ *bitp |= bitSpecial<<shift;
+ if(!(bits & bitMarked))
+ runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
+
+ // If object has no pointers, don't need to scan further.
+ if((bits & bitNoPointers) != 0)
+ continue;
+
+ debug_scanblock(obj, size);
+ }
+}
// Get an empty work buffer off the work.empty list,
// allocating new buffers as needed.
static Workbuf*
getempty(Workbuf *b)
{
- if(b != nil) {
- b->nw = nelem(b->w);
- b->next = work.full;
- work.full = b;
- }
- b = work.empty;
- if(b != nil) {
- work.empty = b->next;
- return b;
+ if(work.nproc == 1) {
+ // Put b on full list.
+ if(b != nil) {
+ b->next = work.full;
+ work.full = b;
+ }
+ // Grab from empty list if possible.
+ b = work.empty;
+ if(b != nil) {
+ work.empty = b->next;
+ goto haveb;
+ }
+ } else {
+ // Put b on full list.
+ if(b != nil) {
+ runtime_lock(&work.fmu);
+ b->next = work.full;
+ work.full = b;
+ runtime_unlock(&work.fmu);
+ }
+ // Grab from empty list if possible.
+ runtime_lock(&work.emu);
+ b = work.empty;
+ if(b != nil)
+ work.empty = b->next;
+ runtime_unlock(&work.emu);
+ if(b != nil)
+ goto haveb;
}
-
+
+ // Need to allocate.
+ runtime_lock(&work);
if(work.nchunk < sizeof *b) {
work.nchunk = 1<<20;
work.chunk = runtime_SysAlloc(work.nchunk);
@@ -304,25 +477,121 @@ getempty(Workbuf *b)
b = (Workbuf*)work.chunk;
work.chunk += sizeof *b;
work.nchunk -= sizeof *b;
+ runtime_unlock(&work);
+
+haveb:
+ b->nobj = 0;
return b;
}
+static void
+putempty(Workbuf *b)
+{
+ if(b == nil)
+ return;
+
+ if(work.nproc == 1) {
+ b->next = work.empty;
+ work.empty = b;
+ return;
+ }
+
+ runtime_lock(&work.emu);
+ b->next = work.empty;
+ work.empty = b;
+ runtime_unlock(&work.emu);
+}
+
// Get a full work buffer off the work.full list, or return nil.
static Workbuf*
getfull(Workbuf *b)
{
- if(b != nil) {
- b->nw = 0;
- b->next = work.empty;
- work.empty = b;
+ int32 i;
+ Workbuf *b1;
+
+ if(work.nproc == 1) {
+ // Put b on empty list.
+ if(b != nil) {
+ b->next = work.empty;
+ work.empty = b;
+ }
+ // Grab from full list if possible.
+ // Since work.nproc==1, no one else is
+ // going to give us work.
+ b = work.full;
+ if(b != nil)
+ work.full = b->next;
+ return b;
+ }
+
+ putempty(b);
+
+ // Grab buffer from full list if possible.
+ for(;;) {
+ b1 = work.full;
+ if(b1 == nil)
+ break;
+ runtime_lock(&work.fmu);
+ if(work.full != nil) {
+ b1 = work.full;
+ work.full = b1->next;
+ runtime_unlock(&work.fmu);
+ return b1;
+ }
+ runtime_unlock(&work.fmu);
+ }
+
+ runtime_xadd(&work.nwait, +1);
+ for(i=0;; i++) {
+ b1 = work.full;
+ if(b1 != nil) {
+ runtime_lock(&work.fmu);
+ if(work.full != nil) {
+ runtime_xadd(&work.nwait, -1);
+ b1 = work.full;
+ work.full = b1->next;
+ runtime_unlock(&work.fmu);
+ return b1;
+ }
+ runtime_unlock(&work.fmu);
+ continue;
+ }
+ if(work.nwait == work.nproc)
+ return nil;
+ if(i < 10)
+ runtime_procyield(20);
+ else if(i < 20)
+ runtime_osyield();
+ else
+ runtime_usleep(100);
}
- b = work.full;
- if(b != nil)
- work.full = b->next;
- return b;
}
-// Scanstack calls scanblock on each of gp's stack segments.
+static Workbuf*
+handoff(Workbuf *b)
+{
+ int32 n;
+ Workbuf *b1;
+
+ // Make new buffer with half of b's pointers.
+ b1 = getempty(nil);
+ n = b->nobj/2;
+ b->nobj -= n;
+ b1->nobj = n;
+ runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
+ nhandoff += n;
+
+ // Put b on full list - let first half of b get stolen.
+ runtime_lock(&work.fmu);
+ b->next = work.full;
+ work.full = b;
+ runtime_unlock(&work.fmu);
+
+ return b1;
+}
+
+// Markfin calls scanblock on the blocks that have finalizers:
+// the things pointed at cannot be freed until the finalizers have run.
static void
markfin(void *v)
{
@@ -355,11 +624,22 @@ __go_register_gc_roots (struct root_list* r)
roots = r;
}
-// Mark
static void
-mark(void)
+debug_markfin(void *v)
+{
+ uintptr size;
+
+ if(!runtime_mlookup(v, (byte**)&v, &size, nil))
+ runtime_throw("debug_mark - finalizer inconsistency");
+ debug_scanblock(v, size);
+}
+
+// Mark
+static void
+mark(void (*scan)(byte*, int64))
{
struct root_list *pl;
+ FinBlock *fb;
for(pl = roots; pl != nil; pl = pl->next) {
struct root* pr = &pl->roots[0];
@@ -372,18 +652,63 @@ mark(void)
}
}
- scanblock((byte*)&m0, sizeof m0);
- scanblock((byte*)&finq, sizeof finq);
- runtime_MProf_Mark(scanblock);
+ scan((byte*)&m0, sizeof m0);
+ scan((byte*)&finq, sizeof finq);
+ runtime_MProf_Mark(scan);
// mark stacks
- __go_scanstacks(scanblock);
+ __go_scanstacks(scan);
// mark things pointed at by objects with finalizers
- runtime_walkfintab(markfin, scanblock);
+ if(scan == debug_scanblock)
+ runtime_walkfintab(debug_markfin, scan);
+ else
+ runtime_walkfintab(markfin, scan);
+
+ for(fb=allfin; fb; fb=fb->alllink)
+ scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]));
+
+ // in multiproc mode, join in the queued work.
+ scan(nil, 0);
}
-// Sweep frees or calls finalizers for blocks not marked in the mark phase.
+static bool
+handlespecial(byte *p, uintptr size)
+{
+ void (*fn)(void*);
+ const struct __go_func_type *ft;
+ FinBlock *block;
+ Finalizer *f;
+
+ if(!runtime_getfinalizer(p, true, &fn, &ft)) {
+ runtime_setblockspecial(p, false);
+ runtime_MProf_Free(p, size);
+ return false;
+ }
+
+ runtime_lock(&finlock);
+ if(finq == nil || finq->cnt == finq->cap) {
+ if(finc == nil) {
+ finc = runtime_SysAlloc(PageSize);
+ finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
+ finc->alllink = allfin;
+ allfin = finc;
+ }
+ block = finc;
+ finc = block->next;
+ block->next = finq;
+ finq = block;
+ }
+ f = &finq->fin[finq->cnt];
+ finq->cnt++;
+ f->fn = fn;
+ f->ft = ft;
+ f->arg = p;
+ runtime_unlock(&finlock);
+ return true;
+}
+
+// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
static void
sweep(void)
@@ -393,9 +718,17 @@ sweep(void)
uintptr size;
byte *p;
MCache *c;
- Finalizer *f;
+ byte *arena_start;
+
+ arena_start = runtime_mheap.arena_start;
+
+ for(;;) {
+ s = work.spans;
+ if(s == nil)
+ break;
+ if(!runtime_casp(&work.spans, s, s->allnext))
+ continue;
- for(s = runtime_mheap.allspans; s != nil; s = s->allnext) {
if(s->state != MSpanInUse)
continue;
@@ -410,13 +743,15 @@ sweep(void)
npages = runtime_class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
-
- // sweep through n objects of given size starting at p.
+
+ // Sweep through n objects of given size starting at p.
+ // This thread owns the span now, so it can manipulate
+ // the block bitmap without atomic operations.
for(; n > 0; n--, p += size) {
uintptr off, *bitp, shift, bits;
- off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start;
- bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)p - (uintptr*)arena_start;
+ bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
bits = *bitp>>shift;
@@ -424,20 +759,21 @@ sweep(void)
continue;
if((bits & bitMarked) != 0) {
+ if(DebugMark) {
+ if(!(bits & bitSpecial))
+ runtime_printf("found spurious mark on %p\n", p);
+ *bitp &= ~(bitSpecial<<shift);
+ }
*bitp &= ~(bitMarked<<shift);
continue;
}
- if((bits & bitSpecial) != 0) {
- // Special means it has a finalizer or is being profiled.
- f = runtime_getfinalizer(p, 1);
- if(f != nil) {
- f->arg = p;
- f->next = finq;
- finq = f;
+ // Special means it has a finalizer or is being profiled.
+ // In DebugMark mode, the bit has been coopted so
+ // we have to assume all blocks are special.
+ if(DebugMark || (bits & bitSpecial) != 0) {
+ if(handlespecial(p, size))
continue;
- }
- runtime_MProf_Free(p, size);
}
// Mark freed; restore block boundary bit.
@@ -464,6 +800,23 @@ sweep(void)
static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER;
+void
+runtime_gchelper(void)
+{
+ // Wait until main proc is ready for mark help.
+ runtime_lock(&work.markgate);
+ runtime_unlock(&work.markgate);
+ scanblock(nil, 0);
+
+ // Wait until main proc is ready for sweep help.
+ runtime_lock(&work.sweepgate);
+ runtime_unlock(&work.sweepgate);
+ sweep();
+
+ if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
+ runtime_notewakeup(&work.alldone);
+}
+
// Initialized from $GOGC. GOGC=off means no gc.
//
// Next gc is after we've allocated an extra amount of
@@ -481,7 +834,7 @@ runtime_gc(int32 force __attribute__ ((unused)))
int64 t0, t1, t2, t3;
uint64 heap0, heap1, obj0, obj1;
char *p;
- Finalizer *fp;
+ bool extra;
// The gc is turned off (via enablegc) until
// the bootstrap has completed.
@@ -502,10 +855,16 @@ runtime_gc(int32 force __attribute__ ((unused)))
gcpercent = -1;
else
gcpercent = runtime_atoi(p);
-
+
p = runtime_getenv("GOGCTRACE");
if(p != nil)
gctrace = runtime_atoi(p);
+
+ runtime_initlock(&work.fmu);
+ runtime_initlock(&work.emu);
+ runtime_initlock(&work.markgate);
+ runtime_initlock(&work.sweepgate);
+ runtime_initlock(&work.Lock);
}
if(gcpercent < 0)
return;
@@ -522,20 +881,42 @@ runtime_gc(int32 force __attribute__ ((unused)))
nlookup = 0;
nsizelookup = 0;
naddrlookup = 0;
+ nhandoff = 0;
m->gcing = 1;
runtime_stoptheworld();
- if(runtime_mheap.Lock.key != 0)
- runtime_throw("runtime_mheap locked during gc");
__go_cachestats();
heap0 = mstats.heap_alloc;
obj0 = mstats.nmalloc - mstats.nfree;
- mark();
+ runtime_lock(&work.markgate);
+ runtime_lock(&work.sweepgate);
+
+ extra = false;
+ work.nproc = 1;
+#if 0
+ if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) {
+ runtime_noteclear(&work.alldone);
+ work.nproc += runtime_helpgc(&extra);
+ }
+#endif
+ work.nwait = 0;
+ work.ndone = 0;
+
+ runtime_unlock(&work.markgate); // let the helpers in
+ mark(scanblock);
+ if(DebugMark)
+ mark(debug_scanblock);
t1 = runtime_nanotime();
+
+ work.spans = runtime_mheap.allspans;
+ runtime_unlock(&work.sweepgate); // let the helpers in
sweep();
+ if(work.nproc > 1)
+ runtime_notesleep(&work.alldone);
t2 = runtime_nanotime();
+
__go_stealcache();
__go_cachestats();
@@ -553,21 +934,28 @@ runtime_gc(int32 force __attribute__ ((unused)))
mstats.numgc++;
if(mstats.debuggc)
runtime_printf("pause %llu\n", (unsigned long long)t3-t0);
-
+
if(gctrace) {
- runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr)\n",
+ runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr) %llu handoff\n",
mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000,
(unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1,
(unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree,
- (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup);
+ (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup, (unsigned long long) nhandoff);
}
pthread_mutex_unlock(&gcsema);
- runtime_starttheworld();
+
+ // If we could have used another helper proc, start one now,
+ // in the hope that it will be available next time.
+ // It would have been even better to start it before the collection,
+ // but doing so requires allocating memory, so it's tricky to
+ // coordinate. This lazy approach works out in practice:
+ // we don't mind if the first couple gc rounds don't have quite
+ // the maximum number of procs.
+ runtime_starttheworld(extra);
// finqlock is still held.
- fp = finq;
- if(fp != nil) {
+ if(finq != nil) {
// kick off or wake up goroutine to run queued finalizers
if(!finstarted) {
__go_go(runfinq, nil);
@@ -601,37 +989,44 @@ runtime_UpdateMemStats(void)
__go_cachestats();
m->gcing = 0;
pthread_mutex_unlock(&gcsema);
- runtime_starttheworld();
+ runtime_starttheworld(false);
}
static void
runfinq(void* dummy)
{
- Finalizer *f, *next;
+ Finalizer *f;
+ FinBlock *fb, *next;
+ uint32 i;
USED(dummy);
for(;;) {
pthread_mutex_lock(&finqlock);
- f = finq;
+ fb = finq;
finq = nil;
- if(f == nil) {
+ if(fb == nil) {
fingwait = 1;
pthread_cond_wait(&finqcond, &finqlock);
pthread_mutex_unlock(&finqlock);
continue;
}
pthread_mutex_unlock(&finqlock);
- for(; f; f=next) {
- void *params[1];
-
- next = f->next;
- params[0] = &f->arg;
- reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
- f->fn = nil;
- f->arg = nil;
- f->next = nil;
- runtime_free(f);
+ for(; fb; fb=next) {
+ next = fb->next;
+ for(i=0; i<(uint32)fb->cnt; i++) {
+ void *params[1];
+
+ f = &fb->fin[i];
+ params[0] = &f->arg;
+ runtime_setblockspecial(f->arg, false);
+ reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil);
+ f->fn = nil;
+ f->arg = nil;
+ }
+ fb->cnt = 0;
+ fb->next = finc;
+ finc = fb;
}
runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
}
@@ -783,6 +1178,9 @@ runtime_blockspecial(void *v)
{
uintptr *b, off, shift;
+ if(DebugMark)
+ return true;
+
off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
@@ -791,17 +1189,23 @@ runtime_blockspecial(void *v)
}
void
-runtime_setblockspecial(void *v)
+runtime_setblockspecial(void *v, bool s)
{
uintptr *b, off, shift, bits, obits;
+ if(DebugMark)
+ return;
+
off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
obits = *b;
- bits = obits | (bitSpecial<<shift);
+ if(s)
+ bits = obits | (bitSpecial<<shift);
+ else
+ bits = obits & ~(bitSpecial<<shift);
if(runtime_singleproc) {
*b = bits;
break;
@@ -812,7 +1216,7 @@ runtime_setblockspecial(void *v)
}
}
}
-
+
void
runtime_MHeap_MapBits(MHeap *h)
{
@@ -823,7 +1227,7 @@ runtime_MHeap_MapBits(MHeap *h)
bitmapChunk = 8192
};
uintptr n;
-
+
n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
if(h->bitmap_mapped >= n)