diff options
Diffstat (limited to 'libgo/runtime/mgc0.c')
-rw-r--r-- | libgo/runtime/mgc0.c | 634 |
1 files changed, 519 insertions, 115 deletions
diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c index 900ebde687c..cb585251b53 100644 --- a/libgo/runtime/mgc0.c +++ b/libgo/runtime/mgc0.c @@ -5,13 +5,14 @@ // Garbage collector. #include "runtime.h" +#include "arch.h" #include "malloc.h" enum { Debug = 0, - UseCas = 1, PtrSize = sizeof(void*), - + DebugMark = 0, // run second pass to check mark + // Four bits per word (see #defines below). wordsPerBitmapWord = sizeof(void*)*8/4, bitShift = sizeof(void*)*8/4, @@ -50,28 +51,72 @@ enum { #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial) +// TODO: Make these per-M. static uint64 nlookup; static uint64 nsizelookup; static uint64 naddrlookup; +static uint64 nhandoff; + static int32 gctrace; typedef struct Workbuf Workbuf; struct Workbuf { Workbuf *next; - uintptr nw; - byte *w[2048-2]; + uintptr nobj; + byte *obj[512-2]; +}; + +typedef struct Finalizer Finalizer; +struct Finalizer +{ + void (*fn)(void*); + void *arg; + const struct __go_func_type *ft; +}; + +typedef struct FinBlock FinBlock; +struct FinBlock +{ + FinBlock *alllink; + FinBlock *next; + int32 cnt; + int32 cap; + Finalizer fin[1]; }; static bool finstarted; static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER; -static Finalizer *finq; +static FinBlock *finq; // list of finalizers that are to be executed +static FinBlock *finc; // cache of free blocks +static FinBlock *allfin; // list of all blocks +static Lock finlock; static int32 fingwait; static void runfinq(void*); static Workbuf* getempty(Workbuf*); static Workbuf* getfull(Workbuf*); +static void putempty(Workbuf*); +static Workbuf* handoff(Workbuf*); + +static struct { + Lock fmu; + Workbuf *full; + Lock emu; + Workbuf *empty; + uint32 nproc; + volatile uint32 nwait; + volatile uint32 ndone; + Note alldone; + Lock markgate; + Lock sweepgate; + MSpan *spans; + + Lock; + byte *chunk; + uintptr nchunk; +} work; // scanblock scans a block of n bytes starting at pointer b for references // to other objects, scanning any it finds recursively until there are no @@ -82,13 +127,14 @@ static Workbuf* getfull(Workbuf*); static void scanblock(byte *b, int64 n) { - byte *obj, *arena_start, *p; + byte *obj, *arena_start, *arena_used, *p; void **vp; - uintptr size, *bitp, bits, shift, i, j, x, xbits, off; + uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc; MSpan *s; PageID k; - void **bw, **w, **ew; + void **wp; Workbuf *wbuf; + bool keepworking; if((int64)(uintptr)n != n || n < 0) { // runtime_printf("scanblock %p %lld\n", b, (long long)n); @@ -97,11 +143,19 @@ scanblock(byte *b, int64 n) // Memory arena parameters. arena_start = runtime_mheap.arena_start; - + arena_used = runtime_mheap.arena_used; + nproc = work.nproc; + wbuf = nil; // current work buffer - ew = nil; // end of work buffer - bw = nil; // beginning of work buffer - w = nil; // current pointer into work buffer + wp = nil; // storage for next queued pointer (write pointer) + nobj = 0; // number of queued objects + + // Scanblock helpers pass b==nil. + // The main proc needs to return to make more + // calls to scanblock. But if work.nproc==1 then + // might as well process blocks as soon as we + // have them. + keepworking = b == nil || work.nproc == 1; // Align b to a word boundary. off = (uintptr)b & (PtrSize-1); @@ -117,17 +171,17 @@ scanblock(byte *b, int64 n) runtime_printf("scanblock %p %lld\n", b, (long long) n); vp = (void**)b; - n /= PtrSize; + n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */ for(i=0; i<(uintptr)n; i++) { obj = (byte*)vp[i]; - + // Words outside the arena cannot be pointers. - if((byte*)obj < arena_start || (byte*)obj >= runtime_mheap.arena_used) + if((byte*)obj < arena_start || (byte*)obj >= arena_used) continue; - + // obj may be a pointer to a live object. // Try to find the beginning of the object. - + // Round down to word boundary. obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); @@ -185,47 +239,72 @@ scanblock(byte *b, int64 n) found: // Now we have bits, bitp, and shift correct for // obj pointing at the base of the object. - // If not allocated or already marked, done. - if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0) + // Only care about allocated and not marked. + if((bits & (bitAllocated|bitMarked)) != bitAllocated) continue; - *bitp |= bitMarked<<shift; + if(nproc == 1) + *bitp |= bitMarked<<shift; + else { + for(;;) { + x = *bitp; + if(x & (bitMarked<<shift)) + goto continue_obj; + if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift)))) + break; + } + } // If object has no pointers, don't need to scan further. if((bits & bitNoPointers) != 0) continue; + // If another proc wants a pointer, give it some. + if(nobj > 4 && work.nwait > 0 && work.full == nil) { + wbuf->nobj = nobj; + wbuf = handoff(wbuf); + nobj = wbuf->nobj; + wp = (void**)(wbuf->obj + nobj); + } + // If buffer is full, get a new one. - if(w >= ew) { + if(wbuf == nil || nobj >= nelem(wbuf->obj)) { + if(wbuf != nil) + wbuf->nobj = nobj; wbuf = getempty(wbuf); - bw = (void**)wbuf->w; - w = bw; - ew = bw + nelem(wbuf->w); + wp = (void**)(wbuf->obj); + nobj = 0; } - *w++ = obj; + *wp++ = obj; + nobj++; + continue_obj:; } - + // Done scanning [b, b+n). Prepare for the next iteration of // the loop by setting b and n to the parameters for the next block. - // Fetch b from the work buffers. - if(w <= bw) { + // Fetch b from the work buffer. + if(nobj == 0) { + if(!keepworking) { + putempty(wbuf); + return; + } // Emptied our buffer: refill. wbuf = getfull(wbuf); if(wbuf == nil) - break; - bw = (void**)wbuf->w; - ew = (void**)(wbuf->w + nelem(wbuf->w)); - w = bw+wbuf->nw; + return; + nobj = wbuf->nobj; + wp = (void**)(wbuf->obj + wbuf->nobj); } - b = *--w; - + b = *--wp; + nobj--; + // Figure out n = size of b. Start by loading bits for b. off = (uintptr*)b - (uintptr*)arena_start; bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; xbits = *bitp; bits = xbits >> shift; - + // Might be small; look for nearby block boundary. // A block boundary is marked by either bitBlockBoundary // or bitAllocated being set (see notes near their definition). @@ -244,12 +323,12 @@ scanblock(byte *b, int64 n) // apply a mask to keep only the bits corresponding // to shift+j < bitShift aka j < bitShift-shift. bits &= (boundary<<(bitShift-shift)) - boundary; - + // A block boundary j words before b is indicated by // xbits>>(shift-j) & boundary // (assuming shift >= j). There is no cleverness here // avoid the test, because when j gets too large the shift - // turns negative, which is undefined in C. + // turns negative, which is undefined in C. for(j=1; j<bitShift; j++) { if(((bits>>j)&boundary) != 0 || (shift>=j && ((xbits>>(shift-j))&boundary) != 0)) { @@ -257,7 +336,7 @@ scanblock(byte *b, int64 n) goto scan; } } - + // Fall back to asking span about size class. // (Manually inlined copy of MHeap_Lookup.) nlookup++; @@ -274,29 +353,123 @@ scanblock(byte *b, int64 n) } } -static struct { - Workbuf *full; - Workbuf *empty; - byte *chunk; - uintptr nchunk; -} work; +// debug_scanblock is the debug copy of scanblock. +// it is simpler, slower, single-threaded, recursive, +// and uses bitSpecial as the mark bit. +static void +debug_scanblock(byte *b, int64 n) +{ + byte *obj, *p; + void **vp; + uintptr size, *bitp, bits, shift, i, xbits, off; + MSpan *s; + + if(!DebugMark) + runtime_throw("debug_scanblock without DebugMark"); + + if((int64)(uintptr)n != n || n < 0) { + //runtime_printf("debug_scanblock %p %D\n", b, n); + runtime_throw("debug_scanblock"); + } + + // Align b to a word boundary. + off = (uintptr)b & (PtrSize-1); + if(off != 0) { + b += PtrSize - off; + n -= PtrSize - off; + } + + vp = (void**)b; + n /= PtrSize; + for(i=0; i<(uintptr)n; i++) { + obj = (byte*)vp[i]; + + // Words outside the arena cannot be pointers. + if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used) + continue; + + // Round down to word boundary. + obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); + + // Consult span table to find beginning. + s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj); + if(s == nil) + continue; + + + p = (byte*)((uintptr)s->start<<PageShift); + if(s->sizeclass == 0) { + obj = p; + size = (uintptr)s->npages<<PageShift; + } else { + if((byte*)obj >= (byte*)s->limit) + continue; + size = runtime_class_to_size[s->sizeclass]; + int32 i = ((byte*)obj - p)/size; + obj = p+i*size; + } + + // Now that we know the object header, reload bits. + off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start; + bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + shift = off % wordsPerBitmapWord; + xbits = *bitp; + bits = xbits >> shift; + + // Now we have bits, bitp, and shift correct for + // obj pointing at the base of the object. + // If not allocated or already marked, done. + if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked + continue; + *bitp |= bitSpecial<<shift; + if(!(bits & bitMarked)) + runtime_printf("found unmarked block %p in %p\n", obj, vp+i); + + // If object has no pointers, don't need to scan further. + if((bits & bitNoPointers) != 0) + continue; + + debug_scanblock(obj, size); + } +} // Get an empty work buffer off the work.empty list, // allocating new buffers as needed. static Workbuf* getempty(Workbuf *b) { - if(b != nil) { - b->nw = nelem(b->w); - b->next = work.full; - work.full = b; - } - b = work.empty; - if(b != nil) { - work.empty = b->next; - return b; + if(work.nproc == 1) { + // Put b on full list. + if(b != nil) { + b->next = work.full; + work.full = b; + } + // Grab from empty list if possible. + b = work.empty; + if(b != nil) { + work.empty = b->next; + goto haveb; + } + } else { + // Put b on full list. + if(b != nil) { + runtime_lock(&work.fmu); + b->next = work.full; + work.full = b; + runtime_unlock(&work.fmu); + } + // Grab from empty list if possible. + runtime_lock(&work.emu); + b = work.empty; + if(b != nil) + work.empty = b->next; + runtime_unlock(&work.emu); + if(b != nil) + goto haveb; } - + + // Need to allocate. + runtime_lock(&work); if(work.nchunk < sizeof *b) { work.nchunk = 1<<20; work.chunk = runtime_SysAlloc(work.nchunk); @@ -304,25 +477,121 @@ getempty(Workbuf *b) b = (Workbuf*)work.chunk; work.chunk += sizeof *b; work.nchunk -= sizeof *b; + runtime_unlock(&work); + +haveb: + b->nobj = 0; return b; } +static void +putempty(Workbuf *b) +{ + if(b == nil) + return; + + if(work.nproc == 1) { + b->next = work.empty; + work.empty = b; + return; + } + + runtime_lock(&work.emu); + b->next = work.empty; + work.empty = b; + runtime_unlock(&work.emu); +} + // Get a full work buffer off the work.full list, or return nil. static Workbuf* getfull(Workbuf *b) { - if(b != nil) { - b->nw = 0; - b->next = work.empty; - work.empty = b; + int32 i; + Workbuf *b1; + + if(work.nproc == 1) { + // Put b on empty list. + if(b != nil) { + b->next = work.empty; + work.empty = b; + } + // Grab from full list if possible. + // Since work.nproc==1, no one else is + // going to give us work. + b = work.full; + if(b != nil) + work.full = b->next; + return b; + } + + putempty(b); + + // Grab buffer from full list if possible. + for(;;) { + b1 = work.full; + if(b1 == nil) + break; + runtime_lock(&work.fmu); + if(work.full != nil) { + b1 = work.full; + work.full = b1->next; + runtime_unlock(&work.fmu); + return b1; + } + runtime_unlock(&work.fmu); + } + + runtime_xadd(&work.nwait, +1); + for(i=0;; i++) { + b1 = work.full; + if(b1 != nil) { + runtime_lock(&work.fmu); + if(work.full != nil) { + runtime_xadd(&work.nwait, -1); + b1 = work.full; + work.full = b1->next; + runtime_unlock(&work.fmu); + return b1; + } + runtime_unlock(&work.fmu); + continue; + } + if(work.nwait == work.nproc) + return nil; + if(i < 10) + runtime_procyield(20); + else if(i < 20) + runtime_osyield(); + else + runtime_usleep(100); } - b = work.full; - if(b != nil) - work.full = b->next; - return b; } -// Scanstack calls scanblock on each of gp's stack segments. +static Workbuf* +handoff(Workbuf *b) +{ + int32 n; + Workbuf *b1; + + // Make new buffer with half of b's pointers. + b1 = getempty(nil); + n = b->nobj/2; + b->nobj -= n; + b1->nobj = n; + runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]); + nhandoff += n; + + // Put b on full list - let first half of b get stolen. + runtime_lock(&work.fmu); + b->next = work.full; + work.full = b; + runtime_unlock(&work.fmu); + + return b1; +} + +// Markfin calls scanblock on the blocks that have finalizers: +// the things pointed at cannot be freed until the finalizers have run. static void markfin(void *v) { @@ -355,11 +624,22 @@ __go_register_gc_roots (struct root_list* r) roots = r; } -// Mark static void -mark(void) +debug_markfin(void *v) +{ + uintptr size; + + if(!runtime_mlookup(v, (byte**)&v, &size, nil)) + runtime_throw("debug_mark - finalizer inconsistency"); + debug_scanblock(v, size); +} + +// Mark +static void +mark(void (*scan)(byte*, int64)) { struct root_list *pl; + FinBlock *fb; for(pl = roots; pl != nil; pl = pl->next) { struct root* pr = &pl->roots[0]; @@ -372,18 +652,63 @@ mark(void) } } - scanblock((byte*)&m0, sizeof m0); - scanblock((byte*)&finq, sizeof finq); - runtime_MProf_Mark(scanblock); + scan((byte*)&m0, sizeof m0); + scan((byte*)&finq, sizeof finq); + runtime_MProf_Mark(scan); // mark stacks - __go_scanstacks(scanblock); + __go_scanstacks(scan); // mark things pointed at by objects with finalizers - runtime_walkfintab(markfin, scanblock); + if(scan == debug_scanblock) + runtime_walkfintab(debug_markfin, scan); + else + runtime_walkfintab(markfin, scan); + + for(fb=allfin; fb; fb=fb->alllink) + scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0])); + + // in multiproc mode, join in the queued work. + scan(nil, 0); } -// Sweep frees or calls finalizers for blocks not marked in the mark phase. +static bool +handlespecial(byte *p, uintptr size) +{ + void (*fn)(void*); + const struct __go_func_type *ft; + FinBlock *block; + Finalizer *f; + + if(!runtime_getfinalizer(p, true, &fn, &ft)) { + runtime_setblockspecial(p, false); + runtime_MProf_Free(p, size); + return false; + } + + runtime_lock(&finlock); + if(finq == nil || finq->cnt == finq->cap) { + if(finc == nil) { + finc = runtime_SysAlloc(PageSize); + finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1; + finc->alllink = allfin; + allfin = finc; + } + block = finc; + finc = block->next; + block->next = finq; + finq = block; + } + f = &finq->fin[finq->cnt]; + finq->cnt++; + f->fn = fn; + f->ft = ft; + f->arg = p; + runtime_unlock(&finlock); + return true; +} + +// Sweep frees or collects finalizers for blocks not marked in the mark phase. // It clears the mark bits in preparation for the next GC round. static void sweep(void) @@ -393,9 +718,17 @@ sweep(void) uintptr size; byte *p; MCache *c; - Finalizer *f; + byte *arena_start; + + arena_start = runtime_mheap.arena_start; + + for(;;) { + s = work.spans; + if(s == nil) + break; + if(!runtime_casp(&work.spans, s, s->allnext)) + continue; - for(s = runtime_mheap.allspans; s != nil; s = s->allnext) { if(s->state != MSpanInUse) continue; @@ -410,13 +743,15 @@ sweep(void) npages = runtime_class_to_allocnpages[cl]; n = (npages << PageShift) / size; } - - // sweep through n objects of given size starting at p. + + // Sweep through n objects of given size starting at p. + // This thread owns the span now, so it can manipulate + // the block bitmap without atomic operations. for(; n > 0; n--, p += size) { uintptr off, *bitp, shift, bits; - off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; - bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)p - (uintptr*)arena_start; + bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; bits = *bitp>>shift; @@ -424,20 +759,21 @@ sweep(void) continue; if((bits & bitMarked) != 0) { + if(DebugMark) { + if(!(bits & bitSpecial)) + runtime_printf("found spurious mark on %p\n", p); + *bitp &= ~(bitSpecial<<shift); + } *bitp &= ~(bitMarked<<shift); continue; } - if((bits & bitSpecial) != 0) { - // Special means it has a finalizer or is being profiled. - f = runtime_getfinalizer(p, 1); - if(f != nil) { - f->arg = p; - f->next = finq; - finq = f; + // Special means it has a finalizer or is being profiled. + // In DebugMark mode, the bit has been coopted so + // we have to assume all blocks are special. + if(DebugMark || (bits & bitSpecial) != 0) { + if(handlespecial(p, size)) continue; - } - runtime_MProf_Free(p, size); } // Mark freed; restore block boundary bit. @@ -464,6 +800,23 @@ sweep(void) static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER; +void +runtime_gchelper(void) +{ + // Wait until main proc is ready for mark help. + runtime_lock(&work.markgate); + runtime_unlock(&work.markgate); + scanblock(nil, 0); + + // Wait until main proc is ready for sweep help. + runtime_lock(&work.sweepgate); + runtime_unlock(&work.sweepgate); + sweep(); + + if(runtime_xadd(&work.ndone, +1) == work.nproc-1) + runtime_notewakeup(&work.alldone); +} + // Initialized from $GOGC. GOGC=off means no gc. // // Next gc is after we've allocated an extra amount of @@ -481,7 +834,7 @@ runtime_gc(int32 force __attribute__ ((unused))) int64 t0, t1, t2, t3; uint64 heap0, heap1, obj0, obj1; char *p; - Finalizer *fp; + bool extra; // The gc is turned off (via enablegc) until // the bootstrap has completed. @@ -502,10 +855,16 @@ runtime_gc(int32 force __attribute__ ((unused))) gcpercent = -1; else gcpercent = runtime_atoi(p); - + p = runtime_getenv("GOGCTRACE"); if(p != nil) gctrace = runtime_atoi(p); + + runtime_initlock(&work.fmu); + runtime_initlock(&work.emu); + runtime_initlock(&work.markgate); + runtime_initlock(&work.sweepgate); + runtime_initlock(&work.Lock); } if(gcpercent < 0) return; @@ -522,20 +881,42 @@ runtime_gc(int32 force __attribute__ ((unused))) nlookup = 0; nsizelookup = 0; naddrlookup = 0; + nhandoff = 0; m->gcing = 1; runtime_stoptheworld(); - if(runtime_mheap.Lock.key != 0) - runtime_throw("runtime_mheap locked during gc"); __go_cachestats(); heap0 = mstats.heap_alloc; obj0 = mstats.nmalloc - mstats.nfree; - mark(); + runtime_lock(&work.markgate); + runtime_lock(&work.sweepgate); + + extra = false; + work.nproc = 1; +#if 0 + if(runtime_gomaxprocs > 1 && runtime_ncpu > 1) { + runtime_noteclear(&work.alldone); + work.nproc += runtime_helpgc(&extra); + } +#endif + work.nwait = 0; + work.ndone = 0; + + runtime_unlock(&work.markgate); // let the helpers in + mark(scanblock); + if(DebugMark) + mark(debug_scanblock); t1 = runtime_nanotime(); + + work.spans = runtime_mheap.allspans; + runtime_unlock(&work.sweepgate); // let the helpers in sweep(); + if(work.nproc > 1) + runtime_notesleep(&work.alldone); t2 = runtime_nanotime(); + __go_stealcache(); __go_cachestats(); @@ -553,21 +934,28 @@ runtime_gc(int32 force __attribute__ ((unused))) mstats.numgc++; if(mstats.debuggc) runtime_printf("pause %llu\n", (unsigned long long)t3-t0); - + if(gctrace) { - runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr)\n", + runtime_printf("gc%d: %llu+%llu+%llu ms %llu -> %llu MB %llu -> %llu (%llu-%llu) objects %llu pointer lookups (%llu size, %llu addr) %llu handoff\n", mstats.numgc, (unsigned long long)(t1-t0)/1000000, (unsigned long long)(t2-t1)/1000000, (unsigned long long)(t3-t2)/1000000, (unsigned long long)heap0>>20, (unsigned long long)heap1>>20, (unsigned long long)obj0, (unsigned long long)obj1, (unsigned long long)mstats.nmalloc, (unsigned long long)mstats.nfree, - (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup); + (unsigned long long)nlookup, (unsigned long long)nsizelookup, (unsigned long long)naddrlookup, (unsigned long long) nhandoff); } pthread_mutex_unlock(&gcsema); - runtime_starttheworld(); + + // If we could have used another helper proc, start one now, + // in the hope that it will be available next time. + // It would have been even better to start it before the collection, + // but doing so requires allocating memory, so it's tricky to + // coordinate. This lazy approach works out in practice: + // we don't mind if the first couple gc rounds don't have quite + // the maximum number of procs. + runtime_starttheworld(extra); // finqlock is still held. - fp = finq; - if(fp != nil) { + if(finq != nil) { // kick off or wake up goroutine to run queued finalizers if(!finstarted) { __go_go(runfinq, nil); @@ -601,37 +989,44 @@ runtime_UpdateMemStats(void) __go_cachestats(); m->gcing = 0; pthread_mutex_unlock(&gcsema); - runtime_starttheworld(); + runtime_starttheworld(false); } static void runfinq(void* dummy) { - Finalizer *f, *next; + Finalizer *f; + FinBlock *fb, *next; + uint32 i; USED(dummy); for(;;) { pthread_mutex_lock(&finqlock); - f = finq; + fb = finq; finq = nil; - if(f == nil) { + if(fb == nil) { fingwait = 1; pthread_cond_wait(&finqcond, &finqlock); pthread_mutex_unlock(&finqlock); continue; } pthread_mutex_unlock(&finqlock); - for(; f; f=next) { - void *params[1]; - - next = f->next; - params[0] = &f->arg; - reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil); - f->fn = nil; - f->arg = nil; - f->next = nil; - runtime_free(f); + for(; fb; fb=next) { + next = fb->next; + for(i=0; i<(uint32)fb->cnt; i++) { + void *params[1]; + + f = &fb->fin[i]; + params[0] = &f->arg; + runtime_setblockspecial(f->arg, false); + reflect_call(f->ft, (void*)f->fn, 0, 0, params, nil); + f->fn = nil; + f->arg = nil; + } + fb->cnt = 0; + fb->next = finc; + finc = fb; } runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible } @@ -783,6 +1178,9 @@ runtime_blockspecial(void *v) { uintptr *b, off, shift; + if(DebugMark) + return true; + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; @@ -791,17 +1189,23 @@ runtime_blockspecial(void *v) } void -runtime_setblockspecial(void *v) +runtime_setblockspecial(void *v, bool s) { uintptr *b, off, shift, bits, obits; + if(DebugMark) + return; + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { obits = *b; - bits = obits | (bitSpecial<<shift); + if(s) + bits = obits | (bitSpecial<<shift); + else + bits = obits & ~(bitSpecial<<shift); if(runtime_singleproc) { *b = bits; break; @@ -812,7 +1216,7 @@ runtime_setblockspecial(void *v) } } } - + void runtime_MHeap_MapBits(MHeap *h) { @@ -823,7 +1227,7 @@ runtime_MHeap_MapBits(MHeap *h) bitmapChunk = 8192 }; uintptr n; - + n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; n = (n+bitmapChunk-1) & ~(bitmapChunk-1); if(h->bitmap_mapped >= n) |