diff options
author | ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-11-06 19:49:01 +0000 |
---|---|---|
committer | ian <ian@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-11-06 19:49:01 +0000 |
commit | 0ce10ea1348e9afd5d0eec6bca986bfe58bac5ac (patch) | |
tree | 39530b071991b2326f881b2a30a2d82d6c133fd6 /libgo/runtime | |
parent | 57a8bf1b0c6057ccbacb0cf79eb84d1985c2c1fe (diff) | |
download | gcc-0ce10ea1348e9afd5d0eec6bca986bfe58bac5ac.tar.gz |
libgo: Update to October 24 version of master library.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@204466 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgo/runtime')
46 files changed, 2062 insertions, 1318 deletions
diff --git a/libgo/runtime/chan.c b/libgo/runtime/chan.c index 6f52a1d5e31..1d9e6681d35 100644 --- a/libgo/runtime/chan.c +++ b/libgo/runtime/chan.c @@ -10,8 +10,6 @@ #define NOSELGEN 1 -static int32 debug = 0; - typedef struct WaitQ WaitQ; typedef struct SudoG SudoG; typedef struct Select Select; @@ -42,8 +40,9 @@ struct Hchan uintgo qcount; // total data in the q uintgo dataqsiz; // size of the circular q uint16 elemsize; - bool closed; uint8 elemalign; + uint8 pad; // ensures proper alignment of the buffer that follows Hchan in memory + bool closed; uintgo sendx; // send index uintgo recvx; // receive index WaitQ recvq; // list of recv waiters @@ -59,6 +58,8 @@ uint32 runtime_Hchansize = sizeof(Hchan); enum { + debug = 0, + // Scase.kind CaseRecv, CaseSend, @@ -105,17 +106,17 @@ runtime_makechan_c(ChanType *t, int64 hint) runtime_panicstring("makechan: size out of range"); n = sizeof(*c); + n = ROUND(n, elem->__align); // allocate memory in one call - c = (Hchan*)runtime_mal(n + hint*elem->__size); + c = (Hchan*)runtime_mallocgc(n + hint*elem->__size, (uintptr)t | TypeInfo_Chan, 0); c->elemsize = elem->__size; c->elemalign = elem->__align; c->dataqsiz = hint; - runtime_settype(c, (uintptr)t | TypeInfo_Chan); if(debug) - runtime_printf("makechan: chan=%p; elemsize=%D; elemalign=%d; dataqsiz=%D\n", - c, (int64)elem->__size, elem->__align, (int64)c->dataqsiz); + runtime_printf("makechan: chan=%p; elemsize=%D; dataqsiz=%D\n", + c, (int64)elem->__size, (int64)c->dataqsiz); return c; } @@ -185,7 +186,7 @@ runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc) return; // not reached } - if(runtime_gcwaiting) + if(runtime_gcwaiting()) runtime_gosched(); if(debug) { @@ -200,7 +201,6 @@ runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc) } runtime_lock(c); - // TODO(dvyukov): add similar instrumentation to select. if(raceenabled) runtime_racereadpc(c, pc, runtime_chansend); if(c->closed) @@ -311,7 +311,7 @@ runtime_chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *received int64 t0; G *g; - if(runtime_gcwaiting) + if(runtime_gcwaiting()) runtime_gosched(); if(debug) @@ -927,6 +927,7 @@ selectgo(Select **selp) { Select *sel; uint32 o, i, j, k; + int64 t0; Scase *cas, *dfl; Hchan *c; SudoG *sg; @@ -935,7 +936,7 @@ selectgo(Select **selp) G *g; sel = *selp; - if(runtime_gcwaiting) + if(runtime_gcwaiting()) runtime_gosched(); if(debug) @@ -943,6 +944,13 @@ selectgo(Select **selp) g = runtime_g(); + t0 = 0; + if(runtime_blockprofilerate > 0) { + t0 = runtime_cputicks(); + for(i=0; i<sel->ncase; i++) + sel->scase[i].sg.releasetime = -1; + } + // The compiler rewrites selects that statically have // only 0 or 1 cases plus default into simpler constructs. // The only way we can end up with such small sel->ncase @@ -1023,6 +1031,8 @@ loop: break; case CaseSend: + if(raceenabled) + runtime_racereadpc(c, runtime_selectgo, runtime_chansend); if(c->closed) goto sclose; if(c->dataqsiz > 0) { @@ -1124,6 +1134,8 @@ asyncrecv: if(sg != nil) { gp = sg->g; selunlock(sel); + if(sg->releasetime) + sg->releasetime = runtime_cputicks(); runtime_ready(gp); } else { selunlock(sel); @@ -1142,6 +1154,8 @@ asyncsend: if(sg != nil) { gp = sg->g; selunlock(sel); + if(sg->releasetime) + sg->releasetime = runtime_cputicks(); runtime_ready(gp); } else { selunlock(sel); @@ -1161,6 +1175,8 @@ syncrecv: runtime_memmove(cas->sg.elem, sg->elem, c->elemsize); gp = sg->g; gp->param = sg; + if(sg->releasetime) + sg->releasetime = runtime_cputicks(); runtime_ready(gp); goto retc; @@ -1186,11 +1202,15 @@ syncsend: runtime_memmove(sg->elem, cas->sg.elem, c->elemsize); gp = sg->g; gp->param = sg; + if(sg->releasetime) + sg->releasetime = runtime_cputicks(); runtime_ready(gp); retc: // return index corresponding to chosen case index = cas->index; + if(cas->sg.releasetime > 0) + runtime_blockevent(cas->sg.releasetime - t0, 2); runtime_free(sel); return index; @@ -1297,17 +1317,36 @@ reflect_rselect(Slice cases) return ret; } +static void closechan(Hchan *c, void *pc); + // closechan(sel *byte); void runtime_closechan(Hchan *c) { + closechan(c, runtime_getcallerpc(&c)); +} + +// For reflect +// func chanclose(c chan) + +void reflect_chanclose(uintptr) __asm__ (GOSYM_PREFIX "reflect.chanclose"); + +void +reflect_chanclose(uintptr c) +{ + closechan((Hchan*)c, runtime_getcallerpc(&c)); +} + +static void +closechan(Hchan *c, void *pc) +{ SudoG *sg; G* gp; if(c == nil) runtime_panicstring("close of nil channel"); - if(runtime_gcwaiting) + if(runtime_gcwaiting()) runtime_gosched(); runtime_lock(c); @@ -1317,7 +1356,7 @@ runtime_closechan(Hchan *c) } if(raceenabled) { - runtime_racewritepc(c, runtime_getcallerpc(&c), runtime_closechan); + runtime_racewritepc(c, pc, runtime_closechan); runtime_racerelease(c); } @@ -1330,6 +1369,8 @@ runtime_closechan(Hchan *c) break; gp = sg->g; gp->param = nil; + if(sg->releasetime) + sg->releasetime = runtime_cputicks(); runtime_ready(gp); } @@ -1340,6 +1381,8 @@ runtime_closechan(Hchan *c) break; gp = sg->g; gp->param = nil; + if(sg->releasetime) + sg->releasetime = runtime_cputicks(); runtime_ready(gp); } @@ -1353,17 +1396,6 @@ __go_builtin_close(Hchan *c) } // For reflect -// func chanclose(c chan) - -void reflect_chanclose(uintptr) __asm__ (GOSYM_PREFIX "reflect.chanclose"); - -void -reflect_chanclose(uintptr c) -{ - runtime_closechan((Hchan*)c); -} - -// For reflect // func chanlen(c chan) (len int) intgo reflect_chanlen(uintptr) __asm__ (GOSYM_PREFIX "reflect.chanlen"); diff --git a/libgo/runtime/cpuprof.c b/libgo/runtime/cpuprof.c index 516387396ea..a2a1a05ce3d 100644 --- a/libgo/runtime/cpuprof.c +++ b/libgo/runtime/cpuprof.c @@ -146,7 +146,7 @@ runtime_SetCPUProfileRate(intgo hz) runtime_lock(&lk); if(hz > 0) { if(prof == nil) { - prof = runtime_SysAlloc(sizeof *prof); + prof = runtime_SysAlloc(sizeof *prof, &mstats.other_sys); if(prof == nil) { runtime_printf("runtime: cpu profiling cannot allocate memory\n"); runtime_unlock(&lk); @@ -340,7 +340,7 @@ getprofile(Profile *p) if(p->wholding) { // Release previous log to signal handling side. - // Loop because we are racing against setprofile(off). + // Loop because we are racing against SetCPUProfileRate(0). for(;;) { n = p->handoff; if(n == 0) { @@ -367,9 +367,7 @@ getprofile(Profile *p) return ret; // Wait for new log. - runtime_entersyscallblock(); - runtime_notesleep(&p->wait); - runtime_exitsyscall(); + runtime_notetsleepg(&p->wait, -1); runtime_noteclear(&p->wait); n = p->handoff; diff --git a/libgo/runtime/env_posix.c b/libgo/runtime/env_posix.c index 7f3fa0d8e0f..3219550af99 100644 --- a/libgo/runtime/env_posix.c +++ b/libgo/runtime/env_posix.c @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin freebsd linux netbsd openbsd windows +// +build darwin dragonfly freebsd linux netbsd openbsd windows #include "runtime.h" #include "array.h" @@ -12,7 +12,8 @@ extern Slice syscall_Envs __asm__ (GOSYM_PREFIX "syscall.Envs"); const byte* runtime_getenv(const char *s) { - int32 i, j, len; + int32 i, j; + intgo len; const byte *v, *bs; String* envv; int32 envc; diff --git a/libgo/runtime/go-byte-array-to-string.c b/libgo/runtime/go-byte-array-to-string.c index 0cd63c76d8d..088b78690fe 100644 --- a/libgo/runtime/go-byte-array-to-string.c +++ b/libgo/runtime/go-byte-array-to-string.c @@ -16,7 +16,7 @@ __go_byte_array_to_string (const void* p, intgo len) String ret; bytes = (const unsigned char *) p; - retdata = runtime_mallocgc ((uintptr) len, FlagNoPointers, 1, 0); + retdata = runtime_mallocgc ((uintptr) len, 0, FlagNoScan); __builtin_memcpy (retdata, bytes, len); ret.str = retdata; ret.len = len; diff --git a/libgo/runtime/go-caller.c b/libgo/runtime/go-caller.c index d84580fa594..8ca3c7efcd7 100644 --- a/libgo/runtime/go-caller.c +++ b/libgo/runtime/go-caller.c @@ -228,3 +228,23 @@ runtime_funcline_go (Func *f __attribute__((unused)), uintptr targetpc) runtime_memclr (&ret, sizeof ret); return ret; } + +/* Return the name of a function. */ +String runtime_funcname_go (Func *f) + __asm__ (GOSYM_PREFIX "runtime.funcname_go"); + +String +runtime_funcname_go (Func *f) +{ + return f->name; +} + +/* Return the entry point of a function. */ +uintptr runtime_funcentry_go(Func *f) + __asm__ (GOSYM_PREFIX "runtime.funcentry_go"); + +uintptr +runtime_funcentry_go (Func *f) +{ + return f->entry; +} diff --git a/libgo/runtime/go-int-array-to-string.c b/libgo/runtime/go-int-array-to-string.c index 6cae2fd8ccb..d93fe651d95 100644 --- a/libgo/runtime/go-int-array-to-string.c +++ b/libgo/runtime/go-int-array-to-string.c @@ -41,7 +41,7 @@ __go_int_array_to_string (const void* p, intgo len) slen += 4; } - retdata = runtime_mallocgc ((uintptr) slen, FlagNoPointers, 1, 0); + retdata = runtime_mallocgc ((uintptr) slen, 0, FlagNoScan); ret.str = retdata; ret.len = slen; diff --git a/libgo/runtime/go-int-to-string.c b/libgo/runtime/go-int-to-string.c index eb441674b6c..d90b1ddfed1 100644 --- a/libgo/runtime/go-int-to-string.c +++ b/libgo/runtime/go-int-to-string.c @@ -60,7 +60,7 @@ __go_int_to_string (intgo v) } } - retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0); + retdata = runtime_mallocgc (len, 0, FlagNoScan); __builtin_memcpy (retdata, buf, len); ret.str = retdata; ret.len = len; diff --git a/libgo/runtime/go-make-slice.c b/libgo/runtime/go-make-slice.c index f08cb012dc8..855bb17ce59 100644 --- a/libgo/runtime/go-make-slice.c +++ b/libgo/runtime/go-make-slice.c @@ -55,15 +55,15 @@ __go_make_slice2 (const struct __go_type_descriptor *td, uintptr_t len, if (size == 0) ret.__values = &runtime_zerobase; else if ((std->__element_type->__code & GO_NO_POINTERS) != 0) - ret.__values = runtime_mallocgc (size, FlagNoPointers, 1, 1); + ret.__values = + runtime_mallocgc (size, + (uintptr) std->__element_type | TypeInfo_Array, + FlagNoScan); else - { - ret.__values = runtime_mallocgc (size, 0, 1, 1); - - if (UseSpanType) - runtime_settype (ret.__values, - (uintptr) std->__element_type | TypeInfo_Array); - } + ret.__values = + runtime_mallocgc (size, + (uintptr) std->__element_type | TypeInfo_Array, + 0); return ret; } diff --git a/libgo/runtime/go-new.c b/libgo/runtime/go-new.c index b1af5f22473..9d46706eaa4 100644 --- a/libgo/runtime/go-new.c +++ b/libgo/runtime/go-new.c @@ -12,11 +12,11 @@ void * __go_new (uintptr_t size) { - return runtime_mallocgc (size, 0, 1, 1); + return runtime_mallocgc (size, 0, 0); } void * __go_new_nopointers (uintptr_t size) { - return runtime_mallocgc (size, FlagNoPointers, 1, 1); + return runtime_mallocgc (size, 0, FlagNoScan); } diff --git a/libgo/runtime/go-reflect-call.c b/libgo/runtime/go-reflect-call.c index 5cf370798bf..0fed68a50e7 100644 --- a/libgo/runtime/go-reflect-call.c +++ b/libgo/runtime/go-reflect-call.c @@ -271,7 +271,21 @@ go_func_return_ffi (const struct __go_func_type *func) types = (const struct __go_type_descriptor **) func->__out.__values; if (count == 1) - return go_type_to_ffi (types[0]); + { + +#if defined (__i386__) && !defined (__x86_64__) + /* FFI does not support complex types. On 32-bit x86, a + complex64 will be returned in %eax/%edx. We normally tell + FFI that a complex64 is a struct of two floats. On 32-bit + x86 a struct of two floats is returned via a hidden first + pointer parameter. Fortunately we can make everything work + by pretending that complex64 is int64. */ + if ((types[0]->__code & GO_CODE_MASK) == GO_COMPLEX64) + return &ffi_type_sint64; +#endif + + return go_type_to_ffi (types[0]); + } ret = (ffi_type *) __go_alloc (sizeof (ffi_type)); ret->type = FFI_TYPE_STRUCT; diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c index 23a94db4157..4f0dcc78c17 100644 --- a/libgo/runtime/go-signal.c +++ b/libgo/runtime/go-signal.c @@ -139,22 +139,6 @@ SigTab runtime_sigtab[] = { #undef P #undef D - -static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n"; - -static void -runtime_badsignal(int32 sig) -{ - // Avoid -D_FORTIFY_SOURCE problems. - int rv __attribute__((unused)); - - if (sig == SIGPROF) { - return; // Ignore SIGPROFs intended for a non-Go thread. - } - rv = runtime_write(2, badsignal, sizeof badsignal - 1); - runtime_exit(1); -} - /* Handle a signal, for cases where we don't panic. We can split the stack here. */ diff --git a/libgo/runtime/go-string-to-byte-array.c b/libgo/runtime/go-string-to-byte-array.c index 75fac1dbfe6..5e030330f29 100644 --- a/libgo/runtime/go-string-to-byte-array.c +++ b/libgo/runtime/go-string-to-byte-array.c @@ -15,7 +15,8 @@ __go_string_to_byte_array (String str) unsigned char *data; struct __go_open_array ret; - data = (unsigned char *) runtime_mallocgc (str.len, FlagNoPointers, 1, 0); + data = (unsigned char *) runtime_mallocgc (str.len, 0, + FlagNoScan | FlagNoZero); __builtin_memcpy (data, str.str, str.len); ret.__values = (void *) data; ret.__count = str.len; diff --git a/libgo/runtime/go-string-to-int-array.c b/libgo/runtime/go-string-to-int-array.c index 16970bdd042..d91c9e2df82 100644 --- a/libgo/runtime/go-string-to-int-array.c +++ b/libgo/runtime/go-string-to-int-array.c @@ -32,8 +32,8 @@ __go_string_to_int_array (String str) p += __go_get_rune (p, pend - p, &rune); } - data = (uint32_t *) runtime_mallocgc (c * sizeof (uint32_t), FlagNoPointers, - 1, 0); + data = (uint32_t *) runtime_mallocgc (c * sizeof (uint32_t), 0, + FlagNoScan | FlagNoZero); p = str.str; pd = data; while (p < pend) diff --git a/libgo/runtime/go-strplus.c b/libgo/runtime/go-strplus.c index d6e6df67fce..13915e3e673 100644 --- a/libgo/runtime/go-strplus.c +++ b/libgo/runtime/go-strplus.c @@ -21,7 +21,7 @@ __go_string_plus (String s1, String s2) return s1; len = s1.len + s2.len; - retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0); + retdata = runtime_mallocgc (len, 0, FlagNoScan | FlagNoZero); __builtin_memcpy (retdata, s1.str, s1.len); __builtin_memcpy (retdata + s1.len, s2.str, s2.len); ret.str = retdata; diff --git a/libgo/runtime/lfstack.c b/libgo/runtime/lfstack.c index 230ed87c43f..132783c3644 100644 --- a/libgo/runtime/lfstack.c +++ b/libgo/runtime/lfstack.c @@ -41,10 +41,10 @@ runtime_lfstackpush(uint64 *head, LFNode *node) node->pushcnt++; new = (uint64)(uintptr)node|(((uint64)node->pushcnt&CNT_MASK)<<PTR_BITS); - old = runtime_atomicload64(head); for(;;) { + old = runtime_atomicload64(head); node->next = (LFNode*)(uintptr)(old&PTR_MASK); - if(runtime_cas64(head, &old, new)) + if(runtime_cas64(head, old, new)) break; } } @@ -55,8 +55,8 @@ runtime_lfstackpop(uint64 *head) LFNode *node, *node2; uint64 old, new; - old = runtime_atomicload64(head); for(;;) { + old = runtime_atomicload64(head); if(old == 0) return nil; node = (LFNode*)(uintptr)(old&PTR_MASK); @@ -64,7 +64,7 @@ runtime_lfstackpop(uint64 *head) new = 0; if(node2 != nil) new = (uint64)(uintptr)node2|(((uint64)node2->pushcnt&CNT_MASK)<<PTR_BITS); - if(runtime_cas64(head, &old, new)) + if(runtime_cas64(head, old, new)) return node; } } diff --git a/libgo/runtime/lock_futex.c b/libgo/runtime/lock_futex.c index 4b9651a75de..fa270132895 100644 --- a/libgo/runtime/lock_futex.c +++ b/libgo/runtime/lock_futex.c @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build freebsd linux +// +build dragonfly freebsd linux #include "runtime.h" @@ -91,14 +91,14 @@ runtime_unlock(Lock *l) { uint32 v; - if(--runtime_m()->locks < 0) - runtime_throw("runtime_unlock: lock count"); - v = runtime_xchg((uint32*)&l->key, MUTEX_UNLOCKED); if(v == MUTEX_UNLOCKED) runtime_throw("unlock of unlocked lock"); if(v == MUTEX_SLEEPING) runtime_futexwakeup((uint32*)&l->key, 1); + + if(--runtime_m()->locks < 0) + runtime_throw("runtime_unlock: lock count"); } // One-time notifications. @@ -111,37 +111,45 @@ runtime_noteclear(Note *n) void runtime_notewakeup(Note *n) { - if(runtime_xchg((uint32*)&n->key, 1)) + uint32 old; + + old = runtime_xchg((uint32*)&n->key, 1); + if(old != 0) { + runtime_printf("notewakeup - double wakeup (%d)\n", old); runtime_throw("notewakeup - double wakeup"); + } runtime_futexwakeup((uint32*)&n->key, 1); } void runtime_notesleep(Note *n) { - if(runtime_m()->profilehz > 0) - runtime_setprof(false); + /* For gccgo it's OK to sleep in non-g0, and it happens in + stoptheworld because we have not implemented preemption. + + if(runtime_g() != runtime_m()->g0) + runtime_throw("notesleep not on g0"); + */ while(runtime_atomicload((uint32*)&n->key) == 0) runtime_futexsleep((uint32*)&n->key, 0, -1); - if(runtime_m()->profilehz > 0) - runtime_setprof(true); } -void -runtime_notetsleep(Note *n, int64 ns) +static bool +notetsleep(Note *n, int64 ns, int64 deadline, int64 now) { - int64 deadline, now; + // Conceptually, deadline and now are local variables. + // They are passed as arguments so that the space for them + // does not count against our nosplit stack sequence. if(ns < 0) { - runtime_notesleep(n); - return; + while(runtime_atomicload((uint32*)&n->key) == 0) + runtime_futexsleep((uint32*)&n->key, 0, -1); + return true; } if(runtime_atomicload((uint32*)&n->key) != 0) - return; + return true; - if(runtime_m()->profilehz > 0) - runtime_setprof(false); deadline = runtime_nanotime() + ns; for(;;) { runtime_futexsleep((uint32*)&n->key, 0, ns); @@ -152,6 +160,33 @@ runtime_notetsleep(Note *n, int64 ns) break; ns = deadline - now; } - if(runtime_m()->profilehz > 0) - runtime_setprof(true); + return runtime_atomicload((uint32*)&n->key) != 0; +} + +bool +runtime_notetsleep(Note *n, int64 ns) +{ + bool res; + + if(runtime_g() != runtime_m()->g0 && !runtime_m()->gcing) + runtime_throw("notetsleep not on g0"); + + res = notetsleep(n, ns, 0, 0); + return res; +} + +// same as runtime_notetsleep, but called on user g (not g0) +// calls only nosplit functions between entersyscallblock/exitsyscall +bool +runtime_notetsleepg(Note *n, int64 ns) +{ + bool res; + + if(runtime_g() == runtime_m()->g0) + runtime_throw("notetsleepg on g0"); + + runtime_entersyscallblock(); + res = notetsleep(n, ns, 0, 0); + runtime_exitsyscall(); + return res; } diff --git a/libgo/runtime/lock_sema.c b/libgo/runtime/lock_sema.c index 2663c5463de..ce435119323 100644 --- a/libgo/runtime/lock_sema.c +++ b/libgo/runtime/lock_sema.c @@ -95,9 +95,6 @@ runtime_unlock(Lock *l) uintptr v; M *mp; - if(--runtime_m()->locks < 0) - runtime_throw("runtime_unlock: lock count"); - for(;;) { v = (uintptr)runtime_atomicloadp((void**)&l->key); if(v == LOCKED) { @@ -114,6 +111,9 @@ runtime_unlock(Lock *l) } } } + + if(--runtime_m()->locks < 0) + runtime_throw("runtime_unlock: lock count"); } // One-time notifications. @@ -151,6 +151,10 @@ runtime_notesleep(Note *n) M *m; m = runtime_m(); + + if(runtime_g() != m->g0) + runtime_throw("notesleep not on g0"); + if(m->waitsema == 0) m->waitsema = runtime_semacreate(); if(!runtime_casp((void**)&n->key, nil, m)) { // must be LOCKED (got wakeup) @@ -159,61 +163,49 @@ runtime_notesleep(Note *n) return; } // Queued. Sleep. - if(m->profilehz > 0) - runtime_setprof(false); runtime_semasleep(-1); - if(m->profilehz > 0) - runtime_setprof(true); } -void -runtime_notetsleep(Note *n, int64 ns) +static bool +notetsleep(Note *n, int64 ns, int64 deadline, M *mp) { M *m; - M *mp; - int64 deadline, now; - - if(ns < 0) { - runtime_notesleep(n); - return; - } m = runtime_m(); - if(m->waitsema == 0) - m->waitsema = runtime_semacreate(); + + // Conceptually, deadline and mp are local variables. + // They are passed as arguments so that the space for them + // does not count against our nosplit stack sequence. // Register for wakeup on n->waitm. if(!runtime_casp((void**)&n->key, nil, m)) { // must be LOCKED (got wakeup already) if(n->key != LOCKED) runtime_throw("notetsleep - waitm out of sync"); - return; + return true; + } + + if(ns < 0) { + // Queued. Sleep. + runtime_semasleep(-1); + return true; } - if(m->profilehz > 0) - runtime_setprof(false); deadline = runtime_nanotime() + ns; for(;;) { // Registered. Sleep. if(runtime_semasleep(ns) >= 0) { // Acquired semaphore, semawakeup unregistered us. // Done. - if(m->profilehz > 0) - runtime_setprof(true); - return; + return true; } // Interrupted or timed out. Still registered. Semaphore not acquired. - now = runtime_nanotime(); - if(now >= deadline) + ns = deadline - runtime_nanotime(); + if(ns <= 0) break; - // Deadline hasn't arrived. Keep sleeping. - ns = deadline - now; } - if(m->profilehz > 0) - runtime_setprof(true); - // Deadline arrived. Still registered. Semaphore not acquired. // Want to give up and return, but have to unregister first, // so that any notewakeup racing with the return does not @@ -223,15 +215,54 @@ runtime_notetsleep(Note *n, int64 ns) if(mp == m) { // No wakeup yet; unregister if possible. if(runtime_casp((void**)&n->key, mp, nil)) - return; + return false; } else if(mp == (M*)LOCKED) { // Wakeup happened so semaphore is available. // Grab it to avoid getting out of sync. if(runtime_semasleep(-1) < 0) runtime_throw("runtime: unable to acquire - semaphore out of sync"); - return; - } else { + return true; + } else runtime_throw("runtime: unexpected waitm - semaphore out of sync"); - } } } + +bool +runtime_notetsleep(Note *n, int64 ns) +{ + M *m; + bool res; + + m = runtime_m(); + + if(runtime_g() != m->g0 && !m->gcing) + runtime_throw("notetsleep not on g0"); + + if(m->waitsema == 0) + m->waitsema = runtime_semacreate(); + + res = notetsleep(n, ns, 0, nil); + return res; +} + +// same as runtime_notetsleep, but called on user g (not g0) +// calls only nosplit functions between entersyscallblock/exitsyscall +bool +runtime_notetsleepg(Note *n, int64 ns) +{ + M *m; + bool res; + + m = runtime_m(); + + if(runtime_g() == m->g0) + runtime_throw("notetsleepg on g0"); + + if(m->waitsema == 0) + m->waitsema = runtime_semacreate(); + + runtime_entersyscallblock(); + res = notetsleep(n, ns, 0, nil); + runtime_exitsyscall(); + return res; +} diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc index 8ccaa6b888c..d349f4749fa 100644 --- a/libgo/runtime/malloc.goc +++ b/libgo/runtime/malloc.goc @@ -18,7 +18,17 @@ package runtime #include "go-type.h" #include "race.h" -MHeap *runtime_mheap; +// Map gccgo field names to gc field names. +// Eface aka __go_empty_interface. +#define type __type_descriptor +// Type aka __go_type_descriptor +#define kind __code +#define string __reflection +#define KindPtr GO_PTR +#define KindNoPointers GO_NO_POINTERS + +// Mark mheap as 'no pointers', it does not contain interesting pointers but occupies ~45K. +MHeap runtime_mheap; int32 runtime_checking; @@ -30,19 +40,28 @@ extern volatile intgo runtime_MemProfileRate // Allocate an object of at least size bytes. // Small objects are allocated from the per-thread cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. +// If the block will be freed with runtime_free(), typ must be 0. void* -runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) +runtime_mallocgc(uintptr size, uintptr typ, uint32 flag) { M *m; G *g; int32 sizeclass; intgo rate; MCache *c; + MCacheList *l; uintptr npages; MSpan *s; - void *v; + MLink *v; bool incallback; + if(size == 0) { + // All 0-length allocations use this pointer. + // The language does not require the allocations to + // have distinct values. + return &runtime_zerobase; + } + m = runtime_m(); g = runtime_g(); @@ -56,34 +75,45 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) runtime_exitsyscall(); m = runtime_m(); incallback = true; - dogc = false; + flag |= FlagNoGC; } - if(runtime_gcwaiting && g != m->g0 && m->locks == 0 && dogc) { + if(runtime_gcwaiting() && g != m->g0 && m->locks == 0 && !(flag & FlagNoGC)) { runtime_gosched(); m = runtime_m(); } if(m->mallocing) runtime_throw("malloc/free - deadlock"); + // Disable preemption during settype_flush. + // We can not use m->mallocing for this, because settype_flush calls mallocgc. + m->locks++; m->mallocing = 1; - if(size == 0) - size = 1; if(DebugTypeAtBlockEnd) size += sizeof(uintptr); c = m->mcache; - c->local_nmalloc++; if(size <= MaxSmallSize) { // Allocate from mcache free lists. - sizeclass = runtime_SizeToClass(size); + // Inlined version of SizeToClass(). + if(size <= 1024-8) + sizeclass = runtime_size_to_class8[(size+7)>>3]; + else + sizeclass = runtime_size_to_class128[(size-1024+127) >> 7]; size = runtime_class_to_size[sizeclass]; - v = runtime_MCache_Alloc(c, sizeclass, size, zeroed); - if(v == nil) - runtime_throw("out of memory"); - c->local_alloc += size; - c->local_total_alloc += size; - c->local_by_size[sizeclass].nmalloc++; + l = &c->list[sizeclass]; + if(l->list == nil) + runtime_MCache_Refill(c, sizeclass); + v = l->list; + l->list = v->next; + l->nlist--; + if(!(flag & FlagNoZero)) { + v->next = nil; + // block is zeroed iff second word is zero ... + if(size > sizeof(uintptr) && ((uintptr*)v)[1] != 0) + runtime_memclr((byte*)v, size); + } + c->local_cachealloc += size; } else { // TODO(rsc): Report tracebacks for very large allocations. @@ -91,32 +121,39 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) npages = size >> PageShift; if((size & PageMask) != 0) npages++; - s = runtime_MHeap_Alloc(runtime_mheap, npages, 0, 1, zeroed); + s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, !(flag & FlagNoZero)); if(s == nil) runtime_throw("out of memory"); + s->limit = (byte*)(s->start<<PageShift) + size; size = npages<<PageShift; - c->local_alloc += size; - c->local_total_alloc += size; v = (void*)(s->start << PageShift); // setup for mark sweep runtime_markspan(v, 0, 0, true); } - if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) { - // purge cache stats to prevent overflow - runtime_lock(runtime_mheap); - runtime_purgecachedstats(c); - runtime_unlock(runtime_mheap); - } - if(!(flag & FlagNoGC)) - runtime_markallocated(v, size, (flag&FlagNoPointers) != 0); + runtime_markallocated(v, size, (flag&FlagNoScan) != 0); if(DebugTypeAtBlockEnd) - *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = 0; + *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = typ; + + // TODO: save type even if FlagNoScan? Potentially expensive but might help + // heap profiling/tracing. + if(UseSpanType && !(flag & FlagNoScan) && typ != 0) { + uintptr *buf, i; + + buf = m->settype_buf; + i = m->settype_bufsize; + buf[i++] = (uintptr)v; + buf[i++] = typ; + m->settype_bufsize = i; + } m->mallocing = 0; + if(UseSpanType && !(flag & FlagNoScan) && typ != 0 && m->settype_bufsize == nelem(m->settype_buf)) + runtime_settype_flush(m); + m->locks--; if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) { if(size >= (uint32) rate) @@ -135,13 +172,11 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) } } - if(dogc && mstats.heap_alloc >= mstats.next_gc) + if(!(flag & FlagNoInvokeGC) && mstats.heap_alloc >= mstats.next_gc) runtime_gc(0); - if(raceenabled) { - runtime_racemalloc(v, size, m->racepc); - m->racepc = nil; - } + if(raceenabled) + runtime_racemalloc(v, size); if(incallback) runtime_entersyscall(); @@ -152,7 +187,7 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) void* __go_alloc(uintptr size) { - return runtime_mallocgc(size, 0, 0, 1); + return runtime_mallocgc(size, 0, FlagNoInvokeGC); } // Free the object whose base pointer is v. @@ -197,7 +232,9 @@ __go_free(void *v) // they might coalesce v into other spans and change the bitmap further. runtime_markfreed(v, size); runtime_unmarkspan(v, 1<<PageShift); - runtime_MHeap_Free(runtime_mheap, s, 1); + runtime_MHeap_Free(&runtime_mheap, s, 1); + c->local_nlargefree++; + c->local_largefree += size; } else { // Small object. size = runtime_class_to_size[sizeclass]; @@ -207,11 +244,9 @@ __go_free(void *v) // it might coalesce v and other blocks into a bigger span // and change the bitmap further. runtime_markfreed(v, size); - c->local_by_size[sizeclass].nfree++; + c->local_nsmallfree[sizeclass]++; runtime_MCache_Free(c, v, sizeclass, size); } - c->local_nfree++; - c->local_alloc -= size; if(prof) runtime_MProf_Free(v, size); m->mallocing = 0; @@ -230,12 +265,12 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp) m->mcache->local_nlookup++; if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) { // purge cache stats to prevent overflow - runtime_lock(runtime_mheap); + runtime_lock(&runtime_mheap); runtime_purgecachedstats(m->mcache); - runtime_unlock(runtime_mheap); + runtime_unlock(&runtime_mheap); } - s = runtime_MHeap_LookupMaybe(runtime_mheap, v); + s = runtime_MHeap_LookupMaybe(&runtime_mheap, v); if(sp) *sp = s; if(s == nil) { @@ -257,11 +292,6 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp) return 1; } - if((byte*)v >= (byte*)s->limit) { - // pointers past the last block do not count as pointers. - return 0; - } - n = s->elemsize; if(base) { i = ((byte*)v - p)/n; @@ -279,11 +309,9 @@ runtime_allocmcache(void) intgo rate; MCache *c; - runtime_lock(runtime_mheap); - c = runtime_FixAlloc_Alloc(&runtime_mheap->cachealloc); - mstats.mcache_inuse = runtime_mheap->cachealloc.inuse; - mstats.mcache_sys = runtime_mheap->cachealloc.sys; - runtime_unlock(runtime_mheap); + runtime_lock(&runtime_mheap); + c = runtime_FixAlloc_Alloc(&runtime_mheap.cachealloc); + runtime_unlock(&runtime_mheap); runtime_memclr((byte*)c, sizeof(*c)); // Set first allocation sample size. @@ -300,30 +328,32 @@ void runtime_freemcache(MCache *c) { runtime_MCache_ReleaseAll(c); - runtime_lock(runtime_mheap); + runtime_lock(&runtime_mheap); runtime_purgecachedstats(c); - runtime_FixAlloc_Free(&runtime_mheap->cachealloc, c); - runtime_unlock(runtime_mheap); + runtime_FixAlloc_Free(&runtime_mheap.cachealloc, c); + runtime_unlock(&runtime_mheap); } void runtime_purgecachedstats(MCache *c) { + MHeap *h; + int32 i; + // Protected by either heap or GC lock. + h = &runtime_mheap; mstats.heap_alloc += c->local_cachealloc; c->local_cachealloc = 0; - mstats.heap_objects += c->local_objects; - c->local_objects = 0; - mstats.nmalloc += c->local_nmalloc; - c->local_nmalloc = 0; - mstats.nfree += c->local_nfree; - c->local_nfree = 0; mstats.nlookup += c->local_nlookup; c->local_nlookup = 0; - mstats.alloc += c->local_alloc; - c->local_alloc= 0; - mstats.total_alloc += c->local_total_alloc; - c->local_total_alloc= 0; + h->largefree += c->local_largefree; + c->local_largefree = 0; + h->nlargefree += c->local_nlargefree; + c->local_nlargefree = 0; + for(i=0; i<(int32)nelem(c->local_nsmallfree); i++) { + h->nsmallfree[i] += c->local_nsmallfree[i]; + c->local_nsmallfree[i] = 0; + } } extern uintptr runtime_sizeof_C_MStats @@ -335,24 +365,24 @@ void runtime_mallocinit(void) { byte *p; - uintptr arena_size, bitmap_size; + uintptr arena_size, bitmap_size, spans_size; extern byte _end[]; byte *want; uintptr limit; + uint64 i; runtime_sizeof_C_MStats = sizeof(MStats); p = nil; arena_size = 0; bitmap_size = 0; - + spans_size = 0; + // for 64-bit build USED(p); USED(arena_size); USED(bitmap_size); - - if((runtime_mheap = runtime_SysAlloc(sizeof(*runtime_mheap))) == nil) - runtime_throw("runtime: cannot allocate heap metadata"); + USED(spans_size); runtime_InitSizes(); @@ -369,15 +399,17 @@ runtime_mallocinit(void) // 128 GB (MaxMem) should be big enough for now. // // The code will work with the reservation at any address, but ask - // SysReserve to use 0x000000c000000000 if possible. + // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f). // Allocating a 128 GB region takes away 37 bits, and the amd64 // doesn't let us choose the top 17 bits, so that leaves the 11 bits // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means - // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x0x00df. + // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df. // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid // UTF-8 sequences, and they are otherwise as far away from - // ff (likely a common byte) as possible. An earlier attempt to use 0x11f8 - // caused out of memory errors on OS X during thread allocations. + // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0 + // addresses. An earlier attempt to use 0x11f8 caused out of memory errors + // on OS X during thread allocations. 0x00c0 causes conflicts with + // AddressSanitizer which reserves all memory up to 0x0100. // These choices are both for debuggability and to reduce the // odds of the conservative garbage collector not collecting memory // because some non-pointer block of memory had a bit pattern @@ -389,7 +421,14 @@ runtime_mallocinit(void) // If this fails we fall back to the 32 bit memory mechanism arena_size = MaxMem; bitmap_size = arena_size / (sizeof(void*)*8/4); - p = runtime_SysReserve((void*)(0x00c0ULL<<32), bitmap_size + arena_size); + spans_size = arena_size / PageSize * sizeof(runtime_mheap.spans[0]); + spans_size = ROUND(spans_size, PageSize); + for(i = 0; i <= 0x7f; i++) { + p = (void*)(uintptr)(i<<40 | 0x00c0ULL<<32); + p = runtime_SysReserve(p, bitmap_size + spans_size + arena_size); + if(p != nil) + break; + } } if (p == nil) { // On a 32-bit machine, we can't typically get away @@ -411,11 +450,14 @@ runtime_mallocinit(void) // of address space, which is probably too much in a 32-bit world. bitmap_size = MaxArena32 / (sizeof(void*)*8/4); arena_size = 512<<20; - if(limit > 0 && arena_size+bitmap_size > limit) { + spans_size = MaxArena32 / PageSize * sizeof(runtime_mheap.spans[0]); + if(limit > 0 && arena_size+bitmap_size+spans_size > limit) { bitmap_size = (limit / 9) & ~((1<<PageShift) - 1); arena_size = bitmap_size * 8; + spans_size = arena_size / PageSize * sizeof(runtime_mheap.spans[0]); } - + spans_size = ROUND(spans_size, PageSize); + // SysReserve treats the address we ask for, end, as a hint, // not as an absolute requirement. If we ask for the end // of the data segment but the operating system requires @@ -425,25 +467,27 @@ runtime_mallocinit(void) // So adjust it upward a little bit ourselves: 1/4 MB to get // away from the running binary image and then round up // to a MB boundary. - want = (byte*)(((uintptr)_end + (1<<18) + (1<<20) - 1)&~((1<<20)-1)); - if(0xffffffff - (uintptr)want <= bitmap_size + arena_size) + want = (byte*)ROUND((uintptr)_end + (1<<18), 1<<20); + if(0xffffffff - (uintptr)want <= bitmap_size + spans_size + arena_size) want = 0; - p = runtime_SysReserve(want, bitmap_size + arena_size); + p = runtime_SysReserve(want, bitmap_size + spans_size + arena_size); if(p == nil) runtime_throw("runtime: cannot reserve arena virtual address space"); if((uintptr)p & (((uintptr)1<<PageShift)-1)) - runtime_printf("runtime: SysReserve returned unaligned address %p; asked for %p", p, bitmap_size+arena_size); + runtime_printf("runtime: SysReserve returned unaligned address %p; asked for %p", p, + bitmap_size+spans_size+arena_size); } if((uintptr)p & (((uintptr)1<<PageShift)-1)) runtime_throw("runtime: SysReserve returned unaligned address"); - runtime_mheap->bitmap = p; - runtime_mheap->arena_start = p + bitmap_size; - runtime_mheap->arena_used = runtime_mheap->arena_start; - runtime_mheap->arena_end = runtime_mheap->arena_start + arena_size; + runtime_mheap.spans = (MSpan**)p; + runtime_mheap.bitmap = p + spans_size; + runtime_mheap.arena_start = p + spans_size + bitmap_size; + runtime_mheap.arena_used = runtime_mheap.arena_start; + runtime_mheap.arena_end = runtime_mheap.arena_start + arena_size; // Initialize the rest of the allocator. - runtime_MHeap_Init(runtime_mheap, runtime_SysAlloc); + runtime_MHeap_Init(&runtime_mheap); runtime_m()->mcache = runtime_allocmcache(); // See if it works. @@ -463,8 +507,7 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n) uintptr needed; needed = (uintptr)h->arena_used + n - (uintptr)h->arena_end; - // Round wanted arena size to a multiple of 256MB. - needed = (needed + (256<<20) - 1) & ~((256<<20)-1); + needed = ROUND(needed, 256<<20); new_end = h->arena_end + needed; if(new_end <= h->arena_start + MaxArena32) { p = runtime_SysReserve(h->arena_end, new_end - h->arena_end); @@ -475,9 +518,10 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n) if(n <= (uintptr)(h->arena_end - h->arena_used)) { // Keep taking from our reservation. p = h->arena_used; - runtime_SysMap(p, n); + runtime_SysMap(p, n, &mstats.heap_sys); h->arena_used += n; runtime_MHeap_MapBits(h); + runtime_MHeap_MapSpans(h); if(raceenabled) runtime_racemapshadow(p, n); return p; @@ -490,14 +534,14 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n) // On 32-bit, once the reservation is gone we can // try to get memory at a location chosen by the OS // and hope that it is in the range we allocated bitmap for. - p = runtime_SysAlloc(n); + p = runtime_SysAlloc(n, &mstats.heap_sys); if(p == nil) return nil; if(p < h->arena_start || (uintptr)(p+n - h->arena_start) >= MaxArena32) { runtime_printf("runtime: memory allocated by OS (%p) not in usable range [%p,%p)\n", p, h->arena_start, h->arena_start+MaxArena32); - runtime_SysFree(p, n); + runtime_SysFree(p, n, &mstats.heap_sys); return nil; } @@ -506,6 +550,7 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n) if(h->arena_used > h->arena_end) h->arena_end = h->arena_used; runtime_MHeap_MapBits(h); + runtime_MHeap_MapSpans(h); if(raceenabled) runtime_racemapshadow(p, n); } @@ -513,17 +558,68 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n) return p; } +static struct +{ + Lock; + byte* pos; + byte* end; +} persistent; + +enum +{ + PersistentAllocChunk = 256<<10, + PersistentAllocMaxBlock = 64<<10, // VM reservation granularity is 64K on windows +}; + +// Wrapper around SysAlloc that can allocate small chunks. +// There is no associated free operation. +// Intended for things like function/type/debug-related persistent data. +// If align is 0, uses default align (currently 8). +void* +runtime_persistentalloc(uintptr size, uintptr align, uint64 *stat) +{ + byte *p; + + if(align != 0) { + if(align&(align-1)) + runtime_throw("persistentalloc: align is now a power of 2"); + if(align > PageSize) + runtime_throw("persistentalloc: align is too large"); + } else + align = 8; + if(size >= PersistentAllocMaxBlock) + return runtime_SysAlloc(size, stat); + runtime_lock(&persistent); + persistent.pos = (byte*)ROUND((uintptr)persistent.pos, align); + if(persistent.pos + size > persistent.end) { + persistent.pos = runtime_SysAlloc(PersistentAllocChunk, &mstats.other_sys); + if(persistent.pos == nil) { + runtime_unlock(&persistent); + runtime_throw("runtime: cannot allocate memory"); + } + persistent.end = persistent.pos + PersistentAllocChunk; + } + p = persistent.pos; + persistent.pos += size; + runtime_unlock(&persistent); + if(stat != &mstats.other_sys) { + // reaccount the allocation against provided stat + runtime_xadd64(stat, size); + runtime_xadd64(&mstats.other_sys, -(uint64)size); + } + return p; +} + static Lock settype_lock; void -runtime_settype_flush(M *mp, bool sysalloc) +runtime_settype_flush(M *mp) { uintptr *buf, *endbuf; uintptr size, ofs, j, t; uintptr ntypes, nbytes2, nbytes3; uintptr *data2; byte *data3; - bool sysalloc3; void *v; uintptr typ, p; MSpan *s; @@ -542,8 +638,8 @@ runtime_settype_flush(M *mp, bool sysalloc) // (Manually inlined copy of runtime_MHeap_Lookup) p = (uintptr)v>>PageShift; if(sizeof(void*) == 8) - p -= (uintptr)runtime_mheap->arena_start >> PageShift; - s = runtime_mheap->map[p]; + p -= (uintptr)runtime_mheap.arena_start >> PageShift; + s = runtime_mheap.spans[p]; if(s->sizeclass == 0) { s->types.compression = MTypes_Single; @@ -558,20 +654,9 @@ runtime_settype_flush(M *mp, bool sysalloc) case MTypes_Empty: ntypes = (s->npages << PageShift) / size; nbytes3 = 8*sizeof(uintptr) + 1*ntypes; - - if(!sysalloc) { - data3 = runtime_mallocgc(nbytes3, FlagNoProfiling|FlagNoPointers, 0, 1); - } else { - data3 = runtime_SysAlloc(nbytes3); - if(data3 == nil) - runtime_throw("runtime: cannot allocate memory"); - if(0) runtime_printf("settype(0->3): SysAlloc(%x) --> %p\n", (uint32)nbytes3, data3); - } - + data3 = runtime_mallocgc(nbytes3, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC); s->types.compression = MTypes_Bytes; - s->types.sysalloc = sysalloc; s->types.data = (uintptr)data3; - ((uintptr*)data3)[1] = typ; data3[8*sizeof(uintptr) + ofs] = 1; break; @@ -596,20 +681,8 @@ runtime_settype_flush(M *mp, bool sysalloc) } else { ntypes = (s->npages << PageShift) / size; nbytes2 = ntypes * sizeof(uintptr); - - if(!sysalloc) { - data2 = runtime_mallocgc(nbytes2, FlagNoProfiling|FlagNoPointers, 0, 1); - } else { - data2 = runtime_SysAlloc(nbytes2); - if(data2 == nil) - runtime_throw("runtime: cannot allocate memory"); - if(0) runtime_printf("settype.(3->2): SysAlloc(%x) --> %p\n", (uint32)nbytes2, data2); - } - - sysalloc3 = s->types.sysalloc; - + data2 = runtime_mallocgc(nbytes2, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC); s->types.compression = MTypes_Words; - s->types.sysalloc = sysalloc; s->types.data = (uintptr)data2; // Move the contents of data3 to data2. Then deallocate data3. @@ -618,12 +691,6 @@ runtime_settype_flush(M *mp, bool sysalloc) t = ((uintptr*)data3)[t]; data2[j] = t; } - if(sysalloc3) { - nbytes3 = 8*sizeof(uintptr) + 1*ntypes; - if(0) runtime_printf("settype.(3->2): SysFree(%p,%x)\n", data3, (uint32)nbytes3); - runtime_SysFree(data3, nbytes3); - } - data2[ofs] = typ; } break; @@ -634,64 +701,6 @@ runtime_settype_flush(M *mp, bool sysalloc) mp->settype_bufsize = 0; } -// It is forbidden to use this function if it is possible that -// explicit deallocation via calling runtime_free(v) may happen. -void -runtime_settype(void *v, uintptr t) -{ - M *mp; - uintptr *buf; - uintptr i; - MSpan *s; - - if(t == 0) - runtime_throw("settype: zero type"); - - mp = runtime_m(); - buf = mp->settype_buf; - i = mp->settype_bufsize; - buf[i+0] = (uintptr)v; - buf[i+1] = t; - i += 2; - mp->settype_bufsize = i; - - if(i == nelem(mp->settype_buf)) { - runtime_settype_flush(mp, false); - } - - if(DebugTypeAtBlockEnd) { - s = runtime_MHeap_Lookup(runtime_mheap, v); - *(uintptr*)((uintptr)v+s->elemsize-sizeof(uintptr)) = t; - } -} - -void -runtime_settype_sysfree(MSpan *s) -{ - uintptr ntypes, nbytes; - - if(!s->types.sysalloc) - return; - - nbytes = (uintptr)-1; - - switch (s->types.compression) { - case MTypes_Words: - ntypes = (s->npages << PageShift) / s->elemsize; - nbytes = ntypes * sizeof(uintptr); - break; - case MTypes_Bytes: - ntypes = (s->npages << PageShift) / s->elemsize; - nbytes = 8*sizeof(uintptr) + 1*ntypes; - break; - } - - if(nbytes != (uintptr)-1) { - if(0) runtime_printf("settype: SysFree(%p,%x)\n", (void*)s->types.data, (uint32)nbytes); - runtime_SysFree((void*)s->types.data, nbytes); - } -} - uintptr runtime_gettype(void *v) { @@ -699,7 +708,7 @@ runtime_gettype(void *v) uintptr t, ofs; byte *data; - s = runtime_MHeap_LookupMaybe(runtime_mheap, v); + s = runtime_MHeap_LookupMaybe(&runtime_mheap, v); if(s != nil) { t = 0; switch(s->types.compression) { @@ -736,61 +745,23 @@ runtime_gettype(void *v) void* runtime_mal(uintptr n) { - return runtime_mallocgc(n, 0, 1, 1); + return runtime_mallocgc(n, 0, 0); } void * runtime_new(const Type *typ) { - void *ret; - uint32 flag; - - if(raceenabled) - runtime_m()->racepc = runtime_getcallerpc(&typ); - - if(typ->__size == 0) { - // All 0-length allocations use this pointer. - // The language does not require the allocations to - // have distinct values. - ret = (uint8*)&runtime_zerobase; - } else { - flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0; - ret = runtime_mallocgc(typ->__size, flag, 1, 1); - - if(UseSpanType && !flag) { - if(false) - runtime_printf("new %S: %p\n", *typ->__reflection, ret); - runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject); - } - } - - return ret; + return runtime_mallocgc(typ->__size, (uintptr)typ | TypeInfo_SingleObject, typ->kind&KindNoPointers ? FlagNoScan : 0); } static void* cnew(const Type *typ, intgo n, int32 objtyp) { - uint32 flag; - void *ret; - if((objtyp&(PtrSize-1)) != objtyp) runtime_throw("runtime: invalid objtyp"); if(n < 0 || (typ->__size > 0 && (uintptr)n > (MaxMem/typ->__size))) runtime_panicstring("runtime: allocation size out of range"); - if(typ->__size == 0 || n == 0) { - // All 0-length allocations use this pointer. - // The language does not require the allocations to - // have distinct values. - return &runtime_zerobase; - } - flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0; - ret = runtime_mallocgc(typ->__size*n, flag, 1, 1); - if(UseSpanType && !flag) { - if(false) - runtime_printf("cnew [%D]%S: %p\n", (int64)n, *typ->__reflection, ret); - runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject); - } - return ret; + return runtime_mallocgc(typ->__size*n, (uintptr)typ | objtyp, typ->kind&KindNoPointers ? FlagNoScan : 0); } // same as runtime_new, but callable from C @@ -814,6 +785,8 @@ func SetFinalizer(obj Eface, finalizer Eface) { byte *base; uintptr size; const FuncType *ft; + const Type *fint; + const PtrType *ot; if(obj.__type_descriptor == nil) { runtime_printf("runtime.SetFinalizer: first argument is nil interface\n"); @@ -828,22 +801,36 @@ func SetFinalizer(obj Eface, finalizer Eface) { goto throw; } ft = nil; + ot = (const PtrType*)obj.__type_descriptor; + fint = nil; if(finalizer.__type_descriptor != nil) { if(finalizer.__type_descriptor->__code != GO_FUNC) goto badfunc; ft = (const FuncType*)finalizer.__type_descriptor; - if(ft->__dotdotdot || ft->__in.__count != 1 || !__go_type_descriptors_equal(*(Type**)ft->__in.__values, obj.__type_descriptor)) + if(ft->__dotdotdot || ft->__in.__count != 1) + goto badfunc; + fint = *(Type**)ft->__in.__values; + if(__go_type_descriptors_equal(fint, obj.__type_descriptor)) { + // ok - same type + } else if(fint->__code == GO_PTR && (fint->__uncommon == nil || fint->__uncommon->__name == nil || obj.type->__uncommon == nil || obj.type->__uncommon->__name == nil) && __go_type_descriptors_equal(((const PtrType*)fint)->__element_type, ((const PtrType*)obj.type)->__element_type)) { + // ok - not same type, but both pointers, + // one or the other is unnamed, and same element type, so assignable. + } else if(fint->kind == GO_INTERFACE && ((const InterfaceType*)fint)->__methods.__count == 0) { + // ok - satisfies empty interface + } else if(fint->kind == GO_INTERFACE && __go_convert_interface_2(fint, obj.__type_descriptor, 1) != nil) { + // ok - satisfies non-empty interface + } else goto badfunc; } - if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft)) { + if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft, ot)) { runtime_printf("runtime.SetFinalizer: finalizer already set\n"); goto throw; } return; badfunc: - runtime_printf("runtime.SetFinalizer: second argument is %S, not func(%S)\n", *finalizer.__type_descriptor->__reflection, *obj.__type_descriptor->__reflection); + runtime_printf("runtime.SetFinalizer: cannot pass %S to finalizer %S\n", *obj.__type_descriptor->__reflection, *finalizer.__type_descriptor->__reflection); throw: runtime_throw("runtime.SetFinalizer"); } diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h index ebea34eb32c..45c4c09c147 100644 --- a/libgo/runtime/malloc.h +++ b/libgo/runtime/malloc.h @@ -108,9 +108,7 @@ enum // Tunable constants. MaxSmallSize = 32<<10, - FixAllocChunk = 128<<10, // Chunk size for FixAlloc - MaxMCacheListLen = 256, // Maximum objects on MCacheList - MaxMCacheSize = 2<<20, // Maximum bytes in one MCache + FixAllocChunk = 16<<10, // Chunk size for FixAlloc MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap. HeapAllocChunk = 1<<20, // Chunk size for heap growth @@ -155,13 +153,13 @@ struct MLink // SysAlloc obtains a large chunk of zeroed memory from the // operating system, typically on the order of a hundred kilobytes -// or a megabyte. If the pointer argument is non-nil, the caller -// wants a mapping there or nowhere. +// or a megabyte. // // SysUnused notifies the operating system that the contents // of the memory region are no longer needed and can be reused -// for other purposes. The program reserves the right to start -// accessing those pages in the future. +// for other purposes. +// SysUsed notifies the operating system that the contents +// of the memory region are needed again. // // SysFree returns it unconditionally; this is only used if // an out-of-memory error has been detected midway through @@ -174,10 +172,11 @@ struct MLink // // SysMap maps previously reserved address space for use. -void* runtime_SysAlloc(uintptr nbytes); -void runtime_SysFree(void *v, uintptr nbytes); +void* runtime_SysAlloc(uintptr nbytes, uint64 *stat); +void runtime_SysFree(void *v, uintptr nbytes, uint64 *stat); void runtime_SysUnused(void *v, uintptr nbytes); -void runtime_SysMap(void *v, uintptr nbytes); +void runtime_SysUsed(void *v, uintptr nbytes); +void runtime_SysMap(void *v, uintptr nbytes, uint64 *stat); void* runtime_SysReserve(void *v, uintptr nbytes); // FixAlloc is a simple free-list allocator for fixed size objects. @@ -190,18 +189,17 @@ void* runtime_SysReserve(void *v, uintptr nbytes); // smashed by freeing and reallocating. struct FixAlloc { - uintptr size; - void *(*alloc)(uintptr); - void (*first)(void *arg, byte *p); // called first time p is returned - void *arg; - MLink *list; - byte *chunk; - uint32 nchunk; - uintptr inuse; // in-use bytes now - uintptr sys; // bytes obtained from system + uintptr size; + void (*first)(void *arg, byte *p); // called first time p is returned + void* arg; + MLink* list; + byte* chunk; + uint32 nchunk; + uintptr inuse; // in-use bytes now + uint64* stat; }; -void runtime_FixAlloc_Init(FixAlloc *f, uintptr size, void *(*alloc)(uintptr), void (*first)(void*, byte*), void *arg); +void runtime_FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat); void* runtime_FixAlloc_Alloc(FixAlloc *f); void runtime_FixAlloc_Free(FixAlloc *f, void *p); @@ -236,6 +234,8 @@ struct MStats uint64 mcache_inuse; // MCache structures uint64 mcache_sys; uint64 buckhash_sys; // profiling bucket hash table + uint64 gc_sys; + uint64 other_sys; // Statistics about garbage collector. // Protected by mheap or stopping the world during GC. @@ -267,14 +267,12 @@ extern MStats mstats // class_to_size[i] = largest size in class i // class_to_allocnpages[i] = number of pages to allocate when // making new objects in class i -// class_to_transfercount[i] = number of objects to move when -// taking a bunch of objects out of the central lists -// and putting them in the thread free list. int32 runtime_SizeToClass(int32); extern int32 runtime_class_to_size[NumSizeClasses]; extern int32 runtime_class_to_allocnpages[NumSizeClasses]; -extern int32 runtime_class_to_transfercount[NumSizeClasses]; +extern int8 runtime_size_to_class8[1024/8 + 1]; +extern int8 runtime_size_to_class128[(MaxSmallSize-1024)/128 + 1]; extern void runtime_InitSizes(void); @@ -285,30 +283,24 @@ struct MCacheList { MLink *list; uint32 nlist; - uint32 nlistmin; }; struct MCache { - MCacheList list[NumSizeClasses]; - uintptr size; + // The following members are accessed on every malloc, + // so they are grouped here for better caching. + int32 next_sample; // trigger heap sample after allocating this many bytes intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap - intptr local_objects; // objects allocated (or freed) from cache since last lock of heap - intptr local_alloc; // bytes allocated (or freed) since last lock of heap - uintptr local_total_alloc; // bytes allocated (even if freed) since last lock of heap - uintptr local_nmalloc; // number of mallocs since last lock of heap - uintptr local_nfree; // number of frees since last lock of heap - uintptr local_nlookup; // number of pointer lookups since last lock of heap - int32 next_sample; // trigger heap sample after allocating this many bytes - // Statistics about allocation size classes since last lock of heap - struct { - uintptr nmalloc; - uintptr nfree; - } local_by_size[NumSizeClasses]; - + // The rest is not accessed on every malloc. + MCacheList list[NumSizeClasses]; + // Local allocator stats, flushed during GC. + uintptr local_nlookup; // number of pointer lookups + uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize) + uintptr local_nlargefree; // number of frees for large objects (>MaxSmallSize) + uintptr local_nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize) }; -void* runtime_MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed); +void runtime_MCache_Refill(MCache *c, int32 sizeclass); void runtime_MCache_Free(MCache *c, void *p, int32 sizeclass, uintptr size); void runtime_MCache_ReleaseAll(MCache *c); @@ -346,7 +338,6 @@ enum struct MTypes { byte compression; // one of MTypes_* - bool sysalloc; // whether (void*)data is from runtime_SysAlloc uintptr data; }; @@ -397,8 +388,8 @@ struct MCentral }; void runtime_MCentral_Init(MCentral *c, int32 sizeclass); -int32 runtime_MCentral_AllocList(MCentral *c, int32 n, MLink **first); -void runtime_MCentral_FreeList(MCentral *c, int32 n, MLink *first); +int32 runtime_MCentral_AllocList(MCentral *c, MLink **first); +void runtime_MCentral_FreeList(MCentral *c, MLink *first); void runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end); // Main malloc heap. @@ -414,7 +405,8 @@ struct MHeap uint32 nspancap; // span lookup - MSpan *map[1<<MHeapMap_Bits]; + MSpan** spans; + uintptr spans_mapped; // range of addresses we might see in the heap byte *bitmap; @@ -434,10 +426,15 @@ struct MHeap FixAlloc spanalloc; // allocator for Span* FixAlloc cachealloc; // allocator for MCache* + + // Malloc stats. + uint64 largefree; // bytes freed for large objects (>MaxSmallSize) + uint64 nlargefree; // number of frees for large objects (>MaxSmallSize) + uint64 nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize) }; -extern MHeap *runtime_mheap; +extern MHeap runtime_mheap; -void runtime_MHeap_Init(MHeap *h, void *(*allocator)(uintptr)); +void runtime_MHeap_Init(MHeap *h); MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed); void runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct); MSpan* runtime_MHeap_Lookup(MHeap *h, void *v); @@ -445,9 +442,11 @@ MSpan* runtime_MHeap_LookupMaybe(MHeap *h, void *v); void runtime_MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj); void* runtime_MHeap_SysAlloc(MHeap *h, uintptr n); void runtime_MHeap_MapBits(MHeap *h); +void runtime_MHeap_MapSpans(MHeap *h); void runtime_MHeap_Scavenger(void*); -void* runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed); +void* runtime_mallocgc(uintptr size, uintptr typ, uint32 flag); +void* runtime_persistentalloc(uintptr size, uintptr align, uint64 *stat); int32 runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **s); void runtime_gc(int32 force); void runtime_markallocated(void *v, uintptr n, bool noptr); @@ -463,17 +462,18 @@ void runtime_purgecachedstats(MCache*); void* runtime_cnew(const Type*); void* runtime_cnewarray(const Type*, intgo); -void runtime_settype(void*, uintptr); -void runtime_settype_flush(M*, bool); +void runtime_settype_flush(M*); void runtime_settype_sysfree(MSpan*); uintptr runtime_gettype(void*); enum { // flags to malloc - FlagNoPointers = 1<<0, // no pointers here - FlagNoProfiling = 1<<1, // must not profile - FlagNoGC = 1<<2, // must not free or scan for pointers + FlagNoScan = 1<<0, // GC doesn't have to scan object + FlagNoProfiling = 1<<1, // must not profile + FlagNoGC = 1<<2, // must not free or scan for pointers + FlagNoZero = 1<<3, // don't zero memory + FlagNoInvokeGC = 1<<4, // don't invoke GC }; typedef struct Obj Obj; @@ -493,15 +493,15 @@ void runtime_helpgc(int32 nproc); void runtime_gchelper(void); struct __go_func_type; -bool runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft); +struct __go_ptr_type; +bool runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft, const struct __go_ptr_type **ot); void runtime_walkfintab(void (*fn)(void*), void (*scan)(Obj)); enum { TypeInfo_SingleObject = 0, TypeInfo_Array = 1, - TypeInfo_Map = 2, - TypeInfo_Chan = 3, + TypeInfo_Chan = 2, // Enables type information at the end of blocks allocated from heap DebugTypeAtBlockEnd = 0, diff --git a/libgo/runtime/mcache.c b/libgo/runtime/mcache.c index 45bac4ffbce..38f824a139b 100644 --- a/libgo/runtime/mcache.c +++ b/libgo/runtime/mcache.c @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Per-thread (in Go, per-M) malloc cache for small objects. +// Per-P malloc cache for small objects. // // See malloc.h for an overview. @@ -10,48 +10,23 @@ #include "arch.h" #include "malloc.h" -void* -runtime_MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed) +void +runtime_MCache_Refill(MCache *c, int32 sizeclass) { MCacheList *l; - MLink *first, *v; - int32 n; - // Allocate from list. + // Replenish using central lists. l = &c->list[sizeclass]; - if(l->list == nil) { - // Replenish using central lists. - n = runtime_MCentral_AllocList(&runtime_mheap->central[sizeclass], - runtime_class_to_transfercount[sizeclass], &first); - if(n == 0) - runtime_throw("out of memory"); - l->list = first; - l->nlist = n; - c->size += n*size; - } - v = l->list; - l->list = v->next; - l->nlist--; - if(l->nlist < l->nlistmin) - l->nlistmin = l->nlist; - c->size -= size; - - // v is zeroed except for the link pointer - // that we used above; zero that. - v->next = nil; - if(zeroed) { - // block is zeroed iff second word is zero ... - if(size > sizeof(uintptr) && ((uintptr*)v)[1] != 0) - runtime_memclr((byte*)v, size); - } - c->local_cachealloc += size; - c->local_objects++; - return v; + if(l->list) + runtime_throw("MCache_Refill: the list is not empty"); + l->nlist = runtime_MCentral_AllocList(&runtime_mheap.central[sizeclass], &l->list); + if(l->list == nil) + runtime_throw("out of memory"); } // Take n elements off l and return them to the central free list. static void -ReleaseN(MCache *c, MCacheList *l, int32 n, int32 sizeclass) +ReleaseN(MCacheList *l, int32 n, int32 sizeclass) { MLink *first, **lp; int32 i; @@ -64,18 +39,14 @@ ReleaseN(MCache *c, MCacheList *l, int32 n, int32 sizeclass) l->list = *lp; *lp = nil; l->nlist -= n; - if(l->nlist < l->nlistmin) - l->nlistmin = l->nlist; - c->size -= n*runtime_class_to_size[sizeclass]; // Return them to central free list. - runtime_MCentral_FreeList(&runtime_mheap->central[sizeclass], n, first); + runtime_MCentral_FreeList(&runtime_mheap.central[sizeclass], first); } void runtime_MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size) { - int32 i, n; MCacheList *l; MLink *p; @@ -85,34 +56,12 @@ runtime_MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size) p->next = l->list; l->list = p; l->nlist++; - c->size += size; c->local_cachealloc -= size; - c->local_objects--; - - if(l->nlist >= MaxMCacheListLen) { - // Release a chunk back. - ReleaseN(c, l, runtime_class_to_transfercount[sizeclass], sizeclass); - } - - if(c->size >= MaxMCacheSize) { - // Scavenge. - for(i=0; i<NumSizeClasses; i++) { - l = &c->list[i]; - n = l->nlistmin; - // n is the minimum number of elements we've seen on - // the list since the last scavenge. If n > 0, it means that - // we could have gotten by with n fewer elements - // without needing to consult the central free list. - // Move toward that situation by releasing n/2 of them. - if(n > 0) { - if(n > 1) - n /= 2; - ReleaseN(c, l, n, i); - } - l->nlistmin = l->nlist; - } - } + // We transfer span at a time from MCentral to MCache, + // if we have 2 times more than that, release a half back. + if(l->nlist >= 2*(runtime_class_to_allocnpages[sizeclass]<<PageShift)/size) + ReleaseN(l, l->nlist/2, sizeclass); } void @@ -123,7 +72,10 @@ runtime_MCache_ReleaseAll(MCache *c) for(i=0; i<NumSizeClasses; i++) { l = &c->list[i]; - ReleaseN(c, l, l->nlist, i); - l->nlistmin = 0; + if(l->list) { + runtime_MCentral_FreeList(&runtime_mheap.central[i], l->list); + l->list = nil; + l->nlist = 0; + } } } diff --git a/libgo/runtime/mcentral.c b/libgo/runtime/mcentral.c index b3108a1c061..81916101e46 100644 --- a/libgo/runtime/mcentral.c +++ b/libgo/runtime/mcentral.c @@ -30,16 +30,15 @@ runtime_MCentral_Init(MCentral *c, int32 sizeclass) runtime_MSpanList_Init(&c->empty); } -// Allocate up to n objects from the central free list. +// Allocate a list of objects from the central free list. // Return the number of objects allocated. // The objects are linked together by their first words. -// On return, *pstart points at the first object. +// On return, *pfirst points at the first object. int32 -runtime_MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst) +runtime_MCentral_AllocList(MCentral *c, MLink **pfirst) { MSpan *s; - MLink *first, *last; - int32 cap, avail, i; + int32 cap, n; runtime_lock(c); // Replenish central list if empty. @@ -52,49 +51,27 @@ runtime_MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst) } s = c->nonempty.next; cap = (s->npages << PageShift) / s->elemsize; - avail = cap - s->ref; - if(avail < n) - n = avail; - - // First one is guaranteed to work, because we just grew the list. - first = s->freelist; - last = first; - for(i=1; i<n; i++) { - last = last->next; - } - s->freelist = last->next; - last->next = nil; + n = cap - s->ref; + *pfirst = s->freelist; + s->freelist = nil; s->ref += n; c->nfree -= n; - - if(n == avail) { - if(s->freelist != nil || s->ref != (uint32)cap) { - runtime_throw("invalid freelist"); - } - runtime_MSpanList_Remove(s); - runtime_MSpanList_Insert(&c->empty, s); - } - + runtime_MSpanList_Remove(s); + runtime_MSpanList_Insert(&c->empty, s); runtime_unlock(c); - *pfirst = first; return n; } -// Free n objects back into the central free list. +// Free the list of objects back into the central free list. void -runtime_MCentral_FreeList(MCentral *c, int32 n, MLink *start) +runtime_MCentral_FreeList(MCentral *c, MLink *start) { - MLink *v, *next; - - // Assume next == nil marks end of list. - // n and end would be useful if we implemented - // the transfer cache optimization in the TODO above. - USED(n); + MLink *next; runtime_lock(c); - for(v=start; v; v=next) { - next = v->next; - MCentral_Free(c, v); + for(; start != nil; start = next) { + next = start->next; + MCentral_Free(c, start); } runtime_unlock(c); } @@ -108,7 +85,7 @@ MCentral_Free(MCentral *c, void *v) int32 size; // Find span for v. - s = runtime_MHeap_Lookup(runtime_mheap, v); + s = runtime_MHeap_Lookup(&runtime_mheap, v); if(s == nil || s->ref == 0) runtime_throw("invalid free"); @@ -133,7 +110,7 @@ MCentral_Free(MCentral *c, void *v) s->freelist = nil; c->nfree -= (s->npages << PageShift) / size; runtime_unlock(c); - runtime_MHeap_Free(runtime_mheap, s, 0); + runtime_MHeap_Free(&runtime_mheap, s, 0); runtime_lock(c); } } @@ -168,7 +145,7 @@ runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *e c->nfree -= (s->npages << PageShift) / size; runtime_unlock(c); runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift); - runtime_MHeap_Free(runtime_mheap, s, 0); + runtime_MHeap_Free(&runtime_mheap, s, 0); } else { runtime_unlock(c); } @@ -200,7 +177,7 @@ MCentral_Grow(MCentral *c) runtime_unlock(c); runtime_MGetSizeClassInfo(c->sizeclass, &size, &npages, &n); - s = runtime_MHeap_Alloc(runtime_mheap, npages, c->sizeclass, 0, 1); + s = runtime_MHeap_Alloc(&runtime_mheap, npages, c->sizeclass, 0, 1); if(s == nil) { // TODO(rsc): Log out of memory runtime_lock(c); diff --git a/libgo/runtime/mem.c b/libgo/runtime/mem.c index 8481e950750..78f7c51faf2 100644 --- a/libgo/runtime/mem.c +++ b/libgo/runtime/mem.c @@ -60,13 +60,11 @@ mmap_fixed(byte *v, uintptr n, int32 prot, int32 flags, int32 fd, uint32 offset) } void* -runtime_SysAlloc(uintptr n) +runtime_SysAlloc(uintptr n, uint64 *stat) { void *p; int fd = -1; - mstats.sys += n; - #ifdef USE_DEV_ZERO if (dev_zero == -1) { dev_zero = open("/dev/zero", O_RDONLY); @@ -91,6 +89,7 @@ runtime_SysAlloc(uintptr n) } return nil; } + runtime_xadd64(stat, n); return p; } @@ -103,9 +102,16 @@ runtime_SysUnused(void *v __attribute__ ((unused)), uintptr n __attribute__ ((un } void -runtime_SysFree(void *v, uintptr n) +runtime_SysUsed(void *v, uintptr n) +{ + USED(v); + USED(n); +} + +void +runtime_SysFree(void *v, uintptr n, uint64 *stat) { - mstats.sys -= n; + runtime_xadd64(stat, -(uint64)n); runtime_munmap(v, n); } @@ -132,8 +138,10 @@ runtime_SysReserve(void *v, uintptr n) // Only user-mode Linux (UML) rejects these requests. if(sizeof(void*) == 8 && (uintptr)v >= 0xffffffffU) { p = mmap_fixed(v, 64<<10, PROT_NONE, MAP_ANON|MAP_PRIVATE, fd, 0); - if (p != v) + if (p != v) { + runtime_munmap(p, 64<<10); return nil; + } runtime_munmap(p, 64<<10); return v; } @@ -149,12 +157,12 @@ runtime_SysReserve(void *v, uintptr n) } void -runtime_SysMap(void *v, uintptr n) +runtime_SysMap(void *v, uintptr n, uint64 *stat) { void *p; int fd = -1; - mstats.sys += n; + runtime_xadd64(stat, n); #ifdef USE_DEV_ZERO if (dev_zero == -1) { diff --git a/libgo/runtime/mfinal.c b/libgo/runtime/mfinal.c index 407092bf392..625af528e1e 100644 --- a/libgo/runtime/mfinal.c +++ b/libgo/runtime/mfinal.c @@ -5,6 +5,7 @@ #include "runtime.h" #include "arch.h" #include "malloc.h" +#include "go-type.h" enum { debug = 0 }; @@ -13,6 +14,7 @@ struct Fin { FuncVal *fn; const struct __go_func_type *ft; + const struct __go_ptr_type *ot; }; // Finalizer hash table. Direct hash, linear scan, at most 3/4 full. @@ -42,7 +44,7 @@ static struct { } fintab[TABSZ]; static void -addfintab(Fintab *t, void *k, FuncVal *fn, const struct __go_func_type *ft) +addfintab(Fintab *t, void *k, FuncVal *fn, const struct __go_func_type *ft, const struct __go_ptr_type *ot) { int32 i, j; @@ -67,6 +69,7 @@ ret: t->fkey[i] = k; t->val[i].fn = fn; t->val[i].ft = ft; + t->val[i].ot = ot; } static bool @@ -87,6 +90,7 @@ lookfintab(Fintab *t, void *k, bool del, Fin *f) t->fkey[i] = (void*)-1; t->val[i].fn = nil; t->val[i].ft = nil; + t->val[i].ot = nil; t->ndead++; } return true; @@ -117,13 +121,13 @@ resizefintab(Fintab *tab) newtab.max *= 3; } - newtab.fkey = runtime_mallocgc(newtab.max*sizeof newtab.fkey[0], FlagNoPointers, 0, 1); - newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1); + newtab.fkey = runtime_mallocgc(newtab.max*sizeof newtab.fkey[0], 0, FlagNoInvokeGC|FlagNoScan); + newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, FlagNoInvokeGC); for(i=0; i<tab->max; i++) { k = tab->fkey[i]; if(k != nil && k != (void*)-1) - addfintab(&newtab, k, tab->val[i].fn, tab->val[i].ft); + addfintab(&newtab, k, tab->val[i].fn, tab->val[i].ft, tab->val[i].ot); } runtime_free(tab->fkey); @@ -137,7 +141,7 @@ resizefintab(Fintab *tab) } bool -runtime_addfinalizer(void *p, FuncVal *f, const struct __go_func_type *ft) +runtime_addfinalizer(void *p, FuncVal *f, const struct __go_func_type *ft, const struct __go_ptr_type *ot) { Fintab *tab; byte *base; @@ -166,7 +170,7 @@ runtime_addfinalizer(void *p, FuncVal *f, const struct __go_func_type *ft) resizefintab(tab); } - addfintab(tab, p, f, ft); + addfintab(tab, p, f, ft, ot); runtime_setblockspecial(p, true); runtime_unlock(tab); return true; @@ -175,7 +179,7 @@ runtime_addfinalizer(void *p, FuncVal *f, const struct __go_func_type *ft) // get finalizer; if del, delete finalizer. // caller is responsible for updating RefHasFinalizer (special) bit. bool -runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft) +runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft, const struct __go_ptr_type **ot) { Fintab *tab; bool res; @@ -189,6 +193,7 @@ runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_typ return false; *fn = f.fn; *ft = f.ft; + *ot = f.ot; return true; } diff --git a/libgo/runtime/mfixalloc.c b/libgo/runtime/mfixalloc.c index 6e4f0c6e607..9d0b3bbda7e 100644 --- a/libgo/runtime/mfixalloc.c +++ b/libgo/runtime/mfixalloc.c @@ -13,17 +13,16 @@ // Initialize f to allocate objects of the given size, // using the allocator to obtain chunks of memory. void -runtime_FixAlloc_Init(FixAlloc *f, uintptr size, void *(*alloc)(uintptr), void (*first)(void*, byte*), void *arg) +runtime_FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat) { f->size = size; - f->alloc = alloc; f->first = first; f->arg = arg; f->list = nil; f->chunk = nil; f->nchunk = 0; f->inuse = 0; - f->sys = 0; + f->stat = stat; } void* @@ -43,10 +42,7 @@ runtime_FixAlloc_Alloc(FixAlloc *f) return v; } if(f->nchunk < f->size) { - f->sys += FixAllocChunk; - f->chunk = f->alloc(FixAllocChunk); - if(f->chunk == nil) - runtime_throw("out of memory (FixAlloc)"); + f->chunk = runtime_persistentalloc(FixAllocChunk, 0, f->stat); f->nchunk = FixAllocChunk; } v = f->chunk; diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c index c3b32111ca0..3edcee9c397 100644 --- a/libgo/runtime/mgc0.c +++ b/libgo/runtime/mgc0.c @@ -59,6 +59,13 @@ enum { PRECISE = 1, LOOP = 2, PC_BITS = PRECISE | LOOP, + + // Pointer map + BitsPerPointer = 2, + BitsNoPointer = 0, + BitsPointer = 1, + BitsIface = 2, + BitsEface = 3, }; // Bits in per-word bitmap. @@ -70,7 +77,7 @@ enum { // The bits in the word are packed together by type first, then by // heap location, so each 64-bit bitmap word consists of, from top to bottom, // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits, -// then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits. +// then the 16 bitNoScan/bitBlockBoundary bits, then the 16 bitAllocated bits. // This layout makes it easier to iterate over the bits of a given type. // // The bitmap starts at mheap.arena_start and extends *backward* from @@ -87,7 +94,7 @@ enum { // /* then test bits & bitAllocated, bits & bitMarked, etc. */ // #define bitAllocated ((uintptr)1<<(bitShift*0)) -#define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */ +#define bitNoScan ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */ #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */ #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */ #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */ @@ -109,8 +116,6 @@ enum { // uint32 runtime_worldsema = 1; -static int32 gctrace; - // The size of Workbuf is N*PageSize. typedef struct Workbuf Workbuf; struct Workbuf @@ -129,6 +134,7 @@ struct Finalizer FuncVal *fn; void *arg; const struct __go_func_type *ft; + const struct __go_ptr_type *ot; }; typedef struct FinBlock FinBlock; @@ -178,7 +184,6 @@ static struct { enum { GC_DEFAULT_PTR = GC_NUM_INSTR, - GC_MAP_NEXT, GC_CHAN, GC_NUM_INSTR2 @@ -201,6 +206,16 @@ static struct { uint64 instr[GC_NUM_INSTR2]; uint64 putempty; uint64 getfull; + struct { + uint64 foundbit; + uint64 foundword; + uint64 foundspan; + } flushptrbuf; + struct { + uint64 foundbit; + uint64 foundword; + uint64 foundspan; + } markonly; } gcstats; // markonly marks an object. It returns true if the object @@ -210,12 +225,12 @@ static bool markonly(void *obj) { byte *p; - uintptr *bitp, bits, shift, x, xbits, off; + uintptr *bitp, bits, shift, x, xbits, off, j; MSpan *s; PageID k; // Words outside the arena cannot be pointers. - if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used) + if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used) return false; // obj may be a pointer to a live object. @@ -225,42 +240,57 @@ markonly(void *obj) obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); // Find bits for this word. - off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start; - bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start; + bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; xbits = *bitp; bits = xbits >> shift; // Pointing at the beginning of a block? - if((bits & (bitAllocated|bitBlockBoundary)) != 0) + if((bits & (bitAllocated|bitBlockBoundary)) != 0) { + if(CollectStats) + runtime_xadd64(&gcstats.markonly.foundbit, 1); goto found; + } + + // Pointing just past the beginning? + // Scan backward a little to find a block boundary. + for(j=shift; j-->0; ) { + if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) { + shift = j; + bits = xbits>>shift; + if(CollectStats) + runtime_xadd64(&gcstats.markonly.foundword, 1); + goto found; + } + } // Otherwise consult span table to find beginning. // (Manually inlined copy of MHeap_LookupMaybe.) k = (uintptr)obj>>PageShift; x = k; if(sizeof(void*) == 8) - x -= (uintptr)runtime_mheap->arena_start>>PageShift; - s = runtime_mheap->map[x]; - if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) + x -= (uintptr)runtime_mheap.arena_start>>PageShift; + s = runtime_mheap.spans[x]; + if(s == nil || k < s->start || (byte*)obj >= s->limit || s->state != MSpanInUse) return false; p = (byte*)((uintptr)s->start<<PageShift); if(s->sizeclass == 0) { obj = p; } else { - if((byte*)obj >= (byte*)s->limit) - return false; uintptr size = s->elemsize; int32 i = ((byte*)obj - p)/size; obj = p+i*size; } // Now that we know the object header, reload bits. - off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start; - bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start; + bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; xbits = *bitp; bits = xbits >> shift; + if(CollectStats) + runtime_xadd64(&gcstats.markonly.foundspan, 1); found: // Now we have bits, bitp, and shift correct for @@ -338,7 +368,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf Workbuf *wbuf; PtrTarget *ptrbuf_end; - arena_start = runtime_mheap->arena_start; + arena_start = runtime_mheap.arena_start; wp = *_wp; wbuf = *_wbuf; @@ -377,7 +407,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf // obj belongs to interval [mheap.arena_start, mheap.arena_used). if(Debug > 1) { - if(obj < runtime_mheap->arena_start || obj >= runtime_mheap->arena_used) + if(obj < runtime_mheap.arena_start || obj >= runtime_mheap.arena_used) runtime_throw("object is outside of mheap"); } @@ -398,8 +428,11 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf bits = xbits >> shift; // Pointing at the beginning of a block? - if((bits & (bitAllocated|bitBlockBoundary)) != 0) + if((bits & (bitAllocated|bitBlockBoundary)) != 0) { + if(CollectStats) + runtime_xadd64(&gcstats.flushptrbuf.foundbit, 1); goto found; + } ti = 0; @@ -410,6 +443,8 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf obj = (byte*)obj - (shift-j)*PtrSize; shift = j; bits = xbits>>shift; + if(CollectStats) + runtime_xadd64(&gcstats.flushptrbuf.foundword, 1); goto found; } } @@ -420,15 +455,13 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf x = k; if(sizeof(void*) == 8) x -= (uintptr)arena_start>>PageShift; - s = runtime_mheap->map[x]; - if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse) + s = runtime_mheap.spans[x]; + if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse) continue; p = (byte*)((uintptr)s->start<<PageShift); if(s->sizeclass == 0) { obj = p; } else { - if((byte*)obj >= (byte*)s->limit) - continue; size = s->elemsize; int32 i = ((byte*)obj - p)/size; obj = p+i*size; @@ -440,6 +473,8 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf shift = off % wordsPerBitmapWord; xbits = *bitp; bits = xbits >> shift; + if(CollectStats) + runtime_xadd64(&gcstats.flushptrbuf.foundspan, 1); found: // Now we have bits, bitp, and shift correct for @@ -460,7 +495,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf } // If object has no pointers, don't need to scan further. - if((bits & bitNoPointers) != 0) + if((bits & bitNoScan) != 0) continue; // Ask span about size class. @@ -468,7 +503,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf x = (uintptr)obj >> PageShift; if(sizeof(void*) == 8) x -= (uintptr)arena_start>>PageShift; - s = runtime_mheap->map[x]; + s = runtime_mheap.spans[x]; PREFETCH(obj); @@ -552,9 +587,6 @@ flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_ static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR}; #if 0 -// Hashmap iterator program -static uintptr mapProg[2] = {0, GC_MAP_NEXT}; - // Hchan program static uintptr chanProg[2] = {0, GC_CHAN}; #endif @@ -578,7 +610,7 @@ checkptr(void *obj, uintptr objti) if(!Debug) runtime_throw("checkptr is debug only"); - if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used) + if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used) return; type = runtime_gettype(obj); t = (Type*)(type & ~(uintptr)(PtrSize-1)); @@ -586,8 +618,8 @@ checkptr(void *obj, uintptr objti) return; x = (uintptr)obj >> PageShift; if(sizeof(void*) == 8) - x -= (uintptr)(runtime_mheap->arena_start)>>PageShift; - s = runtime_mheap->map[x]; + x -= (uintptr)(runtime_mheap.arena_start)>>PageShift; + s = runtime_mheap.spans[x]; objstart = (byte*)((uintptr)s->start<<PageShift); if(s->sizeclass != 0) { i = ((byte*)obj - objstart)/s->elemsize; @@ -595,8 +627,11 @@ checkptr(void *obj, uintptr objti) } tisize = *(uintptr*)objti; // Sanity check for object size: it should fit into the memory block. - if((byte*)obj + tisize > objstart + s->elemsize) + if((byte*)obj + tisize > objstart + s->elemsize) { + runtime_printf("object of type '%S' at %p/%p does not fit in block %p/%p\n", + *t->string, obj, tisize, objstart, s->elemsize); runtime_throw("invalid gc type info"); + } if(obj != objstart) return; // If obj points to the beginning of the memory block, @@ -613,7 +648,7 @@ checkptr(void *obj, uintptr objti) for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) { if(pc1[j] != pc2[j]) { runtime_printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n", - t->string ? (const int8*)t->string->str : (const int8*)"?", j, pc1[j], pc2[j]); + t->string ? (const int8*)t->string->str : (const int8*)"?", j, pc1[j], pc2[j]); runtime_throw("invalid gc type info"); } } @@ -638,7 +673,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) uintptr n, i, end_b, elemsize, size, ti, objti, count /* , type */; uintptr *pc, precise_type, nominal_size; #if 0 - uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti, *chan_ret, chancap; + uintptr *chan_ret, chancap; #endif void *obj; const Type *t; @@ -650,11 +685,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) Eface *eface; Iface *iface; #if 0 - Hmap *hmap; - MapType *maptype; - bool mapkey_kind, mapval_kind; - struct hash_gciter map_iter; - struct hash_gciter_data d; Hchan *chan; ChanType *chantype; #endif @@ -663,8 +693,8 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) runtime_throw("scanblock: size of Workbuf is suboptimal"); // Memory arena parameters. - arena_start = runtime_mheap->arena_start; - arena_used = runtime_mheap->arena_used; + arena_start = runtime_mheap.arena_start; + arena_used = runtime_mheap.arena_used; stack_ptr = stack+nelem(stack)-1; @@ -685,10 +715,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) // (Silence the compiler) #if 0 - map_ret = nil; - mapkey_size = mapval_size = 0; - mapkey_kind = mapval_kind = false; - mapkey_ti = mapval_ti = 0; chan = nil; chantype = nil; chan_ret = nil; @@ -759,23 +785,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) stack_top.elemsize = pc[0]; stack_top.loop_or_ret = pc+1; break; - case TypeInfo_Map: - hmap = (Hmap*)b; - maptype = (MapType*)t; - if(hash_gciter_init(hmap, &map_iter)) { - mapkey_size = maptype->key->size; - mapkey_kind = maptype->key->kind; - mapkey_ti = (uintptr)maptype->key->gc | PRECISE; - mapval_size = maptype->elem->size; - mapval_kind = maptype->elem->kind; - mapval_ti = (uintptr)maptype->elem->gc | PRECISE; - - map_ret = nil; - pc = mapProg; - } else { - goto next_block; - } - break; case TypeInfo_Chan: chan = (Hchan*)b; chantype = (ChanType*)t; @@ -985,79 +994,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction continue; -#if 0 - case GC_MAP_PTR: - hmap = *(Hmap**)(stack_top.b + pc[1]); - if(hmap == nil) { - pc += 3; - continue; - } - if(markonly(hmap)) { - maptype = (MapType*)pc[2]; - if(hash_gciter_init(hmap, &map_iter)) { - mapkey_size = maptype->key->size; - mapkey_kind = maptype->key->kind; - mapkey_ti = (uintptr)maptype->key->gc | PRECISE; - mapval_size = maptype->elem->size; - mapval_kind = maptype->elem->kind; - mapval_ti = (uintptr)maptype->elem->gc | PRECISE; - - // Start mapProg. - map_ret = pc+3; - pc = mapProg+1; - } else { - pc += 3; - } - } else { - pc += 3; - } - continue; - - case GC_MAP_NEXT: - // Add all keys and values to buffers, mark all subtables. - while(hash_gciter_next(&map_iter, &d)) { - // buffers: reserve space for 2 objects. - if(ptrbufpos+2 >= ptrbuf_end) - flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj); - if(objbufpos+2 >= objbuf_end) - flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj); - - if(d.st != nil) - markonly(d.st); - - if(d.key_data != nil) { - if(!(mapkey_kind & KindNoPointers) || d.indirectkey) { - if(!d.indirectkey) - *objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti}; - else { - if(Debug) { - obj = *(void**)d.key_data; - if(!(arena_start <= obj && obj < arena_used)) - runtime_throw("scanblock: inconsistent hashmap"); - } - *ptrbufpos++ = (struct PtrTarget){*(void**)d.key_data, mapkey_ti}; - } - } - if(!(mapval_kind & KindNoPointers) || d.indirectval) { - if(!d.indirectval) - *objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti}; - else { - if(Debug) { - obj = *(void**)d.val_data; - if(!(arena_start <= obj && obj < arena_used)) - runtime_throw("scanblock: inconsistent hashmap"); - } - *ptrbufpos++ = (struct PtrTarget){*(void**)d.val_data, mapval_ti}; - } - } - } - } - if(map_ret == nil) - goto next_block; - pc = map_ret; - continue; -#endif - case GC_REGION: obj = (void*)(stack_top.b + pc[1]); size = pc[2]; @@ -1071,7 +1007,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking) #if 0 case GC_CHAN_PTR: - // Similar to GC_MAP_PTR chan = *(Hchan**)(stack_top.b + pc[1]); if(chan == nil) { pc += 3; @@ -1191,14 +1126,14 @@ debug_scanblock(byte *b, uintptr n) obj = (byte*)vp[i]; // Words outside the arena cannot be pointers. - if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used) + if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used) continue; // Round down to word boundary. obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1)); // Consult span table to find beginning. - s = runtime_MHeap_LookupMaybe(runtime_mheap, obj); + s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj); if(s == nil) continue; @@ -1207,15 +1142,13 @@ debug_scanblock(byte *b, uintptr n) if(s->sizeclass == 0) { obj = p; } else { - if((byte*)obj >= (byte*)s->limit) - continue; int32 i = ((byte*)obj - p)/size; obj = p+i*size; } // Now that we know the object header, reload bits. - off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start; - bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start; + bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; xbits = *bitp; bits = xbits >> shift; @@ -1230,7 +1163,7 @@ debug_scanblock(byte *b, uintptr n) runtime_printf("found unmarked block %p in %p\n", obj, vp+i); // If object has no pointers, don't need to scan further. - if((bits & bitNoPointers) != 0) + if((bits & bitNoScan) != 0) continue; debug_scanblock(obj, size); @@ -1320,7 +1253,7 @@ getempty(Workbuf *b) runtime_lock(&work); if(work.nchunk < sizeof *b) { work.nchunk = 1<<20; - work.chunk = runtime_SysAlloc(work.nchunk); + work.chunk = runtime_SysAlloc(work.nchunk, &mstats.gc_sys); if(work.chunk == nil) runtime_throw("runtime: cannot allocate memory"); } @@ -1416,12 +1349,12 @@ addroot(Obj obj) cap = PageSize/sizeof(Obj); if(cap < 2*work.rootcap) cap = 2*work.rootcap; - new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj)); + new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj), &mstats.gc_sys); if(new == nil) runtime_throw("runtime: cannot allocate memory"); if(work.roots != nil) { runtime_memmove(new, work.roots, work.rootcap*sizeof(Obj)); - runtime_SysFree(work.roots, work.rootcap*sizeof(Obj)); + runtime_SysFree(work.roots, work.rootcap*sizeof(Obj), &mstats.gc_sys); } work.roots = new; work.rootcap = cap; @@ -1560,8 +1493,8 @@ addroots(void) runtime_time_scan(addroot); // MSpan.types - allspans = runtime_mheap->allspans; - for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) { + allspans = runtime_mheap.allspans; + for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) { s = allspans[spanidx]; if(s->state == MSpanInUse) { // The garbage collector ignores type pointers stored in MSpan.types: @@ -1589,10 +1522,7 @@ addroots(void) case Gdead: break; case Grunning: - if(gp != runtime_g()) - runtime_throw("mark - world not stopped"); - addstackroots(gp); - break; + runtime_throw("mark - world not stopped"); case Grunnable: case Gsyscall: case Gwaiting: @@ -1614,10 +1544,11 @@ handlespecial(byte *p, uintptr size) { FuncVal *fn; const struct __go_func_type *ft; + const struct __go_ptr_type *ot; FinBlock *block; Finalizer *f; - if(!runtime_getfinalizer(p, true, &fn, &ft)) { + if(!runtime_getfinalizer(p, true, &fn, &ft, &ot)) { runtime_setblockspecial(p, false); runtime_MProf_Free(p, size); return false; @@ -1626,9 +1557,7 @@ handlespecial(byte *p, uintptr size) runtime_lock(&finlock); if(finq == nil || finq->cnt == finq->cap) { if(finc == nil) { - finc = runtime_SysAlloc(PageSize); - if(finc == nil) - runtime_throw("runtime: cannot allocate memory"); + finc = runtime_persistentalloc(PageSize, 0, &mstats.gc_sys); finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1; finc->alllink = allfin; allfin = finc; @@ -1642,6 +1571,7 @@ handlespecial(byte *p, uintptr size) finq->cnt++; f->fn = fn; f->ft = ft; + f->ot = ot; f->arg = p; runtime_unlock(&finlock); return true; @@ -1668,10 +1598,10 @@ sweepspan(ParFor *desc, uint32 idx) m = runtime_m(); USED(&desc); - s = runtime_mheap->allspans[idx]; + s = runtime_mheap.allspans[idx]; if(s->state != MSpanInUse) return; - arena_start = runtime_mheap->arena_start; + arena_start = runtime_mheap.arena_start; p = (byte*)(s->start << PageShift); cl = s->sizeclass; size = s->elemsize; @@ -1735,9 +1665,9 @@ sweepspan(ParFor *desc, uint32 idx) // Free large span. runtime_unmarkspan(p, 1<<PageShift); *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing - runtime_MHeap_Free(runtime_mheap, s, 1); - c->local_alloc -= size; - c->local_nfree++; + runtime_MHeap_Free(&runtime_mheap, s, 1); + c->local_nlargefree++; + c->local_largefree += size; } else { // Free small object. switch(compression) { @@ -1758,12 +1688,9 @@ sweepspan(ParFor *desc, uint32 idx) } if(nfree) { - c->local_by_size[cl].nfree += nfree; - c->local_alloc -= size * nfree; - c->local_nfree += nfree; + c->local_nsmallfree[cl] += nfree; c->local_cachealloc -= nfree * size; - c->local_objects -= nfree; - runtime_MCentral_FreeSpan(&runtime_mheap->central[cl], s, nfree, head.next, end); + runtime_MCentral_FreeSpan(&runtime_mheap.central[cl], s, nfree, head.next, end); } } @@ -1777,10 +1704,10 @@ dumpspan(uint32 idx) MSpan *s; bool allocated, special; - s = runtime_mheap->allspans[idx]; + s = runtime_mheap.allspans[idx]; if(s->state != MSpanInUse) return; - arena_start = runtime_mheap->arena_start; + arena_start = runtime_mheap.arena_start; p = (byte*)(s->start << PageShift); sizeclass = s->sizeclass; size = s->elemsize; @@ -1838,7 +1765,7 @@ runtime_memorydump(void) { uint32 spanidx; - for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) { + for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) { dumpspan(spanidx); } } @@ -1880,13 +1807,28 @@ runtime_gchelper(void) static int32 gcpercent = GcpercentUnknown; static void -cachestats(GCStats *stats) +cachestats(void) +{ + MCache *c; + P *p, **pp; + + for(pp=runtime_allp; (p=*pp) != nil; pp++) { + c = p->mcache; + if(c==nil) + continue; + runtime_purgecachedstats(c); + } +} + +static void +updatememstats(GCStats *stats) { M *mp; + MSpan *s; MCache *c; P *p, **pp; uint32 i; - uint64 stacks_inuse; + uint64 stacks_inuse, smallfree; uint64 *src, *dst; if(stats) @@ -1902,29 +1844,80 @@ cachestats(GCStats *stats) runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); } } + mstats.stacks_inuse = stacks_inuse; + mstats.mcache_inuse = runtime_mheap.cachealloc.inuse; + mstats.mspan_inuse = runtime_mheap.spanalloc.inuse; + mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys + + mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys; + + // Calculate memory allocator stats. + // During program execution we only count number of frees and amount of freed memory. + // Current number of alive object in the heap and amount of alive heap memory + // are calculated by scanning all spans. + // Total number of mallocs is calculated as number of frees plus number of alive objects. + // Similarly, total amount of allocated memory is calculated as amount of freed memory + // plus amount of alive heap memory. + mstats.alloc = 0; + mstats.total_alloc = 0; + mstats.nmalloc = 0; + mstats.nfree = 0; + for(i = 0; i < nelem(mstats.by_size); i++) { + mstats.by_size[i].nmalloc = 0; + mstats.by_size[i].nfree = 0; + } + + // Flush MCache's to MCentral. for(pp=runtime_allp; (p=*pp) != nil; pp++) { c = p->mcache; if(c==nil) continue; - runtime_purgecachedstats(c); - for(i=0; i<nelem(c->local_by_size); i++) { - mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc; - c->local_by_size[i].nmalloc = 0; - mstats.by_size[i].nfree += c->local_by_size[i].nfree; - c->local_by_size[i].nfree = 0; + runtime_MCache_ReleaseAll(c); + } + + // Aggregate local stats. + cachestats(); + + // Scan all spans and count number of alive objects. + for(i = 0; i < runtime_mheap.nspan; i++) { + s = runtime_mheap.allspans[i]; + if(s->state != MSpanInUse) + continue; + if(s->sizeclass == 0) { + mstats.nmalloc++; + mstats.alloc += s->elemsize; + } else { + mstats.nmalloc += s->ref; + mstats.by_size[s->sizeclass].nmalloc += s->ref; + mstats.alloc += s->ref*s->elemsize; } } - mstats.stacks_inuse = stacks_inuse; + + // Aggregate by size class. + smallfree = 0; + mstats.nfree = runtime_mheap.nlargefree; + for(i = 0; i < nelem(mstats.by_size); i++) { + mstats.nfree += runtime_mheap.nsmallfree[i]; + mstats.by_size[i].nfree = runtime_mheap.nsmallfree[i]; + mstats.by_size[i].nmalloc += runtime_mheap.nsmallfree[i]; + smallfree += runtime_mheap.nsmallfree[i] * runtime_class_to_size[i]; + } + mstats.nmalloc += mstats.nfree; + + // Calculate derived stats. + mstats.total_alloc = mstats.alloc + runtime_mheap.largefree + smallfree; + mstats.heap_alloc = mstats.alloc; + mstats.heap_objects = mstats.nmalloc - mstats.nfree; } // Structure of arguments passed to function gc(). -// This allows the arguments to be passed via reflect_call. +// This allows the arguments to be passed via runtime_mcall. struct gc_args { - int32 force; + int64 start_time; // start time of GC in ns (just before stoptheworld) }; static void gc(struct gc_args *args); +static void mgc(G *gp); static int32 readgogc(void) @@ -1943,8 +1936,9 @@ void runtime_gc(int32 force) { M *m; - const byte *p; - struct gc_args a, *ap; + G *g; + struct gc_args a; + int32 i; // The atomic operations are not atomic if the uint64s // are not aligned on uint64 boundaries. This has been @@ -1967,30 +1961,77 @@ runtime_gc(int32 force) // while holding a lock. The next mallocgc // without a lock will do the gc instead. m = runtime_m(); - if(!mstats.enablegc || m->locks > 0 || runtime_panicking) + if(!mstats.enablegc || runtime_g() == m->g0 || m->locks > 0 || runtime_panicking) return; if(gcpercent == GcpercentUnknown) { // first time through - gcpercent = readgogc(); - - p = runtime_getenv("GOGCTRACE"); - if(p != nil) - gctrace = runtime_atoi(p); + runtime_lock(&runtime_mheap); + if(gcpercent == GcpercentUnknown) + gcpercent = readgogc(); + runtime_unlock(&runtime_mheap); } if(gcpercent < 0) return; - // Run gc on a bigger stack to eliminate - // a potentially large number of calls to runtime_morestack. - // But not when using gccgo. - a.force = force; - ap = &a; - gc(ap); + runtime_semacquire(&runtime_worldsema, false); + if(!force && mstats.heap_alloc < mstats.next_gc) { + // typically threads which lost the race to grab + // worldsema exit here when gc is done. + runtime_semrelease(&runtime_worldsema); + return; + } - if(gctrace > 1 && !force) { - a.force = 1; - gc(&a); + // Ok, we're doing it! Stop everybody else + a.start_time = runtime_nanotime(); + m->gcing = 1; + runtime_stoptheworld(); + + // Run gc on the g0 stack. We do this so that the g stack + // we're currently running on will no longer change. Cuts + // the root set down a bit (g0 stacks are not scanned, and + // we don't need to scan gc's internal state). Also an + // enabler for copyable stacks. + for(i = 0; i < (runtime_debug.gctrace > 1 ? 2 : 1); i++) { + // switch to g0, call gc(&a), then switch back + g = runtime_g(); + g->param = &a; + g->status = Gwaiting; + g->waitreason = "garbage collection"; + runtime_mcall(mgc); + // record a new start time in case we're going around again + a.start_time = runtime_nanotime(); } + + // all done + m->gcing = 0; + m->locks++; + runtime_semrelease(&runtime_worldsema); + runtime_starttheworld(); + m->locks--; + + // now that gc is done, kick off finalizer thread if needed + if(finq != nil) { + runtime_lock(&finlock); + // kick off or wake up goroutine to run queued finalizers + if(fing == nil) + fing = __go_go(runfinq, nil); + else if(fingwait) { + fingwait = 0; + runtime_ready(fing); + } + runtime_unlock(&finlock); + } + // give the queued finalizers, if any, a chance to run + runtime_gosched(); +} + +static void +mgc(G *gp) +{ + gc(gp->param); + gp->param = nil; + gp->status = Grunning; + runtime_gogo(gp); } static void @@ -2004,29 +2045,20 @@ gc(struct gc_args *args) uint32 i; // Eface eface; - runtime_semacquire(&runtime_worldsema); - if(!args->force && mstats.heap_alloc < mstats.next_gc) { - runtime_semrelease(&runtime_worldsema); - return; - } - m = runtime_m(); - t0 = runtime_nanotime(); - - m->gcing = 1; - runtime_stoptheworld(); + t0 = args->start_time; if(CollectStats) runtime_memclr((byte*)&gcstats, sizeof(gcstats)); for(mp=runtime_allm; mp; mp=mp->alllink) - runtime_settype_flush(mp, false); + runtime_settype_flush(mp); heap0 = 0; obj0 = 0; - if(gctrace) { - cachestats(nil); + if(runtime_debug.gctrace) { + updatememstats(nil); heap0 = mstats.heap_alloc; obj0 = mstats.nmalloc - mstats.nfree; } @@ -2050,7 +2082,7 @@ gc(struct gc_args *args) work.nproc = runtime_gcprocs(); addroots(); runtime_parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot); - runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap->nspan, nil, true, sweepspan); + runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap.nspan, nil, true, sweepspan); if(work.nproc > 1) { runtime_noteclear(&work.alldone); runtime_helpgc(work.nproc); @@ -2076,29 +2108,8 @@ gc(struct gc_args *args) if(work.nproc > 1) runtime_notesleep(&work.alldone); - cachestats(&stats); - - stats.nprocyield += work.sweepfor->nprocyield; - stats.nosyield += work.sweepfor->nosyield; - stats.nsleep += work.sweepfor->nsleep; - - mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100; - m->gcing = 0; - - if(finq != nil) { - m->locks++; // disable gc during the mallocs in newproc - // kick off or wake up goroutine to run queued finalizers - if(fing == nil) - fing = __go_go(runfinq, nil); - else if(fingwait) { - fingwait = 0; - runtime_ready(fing); - } - m->locks--; - } - - heap1 = mstats.heap_alloc; - obj1 = mstats.nmalloc - mstats.nfree; + cachestats(); + mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; t4 = runtime_nanotime(); mstats.last_gc = t4; @@ -2108,7 +2119,15 @@ gc(struct gc_args *args) if(mstats.debuggc) runtime_printf("pause %D\n", t4-t0); - if(gctrace) { + if(runtime_debug.gctrace) { + updatememstats(&stats); + heap1 = mstats.heap_alloc; + obj1 = mstats.nmalloc - mstats.nfree; + + stats.nprocyield += work.sweepfor->nprocyield; + stats.nosyield += work.sweepfor->nosyield; + stats.nsleep += work.sweepfor->nsleep; + runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects," " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n", mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000, @@ -2137,16 +2156,13 @@ gc(struct gc_args *args) runtime_printf("\ttotal:\t%D\n", ninstr); runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull); + + runtime_printf("markonly base lookup: bit %D word %D span %D\n", gcstats.markonly.foundbit, gcstats.markonly.foundword, gcstats.markonly.foundspan); + runtime_printf("flushptrbuf base lookup: bit %D word %D span %D\n", gcstats.flushptrbuf.foundbit, gcstats.flushptrbuf.foundword, gcstats.flushptrbuf.foundspan); } } runtime_MProf_GC(); - runtime_semrelease(&runtime_worldsema); - runtime_starttheworld(); - - // give the queued finalizers, if any, a chance to run - if(finq != nil) - runtime_gosched(); } void runtime_ReadMemStats(MStats *) @@ -2161,15 +2177,17 @@ runtime_ReadMemStats(MStats *stats) // because stoptheworld can only be used by // one goroutine at a time, and there might be // a pending garbage collection already calling it. - runtime_semacquire(&runtime_worldsema); + runtime_semacquire(&runtime_worldsema, false); m = runtime_m(); m->gcing = 1; runtime_stoptheworld(); - cachestats(nil); + updatememstats(nil); *stats = mstats; m->gcing = 0; + m->locks++; runtime_semrelease(&runtime_worldsema); runtime_starttheworld(); + m->locks--; } void runtime_debug_readGCStats(Slice*) @@ -2187,7 +2205,7 @@ runtime_debug_readGCStats(Slice *pauses) // Pass back: pauses, last gc (absolute time), number of gc, total pause ns. p = (uint64*)pauses->array; - runtime_lock(runtime_mheap); + runtime_lock(&runtime_mheap); n = mstats.numgc; if(n > nelem(mstats.pause_ns)) n = nelem(mstats.pause_ns); @@ -2202,7 +2220,7 @@ runtime_debug_readGCStats(Slice *pauses) p[n] = mstats.last_gc; p[n+1] = mstats.numgc; p[n+2] = mstats.pause_total_ns; - runtime_unlock(runtime_mheap); + runtime_unlock(&runtime_mheap); pauses->__count = n+3; } @@ -2214,14 +2232,14 @@ runtime_debug_setGCPercent(intgo in) { intgo out; - runtime_lock(runtime_mheap); + runtime_lock(&runtime_mheap); if(gcpercent == GcpercentUnknown) gcpercent = readgogc(); out = gcpercent; if(in < 0) in = -1; gcpercent = in; - runtime_unlock(runtime_mheap); + runtime_unlock(&runtime_mheap); return out; } @@ -2235,6 +2253,8 @@ gchelperstart(void) runtime_throw("gchelperstart: bad m->helpgc"); if(runtime_xchg(&bufferList[m->helpgc].busy, 1)) runtime_throw("gchelperstart: already busy"); + if(runtime_g() != m->g0) + runtime_throw("gchelper not running on g0 stack"); } static void @@ -2243,33 +2263,51 @@ runfinq(void* dummy __attribute__ ((unused))) Finalizer *f; FinBlock *fb, *next; uint32 i; + Eface ef; + Iface iface; for(;;) { - // There's no need for a lock in this section - // because it only conflicts with the garbage - // collector, and the garbage collector only - // runs when everyone else is stopped, and - // runfinq only stops at the gosched() or - // during the calls in the for loop. + runtime_lock(&finlock); fb = finq; finq = nil; if(fb == nil) { fingwait = 1; - runtime_park(nil, nil, "finalizer wait"); + runtime_park(runtime_unlock, &finlock, "finalizer wait"); continue; } + runtime_unlock(&finlock); if(raceenabled) runtime_racefingo(); for(; fb; fb=next) { next = fb->next; for(i=0; i<(uint32)fb->cnt; i++) { + const Type *fint; void *param; f = &fb->fin[i]; - param = &f->arg; + fint = ((const Type**)f->ft->__in.array)[0]; + if(fint->kind == KindPtr) { + // direct use of pointer + param = &f->arg; + } else if(((const InterfaceType*)fint)->__methods.__count == 0) { + // convert to empty interface + ef.type = (const Type*)f->ot; + ef.__object = f->arg; + param = &ef; + } else { + // convert to interface with methods + iface.__methods = __go_convert_interface_2((const Type*)fint, + (const Type*)f->ot, + 1); + iface.__object = f->arg; + if(iface.__methods == nil) + runtime_throw("invalid type conversion in runfinq"); + param = &iface; + } reflect_call(f->ft, f->fn, 0, 0, ¶m, nil); f->fn = nil; f->arg = nil; + f->ot = nil; } fb->cnt = 0; fb->next = finc; @@ -2280,28 +2318,28 @@ runfinq(void* dummy __attribute__ ((unused))) } // mark the block at v of size n as allocated. -// If noptr is true, mark it as having no pointers. +// If noscan is true, mark it as not needing scanning. void -runtime_markallocated(void *v, uintptr n, bool noptr) +runtime_markallocated(void *v, uintptr n, bool noscan) { uintptr *b, obits, bits, off, shift; if(0) runtime_printf("markallocated %p+%p\n", v, n); - if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) + if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) runtime_throw("markallocated: bad pointer"); - off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset - b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { obits = *b; bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift); - if(noptr) - bits |= bitNoPointers<<shift; - if(runtime_singleproc) { + if(noscan) + bits |= bitNoScan<<shift; + if(runtime_gomaxprocs == 1) { *b = bits; break; } else { @@ -2319,19 +2357,19 @@ runtime_markfreed(void *v, uintptr n) uintptr *b, obits, bits, off, shift; if(0) - runtime_printf("markallocated %p+%p\n", v, n); + runtime_printf("markfreed %p+%p\n", v, n); - if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) - runtime_throw("markallocated: bad pointer"); + if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) + runtime_throw("markfreed: bad pointer"); - off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset - b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { obits = *b; bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); - if(runtime_singleproc) { + if(runtime_gomaxprocs == 1) { *b = bits; break; } else { @@ -2351,11 +2389,11 @@ runtime_checkfreed(void *v, uintptr n) if(!runtime_checking) return; - if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) + if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) return; // not allocated, so okay - off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset - b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; bits = *b>>shift; @@ -2374,7 +2412,7 @@ runtime_markspan(void *v, uintptr size, uintptr n, bool leftover) uintptr *b, off, shift; byte *p; - if((byte*)v+size*n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) + if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) runtime_throw("markspan: bad pointer"); p = v; @@ -2385,8 +2423,8 @@ runtime_markspan(void *v, uintptr size, uintptr n, bool leftover) // the entire span, and each bitmap word has bits for only // one span, so no other goroutines are changing these // bitmap words. - off = (uintptr*)p - (uintptr*)runtime_mheap->arena_start; // word offset - b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); } @@ -2398,14 +2436,14 @@ runtime_unmarkspan(void *v, uintptr n) { uintptr *p, *b, off; - if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start) + if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start) runtime_throw("markspan: bad pointer"); p = v; - off = p - (uintptr*)runtime_mheap->arena_start; // word offset + off = p - (uintptr*)runtime_mheap.arena_start; // word offset if(off % wordsPerBitmapWord != 0) runtime_throw("markspan: unaligned pointer"); - b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; n /= PtrSize; if(n%wordsPerBitmapWord != 0) runtime_throw("unmarkspan: unaligned length"); @@ -2426,8 +2464,8 @@ runtime_blockspecial(void *v) if(DebugMark) return true; - off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; - b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; return (*b & (bitSpecial<<shift)) != 0; @@ -2441,8 +2479,8 @@ runtime_setblockspecial(void *v, bool s) if(DebugMark) return; - off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; - b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1; + off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; + b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1; shift = off % wordsPerBitmapWord; for(;;) { @@ -2451,7 +2489,7 @@ runtime_setblockspecial(void *v, bool s) bits = obits | (bitSpecial<<shift); else bits = obits & ~(bitSpecial<<shift); - if(runtime_singleproc) { + if(runtime_gomaxprocs == 1) { *b = bits; break; } else { @@ -2476,13 +2514,13 @@ runtime_MHeap_MapBits(MHeap *h) uintptr n; n = (h->arena_used - h->arena_start) / wordsPerBitmapWord; - n = (n+bitmapChunk-1) & ~(bitmapChunk-1); + n = ROUND(n, bitmapChunk); if(h->bitmap_mapped >= n) return; page_size = getpagesize(); n = (n+page_size-1) & ~(page_size-1); - runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped); + runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped, &mstats.gc_sys); h->bitmap_mapped = n; } diff --git a/libgo/runtime/mgc0.h b/libgo/runtime/mgc0.h index d14fb37c209..f8abe6c9c1c 100644 --- a/libgo/runtime/mgc0.h +++ b/libgo/runtime/mgc0.h @@ -26,7 +26,6 @@ enum { GC_ARRAY_START, // Start an array with a fixed length. Args: (off, len, elemsize) GC_ARRAY_NEXT, // The next element of an array. Args: none GC_CALL, // Call a subroutine. Args: (off, objgcrel) - GC_MAP_PTR, // Go map. Args: (off, MapType*) GC_CHAN_PTR, // Go channel. Args: (off, ChanType*) GC_STRING, // Go string. Args: (off) GC_EFACE, // interface{}. Args: (off) diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c index b4d94b68559..1b6cfd3dcde 100644 --- a/libgo/runtime/mheap.c +++ b/libgo/runtime/mheap.c @@ -36,12 +36,12 @@ RecordSpan(void *vh, byte *p) cap = 64*1024/sizeof(all[0]); if(cap < h->nspancap*3/2) cap = h->nspancap*3/2; - all = (MSpan**)runtime_SysAlloc(cap*sizeof(all[0])); + all = (MSpan**)runtime_SysAlloc(cap*sizeof(all[0]), &mstats.other_sys); if(all == nil) runtime_throw("runtime: cannot allocate memory"); if(h->allspans) { runtime_memmove(all, h->allspans, h->nspancap*sizeof(all[0])); - runtime_SysFree(h->allspans, h->nspancap*sizeof(all[0])); + runtime_SysFree(h->allspans, h->nspancap*sizeof(all[0]), &mstats.other_sys); } h->allspans = all; h->nspancap = cap; @@ -51,12 +51,12 @@ RecordSpan(void *vh, byte *p) // Initialize the heap; fetch memory using alloc. void -runtime_MHeap_Init(MHeap *h, void *(*alloc)(uintptr)) +runtime_MHeap_Init(MHeap *h) { uint32 i; - runtime_FixAlloc_Init(&h->spanalloc, sizeof(MSpan), alloc, RecordSpan, h); - runtime_FixAlloc_Init(&h->cachealloc, sizeof(MCache), alloc, nil, nil); + runtime_FixAlloc_Init(&h->spanalloc, sizeof(MSpan), RecordSpan, h, &mstats.mspan_sys); + runtime_FixAlloc_Init(&h->cachealloc, sizeof(MCache), nil, nil, &mstats.mcache_sys); // h->mapcache needs no init for(i=0; i<nelem(h->free); i++) runtime_MSpanList_Init(&h->free[i]); @@ -65,6 +65,23 @@ runtime_MHeap_Init(MHeap *h, void *(*alloc)(uintptr)) runtime_MCentral_Init(&h->central[i], i); } +void +runtime_MHeap_MapSpans(MHeap *h) +{ + uintptr n; + + // Map spans array, PageSize at a time. + n = (uintptr)h->arena_used; + if(sizeof(void*) == 8) + n -= (uintptr)h->arena_start; + n = n / PageSize * sizeof(h->spans[0]); + n = ROUND(n, PageSize); + if(h->spans_mapped >= n) + return; + runtime_SysMap((byte*)h->spans + h->spans_mapped, n - h->spans_mapped, &mstats.other_sys); + h->spans_mapped = n; +} + // Allocate a new span of npage pages from the heap // and record its size class in the HeapMap and HeapMapCache. MSpan* @@ -73,7 +90,8 @@ runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 MSpan *s; runtime_lock(h); - runtime_purgecachedstats(runtime_m()->mcache); + mstats.heap_alloc += runtime_m()->mcache->local_cachealloc; + runtime_m()->mcache->local_cachealloc = 0; s = MHeap_AllocLocked(h, npage, sizeclass); if(s != nil) { mstats.heap_inuse += npage<<PageShift; @@ -138,6 +156,7 @@ HaveSpan: // is just a unique constant not seen elsewhere in the // runtime, as a clue in case it turns up unexpectedly in // memory or in a stack trace. + runtime_SysUsed((void*)(s->start<<PageShift), s->npages<<PageShift); *(uintptr*)(s->start<<PageShift) = (uintptr)0xbeadbeadbeadbeadULL; } s->npreleased = 0; @@ -145,17 +164,15 @@ HaveSpan: if(s->npages > npage) { // Trim extra and put it back in the heap. t = runtime_FixAlloc_Alloc(&h->spanalloc); - mstats.mspan_inuse = h->spanalloc.inuse; - mstats.mspan_sys = h->spanalloc.sys; runtime_MSpan_Init(t, s->start + npage, s->npages - npage); s->npages = npage; p = t->start; if(sizeof(void*) == 8) p -= ((uintptr)h->arena_start>>PageShift); if(p > 0) - h->map[p-1] = s; - h->map[p] = t; - h->map[p+t->npages-1] = t; + h->spans[p-1] = s; + h->spans[p] = t; + h->spans[p+t->npages-1] = t; *(uintptr*)(t->start<<PageShift) = *(uintptr*)(s->start<<PageShift); // copy "needs zeroing" mark t->state = MSpanInUse; MHeap_FreeLocked(h, t); @@ -172,7 +189,7 @@ HaveSpan: if(sizeof(void*) == 8) p -= ((uintptr)h->arena_start>>PageShift); for(n=0; n<npage; n++) - h->map[p+n] = s; + h->spans[p+n] = s; return s; } @@ -232,19 +249,16 @@ MHeap_Grow(MHeap *h, uintptr npage) return false; } } - mstats.heap_sys += ask; // Create a fake "in use" span and free it, so that the // right coalescing happens. s = runtime_FixAlloc_Alloc(&h->spanalloc); - mstats.mspan_inuse = h->spanalloc.inuse; - mstats.mspan_sys = h->spanalloc.sys; runtime_MSpan_Init(s, (uintptr)v>>PageShift, ask>>PageShift); p = s->start; if(sizeof(void*) == 8) p -= ((uintptr)h->arena_start>>PageShift); - h->map[p] = s; - h->map[p + s->npages - 1] = s; + h->spans[p] = s; + h->spans[p + s->npages - 1] = s; s->state = MSpanInUse; MHeap_FreeLocked(h, s); return true; @@ -261,7 +275,7 @@ runtime_MHeap_Lookup(MHeap *h, void *v) p = (uintptr)v; if(sizeof(void*) == 8) p -= (uintptr)h->arena_start; - return h->map[p >> PageShift]; + return h->spans[p >> PageShift]; } // Look up the span at the given address. @@ -283,10 +297,8 @@ runtime_MHeap_LookupMaybe(MHeap *h, void *v) q = p; if(sizeof(void*) == 8) q -= (uintptr)h->arena_start >> PageShift; - s = h->map[q]; - if(s == nil || p < s->start || p - s->start >= s->npages) - return nil; - if(s->state != MSpanInUse) + s = h->spans[q]; + if(s == nil || p < s->start || (byte*)v >= s->limit || s->state != MSpanInUse) return nil; return s; } @@ -296,7 +308,8 @@ void runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct) { runtime_lock(h); - runtime_purgecachedstats(runtime_m()->mcache); + mstats.heap_alloc += runtime_m()->mcache->local_cachealloc; + runtime_m()->mcache->local_cachealloc = 0; mstats.heap_inuse -= s->npages<<PageShift; if(acct) { mstats.heap_alloc -= s->npages<<PageShift; @@ -313,8 +326,6 @@ MHeap_FreeLocked(MHeap *h, MSpan *s) MSpan *t; PageID p; - if(s->types.sysalloc) - runtime_settype_sysfree(s); s->types.compression = MTypes_Empty; if(s->state != MSpanInUse || s->ref != 0) { @@ -334,31 +345,31 @@ MHeap_FreeLocked(MHeap *h, MSpan *s) p = s->start; if(sizeof(void*) == 8) p -= (uintptr)h->arena_start >> PageShift; - if(p > 0 && (t = h->map[p-1]) != nil && t->state != MSpanInUse) { - tp = (uintptr*)(t->start<<PageShift); - *tp |= *sp; // propagate "needs zeroing" mark + if(p > 0 && (t = h->spans[p-1]) != nil && t->state != MSpanInUse) { + if(t->npreleased == 0) { // cant't touch this otherwise + tp = (uintptr*)(t->start<<PageShift); + *tp |= *sp; // propagate "needs zeroing" mark + } s->start = t->start; s->npages += t->npages; s->npreleased = t->npreleased; // absorb released pages p -= t->npages; - h->map[p] = s; + h->spans[p] = s; runtime_MSpanList_Remove(t); t->state = MSpanDead; runtime_FixAlloc_Free(&h->spanalloc, t); - mstats.mspan_inuse = h->spanalloc.inuse; - mstats.mspan_sys = h->spanalloc.sys; } - if(p+s->npages < nelem(h->map) && (t = h->map[p+s->npages]) != nil && t->state != MSpanInUse) { - tp = (uintptr*)(t->start<<PageShift); - *sp |= *tp; // propagate "needs zeroing" mark + if((p+s->npages)*sizeof(h->spans[0]) < h->spans_mapped && (t = h->spans[p+s->npages]) != nil && t->state != MSpanInUse) { + if(t->npreleased == 0) { // cant't touch this otherwise + tp = (uintptr*)(t->start<<PageShift); + *sp |= *tp; // propagate "needs zeroing" mark + } s->npages += t->npages; s->npreleased += t->npreleased; - h->map[p + s->npages - 1] = s; + h->spans[p + s->npages - 1] = s; runtime_MSpanList_Remove(t); t->state = MSpanDead; runtime_FixAlloc_Free(&h->spanalloc, t); - mstats.mspan_inuse = h->spanalloc.inuse; - mstats.mspan_sys = h->spanalloc.sys; } // Insert s into appropriate list. @@ -388,7 +399,7 @@ scavengelist(MSpan *list, uint64 now, uint64 limit) sumreleased = 0; for(s=list->next; s != list; s=s->next) { - if((now - s->unusedsince) > limit) { + if((now - s->unusedsince) > limit && s->npreleased != s->npages) { released = (s->npages - s->npreleased) << PageShift; mstats.heap_released += released; sumreleased += released; @@ -399,19 +410,26 @@ scavengelist(MSpan *list, uint64 now, uint64 limit) return sumreleased; } -static uintptr -scavenge(uint64 now, uint64 limit) +static void +scavenge(int32 k, uint64 now, uint64 limit) { uint32 i; uintptr sumreleased; MHeap *h; - h = runtime_mheap; + h = &runtime_mheap; sumreleased = 0; for(i=0; i < nelem(h->free); i++) sumreleased += scavengelist(&h->free[i], now, limit); sumreleased += scavengelist(&h->large, now, limit); - return sumreleased; + + if(runtime_debug.gctrace > 0) { + if(sumreleased > 0) + runtime_printf("scvg%d: %D MB released\n", k, (uint64)sumreleased>>20); + runtime_printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n", + k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20, + mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20); + } } // Release (part of) unused memory to OS. @@ -424,9 +442,6 @@ runtime_MHeap_Scavenger(void* dummy) MHeap *h; uint64 tick, now, forcegc, limit; uint32 k; - uintptr sumreleased; - const byte *env; - bool trace; Note note, *notep; USED(dummy); @@ -446,17 +461,10 @@ runtime_MHeap_Scavenger(void* dummy) else tick = limit/2; - trace = false; - env = runtime_getenv("GOGCTRACE"); - if(env != nil) - trace = runtime_atoi(env) > 0; - - h = runtime_mheap; + h = &runtime_mheap; for(k=0;; k++) { runtime_noteclear(¬e); - runtime_entersyscallblock(); - runtime_notetsleep(¬e, tick); - runtime_exitsyscall(); + runtime_notetsleepg(¬e, tick); runtime_lock(h); now = runtime_nanotime(); @@ -468,24 +476,14 @@ runtime_MHeap_Scavenger(void* dummy) runtime_noteclear(¬e); notep = ¬e; __go_go(forcegchelper, (void*)notep); - runtime_entersyscallblock(); - runtime_notesleep(¬e); - runtime_exitsyscall(); - if(trace) + runtime_notetsleepg(¬e, -1); + if(runtime_debug.gctrace > 0) runtime_printf("scvg%d: GC forced\n", k); runtime_lock(h); now = runtime_nanotime(); } - sumreleased = scavenge(now, limit); + scavenge(k, now, limit); runtime_unlock(h); - - if(trace) { - if(sumreleased > 0) - runtime_printf("scvg%d: %p MB released\n", k, sumreleased>>20); - runtime_printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n", - k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20, - mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20); - } } } @@ -495,9 +493,9 @@ void runtime_debug_freeOSMemory(void) { runtime_gc(1); - runtime_lock(runtime_mheap); - scavenge(~(uintptr)0, 0); - runtime_unlock(runtime_mheap); + runtime_lock(&runtime_mheap); + scavenge(-1, ~(uintptr)0, 0); + runtime_unlock(&runtime_mheap); } // Initialize a new span with the given start and npages. diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc index 73d937908c6..7507dfc9173 100644 --- a/libgo/runtime/mprof.goc +++ b/libgo/runtime/mprof.goc @@ -14,44 +14,11 @@ package runtime #include "go-string.h" // NOTE(rsc): Everything here could use cas if contention became an issue. -static Lock proflock, alloclock; +static Lock proflock; // All memory allocations are local and do not escape outside of the profiler. // The profiler is forbidden from referring to garbage-collected memory. -static byte *pool; // memory allocation pool -static uintptr poolfree; // number of bytes left in the pool -enum { - Chunk = 32*PageSize, // initial size of the pool -}; - -// Memory allocation local to this file. -// There is no way to return the allocated memory back to the OS. -static void* -allocate(uintptr size) -{ - void *v; - - if(size == 0) - return nil; - - if(size >= Chunk/2) - return runtime_SysAlloc(size); - - runtime_lock(&alloclock); - if(size > poolfree) { - pool = runtime_SysAlloc(Chunk); - if(pool == nil) - runtime_throw("runtime: cannot allocate memory"); - poolfree = Chunk; - } - v = pool; - pool += size; - poolfree -= size; - runtime_unlock(&alloclock); - return v; -} - enum { MProf, BProf }; // profile types // Per-call-stack profiling information. @@ -104,10 +71,9 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc) Bucket *b; if(buckhash == nil) { - buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]); + buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys); if(buckhash == nil) runtime_throw("runtime: cannot allocate memory"); - mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0]; } // Hash stack. @@ -137,9 +103,7 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc) if(!alloc) return nil; - b = allocate(sizeof *b + nstk*sizeof stk[0]); - if(b == nil) - runtime_throw("runtime: cannot allocate memory"); + b = runtime_persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys); bucketmem += sizeof *b + nstk*sizeof stk[0]; runtime_memmove(b->stk, stk, nstk*sizeof stk[0]); b->typ = typ; @@ -241,7 +205,7 @@ setaddrbucket(uintptr addr, Bucket *b) if(ah->addr == (addr>>AddrHashShift)) goto found; - ah = allocate(sizeof *ah); + ah = runtime_persistentalloc(sizeof *ah, 0, &mstats.buckhash_sys); addrmem += sizeof *ah; ah->next = addrhash[h]; ah->addr = addr>>AddrHashShift; @@ -249,7 +213,7 @@ setaddrbucket(uintptr addr, Bucket *b) found: if((e = addrfree) == nil) { - e = allocate(64*sizeof *e); + e = runtime_persistentalloc(64*sizeof *e, 0, &mstats.buckhash_sys); addrmem += 64*sizeof *e; for(i=0; i+1<64; i++) e[i].next = &e[i+1]; @@ -296,16 +260,10 @@ found: void runtime_MProf_Malloc(void *p, uintptr size) { - M *m; int32 nstk; Location stk[32]; Bucket *b; - m = runtime_m(); - if(m->nomemprof > 0) - return; - - m->nomemprof++; nstk = runtime_callers(1, stk, 32); runtime_lock(&proflock); b = stkbucket(MProf, stk, nstk, true); @@ -313,22 +271,14 @@ runtime_MProf_Malloc(void *p, uintptr size) b->recent_alloc_bytes += size; setaddrbucket((uintptr)p, b); runtime_unlock(&proflock); - m = runtime_m(); - m->nomemprof--; } // Called when freeing a profiled block. void runtime_MProf_Free(void *p, uintptr size) { - M *m; Bucket *b; - m = runtime_m(); - if(m->nomemprof > 0) - return; - - m->nomemprof++; runtime_lock(&proflock); b = getaddrbucket((uintptr)p); if(b != nil) { @@ -336,8 +286,6 @@ runtime_MProf_Free(void *p, uintptr size) b->recent_free_bytes += size; } runtime_unlock(&proflock); - m = runtime_m(); - m->nomemprof--; } int64 runtime_blockprofilerate; // in CPU ticks @@ -347,7 +295,17 @@ void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockP void runtime_SetBlockProfileRate(intgo rate) { - runtime_atomicstore64((uint64*)&runtime_blockprofilerate, rate * runtime_tickspersecond() / (1000*1000*1000)); + int64 r; + + if(rate <= 0) + r = 0; // disable profiling + else { + // convert ns to cycles, use float64 to prevent overflow during multiplication + r = (float64)rate*runtime_tickspersecond()/(1000*1000*1000); + if(r == 0) + r = 1; + } + runtime_atomicstore64((uint64*)&runtime_blockprofilerate, r); } void @@ -510,10 +468,10 @@ func Stack(b Slice, all bool) (n int) { bool enablegc; sp = runtime_getcallersp(&b); - pc = runtime_getcallerpc(&b); + pc = (byte*)(uintptr)runtime_getcallerpc(&b); if(all) { - runtime_semacquire(&runtime_worldsema); + runtime_semacquire(&runtime_worldsema, false); runtime_m()->gcing = 1; runtime_stoptheworld(); enablegc = mstats.enablegc; @@ -530,7 +488,7 @@ func Stack(b Slice, all bool) (n int) { USED(sp); runtime_goroutineheader(g); runtime_traceback(); - runtime_goroutinetrailer(g); + runtime_printcreatedby(g); if(all) runtime_tracebackothers(g); n = b.__count - g->writenbuf; @@ -572,7 +530,7 @@ func GoroutineProfile(b Slice) (n int, ok bool) { ok = false; n = runtime_gcount(); if(n <= b.__count) { - runtime_semacquire(&runtime_worldsema); + runtime_semacquire(&runtime_worldsema, false); runtime_m()->gcing = 1; runtime_stoptheworld(); @@ -598,5 +556,5 @@ func GoroutineProfile(b Slice) (n int, ok bool) { void runtime_mprofinit(void) { - addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash); + addrhash = runtime_persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0, &mstats.buckhash_sys); } diff --git a/libgo/runtime/msize.c b/libgo/runtime/msize.c index 3b5591c1b17..745a76958c8 100644 --- a/libgo/runtime/msize.c +++ b/libgo/runtime/msize.c @@ -31,7 +31,6 @@ int32 runtime_class_to_size[NumSizeClasses]; int32 runtime_class_to_allocnpages[NumSizeClasses]; -int32 runtime_class_to_transfercount[NumSizeClasses]; // The SizeToClass lookup is implemented using two arrays, // one mapping sizes <= 1024 to their class and one mapping @@ -42,17 +41,17 @@ int32 runtime_class_to_transfercount[NumSizeClasses]; // size divided by 128 (rounded up). The arrays are filled in // by InitSizes. -static int32 size_to_class8[1024/8 + 1]; -static int32 size_to_class128[(MaxSmallSize-1024)/128 + 1]; +int8 runtime_size_to_class8[1024/8 + 1]; +int8 runtime_size_to_class128[(MaxSmallSize-1024)/128 + 1]; -int32 -runtime_SizeToClass(int32 size) +static int32 +SizeToClass(int32 size) { if(size > MaxSmallSize) runtime_throw("SizeToClass - invalid size"); if(size > 1024-8) - return size_to_class128[(size-1024+127) >> 7]; - return size_to_class8[(size+7)>>3]; + return runtime_size_to_class128[(size-1024+127) >> 7]; + return runtime_size_to_class8[(size+7)>>3]; } void @@ -111,16 +110,16 @@ runtime_InitSizes(void) nextsize = 0; for (sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) { for(; nextsize < 1024 && nextsize <= runtime_class_to_size[sizeclass]; nextsize+=8) - size_to_class8[nextsize/8] = sizeclass; + runtime_size_to_class8[nextsize/8] = sizeclass; if(nextsize >= 1024) for(; nextsize <= runtime_class_to_size[sizeclass]; nextsize += 128) - size_to_class128[(nextsize-1024)/128] = sizeclass; + runtime_size_to_class128[(nextsize-1024)/128] = sizeclass; } // Double-check SizeToClass. if(0) { for(n=0; n < MaxSmallSize; n++) { - sizeclass = runtime_SizeToClass(n); + sizeclass = SizeToClass(n); if(sizeclass < 1 || sizeclass >= NumSizeClasses || runtime_class_to_size[sizeclass] < n) { runtime_printf("size=%d sizeclass=%d runtime_class_to_size=%d\n", n, sizeclass, runtime_class_to_size[sizeclass]); runtime_printf("incorrect SizeToClass"); @@ -137,16 +136,6 @@ runtime_InitSizes(void) // Copy out for statistics table. for(i=0; i<nelem(runtime_class_to_size); i++) mstats.by_size[i].size = runtime_class_to_size[i]; - - // Initialize the runtime_class_to_transfercount table. - for(sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) { - n = 64*1024 / runtime_class_to_size[sizeclass]; - if(n < 2) - n = 2; - if(n > 32) - n = 32; - runtime_class_to_transfercount[sizeclass] = n; - } return; dump: @@ -157,12 +146,14 @@ dump: runtime_printf(" %d", runtime_class_to_size[sizeclass]); runtime_printf("\n\n"); runtime_printf("size_to_class8:"); - for(i=0; i<nelem(size_to_class8); i++) - runtime_printf(" %d=>%d(%d)\n", i*8, size_to_class8[i], runtime_class_to_size[size_to_class8[i]]); + for(i=0; i<nelem(runtime_size_to_class8); i++) + runtime_printf(" %d=>%d(%d)\n", i*8, runtime_size_to_class8[i], + runtime_class_to_size[runtime_size_to_class8[i]]); runtime_printf("\n"); runtime_printf("size_to_class128:"); - for(i=0; i<nelem(size_to_class128); i++) - runtime_printf(" %d=>%d(%d)\n", i*128, size_to_class128[i], runtime_class_to_size[size_to_class128[i]]); + for(i=0; i<nelem(runtime_size_to_class128); i++) + runtime_printf(" %d=>%d(%d)\n", i*128, runtime_size_to_class128[i], + runtime_class_to_size[runtime_size_to_class128[i]]); runtime_printf("\n"); } runtime_throw("InitSizes failed"); diff --git a/libgo/runtime/netpoll.goc b/libgo/runtime/netpoll.goc index a0bd735f85c..02705734dd8 100644 --- a/libgo/runtime/netpoll.goc +++ b/libgo/runtime/netpoll.goc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin linux +// +build darwin dragonfly freebsd linux netbsd openbsd windows package net @@ -19,7 +19,7 @@ package net // Integrated network poller (platform-independent part). // A particular implementation (epoll/kqueue) must define the following functions: // void runtime_netpollinit(void); // to initialize the poller -// int32 runtime_netpollopen(int32 fd, PollDesc *pd); // to arm edge-triggered notifications +// int32 runtime_netpollopen(uintptr fd, PollDesc *pd); // to arm edge-triggered notifications // and associate fd with pd. // An implementation must call the following function to denote that the pd is ready. // void runtime_netpollready(G **gpp, PollDesc *pd, int32 mode); @@ -30,7 +30,7 @@ struct PollDesc { PollDesc* link; // in pollcache, protected by pollcache.Lock Lock; // protectes the following fields - int32 fd; + uintptr fd; bool closing; uintptr seq; // protects from stale timers and ready notifications G* rg; // G waiting for read or READY (binary semaphore) @@ -52,8 +52,8 @@ static struct // seq is incremented when deadlines are changed or descriptor is reused. } pollcache; -static void netpollblock(PollDesc*, int32); -static G* netpollunblock(PollDesc*, int32); +static bool netpollblock(PollDesc*, int32); +static G* netpollunblock(PollDesc*, int32, bool); static void deadline(int64, Eface); static void readDeadline(int64, Eface); static void writeDeadline(int64, Eface); @@ -68,7 +68,7 @@ func runtime_pollServerInit() { runtime_netpollinit(); } -func runtime_pollOpen(fd int) (pd *PollDesc, errno int) { +func runtime_pollOpen(fd uintptr) (pd *PollDesc, errno int) { pd = allocPollDesc(); runtime_lock(pd); if(pd->wg != nil && pd->wg != READY) @@ -117,18 +117,35 @@ ret: func runtime_pollWait(pd *PollDesc, mode int) (err int) { runtime_lock(pd); err = checkerr(pd, mode); - if(err) - goto ret; - netpollblock(pd, mode); - err = checkerr(pd, mode); -ret: + if(err == 0) { + while(!netpollblock(pd, mode)) { + err = checkerr(pd, mode); + if(err != 0) + break; + // Can happen if timeout has fired and unblocked us, + // but before we had a chance to run, timeout has been reset. + // Pretend it has not happened and retry. + } + } + runtime_unlock(pd); +} + +func runtime_pollWaitCanceled(pd *PollDesc, mode int) { + runtime_lock(pd); + // wait for ioready, ignore closing or timeouts. + while(!netpollblock(pd, mode)) + ; runtime_unlock(pd); } func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) { + G *rg, *wg; + runtime_lock(pd); - if(pd->closing) - goto ret; + if(pd->closing) { + runtime_unlock(pd); + return; + } pd->seq++; // invalidate current timers // Reset current timers. if(pd->rt.fv) { @@ -140,9 +157,8 @@ func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) { pd->wt.fv = nil; } // Setup new timers. - if(d != 0 && d <= runtime_nanotime()) { + if(d != 0 && d <= runtime_nanotime()) d = -1; - } if(mode == 'r' || mode == 'r'+'w') pd->rd = d; if(mode == 'w' || mode == 'r'+'w') @@ -172,8 +188,18 @@ func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) { runtime_addtimer(&pd->wt); } } -ret: + // If we set the new deadline in the past, unblock currently pending IO if any. + rg = nil; + wg = nil; + if(pd->rd < 0) + rg = netpollunblock(pd, 'r', false); + if(pd->wd < 0) + wg = netpollunblock(pd, 'w', false); runtime_unlock(pd); + if(rg) + runtime_ready(rg); + if(wg) + runtime_ready(wg); } func runtime_pollUnblock(pd *PollDesc) { @@ -184,8 +210,8 @@ func runtime_pollUnblock(pd *PollDesc) { runtime_throw("runtime_pollUnblock: already closing"); pd->closing = true; pd->seq++; - rg = netpollunblock(pd, 'r'); - wg = netpollunblock(pd, 'w'); + rg = netpollunblock(pd, 'r', false); + wg = netpollunblock(pd, 'w', false); if(pd->rt.fv) { runtime_deltimer(&pd->rt); pd->rt.fv = nil; @@ -201,6 +227,12 @@ func runtime_pollUnblock(pd *PollDesc) { runtime_ready(wg); } +uintptr +runtime_netpollfd(PollDesc *pd) +{ + return pd->fd; +} + // make pd ready, newly runnable goroutines (if any) are enqueued info gpp list void runtime_netpollready(G **gpp, PollDesc *pd, int32 mode) @@ -210,9 +242,9 @@ runtime_netpollready(G **gpp, PollDesc *pd, int32 mode) rg = wg = nil; runtime_lock(pd); if(mode == 'r' || mode == 'r'+'w') - rg = netpollunblock(pd, 'r'); + rg = netpollunblock(pd, 'r', true); if(mode == 'w' || mode == 'r'+'w') - wg = netpollunblock(pd, 'w'); + wg = netpollunblock(pd, 'w', true); runtime_unlock(pd); if(rg) { rg->schedlink = *gpp; @@ -234,7 +266,8 @@ checkerr(PollDesc *pd, int32 mode) return 0; } -static void +// returns true if IO is ready, or false if timedout or closed +static bool netpollblock(PollDesc *pd, int32 mode) { G **gpp; @@ -244,17 +277,20 @@ netpollblock(PollDesc *pd, int32 mode) gpp = &pd->wg; if(*gpp == READY) { *gpp = nil; - return; + return true; } if(*gpp != nil) - runtime_throw("epoll: double wait"); + runtime_throw("netpollblock: double wait"); *gpp = runtime_g(); runtime_park(runtime_unlock, &pd->Lock, "IO wait"); runtime_lock(pd); + if(runtime_g()->param) + return true; + return false; } static G* -netpollunblock(PollDesc *pd, int32 mode) +netpollunblock(PollDesc *pd, int32 mode, bool ioready) { G **gpp, *old; @@ -264,10 +300,15 @@ netpollunblock(PollDesc *pd, int32 mode) if(*gpp == READY) return nil; if(*gpp == nil) { - *gpp = READY; + // Only set READY for ioready. runtime_pollWait + // will check for timeout/cancel before waiting. + if(ioready) + *gpp = READY; return nil; } old = *gpp; + // pass unblock reason onto blocked g + old->param = (void*)(uintptr)ioready; *gpp = nil; return old; } @@ -296,14 +337,14 @@ deadlineimpl(int64 now, Eface arg, bool read, bool write) runtime_throw("deadlineimpl: inconsistent read deadline"); pd->rd = -1; pd->rt.fv = nil; - rg = netpollunblock(pd, 'r'); + rg = netpollunblock(pd, 'r', false); } if(write) { if(pd->wd <= 0 || (pd->wt.fv == nil && !read)) runtime_throw("deadlineimpl: inconsistent write deadline"); pd->wd = -1; pd->wt.fv = nil; - wg = netpollunblock(pd, 'w'); + wg = netpollunblock(pd, 'w', false); } runtime_unlock(pd); if(rg) @@ -343,7 +384,7 @@ allocPollDesc(void) n = 1; // Must be in non-GC memory because can be referenced // only from epoll/kqueue internals. - pd = runtime_SysAlloc(n*sizeof(*pd)); + pd = runtime_persistentalloc(n*sizeof(*pd), 0, &mstats.other_sys); for(i = 0; i < n; i++) { pd[i].link = pollcache.first; pollcache.first = &pd[i]; diff --git a/libgo/runtime/netpoll_epoll.c b/libgo/runtime/netpoll_epoll.c index 98c5cbeb587..b98aa818c89 100644 --- a/libgo/runtime/netpoll_epoll.c +++ b/libgo/runtime/netpoll_epoll.c @@ -94,24 +94,24 @@ runtime_netpollinit(void) } int32 -runtime_netpollopen(int32 fd, PollDesc *pd) +runtime_netpollopen(uintptr fd, PollDesc *pd) { EpollEvent ev; int32 res; ev.events = EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET; ev.data.ptr = (void*)pd; - res = runtime_epollctl(epfd, EPOLL_CTL_ADD, fd, &ev); + res = runtime_epollctl(epfd, EPOLL_CTL_ADD, (int32)fd, &ev); return -res; } int32 -runtime_netpollclose(int32 fd) +runtime_netpollclose(uintptr fd) { EpollEvent ev; int32 res; - res = runtime_epollctl(epfd, EPOLL_CTL_DEL, fd, &ev); + res = runtime_epollctl(epfd, EPOLL_CTL_DEL, (int32)fd, &ev); return -res; } diff --git a/libgo/runtime/netpoll_kqueue.c b/libgo/runtime/netpoll_kqueue.c index 9b79b2020df..78901611884 100644 --- a/libgo/runtime/netpoll_kqueue.c +++ b/libgo/runtime/netpoll_kqueue.c @@ -2,10 +2,11 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin +// +build darwin dragonfly freebsd netbsd openbsd #include "runtime.h" #include "defs_GOOS_GOARCH.h" +#include "os_GOOS.h" // Integrated network poller (kqueue-based implementation). @@ -27,7 +28,7 @@ runtime_netpollinit(void) } int32 -runtime_netpollopen(int32 fd, PollDesc *pd) +runtime_netpollopen(uintptr fd, PollDesc *pd) { Kevent ev[2]; int32 n; @@ -35,30 +36,22 @@ runtime_netpollopen(int32 fd, PollDesc *pd) // Arm both EVFILT_READ and EVFILT_WRITE in edge-triggered mode (EV_CLEAR) // for the whole fd lifetime. The notifications are automatically unregistered // when fd is closed. - ev[0].ident = fd; + ev[0].ident = (uint32)fd; ev[0].filter = EVFILT_READ; - ev[0].flags = EV_ADD|EV_RECEIPT|EV_CLEAR; + ev[0].flags = EV_ADD|EV_CLEAR; ev[0].fflags = 0; ev[0].data = 0; - ev[0].udata = (byte*)pd; + ev[0].udata = (kevent_udata)pd; ev[1] = ev[0]; ev[1].filter = EVFILT_WRITE; - n = runtime_kevent(kq, ev, 2, ev, 2, nil); + n = runtime_kevent(kq, ev, 2, nil, 0, nil); if(n < 0) return -n; - if(n != 2 || - (ev[0].flags&EV_ERROR) == 0 || ev[0].ident != fd || ev[0].filter != EVFILT_READ || - (ev[1].flags&EV_ERROR) == 0 || ev[1].ident != fd || ev[1].filter != EVFILT_WRITE) - return EFAULT; // just to mark out from other errors - if(ev[0].data != 0) - return ev[0].data; - if(ev[1].data != 0) - return ev[1].data; return 0; } int32 -runtime_netpollclose(int32 fd) +runtime_netpollclose(uintptr fd) { // Don't need to unregister because calling close() // on fd will remove any kevents that reference the descriptor. @@ -74,7 +67,7 @@ runtime_netpoll(bool block) static int32 lasterr; Kevent events[64], *ev; Timespec ts, *tp; - int32 n, i; + int32 n, i, mode; G *gp; if(kq == -1) @@ -97,10 +90,13 @@ retry: } for(i = 0; i < n; i++) { ev = &events[i]; + mode = 0; if(ev->filter == EVFILT_READ) - runtime_netpollready(&gp, (PollDesc*)ev->udata, 'r'); + mode += 'r'; if(ev->filter == EVFILT_WRITE) - runtime_netpollready(&gp, (PollDesc*)ev->udata, 'w'); + mode += 'w'; + if(mode) + runtime_netpollready(&gp, (PollDesc*)ev->udata, mode); } if(block && gp == nil) goto retry; diff --git a/libgo/runtime/netpoll_stub.c b/libgo/runtime/netpoll_stub.c index e28e38e2643..84eef754c8d 100644 --- a/libgo/runtime/netpoll_stub.c +++ b/libgo/runtime/netpoll_stub.c @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build freebsd netbsd openbsd plan9 windows +// +build plan9 #include "runtime.h" diff --git a/libgo/runtime/panic.c b/libgo/runtime/panic.c index 7d79256cf41..7a8d95b1ade 100644 --- a/libgo/runtime/panic.c +++ b/libgo/runtime/panic.c @@ -38,7 +38,7 @@ runtime_startpanic(void) M *m; m = runtime_m(); - if(runtime_mheap == 0 || runtime_mheap->cachealloc.size == 0) { // very early + if(runtime_mheap.cachealloc.size == 0) { // very early runtime_printf("runtime: panic before malloc heap initialized\n"); m->mallocing = 1; // tell rest of panic not to try to malloc } else if(m->mcache == nil) // can happen if called from signal handler or throw @@ -48,8 +48,13 @@ runtime_startpanic(void) runtime_exit(3); } m->dying = 1; + if(runtime_g() != nil) + runtime_g()->writebuf = nil; runtime_xadd(&runtime_panicking, 1); runtime_lock(&paniclk); + if(runtime_debug.schedtrace > 0 || runtime_debug.scheddetail > 0) + runtime_schedtrace(true); + runtime_freezetheworld(); } void @@ -58,18 +63,22 @@ runtime_dopanic(int32 unused __attribute__ ((unused))) G *g; static bool didothers; bool crash; + int32 t; g = runtime_g(); if(g->sig != 0) runtime_printf("[signal %x code=%p addr=%p]\n", g->sig, (void*)g->sigcode0, (void*)g->sigcode1); - if(runtime_gotraceback(&crash)){ + if((t = runtime_gotraceback(&crash)) > 0){ if(g != runtime_m()->g0) { runtime_printf("\n"); runtime_goroutineheader(g); runtime_traceback(); - runtime_goroutinetrailer(g); + runtime_printcreatedby(g); + } else if(t >= 2 || runtime_m()->throwing > 0) { + runtime_printf("\nruntime stack:\n"); + runtime_traceback(); } if(!didothers) { didothers = true; @@ -113,11 +122,15 @@ runtime_panicstring(const char *s) { Eface err; + if(runtime_m()->mallocing) { + runtime_printf("panic: %s\n", s); + runtime_throw("panic during malloc"); + } if(runtime_m()->gcing) { runtime_printf("panic: %s\n", s); runtime_throw("panic during gc"); } - runtime_newErrorString(runtime_gostringnocopy((const byte*)s), &err); + runtime_newErrorCString(s, &err); runtime_panic(err); } diff --git a/libgo/runtime/parfor.c b/libgo/runtime/parfor.c index c0e40f5081b..9489d8dc2ec 100644 --- a/libgo/runtime/parfor.c +++ b/libgo/runtime/parfor.c @@ -151,9 +151,9 @@ runtime_parfordo(ParFor *desc) if(victim >= tid) victim++; victimpos = &desc->thr[victim].pos; - pos = runtime_atomicload64(victimpos); for(;;) { // See if it has any work. + pos = runtime_atomicload64(victimpos); begin = (uint32)pos; end = (uint32)(pos>>32); if(begin+1 >= end) { @@ -166,7 +166,7 @@ runtime_parfordo(ParFor *desc) } begin2 = begin + (end-begin)/2; newpos = (uint64)begin | (uint64)begin2<<32; - if(runtime_cas64(victimpos, &pos, newpos)) { + if(runtime_cas64(victimpos, pos, newpos)) { begin = begin2; break; } diff --git a/libgo/runtime/print.c b/libgo/runtime/print.c index f5c6e82840e..766ddbdc499 100644 --- a/libgo/runtime/print.c +++ b/libgo/runtime/print.c @@ -5,6 +5,7 @@ #include <stdarg.h> #include "runtime.h" #include "array.h" +#include "go-type.h" //static Lock debuglock; @@ -13,7 +14,7 @@ static void go_vprintf(const char*, va_list); // write to goroutine-local buffer if diverting output, // or else standard error. static void -gwrite(const void *v, int32 n) +gwrite(const void *v, intgo n) { G* g = runtime_g(); @@ -301,8 +302,6 @@ runtime_printpointer(void *p) void runtime_printstring(String v) { - // extern uint32 runtime_maxstring; - // if(v.len > runtime_maxstring) { // gwrite("[string too long]", 17); // return; diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c index 0e77a3e0603..ab7cde43863 100644 --- a/libgo/runtime/proc.c +++ b/libgo/runtime/proc.c @@ -231,8 +231,8 @@ kickoff(void) } // Switch context to a different goroutine. This is like longjmp. -static void runtime_gogo(G*) __attribute__ ((noinline)); -static void +void runtime_gogo(G*) __attribute__ ((noinline)); +void runtime_gogo(G* newg) { #ifdef USING_SPLIT_STACK @@ -249,8 +249,8 @@ runtime_gogo(G* newg) // setjmp. Because getcontext always returns 0, unlike setjmp, we use // g->fromgogo as a code. It will be true if we got here via // setcontext. g == nil the first time this is called in a new m. -static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline)); -static void +void runtime_mcall(void (*)(G*)) __attribute__ ((noinline)); +void runtime_mcall(void (*pfn)(G*)) { M *mp; @@ -365,8 +365,9 @@ struct Sched { uint64 goidgen; M* midle; // idle m's waiting for work int32 nmidle; // number of idle m's waiting for work - int32 mlocked; // number of locked m's waiting for work + int32 nmidlelocked; // number of locked m's waiting for work int32 mcount; // number of m's that have been created + int32 maxmcount; // maximum number of m's allowed (or die) P* pidle; // idle P's uint32 npidle; @@ -381,6 +382,7 @@ struct Sched { Lock gflock; G* gfree; + uint32 gcwaiting; // gc is waiting to run int32 stopwait; Note stopnote; uint32 sysmonwait; @@ -396,10 +398,8 @@ enum { MaxGomaxprocs = 1<<8 }; Sched runtime_sched; int32 runtime_gomaxprocs; -bool runtime_singleproc; -bool runtime_iscgo = true; uint32 runtime_needextram = 1; -uint32 runtime_gcwaiting; +bool runtime_iscgo = true; M runtime_m0; G runtime_g0; // idle goroutine for m0 G* runtime_allg; @@ -409,6 +409,7 @@ P** runtime_allp; M* runtime_extram; int8* runtime_goos; int32 runtime_ncpu; +bool runtime_precisestack; static int32 newprocs; void* runtime_mstart(void*); @@ -431,21 +432,22 @@ static void wakep(void); static void stoplockedm(void); static void startlockedm(G*); static void sysmon(void); -static uint32 retake(uint32*); -static void inclocked(int32); +static uint32 retake(int64); +static void incidlelocked(int32); static void checkdead(void); static void exitsyscall0(G*); static void park0(G*); -static void gosched0(G*); static void goexit0(G*); static void gfput(P*, G*); static G* gfget(P*); static void gfpurge(P*); static void globrunqput(G*); -static G* globrunqget(P*); +static G* globrunqget(P*, int32); static P* pidleget(void); static void pidleput(P*); static void injectglist(G*); +static bool preemptall(void); +static bool exitsyscallfast(void); // The bootstrap sequence is: // @@ -460,6 +462,7 @@ runtime_schedinit(void) { int32 n, procs; const byte *p; + Eface i; m = &runtime_m0; g = &runtime_g0; @@ -470,18 +473,22 @@ runtime_schedinit(void) initcontext(); inittlssize(); - m->nomemprof++; + runtime_sched.maxmcount = 10000; + runtime_precisestack = 0; + runtime_mprofinit(); runtime_mallocinit(); mcommoninit(m); + + // Initialize the itable value for newErrorCString, + // so that the next time it gets called, possibly + // in a fault during a garbage collection, it will not + // need to allocated memory. + runtime_newErrorCString(0, &i); runtime_goargs(); runtime_goenvs(); - - // For debugging: - // Allocate internal symbol table representation now, - // so that we don't need to call malloc when we crash. - // runtime_findfunc(0); + runtime_parsedebugvars(); runtime_sched.lastpoll = runtime_nanotime(); procs = 1; @@ -496,16 +503,26 @@ runtime_schedinit(void) // Can not enable GC until all roots are registered. // mstats.enablegc = 1; - m->nomemprof--; + + // if(raceenabled) + // g->racectx = runtime_raceinit(); } extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main"); extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main"); +static void +initDone(void *arg __attribute__ ((unused))) { + runtime_unlockOSThread(); +}; + // The main goroutine. void runtime_main(void* dummy __attribute__((unused))) { + Defer d; + _Bool frame; + newm(sysmon, nil); // Lock the main goroutine onto this, the main OS thread, @@ -515,10 +532,24 @@ runtime_main(void* dummy __attribute__((unused))) // by calling runtime.LockOSThread during initialization // to preserve the lock. runtime_lockOSThread(); + + // Defer unlock so that runtime.Goexit during init does the unlock too. + d.__pfn = initDone; + d.__next = g->defer; + d.__arg = (void*)-1; + d.__panic = g->panic; + d.__retaddr = nil; + d.__frame = &frame; + g->defer = &d; + if(m != &runtime_m0) runtime_throw("runtime_main not on m0"); __go_go(runtime_MHeap_Scavenger, nil); main_init(); + + if(g->defer != &d || d.__pfn != initDone) + runtime_throw("runtime: bad defer entry after init"); + g->defer = d.__next; runtime_unlockOSThread(); // For gccgo we have to wait until after main is initialized @@ -574,7 +605,7 @@ runtime_goroutineheader(G *gp) } void -runtime_goroutinetrailer(G *g) +runtime_printcreatedby(G *g) { if(g != nil && g->gopc != 0 && g->goid != 1) { String fn; @@ -604,8 +635,28 @@ runtime_tracebackothers(G * volatile me) tb.gp = me; traceback = runtime_gotraceback(nil); + + // Show the current goroutine first, if we haven't already. + if((gp = m->curg) != nil && gp != me) { + runtime_printf("\n"); + runtime_goroutineheader(gp); + gp->traceback = &tb; + +#ifdef USING_SPLIT_STACK + __splitstack_getcontext(&me->stack_context[0]); +#endif + getcontext(&me->context); + + if(gp->traceback != nil) { + runtime_gogo(gp); + } + + runtime_printtrace(tb.locbuf, tb.c, false); + runtime_printcreatedby(gp); + } + for(gp = runtime_allg; gp != nil; gp = gp->alllink) { - if(gp == me || gp->status == Gdead) + if(gp == me || gp == m->curg || gp->status == Gdead) continue; if(gp->issystem && traceback < 2) continue; @@ -620,25 +671,38 @@ runtime_tracebackothers(G * volatile me) // This means that if g is running or in a syscall, we // can't reliably print a stack trace. FIXME. - if(gp->status == Gsyscall || gp->status == Grunning) { - runtime_printf("no stack trace available\n"); - runtime_goroutinetrailer(gp); - continue; - } - gp->traceback = &tb; + if(gp->status == Grunning) { + runtime_printf("\tgoroutine running on other thread; stack unavailable\n"); + runtime_printcreatedby(gp); + } else if(gp->status == Gsyscall) { + runtime_printf("\tgoroutine in C code; stack unavailable\n"); + runtime_printcreatedby(gp); + } else { + gp->traceback = &tb; #ifdef USING_SPLIT_STACK - __splitstack_getcontext(&me->stack_context[0]); + __splitstack_getcontext(&me->stack_context[0]); #endif - getcontext(&me->context); + getcontext(&me->context); - if(gp->traceback != nil) { - runtime_gogo(gp); + if(gp->traceback != nil) { + runtime_gogo(gp); + } + + runtime_printtrace(tb.locbuf, tb.c, false); + runtime_printcreatedby(gp); } + } +} - runtime_printtrace(tb.locbuf, tb.c, false); - runtime_goroutinetrailer(gp); +static void +checkmcount(void) +{ + // sched lock is held + if(runtime_sched.mcount > runtime_sched.maxmcount) { + runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount); + runtime_throw("thread exhaustion"); } } @@ -669,7 +733,7 @@ mcommoninit(M *mp) runtime_lock(&runtime_sched); mp->id = runtime_sched.mcount++; - + checkmcount(); runtime_mpreinit(mp); // Add to runtime_allm so garbage collector doesn't free m @@ -686,6 +750,7 @@ void runtime_ready(G *gp) { // Mark runnable. + m->locks++; // disable preemption because it can be holding p in a local var if(gp->status != Gwaiting) { runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status); runtime_throw("bad g->status in ready"); @@ -694,6 +759,7 @@ runtime_ready(G *gp) runqput(m->p, gp); if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic wakep(); + m->locks--; } int32 @@ -753,6 +819,34 @@ runtime_helpgc(int32 nproc) runtime_unlock(&runtime_sched); } +// Similar to stoptheworld but best-effort and can be called several times. +// There is no reverse operation, used during crashing. +// This function must not lock any mutexes. +void +runtime_freezetheworld(void) +{ + int32 i; + + if(runtime_gomaxprocs == 1) + return; + // stopwait and preemption requests can be lost + // due to races with concurrently executing threads, + // so try several times + for(i = 0; i < 5; i++) { + // this should tell the scheduler to not start any new goroutines + runtime_sched.stopwait = 0x7fffffff; + runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1); + // this should stop running goroutines + if(!preemptall()) + break; // no running goroutines + runtime_usleep(1000); + } + // to be sure + runtime_usleep(1000); + preemptall(); + runtime_usleep(1000); +} + void runtime_stoptheworld(void) { @@ -763,7 +857,8 @@ runtime_stoptheworld(void) runtime_lock(&runtime_sched); runtime_sched.stopwait = runtime_gomaxprocs; - runtime_atomicstore((uint32*)&runtime_gcwaiting, 1); + runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1); + preemptall(); // stop current P m->p->status = Pgcstop; runtime_sched.stopwait--; @@ -782,7 +877,7 @@ runtime_stoptheworld(void) wait = runtime_sched.stopwait > 0; runtime_unlock(&runtime_sched); - // wait for remaining P's to stop voluntary + // wait for remaining P's to stop voluntarily if(wait) { runtime_notesleep(&runtime_sched.stopnote); runtime_noteclear(&runtime_sched.stopnote); @@ -810,6 +905,7 @@ runtime_starttheworld(void) G *gp; bool add; + m->locks++; // disable preemption because it can be holding p in a local var gp = runtime_netpoll(false); // non-blocking injectglist(gp); add = needaddgcproc(); @@ -819,7 +915,7 @@ runtime_starttheworld(void) newprocs = 0; } else procresize(runtime_gomaxprocs); - runtime_gcwaiting = 0; + runtime_sched.gcwaiting = 0; p1 = nil; while((p = pidleget()) != nil) { @@ -829,16 +925,9 @@ runtime_starttheworld(void) pidleput(p); break; } - mp = mget(); - if(mp == nil) { - p->link = p1; - p1 = p; - continue; - } - if(mp->nextp) - runtime_throw("starttheworld: inconsistent mp->nextp"); - mp->nextp = p; - runtime_notewakeup(&mp->park); + p->m = mget(); + p->link = p1; + p1 = p; } if(runtime_sched.sysmonwait) { runtime_sched.sysmonwait = false; @@ -849,8 +938,18 @@ runtime_starttheworld(void) while(p1) { p = p1; p1 = p1->link; - add = false; - newm(nil, p); + if(p->m) { + mp = p->m; + p->m = nil; + if(mp->nextp) + runtime_throw("starttheworld: inconsistent mp->nextp"); + mp->nextp = p; + runtime_notewakeup(&mp->park); + } else { + // Start M to run P. Do not start another M below. + newm(nil, p); + add = false; + } } if(add) { @@ -863,6 +962,7 @@ runtime_starttheworld(void) // the maximum number of procs. newm(mhelpgc, nil); } + m->locks--; } // Called to start an M. @@ -909,11 +1009,8 @@ runtime_mstart(void* mp) // Install signal handlers; after minit so that minit can // prepare the thread to be able to handle the signals. - if(m == &runtime_m0) { + if(m == &runtime_m0) runtime_initsig(); - if(runtime_iscgo) - runtime_newextram(); - } if(m->mstartfn) m->mstartfn(); @@ -1015,6 +1112,14 @@ runtime_needm(void) { M *mp; + if(runtime_needextram) { + // Can happen if C/C++ code calls Go from a global ctor. + // Can not throw, because scheduler is not initialized yet. + runtime_write(2, "fatal error: cgo callback before cgo call\n", + sizeof("fatal error: cgo callback before cgo call\n")-1); + runtime_exit(1); + } + // Lock extra list, take head, unlock popped list. // nilokay=false is safe here because of the invariant above, // that the extra list always contains or will soon contain @@ -1090,6 +1195,7 @@ runtime_newextram(void) mp->locked = LockInternal; mp->lockedg = gp; gp->lockedm = mp; + gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1); // put on allg for garbage collector runtime_lock(&runtime_sched); if(runtime_lastg == nil) @@ -1325,7 +1431,7 @@ handoffp(P *p) return; } runtime_lock(&runtime_sched); - if(runtime_gcwaiting) { + if(runtime_sched.gcwaiting) { p->status = Pgcstop; if(--runtime_sched.stopwait == 0) runtime_notewakeup(&runtime_sched.stopnote); @@ -1373,7 +1479,7 @@ stoplockedm(void) p = releasep(); handoffp(p); } - inclocked(1); + incidlelocked(1); // Wait until another thread schedules lockedg again. runtime_notesleep(&m->park); runtime_noteclear(&m->park); @@ -1396,7 +1502,7 @@ startlockedm(G *gp) if(mp->nextp) runtime_throw("startlockedm: m has p"); // directly handoff current P to the locked m - inclocked(-1); + incidlelocked(-1); p = releasep(); mp->nextp = p; runtime_notewakeup(&mp->park); @@ -1410,7 +1516,7 @@ gcstopm(void) { P *p; - if(!runtime_gcwaiting) + if(!runtime_sched.gcwaiting) runtime_throw("gcstopm: not waiting for gc"); if(m->spinning) { m->spinning = false; @@ -1437,7 +1543,7 @@ execute(G *gp) runtime_throw("execute: bad g status"); } gp->status = Grunning; - m->p->tick++; + m->p->schedtick++; m->curg = gp; gp->m = m; @@ -1459,7 +1565,7 @@ findrunnable(void) int32 i; top: - if(runtime_gcwaiting) { + if(runtime_sched.gcwaiting) { gcstopm(); goto top; } @@ -1470,7 +1576,7 @@ top: // global runq if(runtime_sched.runqsize) { runtime_lock(&runtime_sched); - gp = globrunqget(m->p); + gp = globrunqget(m->p, 0); runtime_unlock(&runtime_sched); if(gp) return gp; @@ -1493,7 +1599,7 @@ top: } // random steal from other P's for(i = 0; i < 2*runtime_gomaxprocs; i++) { - if(runtime_gcwaiting) + if(runtime_sched.gcwaiting) goto top; p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs]; if(p == m->p) @@ -1506,12 +1612,12 @@ top: stop: // return P and block runtime_lock(&runtime_sched); - if(runtime_gcwaiting) { + if(runtime_sched.gcwaiting) { runtime_unlock(&runtime_sched); goto top; } if(runtime_sched.runqsize) { - gp = globrunqget(m->p); + gp = globrunqget(m->p, 0); runtime_unlock(&runtime_sched); return gp; } @@ -1561,6 +1667,25 @@ stop: goto top; } +static void +resetspinning(void) +{ + int32 nmspinning; + + if(m->spinning) { + m->spinning = false; + nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1); + if(nmspinning < 0) + runtime_throw("findrunnable: negative nmspinning"); + } else + nmspinning = runtime_atomicload(&runtime_sched.nmspinning); + + // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), + // so see if we need to wakeup another P here. + if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0) + wakep(); +} + // Injects the list of runnable G's into the scheduler. // Can run concurrently with GC. static void @@ -1590,33 +1715,44 @@ static void schedule(void) { G *gp; + uint32 tick; if(m->locks) runtime_throw("schedule: holding locks"); top: - if(runtime_gcwaiting) { + if(runtime_sched.gcwaiting) { gcstopm(); goto top; } - gp = runqget(m->p); - if(gp == nil) - gp = findrunnable(); - - if(m->spinning) { - m->spinning = false; - runtime_xadd(&runtime_sched.nmspinning, -1); + gp = nil; + // Check the global runnable queue once in a while to ensure fairness. + // Otherwise two goroutines can completely occupy the local runqueue + // by constantly respawning each other. + tick = m->p->schedtick; + // This is a fancy way to say tick%61==0, + // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors. + if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) { + runtime_lock(&runtime_sched); + gp = globrunqget(m->p, 1); + runtime_unlock(&runtime_sched); + if(gp) + resetspinning(); + } + if(gp == nil) { + gp = runqget(m->p); + if(gp && m->spinning) + runtime_throw("schedule: spinning with local work"); + } + if(gp == nil) { + gp = findrunnable(); // blocks until work is available + resetspinning(); } - - // M wakeup policy is deliberately somewhat conservative (see nmspinning handling), - // so see if we need to wakeup another M here. - if (m->p->runqhead != m->p->runqtail && - runtime_atomicload(&runtime_sched.nmspinning) == 0 && - runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic - wakep(); if(gp->lockedm) { + // Hands off own p to the locked m, + // then blocks waiting for a new p. startlockedm(gp); goto top; } @@ -1658,12 +1794,12 @@ park0(G *gp) void runtime_gosched(void) { - runtime_mcall(gosched0); + runtime_mcall(runtime_gosched0); } // runtime_gosched continuation on g0. -static void -gosched0(G *gp) +void +runtime_gosched0(G *gp) { gp->status = Grunnable; gp->m = nil; @@ -1679,6 +1815,9 @@ gosched0(G *gp) } // Finishes execution of the current goroutine. +// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack). +// Since it does not return it does not matter. But if it is preempted +// at the split stack check, GC will complain about inconsistent sp. void runtime_goexit(void) { @@ -1698,7 +1837,7 @@ goexit0(G *gp) m->curg = nil; m->lockedg = nil; if(m->locked & ~LockExternal) { - runtime_printf("invalid m->locked = %d", m->locked); + runtime_printf("invalid m->locked = %d\n", m->locked); runtime_throw("internal lockOSThread error"); } m->locked = 0; @@ -1720,10 +1859,11 @@ void runtime_entersyscall(void) __attribute__ ((no_split_stack)); void runtime_entersyscall() { - if(m->profilehz > 0) - runtime_setprof(false); + // Disable preemption because during this function g is in Gsyscall status, + // but can have inconsistent g->sched, do not let GC observe it. + m->locks++; - // Leave SP around for gc and traceback. + // Leave SP around for GC and traceback. #ifdef USING_SPLIT_STACK g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size, &g->gcnext_segment, &g->gcnext_sp, @@ -1752,10 +1892,9 @@ runtime_entersyscall() } m->mcache = nil; - m->p->tick++; m->p->m = nil; runtime_atomicstore(&m->p->status, Psyscall); - if(runtime_gcwaiting) { + if(runtime_sched.gcwaiting) { runtime_lock(&runtime_sched); if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) { if(--runtime_sched.stopwait == 0) @@ -1763,6 +1902,8 @@ runtime_entersyscall() } runtime_unlock(&runtime_sched); } + + m->locks--; } // The same as runtime_entersyscall(), but with a hint that the syscall is blocking. @@ -1771,10 +1912,9 @@ runtime_entersyscallblock(void) { P *p; - if(m->profilehz > 0) - runtime_setprof(false); + m->locks++; // see comment in entersyscall - // Leave SP around for gc and traceback. + // Leave SP around for GC and traceback. #ifdef USING_SPLIT_STACK g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size, &g->gcnext_segment, &g->gcnext_sp, @@ -1792,7 +1932,9 @@ runtime_entersyscallblock(void) p = releasep(); handoffp(p); if(g->isbackground) // do not consider blocked scavenger for deadlock detection - inclocked(1); + incidlelocked(1); + + m->locks--; } // The goroutine g exited its system call. @@ -1803,19 +1945,16 @@ void runtime_exitsyscall(void) { G *gp; - P *p; - // Check whether the profiler needs to be turned on. - if(m->profilehz > 0) - runtime_setprof(true); + m->locks++; // see comment in entersyscall gp = g; - // Try to re-acquire the last P. - if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) { + if(gp->isbackground) // do not consider blocked scavenger for deadlock detection + incidlelocked(-1); + + if(exitsyscallfast()) { // There's a cpu for us, so we can run. - m->mcache = m->p->mcache; - m->p->m = m; - m->p->tick++; + m->p->syscalltick++; gp->status = Grunning; // Garbage collector isn't running (since we are), // so okay to clear gcstack and gcsp. @@ -1824,27 +1963,11 @@ runtime_exitsyscall(void) #endif gp->gcnext_sp = nil; runtime_memclr(&gp->gcregs, sizeof gp->gcregs); + m->locks--; return; } - if(gp->isbackground) // do not consider blocked scavenger for deadlock detection - inclocked(-1); - // Try to get any other idle P. - m->p = nil; - if(runtime_sched.pidle) { - runtime_lock(&runtime_sched); - p = pidleget(); - runtime_unlock(&runtime_sched); - if(p) { - acquirep(p); -#ifdef USING_SPLIT_STACK - gp->gcstack = nil; -#endif - gp->gcnext_sp = nil; - runtime_memclr(&gp->gcregs, sizeof gp->gcregs); - return; - } - } + m->locks--; // Call the scheduler. runtime_mcall(exitsyscall0); @@ -1860,6 +1983,43 @@ runtime_exitsyscall(void) #endif gp->gcnext_sp = nil; runtime_memclr(&gp->gcregs, sizeof gp->gcregs); + m->p->syscalltick++; +} + +static bool +exitsyscallfast(void) +{ + P *p; + + // Freezetheworld sets stopwait but does not retake P's. + if(runtime_sched.stopwait) { + m->p = nil; + return false; + } + + // Try to re-acquire the last P. + if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) { + // There's a cpu for us, so we can run. + m->mcache = m->p->mcache; + m->p->m = m; + return true; + } + // Try to get any other idle P. + m->p = nil; + if(runtime_sched.pidle) { + runtime_lock(&runtime_sched); + p = pidleget(); + if(p && runtime_atomicload(&runtime_sched.sysmonwait)) { + runtime_atomicstore(&runtime_sched.sysmonwait, 0); + runtime_notewakeup(&runtime_sched.sysmonnote); + } + runtime_unlock(&runtime_sched); + if(p) { + acquirep(p); + return true; + } + } + return false; } // runtime_exitsyscall slow path on g0. @@ -1876,6 +2036,10 @@ exitsyscall0(G *gp) p = pidleget(); if(p == nil) globrunqput(gp); + else if(runtime_atomicload(&runtime_sched.sysmonwait)) { + runtime_atomicstore(&runtime_sched.sysmonwait, 0); + runtime_notewakeup(&runtime_sched.sysmonnote); + } runtime_unlock(&runtime_sched); if(p) { acquirep(p); @@ -1890,6 +2054,33 @@ exitsyscall0(G *gp) schedule(); // Never returns. } +// Called from syscall package before fork. +void syscall_runtime_BeforeFork(void) + __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork"); +void +syscall_runtime_BeforeFork(void) +{ + // Fork can hang if preempted with signals frequently enough (see issue 5517). + // Ensure that we stay on the same M where we disable profiling. + m->locks++; + if(m->profilehz != 0) + runtime_resetcpuprofiler(0); +} + +// Called from syscall package after fork in parent. +void syscall_runtime_AfterFork(void) + __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork"); +void +syscall_runtime_AfterFork(void) +{ + int32 hz; + + hz = runtime_sched.profilehz; + if(hz != 0) + runtime_resetcpuprofiler(hz); + m->locks--; +} + // Allocate a new g, with a stack big enough for stacksize bytes. G* runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize) @@ -1919,9 +2110,16 @@ runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize) /* For runtime package testing. */ + +// Create a new g running fn with siz bytes of arguments. +// Put it on the queue of g's waiting to run. +// The compiler turns a go statement into a call to this. +// Cannot split the stack because it assumes that the arguments +// are available sequentially after &fn; they would not be +// copied if a stack split occurred. It's OK for this to call +// functions that split the stack. void runtime_testing_entersyscall(void) __asm__ (GOSYM_PREFIX "runtime.entersyscall"); - void runtime_testing_entersyscall() { @@ -1944,6 +2142,7 @@ __go_go(void (*fn)(void*), void* arg) size_t spsize; G *newg; +//runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret); m->locks++; // disable preemption because it can be holding p in a local var if((newg = gfget(m->p)) != nil) { @@ -2099,7 +2298,7 @@ runtime_gomaxprocsfunc(int32 n) } runtime_unlock(&runtime_sched); - runtime_semacquire(&runtime_worldsema); + runtime_semacquire(&runtime_worldsema, false); m->gcing = 1; runtime_stoptheworld(); newprocs = n; @@ -2110,8 +2309,11 @@ runtime_gomaxprocsfunc(int32 n) return ret; } +// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below +// after they modify m->locked. Do not allow preemption during this call, +// or else the m might be different in this function than in the caller. static void -LockOSThread(void) +lockOSThread(void) { m->lockedg = g; g->lockedm = m; @@ -2122,18 +2324,22 @@ void runtime_LockOSThread(void) { m->locked |= LockExternal; - LockOSThread(); + lockOSThread(); } void runtime_lockOSThread(void) { m->locked += LockInternal; - LockOSThread(); + lockOSThread(); } + +// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below +// after they update m->locked. Do not allow preemption during this call, +// or else the m might be in different in this function than in the caller. static void -UnlockOSThread(void) +unlockOSThread(void) { if(m->locked != 0) return; @@ -2147,7 +2353,7 @@ void runtime_UnlockOSThread(void) { m->locked &= ~LockExternal; - UnlockOSThread(); + unlockOSThread(); } void @@ -2156,7 +2362,7 @@ runtime_unlockOSThread(void) if(m->locked < LockInternal) runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread"); m->locked -= LockInternal; - UnlockOSThread(); + unlockOSThread(); } bool @@ -2176,13 +2382,6 @@ runtime_golockedOSThread(void) return runtime_lockedOSThread(); } -// for testing of wire, unwire -uint32 -runtime_mid() -{ - return m->id; -} - intgo runtime_NumGoroutine (void) __asm__ (GOSYM_PREFIX "runtime.NumGoroutine"); @@ -2227,28 +2426,42 @@ static struct { Location locbuf[100]; } prof; +static void +System(void) +{ +} + // Called if we receive a SIGPROF signal. void runtime_sigprof() { int32 n, i; + bool traceback; - // Windows does profiling in a dedicated thread w/o m. - if(!Windows && (m == nil || m->mcache == nil)) - return; if(prof.fn == nil || prof.hz == 0) return; - + traceback = true; + // Windows does profiling in a dedicated thread w/o m. + if(!Windows && (m == nil || m->mcache == nil)) + traceback = false; + runtime_lock(&prof); if(prof.fn == nil) { runtime_unlock(&prof); return; } - n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf)); - for(i = 0; i < n; i++) - prof.pcbuf[i] = prof.locbuf[i].pc; - if(n > 0) - prof.fn(prof.pcbuf, n); + n = 0; + if(traceback) { + n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf)); + for(i = 0; i < n; i++) + prof.pcbuf[i] = prof.locbuf[i].pc; + } + if (!traceback || n <= 0) { + n = 2; + prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n); + prof.pcbuf[1] = (uintptr)System + 1; + } + prof.fn(prof.pcbuf, n); runtime_unlock(&prof); } @@ -2264,7 +2477,11 @@ runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) if(fn == nil) hz = 0; - // Stop profiler on this cpu so that it is safe to lock prof. + // Disable preemption, otherwise we can be rescheduled to another thread + // that has profiling enabled. + m->locks++; + + // Stop profiler on this thread so that it is safe to lock prof. // if a profiling signal came in while we had prof locked, // it would deadlock. runtime_resetcpuprofiler(0); @@ -2279,6 +2496,8 @@ runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz) if(hz != 0) runtime_resetcpuprofiler(hz); + + m->locks--; } // Change number of processors. The world is stopped, sched is locked. @@ -2296,7 +2515,8 @@ procresize(int32 new) for(i = 0; i < new; i++) { p = runtime_allp[i]; if(p == nil) { - p = (P*)runtime_mallocgc(sizeof(*p), 0, 0, 1); + p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC); + p->id = i; p->status = Pgcstop; runtime_atomicstorep(&runtime_allp[i], p); } @@ -2308,7 +2528,7 @@ procresize(int32 new) } if(p->runq == nil) { p->runqsize = 128; - p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, 0, 1); + p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC); } } @@ -2351,7 +2571,6 @@ procresize(int32 new) p->status = Pidle; pidleput(p); } - runtime_singleproc = new == 1; runtime_atomicstore((uint32*)&runtime_gomaxprocs, new); } @@ -2393,10 +2612,10 @@ releasep(void) } static void -inclocked(int32 v) +incidlelocked(int32 v) { runtime_lock(&runtime_sched); - runtime_sched.mlocked += v; + runtime_sched.nmidlelocked += v; if(v > 0) checkdead(); runtime_unlock(&runtime_sched); @@ -2411,12 +2630,12 @@ checkdead(void) int32 run, grunning, s; // -1 for sysmon - run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.mlocked - 1 - countextra(); + run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra(); if(run > 0) return; if(run < 0) { - runtime_printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n", - runtime_sched.nmidle, runtime_sched.mlocked, runtime_sched.mcount); + runtime_printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n", + runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount); runtime_throw("checkdead: inconsistent counts"); } grunning = 0; @@ -2441,10 +2660,10 @@ static void sysmon(void) { uint32 idle, delay; - int64 now, lastpoll; + int64 now, lastpoll, lasttrace; G *gp; - uint32 ticks[MaxGomaxprocs]; + lasttrace = 0; idle = 0; // how many cycles in succession we had not wokeup somebody delay = 0; for(;;) { @@ -2455,9 +2674,10 @@ sysmon(void) if(delay > 10*1000) // up to 10ms delay = 10*1000; runtime_usleep(delay); - if(runtime_gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { // TODO: fast atomic + if(runtime_debug.schedtrace <= 0 && + (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic runtime_lock(&runtime_sched); - if(runtime_atomicload(&runtime_gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { + if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { runtime_atomicstore(&runtime_sched.sysmonwait, 1); runtime_unlock(&runtime_sched); runtime_notesleep(&runtime_sched.sysmonnote); @@ -2470,53 +2690,198 @@ sysmon(void) // poll network if not polled for more than 10ms lastpoll = runtime_atomicload64(&runtime_sched.lastpoll); now = runtime_nanotime(); - if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) { + if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) { + runtime_cas64(&runtime_sched.lastpoll, lastpoll, now); gp = runtime_netpoll(false); // non-blocking - injectglist(gp); + if(gp) { + // Need to decrement number of idle locked M's + // (pretending that one more is running) before injectglist. + // Otherwise it can lead to the following situation: + // injectglist grabs all P's but before it starts M's to run the P's, + // another M returns from syscall, finishes running its G, + // observes that there is no work to do and no other running M's + // and reports deadlock. + incidlelocked(-1); + injectglist(gp); + incidlelocked(1); + } } // retake P's blocked in syscalls - if(retake(ticks)) + // and preempt long running G's + if(retake(now)) idle = 0; else idle++; + + if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) { + lasttrace = now; + runtime_schedtrace(runtime_debug.scheddetail); + } } } +typedef struct Pdesc Pdesc; +struct Pdesc +{ + uint32 schedtick; + int64 schedwhen; + uint32 syscalltick; + int64 syscallwhen; +}; +static Pdesc pdesc[MaxGomaxprocs]; + static uint32 -retake(uint32 *ticks) +retake(int64 now) { uint32 i, s, n; int64 t; P *p; + Pdesc *pd; n = 0; for(i = 0; i < (uint32)runtime_gomaxprocs; i++) { p = runtime_allp[i]; if(p==nil) continue; - t = p->tick; - if(ticks[i] != t) { - ticks[i] = t; - continue; - } + pd = &pdesc[i]; s = p->status; - if(s != Psyscall) - continue; - if(p->runqhead == p->runqtail && runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic - continue; - // Need to increment number of locked M's before the CAS. - // Otherwise the M from which we retake can exit the syscall, - // increment nmidle and report deadlock. - inclocked(-1); - if(runtime_cas(&p->status, s, Pidle)) { - n++; - handoffp(p); + if(s == Psyscall) { + // Retake P from syscall if it's there for more than 1 sysmon tick (20us). + // But only if there is other work to do. + t = p->syscalltick; + if(pd->syscalltick != t) { + pd->syscalltick = t; + pd->syscallwhen = now; + continue; + } + if(p->runqhead == p->runqtail && + runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0) + continue; + // Need to decrement number of idle locked M's + // (pretending that one more is running) before the CAS. + // Otherwise the M from which we retake can exit the syscall, + // increment nmidle and report deadlock. + incidlelocked(-1); + if(runtime_cas(&p->status, s, Pidle)) { + n++; + handoffp(p); + } + incidlelocked(1); + } else if(s == Prunning) { + // Preempt G if it's running for more than 10ms. + t = p->schedtick; + if(pd->schedtick != t) { + pd->schedtick = t; + pd->schedwhen = now; + continue; + } + if(pd->schedwhen + 10*1000*1000 > now) + continue; + // preemptone(p); } - inclocked(1); } return n; } +// Tell all goroutines that they have been preempted and they should stop. +// This function is purely best-effort. It can fail to inform a goroutine if a +// processor just started running it. +// No locks need to be held. +// Returns true if preemption request was issued to at least one goroutine. +static bool +preemptall(void) +{ + return false; +} + +void +runtime_schedtrace(bool detailed) +{ + static int64 starttime; + int64 now; + int64 id1, id2, id3; + int32 i, q, t, h, s; + const char *fmt; + M *mp, *lockedm; + G *gp, *lockedg; + P *p; + + now = runtime_nanotime(); + if(starttime == 0) + starttime = now; + + runtime_lock(&runtime_sched); + runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d", + (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount, + runtime_sched.nmidle, runtime_sched.runqsize); + if(detailed) { + runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n", + runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning, + runtime_sched.stopwait, runtime_sched.sysmonwait); + } + // We must be careful while reading data from P's, M's and G's. + // Even if we hold schedlock, most data can be changed concurrently. + // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. + for(i = 0; i < runtime_gomaxprocs; i++) { + p = runtime_allp[i]; + if(p == nil) + continue; + mp = p->m; + t = p->runqtail; + h = p->runqhead; + s = p->runqsize; + q = t - h; + if(q < 0) + q += s; + if(detailed) + runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d/%d gfreecnt=%d\n", + i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt); + else { + // In non-detailed mode format lengths of per-P run queues as: + // [len1 len2 len3 len4] + fmt = " %d"; + if(runtime_gomaxprocs == 1) + fmt = " [%d]\n"; + else if(i == 0) + fmt = " [%d"; + else if(i == runtime_gomaxprocs-1) + fmt = " %d]\n"; + runtime_printf(fmt, q); + } + } + if(!detailed) { + runtime_unlock(&runtime_sched); + return; + } + for(mp = runtime_allm; mp; mp = mp->alllink) { + p = mp->p; + gp = mp->curg; + lockedg = mp->lockedg; + id1 = -1; + if(p) + id1 = p->id; + id2 = -1; + if(gp) + id2 = gp->goid; + id3 = -1; + if(lockedg) + id3 = lockedg->goid; + runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d" + " locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n", + mp->id, id1, id2, + mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc, + mp->spinning, id3); + } + for(gp = runtime_allg; gp; gp = gp->alllink) { + mp = gp->m; + lockedm = gp->lockedm; + runtime_printf(" G%D: status=%d(%s) m=%d lockedm=%d\n", + gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1, + lockedm ? lockedm->id : -1); + } + runtime_unlock(&runtime_sched); +} + // Put mp on midle list. // Sched must be locked. static void @@ -2559,7 +2924,7 @@ globrunqput(G *gp) // Try get a batch of G's from the global runnable queue. // Sched must be locked. static G* -globrunqget(P *p) +globrunqget(P *p, int32 max) { G *gp, *gp1; int32 n; @@ -2569,6 +2934,8 @@ globrunqget(P *p) n = runtime_sched.runqsize/runtime_gomaxprocs+1; if(n > runtime_sched.runqsize) n = runtime_sched.runqsize; + if(max > 0 && n > max) + n = max; runtime_sched.runqsize -= n; if(runtime_sched.runqsize == 0) runtime_sched.runqtail = nil; @@ -2827,6 +3194,22 @@ runtime_testSchedLocalQueueSteal(void) } } +intgo runtime_debug_setMaxThreads(intgo) + __asm__(GOSYM_PREFIX "runtime_debug.setMaxThreads"); + +intgo +runtime_debug_setMaxThreads(intgo in) +{ + intgo out; + + runtime_lock(&runtime_sched); + out = runtime_sched.maxmcount; + runtime_sched.maxmcount = in; + checkmcount(); + runtime_unlock(&runtime_sched); + return out; +} + void runtime_proc_scan(void (*addroot)(Obj)) { @@ -2852,3 +3235,11 @@ __go_get_closure(void) { return g->closure; } + +// Return whether we are waiting for a GC. This gc toolchain uses +// preemption instead. +bool +runtime_gcwaiting(void) +{ + return runtime_sched.gcwaiting; +} diff --git a/libgo/runtime/race.h b/libgo/runtime/race.h index 3357bed312d..884245cedad 100644 --- a/libgo/runtime/race.h +++ b/libgo/runtime/race.h @@ -16,14 +16,14 @@ uintptr runtime_raceinit(void); void runtime_racefini(void); void runtime_racemapshadow(void *addr, uintptr size); -void runtime_racemalloc(void *p, uintptr sz, void *pc); +void runtime_racemalloc(void *p, uintptr sz); void runtime_racefree(void *p); uintptr runtime_racegostart(void *pc); void runtime_racegoend(void); void runtime_racewritepc(void *addr, void *callpc, void *pc); void runtime_racereadpc(void *addr, void *callpc, void *pc); -void runtime_racewriterangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc); -void runtime_racereadrangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc); +void runtime_racewriterangepc(void *addr, uintptr sz, void *callpc, void *pc); +void runtime_racereadrangepc(void *addr, uintptr sz, void *callpc, void *pc); void runtime_racefingo(void); void runtime_raceacquire(void *addr); void runtime_raceacquireg(G *gp, void *addr); diff --git a/libgo/runtime/runtime.c b/libgo/runtime/runtime.c index 1ff6d00e299..56fc045eac8 100644 --- a/libgo/runtime/runtime.c +++ b/libgo/runtime/runtime.c @@ -124,11 +124,12 @@ TestAtomic64(void) z64 = 42; x64 = 0; PREFETCH(&z64); - if(runtime_cas64(&z64, &x64, 1)) + if(runtime_cas64(&z64, x64, 1)) runtime_throw("cas64 failed"); - if(x64 != 42) + if(x64 != 0) runtime_throw("cas64 failed"); - if(!runtime_cas64(&z64, &x64, 1)) + x64 = 42; + if(!runtime_cas64(&z64, x64, 1)) runtime_throw("cas64 failed"); if(x64 != 42 || z64 != 1) runtime_throw("cas64 failed"); @@ -279,3 +280,79 @@ runtime_signalstack(byte *p, int32 n) if(sigaltstack(&st, nil) < 0) *(int *)0xf1 = 0xf1; } + +DebugVars runtime_debug; + +static struct { + const char* name; + int32* value; +} dbgvar[] = { + {"gctrace", &runtime_debug.gctrace}, + {"schedtrace", &runtime_debug.schedtrace}, + {"scheddetail", &runtime_debug.scheddetail}, +}; + +void +runtime_parsedebugvars(void) +{ + const byte *p; + intgo i, n; + + p = runtime_getenv("GODEBUG"); + if(p == nil) + return; + for(;;) { + for(i=0; i<(intgo)nelem(dbgvar); i++) { + n = runtime_findnull((const byte*)dbgvar[i].name); + if(runtime_mcmp(p, dbgvar[i].name, n) == 0 && p[n] == '=') + *dbgvar[i].value = runtime_atoi(p+n+1); + } + p = (const byte *)runtime_strstr((const char *)p, ","); + if(p == nil) + break; + p++; + } +} + +// Poor mans 64-bit division. +// This is a very special function, do not use it if you are not sure what you are doing. +// int64 division is lowered into _divv() call on 386, which does not fit into nosplit functions. +// Handles overflow in a time-specific manner. +int32 +runtime_timediv(int64 v, int32 div, int32 *rem) +{ + int32 res, bit; + + if(v >= (int64)div*0x7fffffffLL) { + if(rem != nil) + *rem = 0; + return 0x7fffffff; + } + res = 0; + for(bit = 30; bit >= 0; bit--) { + if(v >= ((int64)div<<bit)) { + v = v - ((int64)div<<bit); + res += 1<<bit; + } + } + if(rem != nil) + *rem = v; + return res; +} + +// Setting the max stack size doesn't really do anything for gccgo. + +uintptr runtime_maxstacksize = 1<<20; // enough until runtime.main sets it for real + +intgo runtime_debug_setMaxStack(intgo) + __asm__ (GOSYM_PREFIX "runtime_debug.setMaxStack"); + +intgo +runtime_debug_setMaxStack(intgo in) +{ + intgo out; + + out = runtime_maxstacksize; + runtime_maxstacksize = in; + return out; +} diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h index d2e7d4c11bc..e82e83231e6 100644 --- a/libgo/runtime/runtime.h +++ b/libgo/runtime/runtime.h @@ -72,6 +72,7 @@ typedef struct ParFor ParFor; typedef struct ParForThread ParForThread; typedef struct CgoMal CgoMal; typedef struct PollDesc PollDesc; +typedef struct DebugVars DebugVars; typedef struct __go_open_array Slice; typedef struct __go_interface Iface; @@ -82,6 +83,7 @@ typedef struct __go_panic_stack Panic; typedef struct __go_ptr_type PtrType; typedef struct __go_func_type FuncType; +typedef struct __go_interface_type InterfaceType; typedef struct __go_map_type MapType; typedef struct __go_channel_type ChanType; @@ -206,21 +208,20 @@ struct G void* param; // passed parameter on wakeup bool fromgogo; // reached from gogo int16 status; - int64 goid; uint32 selgen; // valid sudog pointer + int64 goid; const char* waitreason; // if status==Gwaiting G* schedlink; bool ispanic; bool issystem; // do not output in stack dump bool isbackground; // ignore in deadlock detector - bool blockingsyscall; // hint that the next syscall will block M* m; // for debuggers, but offset not hard-coded M* lockedm; int32 sig; int32 writenbuf; byte* writebuf; - // DeferChunk *dchunk; - // DeferChunk *dchunknext; + // DeferChunk* dchunk; + // DeferChunk* dchunknext; uintptr sigcode0; uintptr sigcode1; // uintptr sigpc; @@ -243,6 +244,7 @@ struct M size_t gsignalstacksize; void (*mstartfn)(void); G* curg; // current running goroutine + G* caughtsig; // goroutine running during fatal signal P* p; // attached P for executing Go code (nil if not executing Go code) P* nextp; int32 id; @@ -250,11 +252,9 @@ struct M int32 throwing; int32 gcing; int32 locks; - int32 nomemprof; int32 dying; int32 profilehz; int32 helpgc; - bool blockingsyscall; bool spinning; uint32 fastrand; uint64 ncgocall; // number of cgo calls in total @@ -289,10 +289,12 @@ struct P { Lock; - uint32 status; // one of Pidle/Prunning/... + int32 id; + uint32 status; // one of Pidle/Prunning/... P* link; - uint32 tick; // incremented on every scheduler or system call - M* m; // back-link to associated M (nil if idle) + uint32 schedtick; // incremented on every scheduler call + uint32 syscalltick; // incremented on every system call + M* m; // back-link to associated M (nil if idle) MCache* mcache; // Queue of runnable goroutines. @@ -308,9 +310,13 @@ struct P byte pad[64]; }; -// The m->locked word holds a single bit saying whether -// external calls to LockOSThread are in effect, and then a counter -// of the internal nesting depth of lockOSThread / unlockOSThread. +// The m->locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread. +// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active. +// External locks are not recursive; a second lock is silently ignored. +// The upper bits of m->lockedcount record the nesting depth of calls to lockOSThread +// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal). +// Internal locks can be recursive. For instance, a lock for cgo can occur while the main +// goroutine is holding the lock during the initialization phase. enum { LockExternal = 1, @@ -333,19 +339,16 @@ enum SigIgnored = 1<<6, // the signal was ignored before we registered for it }; -#ifndef NSIG -#define NSIG 32 -#endif - -// NOTE(rsc): keep in sync with extern.go:/type.Func. -// Eventually, the loaded symbol table should be closer to this form. +// Layout of in-memory per-function information prepared by linker +// See http://golang.org/s/go12symtab. +// Keep in sync with linker and with ../../libmach/sym.c +// and with package debug/gosym. struct Func { String name; uintptr entry; // entry pc }; - #ifdef GOOS_windows enum { Windows = 1 @@ -372,7 +375,7 @@ struct Timers // If this struct changes, adjust ../time/sleep.go:/runtimeTimer. struct Timer { - int32 i; // heap index + int32 i; // heap index // Timer wakes up at when, and then at when+period, ... (period > 0 only) // each time calling f(now, arg) in the timer goroutine, so f must be @@ -420,6 +423,16 @@ struct CgoMal void *alloc; }; +// Holds variables parsed from GODEBUG env var. +struct DebugVars +{ + int32 gctrace; + int32 schedtrace; + int32 scheddetail; +}; + +extern bool runtime_precisestack; + /* * defined macros * you need super-gopher-guru privilege @@ -453,12 +466,11 @@ extern M* runtime_allm; extern P** runtime_allp; extern int32 runtime_gomaxprocs; extern uint32 runtime_needextram; -extern bool runtime_singleproc; extern uint32 runtime_panicking; -extern uint32 runtime_gcwaiting; // gc is waiting to run extern int8* runtime_goos; extern int32 runtime_ncpu; extern void (*runtime_sysargs)(int32, uint8**); +extern DebugVars runtime_debug; /* * common functions and data @@ -466,11 +478,13 @@ extern void (*runtime_sysargs)(int32, uint8**); #define runtime_strcmp(s1, s2) __builtin_strcmp((s1), (s2)) #define runtime_strstr(s1, s2) __builtin_strstr((s1), (s2)) intgo runtime_findnull(const byte*); +intgo runtime_findnullw(const uint16*); void runtime_dump(byte*, int32); /* * very low level c-called */ +void runtime_gogo(G*); struct __go_func_type; void runtime_args(int32, byte**); void runtime_osinit(); @@ -492,14 +506,13 @@ void runtime_sigenable(uint32 sig); void runtime_sigdisable(uint32 sig); int32 runtime_gotraceback(bool *crash); void runtime_goroutineheader(G*); -void runtime_goroutinetrailer(G*); void runtime_printtrace(Location*, int32, bool); #define runtime_open(p, f, m) open((p), (f), (m)) #define runtime_read(d, v, n) read((d), (v), (n)) #define runtime_write(d, v, n) write((d), (v), (n)) #define runtime_close(d) close(d) #define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) -#define runtime_cas64(pval, pold, new) __atomic_compare_exchange_n (pval, pold, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) +#define runtime_cas64(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) #define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) // Don't confuse with XADD x86 instruction, // this one is actually 'addx', that is, add-and-fetch. @@ -530,17 +543,21 @@ void runtime_mallocinit(void); void runtime_mprofinit(void); #define runtime_malloc(s) __go_alloc(s) #define runtime_free(p) __go_free(p) -bool runtime_addfinalizer(void*, FuncVal *fn, const struct __go_func_type *); +bool runtime_addfinalizer(void*, FuncVal *fn, const struct __go_func_type *, const struct __go_ptr_type *); #define runtime_getcallersp(p) __builtin_frame_address(1) int32 runtime_mcount(void); int32 runtime_gcount(void); +void runtime_mcall(void(*)(G*)); uint32 runtime_fastrand1(void); +int32 runtime_timediv(int64, int32, int32*); void runtime_setmg(M*, G*); void runtime_newextram(void); #define runtime_exit(s) exit(s) #define runtime_breakpoint() __builtin_trap() void runtime_gosched(void); +void runtime_gosched0(G*); +void runtime_schedtrace(bool); void runtime_park(void(*)(Lock*), Lock*, const char*); void runtime_tsleep(int64, const char*); M* runtime_newm(void); @@ -555,6 +572,8 @@ int32 runtime_callers(int32, Location*, int32); int64 runtime_nanotime(void); void runtime_dopanic(int32) __attribute__ ((noreturn)); void runtime_startpanic(void); +void runtime_freezetheworld(void); +void runtime_unwindstack(G*, byte*); void runtime_sigprof(); void runtime_resetcpuprofiler(int32); void runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32); @@ -567,10 +586,14 @@ void runtime_addtimer(Timer*); bool runtime_deltimer(Timer*); G* runtime_netpoll(bool); void runtime_netpollinit(void); -int32 runtime_netpollopen(int32, PollDesc*); -int32 runtime_netpollclose(int32); +int32 runtime_netpollopen(uintptr, PollDesc*); +int32 runtime_netpollclose(uintptr); void runtime_netpollready(G**, PollDesc*, int32); +uintptr runtime_netpollfd(PollDesc*); void runtime_crash(void); +void runtime_parsedebugvars(void); +void _rt0_go(void); +void* runtime_funcdata(Func*, int32); void runtime_stoptheworld(void); void runtime_starttheworld(void); @@ -603,11 +626,15 @@ void runtime_unlock(Lock*); * wake up early, it must wait to call noteclear until it * can be sure that no other goroutine is calling * notewakeup. + * + * notesleep/notetsleep are generally called on g0, + * notetsleepg is similar to notetsleep but is called on user g. */ void runtime_noteclear(Note*); void runtime_notesleep(Note*); void runtime_notewakeup(Note*); -void runtime_notetsleep(Note*, int64); +bool runtime_notetsleep(Note*, int64); // false - timeout +bool runtime_notetsleepg(Note*, int64); // false - timeout /* * low-level synchronization for implementing the above @@ -698,11 +725,13 @@ void runtime_newTypeAssertionError(const String*, const String*, const String*, __asm__ (GOSYM_PREFIX "runtime.NewTypeAssertionError"); void runtime_newErrorString(String, Eface*) __asm__ (GOSYM_PREFIX "runtime.NewErrorString"); +void runtime_newErrorCString(const char*, Eface*) + __asm__ (GOSYM_PREFIX "runtime.NewErrorCString"); /* * wrapped for go users */ -void runtime_semacquire(uint32 volatile *); +void runtime_semacquire(uint32 volatile *, bool); void runtime_semrelease(uint32 volatile *); int32 runtime_gomaxprocsfunc(int32 n); void runtime_procyield(uint32); @@ -711,19 +740,10 @@ void runtime_lockOSThread(void); void runtime_unlockOSThread(void); bool runtime_showframe(String, bool); +void runtime_printcreatedby(G*); uintptr runtime_memlimit(void); -// If appropriate, ask the operating system to control whether this -// thread should receive profiling signals. This is only necessary on OS X. -// An operating system should not deliver a profiling signal to a -// thread that is not actually executing (what good is that?), but that's -// what OS X prefers to do. When profiling is turned on, we mask -// away the profiling signal when threads go to sleep, so that OS X -// is forced to deliver the signal to a thread that's actually running. -// This is a no-op on other systems. -void runtime_setprof(bool); - #define ISNAN(f) __builtin_isnan(f) enum @@ -763,3 +783,6 @@ int32 getproccount(void); void __go_set_closure(void*); void* __go_get_closure(void); + +bool runtime_gcwaiting(void); +void runtime_badsignal(int); diff --git a/libgo/runtime/sema.goc b/libgo/runtime/sema.goc index be971bd1265..f5d5bc89e3d 100644 --- a/libgo/runtime/sema.goc +++ b/libgo/runtime/sema.goc @@ -21,22 +21,23 @@ package sync #include "runtime.h" #include "arch.h" -typedef struct Sema Sema; -struct Sema +typedef struct SemaWaiter SemaWaiter; +struct SemaWaiter { uint32 volatile* addr; G* g; int64 releasetime; - Sema* prev; - Sema* next; + int32 nrelease; // -1 for acquire + SemaWaiter* prev; + SemaWaiter* next; }; typedef struct SemaRoot SemaRoot; struct SemaRoot { Lock; - Sema* head; - Sema* tail; + SemaWaiter* head; + SemaWaiter* tail; // Number of waiters. Read w/o the lock. uint32 volatile nwait; }; @@ -58,7 +59,7 @@ semroot(uint32 volatile *addr) } static void -semqueue(SemaRoot *root, uint32 volatile *addr, Sema *s) +semqueue(SemaRoot *root, uint32 volatile *addr, SemaWaiter *s) { s->g = runtime_g(); s->addr = addr; @@ -72,7 +73,7 @@ semqueue(SemaRoot *root, uint32 volatile *addr, Sema *s) } static void -semdequeue(SemaRoot *root, Sema *s) +semdequeue(SemaRoot *root, SemaWaiter *s) { if(s->next) s->next->prev = s->prev; @@ -97,10 +98,10 @@ cansemacquire(uint32 volatile *addr) return 0; } -static void -semacquireimpl(uint32 volatile *addr, int32 profile) +void +runtime_semacquire(uint32 volatile *addr, bool profile) { - Sema s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it + SemaWaiter s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it SemaRoot *root; int64 t0; @@ -145,15 +146,9 @@ semacquireimpl(uint32 volatile *addr, int32 profile) } void -runtime_semacquire(uint32 volatile *addr) -{ - semacquireimpl(addr, 0); -} - -void runtime_semrelease(uint32 volatile *addr) { - Sema *s; + SemaWaiter *s; SemaRoot *root; root = semroot(addr); @@ -188,10 +183,117 @@ runtime_semrelease(uint32 volatile *addr) } } +// TODO(dvyukov): move to netpoll.goc once it's used by all OSes. +void net_runtime_Semacquire(uint32 *addr) + __asm__ (GOSYM_PREFIX "net.runtime_Semacquire"); + +void net_runtime_Semacquire(uint32 *addr) +{ + runtime_semacquire(addr, true); +} + +void net_runtime_Semrelease(uint32 *addr) + __asm__ (GOSYM_PREFIX "net.runtime_Semrelease"); + +void net_runtime_Semrelease(uint32 *addr) +{ + runtime_semrelease(addr); +} + func runtime_Semacquire(addr *uint32) { - semacquireimpl(addr, 1); + runtime_semacquire(addr, true); } func runtime_Semrelease(addr *uint32) { runtime_semrelease(addr); } + +typedef struct SyncSema SyncSema; +struct SyncSema +{ + Lock; + SemaWaiter* head; + SemaWaiter* tail; +}; + +func runtime_Syncsemcheck(size uintptr) { + if(size != sizeof(SyncSema)) { + runtime_printf("bad SyncSema size: sync:%D runtime:%D\n", (int64)size, (int64)sizeof(SyncSema)); + runtime_throw("bad SyncSema size"); + } +} + +// Syncsemacquire waits for a pairing Syncsemrelease on the same semaphore s. +func runtime_Syncsemacquire(s *SyncSema) { + SemaWaiter w, *wake; + int64 t0; + + w.g = runtime_g(); + w.nrelease = -1; + w.next = nil; + w.releasetime = 0; + t0 = 0; + if(runtime_blockprofilerate > 0) { + t0 = runtime_cputicks(); + w.releasetime = -1; + } + + runtime_lock(s); + if(s->head && s->head->nrelease > 0) { + // have pending release, consume it + wake = nil; + s->head->nrelease--; + if(s->head->nrelease == 0) { + wake = s->head; + s->head = wake->next; + if(s->head == nil) + s->tail = nil; + } + runtime_unlock(s); + if(wake) + runtime_ready(wake->g); + } else { + // enqueue itself + if(s->tail == nil) + s->head = &w; + else + s->tail->next = &w; + s->tail = &w; + runtime_park(runtime_unlock, s, "semacquire"); + if(t0) + runtime_blockevent(w.releasetime - t0, 2); + } +} + +// Syncsemrelease waits for n pairing Syncsemacquire on the same semaphore s. +func runtime_Syncsemrelease(s *SyncSema, n uint32) { + SemaWaiter w, *wake; + + w.g = runtime_g(); + w.nrelease = (int32)n; + w.next = nil; + w.releasetime = 0; + + runtime_lock(s); + while(w.nrelease > 0 && s->head && s->head->nrelease < 0) { + // have pending acquire, satisfy it + wake = s->head; + s->head = wake->next; + if(s->head == nil) + s->tail = nil; + if(wake->releasetime) + wake->releasetime = runtime_cputicks(); + runtime_ready(wake->g); + w.nrelease--; + } + if(w.nrelease > 0) { + // enqueue itself + if(s->tail == nil) + s->head = &w; + else + s->tail->next = &w; + s->tail = &w; + runtime_park(runtime_unlock, s, "semarelease"); + } else + runtime_unlock(s); +} diff --git a/libgo/runtime/signal_unix.c b/libgo/runtime/signal_unix.c index 5a506c8af3d..ea0a58f2ea2 100644 --- a/libgo/runtime/signal_unix.c +++ b/libgo/runtime/signal_unix.c @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build darwin freebsd linux openbsd netbsd +// +build darwin dragonfly freebsd linux openbsd netbsd #include <sys/time.h> @@ -100,13 +100,11 @@ runtime_resetcpuprofiler(int32 hz) runtime_memclr((byte*)&it, sizeof it); if(hz == 0) { runtime_setitimer(ITIMER_PROF, &it, nil); - runtime_setprof(false); } else { it.it_interval.tv_sec = 0; it.it_interval.tv_usec = 1000000 / hz; it.it_value = it.it_interval; runtime_setitimer(ITIMER_PROF, &it, nil); - runtime_setprof(true); } runtime_m()->profilehz = hz; } diff --git a/libgo/runtime/sigqueue.goc b/libgo/runtime/sigqueue.goc index 8657216d3f4..6769b239dc3 100644 --- a/libgo/runtime/sigqueue.goc +++ b/libgo/runtime/sigqueue.goc @@ -107,9 +107,7 @@ func signal_recv() (m uint32) { new = HASWAITER; if(runtime_cas(&sig.state, old, new)) { if (new == HASWAITER) { - runtime_entersyscallblock(); - runtime_notesleep(&sig); - runtime_exitsyscall(); + runtime_notetsleepg(&sig, -1); runtime_noteclear(&sig); } break; @@ -157,3 +155,10 @@ func signal_disable(s uint32) { sig.wanted[s/32] &= ~(1U<<(s&31)); runtime_sigdisable(s); } + +// This runs on a foreign stack, without an m or a g. No stack split. +void +runtime_badsignal(int sig) +{ + __go_sigsend(sig); +} diff --git a/libgo/runtime/string.goc b/libgo/runtime/string.goc index 64ed4f6ebaa..a7446e93c45 100644 --- a/libgo/runtime/string.goc +++ b/libgo/runtime/string.goc @@ -21,6 +21,18 @@ runtime_findnull(const byte *s) return __builtin_strlen((const char*) s); } +intgo +runtime_findnullw(const uint16 *s) +{ + intgo l; + + if(s == nil) + return 0; + for(l=0; s[l]!=0; l++) + ; + return l; +} + static String gostringsize(intgo l, byte** pmem) { @@ -32,7 +44,7 @@ gostringsize(intgo l, byte** pmem) return runtime_emptystring; } // leave room for NUL for C runtime (e.g., callers of getenv) - mem = runtime_mallocgc(l+1, FlagNoPointers, 1, 0); + mem = runtime_mallocgc(l+1, 0, FlagNoScan|FlagNoZero); s.str = mem; s.len = l; mem[l] = 0; @@ -63,6 +75,15 @@ runtime_gostringnocopy(const byte *str) return s; } +String runtime_cstringToGo(byte*) + __asm__ (GOSYM_PREFIX "runtime.cstringToGo"); + +String +runtime_cstringToGo(byte *str) +{ + return runtime_gostringnocopy(str); +} + enum { Runeself = 0x80, diff --git a/libgo/runtime/thread-linux.c b/libgo/runtime/thread-linux.c index 13d23c47b07..ae56261e6f5 100644 --- a/libgo/runtime/thread-linux.c +++ b/libgo/runtime/thread-linux.c @@ -4,6 +4,7 @@ #include "runtime.h" #include "defs.h" +#include "signal_unix.h" // Linux futex. // @@ -33,25 +34,22 @@ typedef struct timespec Timespec; void runtime_futexsleep(uint32 *addr, uint32 val, int64 ns) { - Timespec ts, *tsp; - - if(ns < 0) - tsp = nil; - else { - ts.tv_sec = ns/1000000000LL; - ts.tv_nsec = ns%1000000000LL; - // Avoid overflow - if(ts.tv_sec > 1<<30) - ts.tv_sec = 1<<30; - tsp = &ts; - } + Timespec ts; + int32 nsec; // Some Linux kernels have a bug where futex of // FUTEX_WAIT returns an internal error code // as an errno. Libpthread ignores the return value // here, and so can we: as it says a few lines up, // spurious wakeups are allowed. - syscall(__NR_futex, addr, FUTEX_WAIT, val, tsp, nil, 0); + + if(ns < 0) { + syscall(__NR_futex, addr, FUTEX_WAIT, val, nil, nil, 0); + return; + } + ts.tv_sec = runtime_timediv(ns, 1000000000LL, &nsec); + ts.tv_nsec = nsec; + syscall(__NR_futex, addr, FUTEX_WAIT, val, &ts, nil, 0); } // If any procs are sleeping on addr, wake up at most cnt. diff --git a/libgo/runtime/time.goc b/libgo/runtime/time.goc index 8d12fe01080..e4e35ec0846 100644 --- a/libgo/runtime/time.goc +++ b/libgo/runtime/time.goc @@ -12,8 +12,13 @@ package time #include "malloc.h" #include "race.h" +enum { + debug = 0, +}; + static Timers timers; static void addtimer(Timer*); +static void dumptimers(const char*); // Package time APIs. // Godoc uses the comments in package time, not these. @@ -92,6 +97,11 @@ addtimer(Timer *t) int32 n; Timer **nt; + // when must never be negative; otherwise timerproc will overflow + // during its delta calculation and never expire other timers. + if(t->when < 0) + t->when = (int64)((1ULL<<63)-1); + if(timers.len >= timers.cap) { // Grow slice. n = 16; @@ -121,8 +131,13 @@ addtimer(Timer *t) timers.timerproc = __go_go(timerproc, nil); timers.timerproc->issystem = true; } + if(debug) + dumptimers("addtimer"); } +// Used to force a dereference before the lock is acquired. +static int32 gi; + // Delete timer t from the heap. // Do not need to update the timerproc: // if it wakes up early, no big deal. @@ -131,6 +146,11 @@ runtime_deltimer(Timer *t) { int32 i; + // Dereference t so that any panic happens before the lock is held. + // Discard result, because t might be moving in the heap. + i = t->i; + gi = i; + runtime_lock(&timers); // t may not be registered anymore and may have @@ -152,6 +172,8 @@ runtime_deltimer(Timer *t) siftup(i); siftdown(i); } + if(debug) + dumptimers("deltimer"); runtime_unlock(&timers); return true; } @@ -170,6 +192,7 @@ timerproc(void* dummy __attribute__ ((unused))) for(;;) { runtime_lock(&timers); + timers.sleeping = false; now = runtime_nanotime(); for(;;) { if(timers.len == 0) { @@ -210,9 +233,7 @@ timerproc(void* dummy __attribute__ ((unused))) timers.sleeping = true; runtime_noteclear(&timers.waitnote); runtime_unlock(&timers); - runtime_entersyscallblock(); - runtime_notetsleep(&timers.waitnote, delta); - runtime_exitsyscall(); + runtime_notetsleepg(&timers.waitnote, delta); } } @@ -222,18 +243,20 @@ static void siftup(int32 i) { int32 p; + int64 when; Timer **t, *tmp; t = timers.t; + when = t[i]->when; + tmp = t[i]; while(i > 0) { - p = (i-1)/2; // parent - if(t[i]->when >= t[p]->when) + p = (i-1)/4; // parent + if(when >= t[p]->when) break; - tmp = t[i]; t[i] = t[p]; - t[p] = tmp; t[i]->i = i; - t[p]->i = p; + t[p] = tmp; + tmp->i = p; i = p; } } @@ -241,29 +264,61 @@ siftup(int32 i) static void siftdown(int32 i) { - int32 c, len; + int32 c, c3, len; + int64 when, w, w3; Timer **t, *tmp; t = timers.t; len = timers.len; + when = t[i]->when; + tmp = t[i]; for(;;) { - c = i*2 + 1; // left child + c = i*4 + 1; // left child + c3 = c + 2; // mid child if(c >= len) { break; } - if(c+1 < len && t[c+1]->when < t[c]->when) + w = t[c]->when; + if(c+1 < len && t[c+1]->when < w) { + w = t[c+1]->when; c++; - if(t[c]->when >= t[i]->when) + } + if(c3 < len) { + w3 = t[c3]->when; + if(c3+1 < len && t[c3+1]->when < w3) { + w3 = t[c3+1]->when; + c3++; + } + if(w3 < w) { + w = w3; + c = c3; + } + } + if(w >= when) break; - tmp = t[i]; t[i] = t[c]; - t[c] = tmp; t[i]->i = i; - t[c]->i = c; + t[c] = tmp; + tmp->i = c; i = c; } } +static void +dumptimers(const char *msg) +{ + Timer *t; + int32 i; + + runtime_printf("timers: %s\n", msg); + for(i = 0; i < timers.len; i++) { + t = timers.t[i]; + runtime_printf("\t%d\t%p:\ti %d when %D period %D fn %p\n", + i, t, t->i, t->when, t->period, t->fv->fn); + } + runtime_printf("\n"); +} + void runtime_time_scan(void (*addroot)(Obj)) { |