summaryrefslogtreecommitdiff
path: root/libgo/runtime
diff options
context:
space:
mode:
authorian <ian@138bc75d-0d04-0410-961f-82ee72b054a4>2013-11-06 19:49:01 +0000
committerian <ian@138bc75d-0d04-0410-961f-82ee72b054a4>2013-11-06 19:49:01 +0000
commit0ce10ea1348e9afd5d0eec6bca986bfe58bac5ac (patch)
tree39530b071991b2326f881b2a30a2d82d6c133fd6 /libgo/runtime
parent57a8bf1b0c6057ccbacb0cf79eb84d1985c2c1fe (diff)
downloadgcc-0ce10ea1348e9afd5d0eec6bca986bfe58bac5ac.tar.gz
libgo: Update to October 24 version of master library.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@204466 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgo/runtime')
-rw-r--r--libgo/runtime/chan.c80
-rw-r--r--libgo/runtime/cpuprof.c8
-rw-r--r--libgo/runtime/env_posix.c5
-rw-r--r--libgo/runtime/go-byte-array-to-string.c2
-rw-r--r--libgo/runtime/go-caller.c20
-rw-r--r--libgo/runtime/go-int-array-to-string.c2
-rw-r--r--libgo/runtime/go-int-to-string.c2
-rw-r--r--libgo/runtime/go-make-slice.c16
-rw-r--r--libgo/runtime/go-new.c4
-rw-r--r--libgo/runtime/go-reflect-call.c16
-rw-r--r--libgo/runtime/go-signal.c16
-rw-r--r--libgo/runtime/go-string-to-byte-array.c3
-rw-r--r--libgo/runtime/go-string-to-int-array.c4
-rw-r--r--libgo/runtime/go-strplus.c2
-rw-r--r--libgo/runtime/lfstack.c8
-rw-r--r--libgo/runtime/lock_futex.c73
-rw-r--r--libgo/runtime/lock_sema.c101
-rw-r--r--libgo/runtime/malloc.goc441
-rw-r--r--libgo/runtime/malloc.h112
-rw-r--r--libgo/runtime/mcache.c88
-rw-r--r--libgo/runtime/mcentral.c61
-rw-r--r--libgo/runtime/mem.c24
-rw-r--r--libgo/runtime/mfinal.c19
-rw-r--r--libgo/runtime/mfixalloc.c10
-rw-r--r--libgo/runtime/mgc0.c610
-rw-r--r--libgo/runtime/mgc0.h1
-rw-r--r--libgo/runtime/mheap.c138
-rw-r--r--libgo/runtime/mprof.goc84
-rw-r--r--libgo/runtime/msize.c39
-rw-r--r--libgo/runtime/netpoll.goc97
-rw-r--r--libgo/runtime/netpoll_epoll.c8
-rw-r--r--libgo/runtime/netpoll_kqueue.c32
-rw-r--r--libgo/runtime/netpoll_stub.c2
-rw-r--r--libgo/runtime/panic.c21
-rw-r--r--libgo/runtime/parfor.c4
-rw-r--r--libgo/runtime/print.c5
-rw-r--r--libgo/runtime/proc.c745
-rw-r--r--libgo/runtime/race.h6
-rw-r--r--libgo/runtime/runtime.c83
-rw-r--r--libgo/runtime/runtime.h101
-rw-r--r--libgo/runtime/sema.goc140
-rw-r--r--libgo/runtime/signal_unix.c4
-rw-r--r--libgo/runtime/sigqueue.goc11
-rw-r--r--libgo/runtime/string.goc23
-rw-r--r--libgo/runtime/thread-linux.c24
-rw-r--r--libgo/runtime/time.goc85
46 files changed, 2062 insertions, 1318 deletions
diff --git a/libgo/runtime/chan.c b/libgo/runtime/chan.c
index 6f52a1d5e31..1d9e6681d35 100644
--- a/libgo/runtime/chan.c
+++ b/libgo/runtime/chan.c
@@ -10,8 +10,6 @@
#define NOSELGEN 1
-static int32 debug = 0;
-
typedef struct WaitQ WaitQ;
typedef struct SudoG SudoG;
typedef struct Select Select;
@@ -42,8 +40,9 @@ struct Hchan
uintgo qcount; // total data in the q
uintgo dataqsiz; // size of the circular q
uint16 elemsize;
- bool closed;
uint8 elemalign;
+ uint8 pad; // ensures proper alignment of the buffer that follows Hchan in memory
+ bool closed;
uintgo sendx; // send index
uintgo recvx; // receive index
WaitQ recvq; // list of recv waiters
@@ -59,6 +58,8 @@ uint32 runtime_Hchansize = sizeof(Hchan);
enum
{
+ debug = 0,
+
// Scase.kind
CaseRecv,
CaseSend,
@@ -105,17 +106,17 @@ runtime_makechan_c(ChanType *t, int64 hint)
runtime_panicstring("makechan: size out of range");
n = sizeof(*c);
+ n = ROUND(n, elem->__align);
// allocate memory in one call
- c = (Hchan*)runtime_mal(n + hint*elem->__size);
+ c = (Hchan*)runtime_mallocgc(n + hint*elem->__size, (uintptr)t | TypeInfo_Chan, 0);
c->elemsize = elem->__size;
c->elemalign = elem->__align;
c->dataqsiz = hint;
- runtime_settype(c, (uintptr)t | TypeInfo_Chan);
if(debug)
- runtime_printf("makechan: chan=%p; elemsize=%D; elemalign=%d; dataqsiz=%D\n",
- c, (int64)elem->__size, elem->__align, (int64)c->dataqsiz);
+ runtime_printf("makechan: chan=%p; elemsize=%D; dataqsiz=%D\n",
+ c, (int64)elem->__size, (int64)c->dataqsiz);
return c;
}
@@ -185,7 +186,7 @@ runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc)
return; // not reached
}
- if(runtime_gcwaiting)
+ if(runtime_gcwaiting())
runtime_gosched();
if(debug) {
@@ -200,7 +201,6 @@ runtime_chansend(ChanType *t, Hchan *c, byte *ep, bool *pres, void *pc)
}
runtime_lock(c);
- // TODO(dvyukov): add similar instrumentation to select.
if(raceenabled)
runtime_racereadpc(c, pc, runtime_chansend);
if(c->closed)
@@ -311,7 +311,7 @@ runtime_chanrecv(ChanType *t, Hchan* c, byte *ep, bool *selected, bool *received
int64 t0;
G *g;
- if(runtime_gcwaiting)
+ if(runtime_gcwaiting())
runtime_gosched();
if(debug)
@@ -927,6 +927,7 @@ selectgo(Select **selp)
{
Select *sel;
uint32 o, i, j, k;
+ int64 t0;
Scase *cas, *dfl;
Hchan *c;
SudoG *sg;
@@ -935,7 +936,7 @@ selectgo(Select **selp)
G *g;
sel = *selp;
- if(runtime_gcwaiting)
+ if(runtime_gcwaiting())
runtime_gosched();
if(debug)
@@ -943,6 +944,13 @@ selectgo(Select **selp)
g = runtime_g();
+ t0 = 0;
+ if(runtime_blockprofilerate > 0) {
+ t0 = runtime_cputicks();
+ for(i=0; i<sel->ncase; i++)
+ sel->scase[i].sg.releasetime = -1;
+ }
+
// The compiler rewrites selects that statically have
// only 0 or 1 cases plus default into simpler constructs.
// The only way we can end up with such small sel->ncase
@@ -1023,6 +1031,8 @@ loop:
break;
case CaseSend:
+ if(raceenabled)
+ runtime_racereadpc(c, runtime_selectgo, runtime_chansend);
if(c->closed)
goto sclose;
if(c->dataqsiz > 0) {
@@ -1124,6 +1134,8 @@ asyncrecv:
if(sg != nil) {
gp = sg->g;
selunlock(sel);
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
} else {
selunlock(sel);
@@ -1142,6 +1154,8 @@ asyncsend:
if(sg != nil) {
gp = sg->g;
selunlock(sel);
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
} else {
selunlock(sel);
@@ -1161,6 +1175,8 @@ syncrecv:
runtime_memmove(cas->sg.elem, sg->elem, c->elemsize);
gp = sg->g;
gp->param = sg;
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
goto retc;
@@ -1186,11 +1202,15 @@ syncsend:
runtime_memmove(sg->elem, cas->sg.elem, c->elemsize);
gp = sg->g;
gp->param = sg;
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
retc:
// return index corresponding to chosen case
index = cas->index;
+ if(cas->sg.releasetime > 0)
+ runtime_blockevent(cas->sg.releasetime - t0, 2);
runtime_free(sel);
return index;
@@ -1297,17 +1317,36 @@ reflect_rselect(Slice cases)
return ret;
}
+static void closechan(Hchan *c, void *pc);
+
// closechan(sel *byte);
void
runtime_closechan(Hchan *c)
{
+ closechan(c, runtime_getcallerpc(&c));
+}
+
+// For reflect
+// func chanclose(c chan)
+
+void reflect_chanclose(uintptr) __asm__ (GOSYM_PREFIX "reflect.chanclose");
+
+void
+reflect_chanclose(uintptr c)
+{
+ closechan((Hchan*)c, runtime_getcallerpc(&c));
+}
+
+static void
+closechan(Hchan *c, void *pc)
+{
SudoG *sg;
G* gp;
if(c == nil)
runtime_panicstring("close of nil channel");
- if(runtime_gcwaiting)
+ if(runtime_gcwaiting())
runtime_gosched();
runtime_lock(c);
@@ -1317,7 +1356,7 @@ runtime_closechan(Hchan *c)
}
if(raceenabled) {
- runtime_racewritepc(c, runtime_getcallerpc(&c), runtime_closechan);
+ runtime_racewritepc(c, pc, runtime_closechan);
runtime_racerelease(c);
}
@@ -1330,6 +1369,8 @@ runtime_closechan(Hchan *c)
break;
gp = sg->g;
gp->param = nil;
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
}
@@ -1340,6 +1381,8 @@ runtime_closechan(Hchan *c)
break;
gp = sg->g;
gp->param = nil;
+ if(sg->releasetime)
+ sg->releasetime = runtime_cputicks();
runtime_ready(gp);
}
@@ -1353,17 +1396,6 @@ __go_builtin_close(Hchan *c)
}
// For reflect
-// func chanclose(c chan)
-
-void reflect_chanclose(uintptr) __asm__ (GOSYM_PREFIX "reflect.chanclose");
-
-void
-reflect_chanclose(uintptr c)
-{
- runtime_closechan((Hchan*)c);
-}
-
-// For reflect
// func chanlen(c chan) (len int)
intgo reflect_chanlen(uintptr) __asm__ (GOSYM_PREFIX "reflect.chanlen");
diff --git a/libgo/runtime/cpuprof.c b/libgo/runtime/cpuprof.c
index 516387396ea..a2a1a05ce3d 100644
--- a/libgo/runtime/cpuprof.c
+++ b/libgo/runtime/cpuprof.c
@@ -146,7 +146,7 @@ runtime_SetCPUProfileRate(intgo hz)
runtime_lock(&lk);
if(hz > 0) {
if(prof == nil) {
- prof = runtime_SysAlloc(sizeof *prof);
+ prof = runtime_SysAlloc(sizeof *prof, &mstats.other_sys);
if(prof == nil) {
runtime_printf("runtime: cpu profiling cannot allocate memory\n");
runtime_unlock(&lk);
@@ -340,7 +340,7 @@ getprofile(Profile *p)
if(p->wholding) {
// Release previous log to signal handling side.
- // Loop because we are racing against setprofile(off).
+ // Loop because we are racing against SetCPUProfileRate(0).
for(;;) {
n = p->handoff;
if(n == 0) {
@@ -367,9 +367,7 @@ getprofile(Profile *p)
return ret;
// Wait for new log.
- runtime_entersyscallblock();
- runtime_notesleep(&p->wait);
- runtime_exitsyscall();
+ runtime_notetsleepg(&p->wait, -1);
runtime_noteclear(&p->wait);
n = p->handoff;
diff --git a/libgo/runtime/env_posix.c b/libgo/runtime/env_posix.c
index 7f3fa0d8e0f..3219550af99 100644
--- a/libgo/runtime/env_posix.c
+++ b/libgo/runtime/env_posix.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin freebsd linux netbsd openbsd windows
+// +build darwin dragonfly freebsd linux netbsd openbsd windows
#include "runtime.h"
#include "array.h"
@@ -12,7 +12,8 @@ extern Slice syscall_Envs __asm__ (GOSYM_PREFIX "syscall.Envs");
const byte*
runtime_getenv(const char *s)
{
- int32 i, j, len;
+ int32 i, j;
+ intgo len;
const byte *v, *bs;
String* envv;
int32 envc;
diff --git a/libgo/runtime/go-byte-array-to-string.c b/libgo/runtime/go-byte-array-to-string.c
index 0cd63c76d8d..088b78690fe 100644
--- a/libgo/runtime/go-byte-array-to-string.c
+++ b/libgo/runtime/go-byte-array-to-string.c
@@ -16,7 +16,7 @@ __go_byte_array_to_string (const void* p, intgo len)
String ret;
bytes = (const unsigned char *) p;
- retdata = runtime_mallocgc ((uintptr) len, FlagNoPointers, 1, 0);
+ retdata = runtime_mallocgc ((uintptr) len, 0, FlagNoScan);
__builtin_memcpy (retdata, bytes, len);
ret.str = retdata;
ret.len = len;
diff --git a/libgo/runtime/go-caller.c b/libgo/runtime/go-caller.c
index d84580fa594..8ca3c7efcd7 100644
--- a/libgo/runtime/go-caller.c
+++ b/libgo/runtime/go-caller.c
@@ -228,3 +228,23 @@ runtime_funcline_go (Func *f __attribute__((unused)), uintptr targetpc)
runtime_memclr (&ret, sizeof ret);
return ret;
}
+
+/* Return the name of a function. */
+String runtime_funcname_go (Func *f)
+ __asm__ (GOSYM_PREFIX "runtime.funcname_go");
+
+String
+runtime_funcname_go (Func *f)
+{
+ return f->name;
+}
+
+/* Return the entry point of a function. */
+uintptr runtime_funcentry_go(Func *f)
+ __asm__ (GOSYM_PREFIX "runtime.funcentry_go");
+
+uintptr
+runtime_funcentry_go (Func *f)
+{
+ return f->entry;
+}
diff --git a/libgo/runtime/go-int-array-to-string.c b/libgo/runtime/go-int-array-to-string.c
index 6cae2fd8ccb..d93fe651d95 100644
--- a/libgo/runtime/go-int-array-to-string.c
+++ b/libgo/runtime/go-int-array-to-string.c
@@ -41,7 +41,7 @@ __go_int_array_to_string (const void* p, intgo len)
slen += 4;
}
- retdata = runtime_mallocgc ((uintptr) slen, FlagNoPointers, 1, 0);
+ retdata = runtime_mallocgc ((uintptr) slen, 0, FlagNoScan);
ret.str = retdata;
ret.len = slen;
diff --git a/libgo/runtime/go-int-to-string.c b/libgo/runtime/go-int-to-string.c
index eb441674b6c..d90b1ddfed1 100644
--- a/libgo/runtime/go-int-to-string.c
+++ b/libgo/runtime/go-int-to-string.c
@@ -60,7 +60,7 @@ __go_int_to_string (intgo v)
}
}
- retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0);
+ retdata = runtime_mallocgc (len, 0, FlagNoScan);
__builtin_memcpy (retdata, buf, len);
ret.str = retdata;
ret.len = len;
diff --git a/libgo/runtime/go-make-slice.c b/libgo/runtime/go-make-slice.c
index f08cb012dc8..855bb17ce59 100644
--- a/libgo/runtime/go-make-slice.c
+++ b/libgo/runtime/go-make-slice.c
@@ -55,15 +55,15 @@ __go_make_slice2 (const struct __go_type_descriptor *td, uintptr_t len,
if (size == 0)
ret.__values = &runtime_zerobase;
else if ((std->__element_type->__code & GO_NO_POINTERS) != 0)
- ret.__values = runtime_mallocgc (size, FlagNoPointers, 1, 1);
+ ret.__values =
+ runtime_mallocgc (size,
+ (uintptr) std->__element_type | TypeInfo_Array,
+ FlagNoScan);
else
- {
- ret.__values = runtime_mallocgc (size, 0, 1, 1);
-
- if (UseSpanType)
- runtime_settype (ret.__values,
- (uintptr) std->__element_type | TypeInfo_Array);
- }
+ ret.__values =
+ runtime_mallocgc (size,
+ (uintptr) std->__element_type | TypeInfo_Array,
+ 0);
return ret;
}
diff --git a/libgo/runtime/go-new.c b/libgo/runtime/go-new.c
index b1af5f22473..9d46706eaa4 100644
--- a/libgo/runtime/go-new.c
+++ b/libgo/runtime/go-new.c
@@ -12,11 +12,11 @@
void *
__go_new (uintptr_t size)
{
- return runtime_mallocgc (size, 0, 1, 1);
+ return runtime_mallocgc (size, 0, 0);
}
void *
__go_new_nopointers (uintptr_t size)
{
- return runtime_mallocgc (size, FlagNoPointers, 1, 1);
+ return runtime_mallocgc (size, 0, FlagNoScan);
}
diff --git a/libgo/runtime/go-reflect-call.c b/libgo/runtime/go-reflect-call.c
index 5cf370798bf..0fed68a50e7 100644
--- a/libgo/runtime/go-reflect-call.c
+++ b/libgo/runtime/go-reflect-call.c
@@ -271,7 +271,21 @@ go_func_return_ffi (const struct __go_func_type *func)
types = (const struct __go_type_descriptor **) func->__out.__values;
if (count == 1)
- return go_type_to_ffi (types[0]);
+ {
+
+#if defined (__i386__) && !defined (__x86_64__)
+ /* FFI does not support complex types. On 32-bit x86, a
+ complex64 will be returned in %eax/%edx. We normally tell
+ FFI that a complex64 is a struct of two floats. On 32-bit
+ x86 a struct of two floats is returned via a hidden first
+ pointer parameter. Fortunately we can make everything work
+ by pretending that complex64 is int64. */
+ if ((types[0]->__code & GO_CODE_MASK) == GO_COMPLEX64)
+ return &ffi_type_sint64;
+#endif
+
+ return go_type_to_ffi (types[0]);
+ }
ret = (ffi_type *) __go_alloc (sizeof (ffi_type));
ret->type = FFI_TYPE_STRUCT;
diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c
index 23a94db4157..4f0dcc78c17 100644
--- a/libgo/runtime/go-signal.c
+++ b/libgo/runtime/go-signal.c
@@ -139,22 +139,6 @@ SigTab runtime_sigtab[] = {
#undef P
#undef D
-
-static int8 badsignal[] = "runtime: signal received on thread not created by Go.\n";
-
-static void
-runtime_badsignal(int32 sig)
-{
- // Avoid -D_FORTIFY_SOURCE problems.
- int rv __attribute__((unused));
-
- if (sig == SIGPROF) {
- return; // Ignore SIGPROFs intended for a non-Go thread.
- }
- rv = runtime_write(2, badsignal, sizeof badsignal - 1);
- runtime_exit(1);
-}
-
/* Handle a signal, for cases where we don't panic. We can split the
stack here. */
diff --git a/libgo/runtime/go-string-to-byte-array.c b/libgo/runtime/go-string-to-byte-array.c
index 75fac1dbfe6..5e030330f29 100644
--- a/libgo/runtime/go-string-to-byte-array.c
+++ b/libgo/runtime/go-string-to-byte-array.c
@@ -15,7 +15,8 @@ __go_string_to_byte_array (String str)
unsigned char *data;
struct __go_open_array ret;
- data = (unsigned char *) runtime_mallocgc (str.len, FlagNoPointers, 1, 0);
+ data = (unsigned char *) runtime_mallocgc (str.len, 0,
+ FlagNoScan | FlagNoZero);
__builtin_memcpy (data, str.str, str.len);
ret.__values = (void *) data;
ret.__count = str.len;
diff --git a/libgo/runtime/go-string-to-int-array.c b/libgo/runtime/go-string-to-int-array.c
index 16970bdd042..d91c9e2df82 100644
--- a/libgo/runtime/go-string-to-int-array.c
+++ b/libgo/runtime/go-string-to-int-array.c
@@ -32,8 +32,8 @@ __go_string_to_int_array (String str)
p += __go_get_rune (p, pend - p, &rune);
}
- data = (uint32_t *) runtime_mallocgc (c * sizeof (uint32_t), FlagNoPointers,
- 1, 0);
+ data = (uint32_t *) runtime_mallocgc (c * sizeof (uint32_t), 0,
+ FlagNoScan | FlagNoZero);
p = str.str;
pd = data;
while (p < pend)
diff --git a/libgo/runtime/go-strplus.c b/libgo/runtime/go-strplus.c
index d6e6df67fce..13915e3e673 100644
--- a/libgo/runtime/go-strplus.c
+++ b/libgo/runtime/go-strplus.c
@@ -21,7 +21,7 @@ __go_string_plus (String s1, String s2)
return s1;
len = s1.len + s2.len;
- retdata = runtime_mallocgc (len, FlagNoPointers, 1, 0);
+ retdata = runtime_mallocgc (len, 0, FlagNoScan | FlagNoZero);
__builtin_memcpy (retdata, s1.str, s1.len);
__builtin_memcpy (retdata + s1.len, s2.str, s2.len);
ret.str = retdata;
diff --git a/libgo/runtime/lfstack.c b/libgo/runtime/lfstack.c
index 230ed87c43f..132783c3644 100644
--- a/libgo/runtime/lfstack.c
+++ b/libgo/runtime/lfstack.c
@@ -41,10 +41,10 @@ runtime_lfstackpush(uint64 *head, LFNode *node)
node->pushcnt++;
new = (uint64)(uintptr)node|(((uint64)node->pushcnt&CNT_MASK)<<PTR_BITS);
- old = runtime_atomicload64(head);
for(;;) {
+ old = runtime_atomicload64(head);
node->next = (LFNode*)(uintptr)(old&PTR_MASK);
- if(runtime_cas64(head, &old, new))
+ if(runtime_cas64(head, old, new))
break;
}
}
@@ -55,8 +55,8 @@ runtime_lfstackpop(uint64 *head)
LFNode *node, *node2;
uint64 old, new;
- old = runtime_atomicload64(head);
for(;;) {
+ old = runtime_atomicload64(head);
if(old == 0)
return nil;
node = (LFNode*)(uintptr)(old&PTR_MASK);
@@ -64,7 +64,7 @@ runtime_lfstackpop(uint64 *head)
new = 0;
if(node2 != nil)
new = (uint64)(uintptr)node2|(((uint64)node2->pushcnt&CNT_MASK)<<PTR_BITS);
- if(runtime_cas64(head, &old, new))
+ if(runtime_cas64(head, old, new))
return node;
}
}
diff --git a/libgo/runtime/lock_futex.c b/libgo/runtime/lock_futex.c
index 4b9651a75de..fa270132895 100644
--- a/libgo/runtime/lock_futex.c
+++ b/libgo/runtime/lock_futex.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build freebsd linux
+// +build dragonfly freebsd linux
#include "runtime.h"
@@ -91,14 +91,14 @@ runtime_unlock(Lock *l)
{
uint32 v;
- if(--runtime_m()->locks < 0)
- runtime_throw("runtime_unlock: lock count");
-
v = runtime_xchg((uint32*)&l->key, MUTEX_UNLOCKED);
if(v == MUTEX_UNLOCKED)
runtime_throw("unlock of unlocked lock");
if(v == MUTEX_SLEEPING)
runtime_futexwakeup((uint32*)&l->key, 1);
+
+ if(--runtime_m()->locks < 0)
+ runtime_throw("runtime_unlock: lock count");
}
// One-time notifications.
@@ -111,37 +111,45 @@ runtime_noteclear(Note *n)
void
runtime_notewakeup(Note *n)
{
- if(runtime_xchg((uint32*)&n->key, 1))
+ uint32 old;
+
+ old = runtime_xchg((uint32*)&n->key, 1);
+ if(old != 0) {
+ runtime_printf("notewakeup - double wakeup (%d)\n", old);
runtime_throw("notewakeup - double wakeup");
+ }
runtime_futexwakeup((uint32*)&n->key, 1);
}
void
runtime_notesleep(Note *n)
{
- if(runtime_m()->profilehz > 0)
- runtime_setprof(false);
+ /* For gccgo it's OK to sleep in non-g0, and it happens in
+ stoptheworld because we have not implemented preemption.
+
+ if(runtime_g() != runtime_m()->g0)
+ runtime_throw("notesleep not on g0");
+ */
while(runtime_atomicload((uint32*)&n->key) == 0)
runtime_futexsleep((uint32*)&n->key, 0, -1);
- if(runtime_m()->profilehz > 0)
- runtime_setprof(true);
}
-void
-runtime_notetsleep(Note *n, int64 ns)
+static bool
+notetsleep(Note *n, int64 ns, int64 deadline, int64 now)
{
- int64 deadline, now;
+ // Conceptually, deadline and now are local variables.
+ // They are passed as arguments so that the space for them
+ // does not count against our nosplit stack sequence.
if(ns < 0) {
- runtime_notesleep(n);
- return;
+ while(runtime_atomicload((uint32*)&n->key) == 0)
+ runtime_futexsleep((uint32*)&n->key, 0, -1);
+ return true;
}
if(runtime_atomicload((uint32*)&n->key) != 0)
- return;
+ return true;
- if(runtime_m()->profilehz > 0)
- runtime_setprof(false);
deadline = runtime_nanotime() + ns;
for(;;) {
runtime_futexsleep((uint32*)&n->key, 0, ns);
@@ -152,6 +160,33 @@ runtime_notetsleep(Note *n, int64 ns)
break;
ns = deadline - now;
}
- if(runtime_m()->profilehz > 0)
- runtime_setprof(true);
+ return runtime_atomicload((uint32*)&n->key) != 0;
+}
+
+bool
+runtime_notetsleep(Note *n, int64 ns)
+{
+ bool res;
+
+ if(runtime_g() != runtime_m()->g0 && !runtime_m()->gcing)
+ runtime_throw("notetsleep not on g0");
+
+ res = notetsleep(n, ns, 0, 0);
+ return res;
+}
+
+// same as runtime_notetsleep, but called on user g (not g0)
+// calls only nosplit functions between entersyscallblock/exitsyscall
+bool
+runtime_notetsleepg(Note *n, int64 ns)
+{
+ bool res;
+
+ if(runtime_g() == runtime_m()->g0)
+ runtime_throw("notetsleepg on g0");
+
+ runtime_entersyscallblock();
+ res = notetsleep(n, ns, 0, 0);
+ runtime_exitsyscall();
+ return res;
}
diff --git a/libgo/runtime/lock_sema.c b/libgo/runtime/lock_sema.c
index 2663c5463de..ce435119323 100644
--- a/libgo/runtime/lock_sema.c
+++ b/libgo/runtime/lock_sema.c
@@ -95,9 +95,6 @@ runtime_unlock(Lock *l)
uintptr v;
M *mp;
- if(--runtime_m()->locks < 0)
- runtime_throw("runtime_unlock: lock count");
-
for(;;) {
v = (uintptr)runtime_atomicloadp((void**)&l->key);
if(v == LOCKED) {
@@ -114,6 +111,9 @@ runtime_unlock(Lock *l)
}
}
}
+
+ if(--runtime_m()->locks < 0)
+ runtime_throw("runtime_unlock: lock count");
}
// One-time notifications.
@@ -151,6 +151,10 @@ runtime_notesleep(Note *n)
M *m;
m = runtime_m();
+
+ if(runtime_g() != m->g0)
+ runtime_throw("notesleep not on g0");
+
if(m->waitsema == 0)
m->waitsema = runtime_semacreate();
if(!runtime_casp((void**)&n->key, nil, m)) { // must be LOCKED (got wakeup)
@@ -159,61 +163,49 @@ runtime_notesleep(Note *n)
return;
}
// Queued. Sleep.
- if(m->profilehz > 0)
- runtime_setprof(false);
runtime_semasleep(-1);
- if(m->profilehz > 0)
- runtime_setprof(true);
}
-void
-runtime_notetsleep(Note *n, int64 ns)
+static bool
+notetsleep(Note *n, int64 ns, int64 deadline, M *mp)
{
M *m;
- M *mp;
- int64 deadline, now;
-
- if(ns < 0) {
- runtime_notesleep(n);
- return;
- }
m = runtime_m();
- if(m->waitsema == 0)
- m->waitsema = runtime_semacreate();
+
+ // Conceptually, deadline and mp are local variables.
+ // They are passed as arguments so that the space for them
+ // does not count against our nosplit stack sequence.
// Register for wakeup on n->waitm.
if(!runtime_casp((void**)&n->key, nil, m)) { // must be LOCKED (got wakeup already)
if(n->key != LOCKED)
runtime_throw("notetsleep - waitm out of sync");
- return;
+ return true;
+ }
+
+ if(ns < 0) {
+ // Queued. Sleep.
+ runtime_semasleep(-1);
+ return true;
}
- if(m->profilehz > 0)
- runtime_setprof(false);
deadline = runtime_nanotime() + ns;
for(;;) {
// Registered. Sleep.
if(runtime_semasleep(ns) >= 0) {
// Acquired semaphore, semawakeup unregistered us.
// Done.
- if(m->profilehz > 0)
- runtime_setprof(true);
- return;
+ return true;
}
// Interrupted or timed out. Still registered. Semaphore not acquired.
- now = runtime_nanotime();
- if(now >= deadline)
+ ns = deadline - runtime_nanotime();
+ if(ns <= 0)
break;
-
// Deadline hasn't arrived. Keep sleeping.
- ns = deadline - now;
}
- if(m->profilehz > 0)
- runtime_setprof(true);
-
// Deadline arrived. Still registered. Semaphore not acquired.
// Want to give up and return, but have to unregister first,
// so that any notewakeup racing with the return does not
@@ -223,15 +215,54 @@ runtime_notetsleep(Note *n, int64 ns)
if(mp == m) {
// No wakeup yet; unregister if possible.
if(runtime_casp((void**)&n->key, mp, nil))
- return;
+ return false;
} else if(mp == (M*)LOCKED) {
// Wakeup happened so semaphore is available.
// Grab it to avoid getting out of sync.
if(runtime_semasleep(-1) < 0)
runtime_throw("runtime: unable to acquire - semaphore out of sync");
- return;
- } else {
+ return true;
+ } else
runtime_throw("runtime: unexpected waitm - semaphore out of sync");
- }
}
}
+
+bool
+runtime_notetsleep(Note *n, int64 ns)
+{
+ M *m;
+ bool res;
+
+ m = runtime_m();
+
+ if(runtime_g() != m->g0 && !m->gcing)
+ runtime_throw("notetsleep not on g0");
+
+ if(m->waitsema == 0)
+ m->waitsema = runtime_semacreate();
+
+ res = notetsleep(n, ns, 0, nil);
+ return res;
+}
+
+// same as runtime_notetsleep, but called on user g (not g0)
+// calls only nosplit functions between entersyscallblock/exitsyscall
+bool
+runtime_notetsleepg(Note *n, int64 ns)
+{
+ M *m;
+ bool res;
+
+ m = runtime_m();
+
+ if(runtime_g() == m->g0)
+ runtime_throw("notetsleepg on g0");
+
+ if(m->waitsema == 0)
+ m->waitsema = runtime_semacreate();
+
+ runtime_entersyscallblock();
+ res = notetsleep(n, ns, 0, nil);
+ runtime_exitsyscall();
+ return res;
+}
diff --git a/libgo/runtime/malloc.goc b/libgo/runtime/malloc.goc
index 8ccaa6b888c..d349f4749fa 100644
--- a/libgo/runtime/malloc.goc
+++ b/libgo/runtime/malloc.goc
@@ -18,7 +18,17 @@ package runtime
#include "go-type.h"
#include "race.h"
-MHeap *runtime_mheap;
+// Map gccgo field names to gc field names.
+// Eface aka __go_empty_interface.
+#define type __type_descriptor
+// Type aka __go_type_descriptor
+#define kind __code
+#define string __reflection
+#define KindPtr GO_PTR
+#define KindNoPointers GO_NO_POINTERS
+
+// Mark mheap as 'no pointers', it does not contain interesting pointers but occupies ~45K.
+MHeap runtime_mheap;
int32 runtime_checking;
@@ -30,19 +40,28 @@ extern volatile intgo runtime_MemProfileRate
// Allocate an object of at least size bytes.
// Small objects are allocated from the per-thread cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
+// If the block will be freed with runtime_free(), typ must be 0.
void*
-runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
+runtime_mallocgc(uintptr size, uintptr typ, uint32 flag)
{
M *m;
G *g;
int32 sizeclass;
intgo rate;
MCache *c;
+ MCacheList *l;
uintptr npages;
MSpan *s;
- void *v;
+ MLink *v;
bool incallback;
+ if(size == 0) {
+ // All 0-length allocations use this pointer.
+ // The language does not require the allocations to
+ // have distinct values.
+ return &runtime_zerobase;
+ }
+
m = runtime_m();
g = runtime_g();
@@ -56,34 +75,45 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
runtime_exitsyscall();
m = runtime_m();
incallback = true;
- dogc = false;
+ flag |= FlagNoGC;
}
- if(runtime_gcwaiting && g != m->g0 && m->locks == 0 && dogc) {
+ if(runtime_gcwaiting() && g != m->g0 && m->locks == 0 && !(flag & FlagNoGC)) {
runtime_gosched();
m = runtime_m();
}
if(m->mallocing)
runtime_throw("malloc/free - deadlock");
+ // Disable preemption during settype_flush.
+ // We can not use m->mallocing for this, because settype_flush calls mallocgc.
+ m->locks++;
m->mallocing = 1;
- if(size == 0)
- size = 1;
if(DebugTypeAtBlockEnd)
size += sizeof(uintptr);
c = m->mcache;
- c->local_nmalloc++;
if(size <= MaxSmallSize) {
// Allocate from mcache free lists.
- sizeclass = runtime_SizeToClass(size);
+ // Inlined version of SizeToClass().
+ if(size <= 1024-8)
+ sizeclass = runtime_size_to_class8[(size+7)>>3];
+ else
+ sizeclass = runtime_size_to_class128[(size-1024+127) >> 7];
size = runtime_class_to_size[sizeclass];
- v = runtime_MCache_Alloc(c, sizeclass, size, zeroed);
- if(v == nil)
- runtime_throw("out of memory");
- c->local_alloc += size;
- c->local_total_alloc += size;
- c->local_by_size[sizeclass].nmalloc++;
+ l = &c->list[sizeclass];
+ if(l->list == nil)
+ runtime_MCache_Refill(c, sizeclass);
+ v = l->list;
+ l->list = v->next;
+ l->nlist--;
+ if(!(flag & FlagNoZero)) {
+ v->next = nil;
+ // block is zeroed iff second word is zero ...
+ if(size > sizeof(uintptr) && ((uintptr*)v)[1] != 0)
+ runtime_memclr((byte*)v, size);
+ }
+ c->local_cachealloc += size;
} else {
// TODO(rsc): Report tracebacks for very large allocations.
@@ -91,32 +121,39 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
npages = size >> PageShift;
if((size & PageMask) != 0)
npages++;
- s = runtime_MHeap_Alloc(runtime_mheap, npages, 0, 1, zeroed);
+ s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, !(flag & FlagNoZero));
if(s == nil)
runtime_throw("out of memory");
+ s->limit = (byte*)(s->start<<PageShift) + size;
size = npages<<PageShift;
- c->local_alloc += size;
- c->local_total_alloc += size;
v = (void*)(s->start << PageShift);
// setup for mark sweep
runtime_markspan(v, 0, 0, true);
}
- if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) {
- // purge cache stats to prevent overflow
- runtime_lock(runtime_mheap);
- runtime_purgecachedstats(c);
- runtime_unlock(runtime_mheap);
- }
-
if(!(flag & FlagNoGC))
- runtime_markallocated(v, size, (flag&FlagNoPointers) != 0);
+ runtime_markallocated(v, size, (flag&FlagNoScan) != 0);
if(DebugTypeAtBlockEnd)
- *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = 0;
+ *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = typ;
+
+ // TODO: save type even if FlagNoScan? Potentially expensive but might help
+ // heap profiling/tracing.
+ if(UseSpanType && !(flag & FlagNoScan) && typ != 0) {
+ uintptr *buf, i;
+
+ buf = m->settype_buf;
+ i = m->settype_bufsize;
+ buf[i++] = (uintptr)v;
+ buf[i++] = typ;
+ m->settype_bufsize = i;
+ }
m->mallocing = 0;
+ if(UseSpanType && !(flag & FlagNoScan) && typ != 0 && m->settype_bufsize == nelem(m->settype_buf))
+ runtime_settype_flush(m);
+ m->locks--;
if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
if(size >= (uint32) rate)
@@ -135,13 +172,11 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
}
}
- if(dogc && mstats.heap_alloc >= mstats.next_gc)
+ if(!(flag & FlagNoInvokeGC) && mstats.heap_alloc >= mstats.next_gc)
runtime_gc(0);
- if(raceenabled) {
- runtime_racemalloc(v, size, m->racepc);
- m->racepc = nil;
- }
+ if(raceenabled)
+ runtime_racemalloc(v, size);
if(incallback)
runtime_entersyscall();
@@ -152,7 +187,7 @@ runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
void*
__go_alloc(uintptr size)
{
- return runtime_mallocgc(size, 0, 0, 1);
+ return runtime_mallocgc(size, 0, FlagNoInvokeGC);
}
// Free the object whose base pointer is v.
@@ -197,7 +232,9 @@ __go_free(void *v)
// they might coalesce v into other spans and change the bitmap further.
runtime_markfreed(v, size);
runtime_unmarkspan(v, 1<<PageShift);
- runtime_MHeap_Free(runtime_mheap, s, 1);
+ runtime_MHeap_Free(&runtime_mheap, s, 1);
+ c->local_nlargefree++;
+ c->local_largefree += size;
} else {
// Small object.
size = runtime_class_to_size[sizeclass];
@@ -207,11 +244,9 @@ __go_free(void *v)
// it might coalesce v and other blocks into a bigger span
// and change the bitmap further.
runtime_markfreed(v, size);
- c->local_by_size[sizeclass].nfree++;
+ c->local_nsmallfree[sizeclass]++;
runtime_MCache_Free(c, v, sizeclass, size);
}
- c->local_nfree++;
- c->local_alloc -= size;
if(prof)
runtime_MProf_Free(v, size);
m->mallocing = 0;
@@ -230,12 +265,12 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
m->mcache->local_nlookup++;
if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) {
// purge cache stats to prevent overflow
- runtime_lock(runtime_mheap);
+ runtime_lock(&runtime_mheap);
runtime_purgecachedstats(m->mcache);
- runtime_unlock(runtime_mheap);
+ runtime_unlock(&runtime_mheap);
}
- s = runtime_MHeap_LookupMaybe(runtime_mheap, v);
+ s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
if(sp)
*sp = s;
if(s == nil) {
@@ -257,11 +292,6 @@ runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
return 1;
}
- if((byte*)v >= (byte*)s->limit) {
- // pointers past the last block do not count as pointers.
- return 0;
- }
-
n = s->elemsize;
if(base) {
i = ((byte*)v - p)/n;
@@ -279,11 +309,9 @@ runtime_allocmcache(void)
intgo rate;
MCache *c;
- runtime_lock(runtime_mheap);
- c = runtime_FixAlloc_Alloc(&runtime_mheap->cachealloc);
- mstats.mcache_inuse = runtime_mheap->cachealloc.inuse;
- mstats.mcache_sys = runtime_mheap->cachealloc.sys;
- runtime_unlock(runtime_mheap);
+ runtime_lock(&runtime_mheap);
+ c = runtime_FixAlloc_Alloc(&runtime_mheap.cachealloc);
+ runtime_unlock(&runtime_mheap);
runtime_memclr((byte*)c, sizeof(*c));
// Set first allocation sample size.
@@ -300,30 +328,32 @@ void
runtime_freemcache(MCache *c)
{
runtime_MCache_ReleaseAll(c);
- runtime_lock(runtime_mheap);
+ runtime_lock(&runtime_mheap);
runtime_purgecachedstats(c);
- runtime_FixAlloc_Free(&runtime_mheap->cachealloc, c);
- runtime_unlock(runtime_mheap);
+ runtime_FixAlloc_Free(&runtime_mheap.cachealloc, c);
+ runtime_unlock(&runtime_mheap);
}
void
runtime_purgecachedstats(MCache *c)
{
+ MHeap *h;
+ int32 i;
+
// Protected by either heap or GC lock.
+ h = &runtime_mheap;
mstats.heap_alloc += c->local_cachealloc;
c->local_cachealloc = 0;
- mstats.heap_objects += c->local_objects;
- c->local_objects = 0;
- mstats.nmalloc += c->local_nmalloc;
- c->local_nmalloc = 0;
- mstats.nfree += c->local_nfree;
- c->local_nfree = 0;
mstats.nlookup += c->local_nlookup;
c->local_nlookup = 0;
- mstats.alloc += c->local_alloc;
- c->local_alloc= 0;
- mstats.total_alloc += c->local_total_alloc;
- c->local_total_alloc= 0;
+ h->largefree += c->local_largefree;
+ c->local_largefree = 0;
+ h->nlargefree += c->local_nlargefree;
+ c->local_nlargefree = 0;
+ for(i=0; i<(int32)nelem(c->local_nsmallfree); i++) {
+ h->nsmallfree[i] += c->local_nsmallfree[i];
+ c->local_nsmallfree[i] = 0;
+ }
}
extern uintptr runtime_sizeof_C_MStats
@@ -335,24 +365,24 @@ void
runtime_mallocinit(void)
{
byte *p;
- uintptr arena_size, bitmap_size;
+ uintptr arena_size, bitmap_size, spans_size;
extern byte _end[];
byte *want;
uintptr limit;
+ uint64 i;
runtime_sizeof_C_MStats = sizeof(MStats);
p = nil;
arena_size = 0;
bitmap_size = 0;
-
+ spans_size = 0;
+
// for 64-bit build
USED(p);
USED(arena_size);
USED(bitmap_size);
-
- if((runtime_mheap = runtime_SysAlloc(sizeof(*runtime_mheap))) == nil)
- runtime_throw("runtime: cannot allocate heap metadata");
+ USED(spans_size);
runtime_InitSizes();
@@ -369,15 +399,17 @@ runtime_mallocinit(void)
// 128 GB (MaxMem) should be big enough for now.
//
// The code will work with the reservation at any address, but ask
- // SysReserve to use 0x000000c000000000 if possible.
+ // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
// Allocating a 128 GB region takes away 37 bits, and the amd64
// doesn't let us choose the top 17 bits, so that leaves the 11 bits
// in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
- // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x0x00df.
+ // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
// UTF-8 sequences, and they are otherwise as far away from
- // ff (likely a common byte) as possible. An earlier attempt to use 0x11f8
- // caused out of memory errors on OS X during thread allocations.
+ // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
+ // addresses. An earlier attempt to use 0x11f8 caused out of memory errors
+ // on OS X during thread allocations. 0x00c0 causes conflicts with
+ // AddressSanitizer which reserves all memory up to 0x0100.
// These choices are both for debuggability and to reduce the
// odds of the conservative garbage collector not collecting memory
// because some non-pointer block of memory had a bit pattern
@@ -389,7 +421,14 @@ runtime_mallocinit(void)
// If this fails we fall back to the 32 bit memory mechanism
arena_size = MaxMem;
bitmap_size = arena_size / (sizeof(void*)*8/4);
- p = runtime_SysReserve((void*)(0x00c0ULL<<32), bitmap_size + arena_size);
+ spans_size = arena_size / PageSize * sizeof(runtime_mheap.spans[0]);
+ spans_size = ROUND(spans_size, PageSize);
+ for(i = 0; i <= 0x7f; i++) {
+ p = (void*)(uintptr)(i<<40 | 0x00c0ULL<<32);
+ p = runtime_SysReserve(p, bitmap_size + spans_size + arena_size);
+ if(p != nil)
+ break;
+ }
}
if (p == nil) {
// On a 32-bit machine, we can't typically get away
@@ -411,11 +450,14 @@ runtime_mallocinit(void)
// of address space, which is probably too much in a 32-bit world.
bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
arena_size = 512<<20;
- if(limit > 0 && arena_size+bitmap_size > limit) {
+ spans_size = MaxArena32 / PageSize * sizeof(runtime_mheap.spans[0]);
+ if(limit > 0 && arena_size+bitmap_size+spans_size > limit) {
bitmap_size = (limit / 9) & ~((1<<PageShift) - 1);
arena_size = bitmap_size * 8;
+ spans_size = arena_size / PageSize * sizeof(runtime_mheap.spans[0]);
}
-
+ spans_size = ROUND(spans_size, PageSize);
+
// SysReserve treats the address we ask for, end, as a hint,
// not as an absolute requirement. If we ask for the end
// of the data segment but the operating system requires
@@ -425,25 +467,27 @@ runtime_mallocinit(void)
// So adjust it upward a little bit ourselves: 1/4 MB to get
// away from the running binary image and then round up
// to a MB boundary.
- want = (byte*)(((uintptr)_end + (1<<18) + (1<<20) - 1)&~((1<<20)-1));
- if(0xffffffff - (uintptr)want <= bitmap_size + arena_size)
+ want = (byte*)ROUND((uintptr)_end + (1<<18), 1<<20);
+ if(0xffffffff - (uintptr)want <= bitmap_size + spans_size + arena_size)
want = 0;
- p = runtime_SysReserve(want, bitmap_size + arena_size);
+ p = runtime_SysReserve(want, bitmap_size + spans_size + arena_size);
if(p == nil)
runtime_throw("runtime: cannot reserve arena virtual address space");
if((uintptr)p & (((uintptr)1<<PageShift)-1))
- runtime_printf("runtime: SysReserve returned unaligned address %p; asked for %p", p, bitmap_size+arena_size);
+ runtime_printf("runtime: SysReserve returned unaligned address %p; asked for %p", p,
+ bitmap_size+spans_size+arena_size);
}
if((uintptr)p & (((uintptr)1<<PageShift)-1))
runtime_throw("runtime: SysReserve returned unaligned address");
- runtime_mheap->bitmap = p;
- runtime_mheap->arena_start = p + bitmap_size;
- runtime_mheap->arena_used = runtime_mheap->arena_start;
- runtime_mheap->arena_end = runtime_mheap->arena_start + arena_size;
+ runtime_mheap.spans = (MSpan**)p;
+ runtime_mheap.bitmap = p + spans_size;
+ runtime_mheap.arena_start = p + spans_size + bitmap_size;
+ runtime_mheap.arena_used = runtime_mheap.arena_start;
+ runtime_mheap.arena_end = runtime_mheap.arena_start + arena_size;
// Initialize the rest of the allocator.
- runtime_MHeap_Init(runtime_mheap, runtime_SysAlloc);
+ runtime_MHeap_Init(&runtime_mheap);
runtime_m()->mcache = runtime_allocmcache();
// See if it works.
@@ -463,8 +507,7 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n)
uintptr needed;
needed = (uintptr)h->arena_used + n - (uintptr)h->arena_end;
- // Round wanted arena size to a multiple of 256MB.
- needed = (needed + (256<<20) - 1) & ~((256<<20)-1);
+ needed = ROUND(needed, 256<<20);
new_end = h->arena_end + needed;
if(new_end <= h->arena_start + MaxArena32) {
p = runtime_SysReserve(h->arena_end, new_end - h->arena_end);
@@ -475,9 +518,10 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n)
if(n <= (uintptr)(h->arena_end - h->arena_used)) {
// Keep taking from our reservation.
p = h->arena_used;
- runtime_SysMap(p, n);
+ runtime_SysMap(p, n, &mstats.heap_sys);
h->arena_used += n;
runtime_MHeap_MapBits(h);
+ runtime_MHeap_MapSpans(h);
if(raceenabled)
runtime_racemapshadow(p, n);
return p;
@@ -490,14 +534,14 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n)
// On 32-bit, once the reservation is gone we can
// try to get memory at a location chosen by the OS
// and hope that it is in the range we allocated bitmap for.
- p = runtime_SysAlloc(n);
+ p = runtime_SysAlloc(n, &mstats.heap_sys);
if(p == nil)
return nil;
if(p < h->arena_start || (uintptr)(p+n - h->arena_start) >= MaxArena32) {
runtime_printf("runtime: memory allocated by OS (%p) not in usable range [%p,%p)\n",
p, h->arena_start, h->arena_start+MaxArena32);
- runtime_SysFree(p, n);
+ runtime_SysFree(p, n, &mstats.heap_sys);
return nil;
}
@@ -506,6 +550,7 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n)
if(h->arena_used > h->arena_end)
h->arena_end = h->arena_used;
runtime_MHeap_MapBits(h);
+ runtime_MHeap_MapSpans(h);
if(raceenabled)
runtime_racemapshadow(p, n);
}
@@ -513,17 +558,68 @@ runtime_MHeap_SysAlloc(MHeap *h, uintptr n)
return p;
}
+static struct
+{
+ Lock;
+ byte* pos;
+ byte* end;
+} persistent;
+
+enum
+{
+ PersistentAllocChunk = 256<<10,
+ PersistentAllocMaxBlock = 64<<10, // VM reservation granularity is 64K on windows
+};
+
+// Wrapper around SysAlloc that can allocate small chunks.
+// There is no associated free operation.
+// Intended for things like function/type/debug-related persistent data.
+// If align is 0, uses default align (currently 8).
+void*
+runtime_persistentalloc(uintptr size, uintptr align, uint64 *stat)
+{
+ byte *p;
+
+ if(align != 0) {
+ if(align&(align-1))
+ runtime_throw("persistentalloc: align is now a power of 2");
+ if(align > PageSize)
+ runtime_throw("persistentalloc: align is too large");
+ } else
+ align = 8;
+ if(size >= PersistentAllocMaxBlock)
+ return runtime_SysAlloc(size, stat);
+ runtime_lock(&persistent);
+ persistent.pos = (byte*)ROUND((uintptr)persistent.pos, align);
+ if(persistent.pos + size > persistent.end) {
+ persistent.pos = runtime_SysAlloc(PersistentAllocChunk, &mstats.other_sys);
+ if(persistent.pos == nil) {
+ runtime_unlock(&persistent);
+ runtime_throw("runtime: cannot allocate memory");
+ }
+ persistent.end = persistent.pos + PersistentAllocChunk;
+ }
+ p = persistent.pos;
+ persistent.pos += size;
+ runtime_unlock(&persistent);
+ if(stat != &mstats.other_sys) {
+ // reaccount the allocation against provided stat
+ runtime_xadd64(stat, size);
+ runtime_xadd64(&mstats.other_sys, -(uint64)size);
+ }
+ return p;
+}
+
static Lock settype_lock;
void
-runtime_settype_flush(M *mp, bool sysalloc)
+runtime_settype_flush(M *mp)
{
uintptr *buf, *endbuf;
uintptr size, ofs, j, t;
uintptr ntypes, nbytes2, nbytes3;
uintptr *data2;
byte *data3;
- bool sysalloc3;
void *v;
uintptr typ, p;
MSpan *s;
@@ -542,8 +638,8 @@ runtime_settype_flush(M *mp, bool sysalloc)
// (Manually inlined copy of runtime_MHeap_Lookup)
p = (uintptr)v>>PageShift;
if(sizeof(void*) == 8)
- p -= (uintptr)runtime_mheap->arena_start >> PageShift;
- s = runtime_mheap->map[p];
+ p -= (uintptr)runtime_mheap.arena_start >> PageShift;
+ s = runtime_mheap.spans[p];
if(s->sizeclass == 0) {
s->types.compression = MTypes_Single;
@@ -558,20 +654,9 @@ runtime_settype_flush(M *mp, bool sysalloc)
case MTypes_Empty:
ntypes = (s->npages << PageShift) / size;
nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
-
- if(!sysalloc) {
- data3 = runtime_mallocgc(nbytes3, FlagNoProfiling|FlagNoPointers, 0, 1);
- } else {
- data3 = runtime_SysAlloc(nbytes3);
- if(data3 == nil)
- runtime_throw("runtime: cannot allocate memory");
- if(0) runtime_printf("settype(0->3): SysAlloc(%x) --> %p\n", (uint32)nbytes3, data3);
- }
-
+ data3 = runtime_mallocgc(nbytes3, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC);
s->types.compression = MTypes_Bytes;
- s->types.sysalloc = sysalloc;
s->types.data = (uintptr)data3;
-
((uintptr*)data3)[1] = typ;
data3[8*sizeof(uintptr) + ofs] = 1;
break;
@@ -596,20 +681,8 @@ runtime_settype_flush(M *mp, bool sysalloc)
} else {
ntypes = (s->npages << PageShift) / size;
nbytes2 = ntypes * sizeof(uintptr);
-
- if(!sysalloc) {
- data2 = runtime_mallocgc(nbytes2, FlagNoProfiling|FlagNoPointers, 0, 1);
- } else {
- data2 = runtime_SysAlloc(nbytes2);
- if(data2 == nil)
- runtime_throw("runtime: cannot allocate memory");
- if(0) runtime_printf("settype.(3->2): SysAlloc(%x) --> %p\n", (uint32)nbytes2, data2);
- }
-
- sysalloc3 = s->types.sysalloc;
-
+ data2 = runtime_mallocgc(nbytes2, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC);
s->types.compression = MTypes_Words;
- s->types.sysalloc = sysalloc;
s->types.data = (uintptr)data2;
// Move the contents of data3 to data2. Then deallocate data3.
@@ -618,12 +691,6 @@ runtime_settype_flush(M *mp, bool sysalloc)
t = ((uintptr*)data3)[t];
data2[j] = t;
}
- if(sysalloc3) {
- nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
- if(0) runtime_printf("settype.(3->2): SysFree(%p,%x)\n", data3, (uint32)nbytes3);
- runtime_SysFree(data3, nbytes3);
- }
-
data2[ofs] = typ;
}
break;
@@ -634,64 +701,6 @@ runtime_settype_flush(M *mp, bool sysalloc)
mp->settype_bufsize = 0;
}
-// It is forbidden to use this function if it is possible that
-// explicit deallocation via calling runtime_free(v) may happen.
-void
-runtime_settype(void *v, uintptr t)
-{
- M *mp;
- uintptr *buf;
- uintptr i;
- MSpan *s;
-
- if(t == 0)
- runtime_throw("settype: zero type");
-
- mp = runtime_m();
- buf = mp->settype_buf;
- i = mp->settype_bufsize;
- buf[i+0] = (uintptr)v;
- buf[i+1] = t;
- i += 2;
- mp->settype_bufsize = i;
-
- if(i == nelem(mp->settype_buf)) {
- runtime_settype_flush(mp, false);
- }
-
- if(DebugTypeAtBlockEnd) {
- s = runtime_MHeap_Lookup(runtime_mheap, v);
- *(uintptr*)((uintptr)v+s->elemsize-sizeof(uintptr)) = t;
- }
-}
-
-void
-runtime_settype_sysfree(MSpan *s)
-{
- uintptr ntypes, nbytes;
-
- if(!s->types.sysalloc)
- return;
-
- nbytes = (uintptr)-1;
-
- switch (s->types.compression) {
- case MTypes_Words:
- ntypes = (s->npages << PageShift) / s->elemsize;
- nbytes = ntypes * sizeof(uintptr);
- break;
- case MTypes_Bytes:
- ntypes = (s->npages << PageShift) / s->elemsize;
- nbytes = 8*sizeof(uintptr) + 1*ntypes;
- break;
- }
-
- if(nbytes != (uintptr)-1) {
- if(0) runtime_printf("settype: SysFree(%p,%x)\n", (void*)s->types.data, (uint32)nbytes);
- runtime_SysFree((void*)s->types.data, nbytes);
- }
-}
-
uintptr
runtime_gettype(void *v)
{
@@ -699,7 +708,7 @@ runtime_gettype(void *v)
uintptr t, ofs;
byte *data;
- s = runtime_MHeap_LookupMaybe(runtime_mheap, v);
+ s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
if(s != nil) {
t = 0;
switch(s->types.compression) {
@@ -736,61 +745,23 @@ runtime_gettype(void *v)
void*
runtime_mal(uintptr n)
{
- return runtime_mallocgc(n, 0, 1, 1);
+ return runtime_mallocgc(n, 0, 0);
}
void *
runtime_new(const Type *typ)
{
- void *ret;
- uint32 flag;
-
- if(raceenabled)
- runtime_m()->racepc = runtime_getcallerpc(&typ);
-
- if(typ->__size == 0) {
- // All 0-length allocations use this pointer.
- // The language does not require the allocations to
- // have distinct values.
- ret = (uint8*)&runtime_zerobase;
- } else {
- flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0;
- ret = runtime_mallocgc(typ->__size, flag, 1, 1);
-
- if(UseSpanType && !flag) {
- if(false)
- runtime_printf("new %S: %p\n", *typ->__reflection, ret);
- runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject);
- }
- }
-
- return ret;
+ return runtime_mallocgc(typ->__size, (uintptr)typ | TypeInfo_SingleObject, typ->kind&KindNoPointers ? FlagNoScan : 0);
}
static void*
cnew(const Type *typ, intgo n, int32 objtyp)
{
- uint32 flag;
- void *ret;
-
if((objtyp&(PtrSize-1)) != objtyp)
runtime_throw("runtime: invalid objtyp");
if(n < 0 || (typ->__size > 0 && (uintptr)n > (MaxMem/typ->__size)))
runtime_panicstring("runtime: allocation size out of range");
- if(typ->__size == 0 || n == 0) {
- // All 0-length allocations use this pointer.
- // The language does not require the allocations to
- // have distinct values.
- return &runtime_zerobase;
- }
- flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0;
- ret = runtime_mallocgc(typ->__size*n, flag, 1, 1);
- if(UseSpanType && !flag) {
- if(false)
- runtime_printf("cnew [%D]%S: %p\n", (int64)n, *typ->__reflection, ret);
- runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject);
- }
- return ret;
+ return runtime_mallocgc(typ->__size*n, (uintptr)typ | objtyp, typ->kind&KindNoPointers ? FlagNoScan : 0);
}
// same as runtime_new, but callable from C
@@ -814,6 +785,8 @@ func SetFinalizer(obj Eface, finalizer Eface) {
byte *base;
uintptr size;
const FuncType *ft;
+ const Type *fint;
+ const PtrType *ot;
if(obj.__type_descriptor == nil) {
runtime_printf("runtime.SetFinalizer: first argument is nil interface\n");
@@ -828,22 +801,36 @@ func SetFinalizer(obj Eface, finalizer Eface) {
goto throw;
}
ft = nil;
+ ot = (const PtrType*)obj.__type_descriptor;
+ fint = nil;
if(finalizer.__type_descriptor != nil) {
if(finalizer.__type_descriptor->__code != GO_FUNC)
goto badfunc;
ft = (const FuncType*)finalizer.__type_descriptor;
- if(ft->__dotdotdot || ft->__in.__count != 1 || !__go_type_descriptors_equal(*(Type**)ft->__in.__values, obj.__type_descriptor))
+ if(ft->__dotdotdot || ft->__in.__count != 1)
+ goto badfunc;
+ fint = *(Type**)ft->__in.__values;
+ if(__go_type_descriptors_equal(fint, obj.__type_descriptor)) {
+ // ok - same type
+ } else if(fint->__code == GO_PTR && (fint->__uncommon == nil || fint->__uncommon->__name == nil || obj.type->__uncommon == nil || obj.type->__uncommon->__name == nil) && __go_type_descriptors_equal(((const PtrType*)fint)->__element_type, ((const PtrType*)obj.type)->__element_type)) {
+ // ok - not same type, but both pointers,
+ // one or the other is unnamed, and same element type, so assignable.
+ } else if(fint->kind == GO_INTERFACE && ((const InterfaceType*)fint)->__methods.__count == 0) {
+ // ok - satisfies empty interface
+ } else if(fint->kind == GO_INTERFACE && __go_convert_interface_2(fint, obj.__type_descriptor, 1) != nil) {
+ // ok - satisfies non-empty interface
+ } else
goto badfunc;
}
- if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft)) {
+ if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft, ot)) {
runtime_printf("runtime.SetFinalizer: finalizer already set\n");
goto throw;
}
return;
badfunc:
- runtime_printf("runtime.SetFinalizer: second argument is %S, not func(%S)\n", *finalizer.__type_descriptor->__reflection, *obj.__type_descriptor->__reflection);
+ runtime_printf("runtime.SetFinalizer: cannot pass %S to finalizer %S\n", *obj.__type_descriptor->__reflection, *finalizer.__type_descriptor->__reflection);
throw:
runtime_throw("runtime.SetFinalizer");
}
diff --git a/libgo/runtime/malloc.h b/libgo/runtime/malloc.h
index ebea34eb32c..45c4c09c147 100644
--- a/libgo/runtime/malloc.h
+++ b/libgo/runtime/malloc.h
@@ -108,9 +108,7 @@ enum
// Tunable constants.
MaxSmallSize = 32<<10,
- FixAllocChunk = 128<<10, // Chunk size for FixAlloc
- MaxMCacheListLen = 256, // Maximum objects on MCacheList
- MaxMCacheSize = 2<<20, // Maximum bytes in one MCache
+ FixAllocChunk = 16<<10, // Chunk size for FixAlloc
MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap.
HeapAllocChunk = 1<<20, // Chunk size for heap growth
@@ -155,13 +153,13 @@ struct MLink
// SysAlloc obtains a large chunk of zeroed memory from the
// operating system, typically on the order of a hundred kilobytes
-// or a megabyte. If the pointer argument is non-nil, the caller
-// wants a mapping there or nowhere.
+// or a megabyte.
//
// SysUnused notifies the operating system that the contents
// of the memory region are no longer needed and can be reused
-// for other purposes. The program reserves the right to start
-// accessing those pages in the future.
+// for other purposes.
+// SysUsed notifies the operating system that the contents
+// of the memory region are needed again.
//
// SysFree returns it unconditionally; this is only used if
// an out-of-memory error has been detected midway through
@@ -174,10 +172,11 @@ struct MLink
//
// SysMap maps previously reserved address space for use.
-void* runtime_SysAlloc(uintptr nbytes);
-void runtime_SysFree(void *v, uintptr nbytes);
+void* runtime_SysAlloc(uintptr nbytes, uint64 *stat);
+void runtime_SysFree(void *v, uintptr nbytes, uint64 *stat);
void runtime_SysUnused(void *v, uintptr nbytes);
-void runtime_SysMap(void *v, uintptr nbytes);
+void runtime_SysUsed(void *v, uintptr nbytes);
+void runtime_SysMap(void *v, uintptr nbytes, uint64 *stat);
void* runtime_SysReserve(void *v, uintptr nbytes);
// FixAlloc is a simple free-list allocator for fixed size objects.
@@ -190,18 +189,17 @@ void* runtime_SysReserve(void *v, uintptr nbytes);
// smashed by freeing and reallocating.
struct FixAlloc
{
- uintptr size;
- void *(*alloc)(uintptr);
- void (*first)(void *arg, byte *p); // called first time p is returned
- void *arg;
- MLink *list;
- byte *chunk;
- uint32 nchunk;
- uintptr inuse; // in-use bytes now
- uintptr sys; // bytes obtained from system
+ uintptr size;
+ void (*first)(void *arg, byte *p); // called first time p is returned
+ void* arg;
+ MLink* list;
+ byte* chunk;
+ uint32 nchunk;
+ uintptr inuse; // in-use bytes now
+ uint64* stat;
};
-void runtime_FixAlloc_Init(FixAlloc *f, uintptr size, void *(*alloc)(uintptr), void (*first)(void*, byte*), void *arg);
+void runtime_FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat);
void* runtime_FixAlloc_Alloc(FixAlloc *f);
void runtime_FixAlloc_Free(FixAlloc *f, void *p);
@@ -236,6 +234,8 @@ struct MStats
uint64 mcache_inuse; // MCache structures
uint64 mcache_sys;
uint64 buckhash_sys; // profiling bucket hash table
+ uint64 gc_sys;
+ uint64 other_sys;
// Statistics about garbage collector.
// Protected by mheap or stopping the world during GC.
@@ -267,14 +267,12 @@ extern MStats mstats
// class_to_size[i] = largest size in class i
// class_to_allocnpages[i] = number of pages to allocate when
// making new objects in class i
-// class_to_transfercount[i] = number of objects to move when
-// taking a bunch of objects out of the central lists
-// and putting them in the thread free list.
int32 runtime_SizeToClass(int32);
extern int32 runtime_class_to_size[NumSizeClasses];
extern int32 runtime_class_to_allocnpages[NumSizeClasses];
-extern int32 runtime_class_to_transfercount[NumSizeClasses];
+extern int8 runtime_size_to_class8[1024/8 + 1];
+extern int8 runtime_size_to_class128[(MaxSmallSize-1024)/128 + 1];
extern void runtime_InitSizes(void);
@@ -285,30 +283,24 @@ struct MCacheList
{
MLink *list;
uint32 nlist;
- uint32 nlistmin;
};
struct MCache
{
- MCacheList list[NumSizeClasses];
- uintptr size;
+ // The following members are accessed on every malloc,
+ // so they are grouped here for better caching.
+ int32 next_sample; // trigger heap sample after allocating this many bytes
intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap
- intptr local_objects; // objects allocated (or freed) from cache since last lock of heap
- intptr local_alloc; // bytes allocated (or freed) since last lock of heap
- uintptr local_total_alloc; // bytes allocated (even if freed) since last lock of heap
- uintptr local_nmalloc; // number of mallocs since last lock of heap
- uintptr local_nfree; // number of frees since last lock of heap
- uintptr local_nlookup; // number of pointer lookups since last lock of heap
- int32 next_sample; // trigger heap sample after allocating this many bytes
- // Statistics about allocation size classes since last lock of heap
- struct {
- uintptr nmalloc;
- uintptr nfree;
- } local_by_size[NumSizeClasses];
-
+ // The rest is not accessed on every malloc.
+ MCacheList list[NumSizeClasses];
+ // Local allocator stats, flushed during GC.
+ uintptr local_nlookup; // number of pointer lookups
+ uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
+ uintptr local_nlargefree; // number of frees for large objects (>MaxSmallSize)
+ uintptr local_nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize)
};
-void* runtime_MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed);
+void runtime_MCache_Refill(MCache *c, int32 sizeclass);
void runtime_MCache_Free(MCache *c, void *p, int32 sizeclass, uintptr size);
void runtime_MCache_ReleaseAll(MCache *c);
@@ -346,7 +338,6 @@ enum
struct MTypes
{
byte compression; // one of MTypes_*
- bool sysalloc; // whether (void*)data is from runtime_SysAlloc
uintptr data;
};
@@ -397,8 +388,8 @@ struct MCentral
};
void runtime_MCentral_Init(MCentral *c, int32 sizeclass);
-int32 runtime_MCentral_AllocList(MCentral *c, int32 n, MLink **first);
-void runtime_MCentral_FreeList(MCentral *c, int32 n, MLink *first);
+int32 runtime_MCentral_AllocList(MCentral *c, MLink **first);
+void runtime_MCentral_FreeList(MCentral *c, MLink *first);
void runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
// Main malloc heap.
@@ -414,7 +405,8 @@ struct MHeap
uint32 nspancap;
// span lookup
- MSpan *map[1<<MHeapMap_Bits];
+ MSpan** spans;
+ uintptr spans_mapped;
// range of addresses we might see in the heap
byte *bitmap;
@@ -434,10 +426,15 @@ struct MHeap
FixAlloc spanalloc; // allocator for Span*
FixAlloc cachealloc; // allocator for MCache*
+
+ // Malloc stats.
+ uint64 largefree; // bytes freed for large objects (>MaxSmallSize)
+ uint64 nlargefree; // number of frees for large objects (>MaxSmallSize)
+ uint64 nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize)
};
-extern MHeap *runtime_mheap;
+extern MHeap runtime_mheap;
-void runtime_MHeap_Init(MHeap *h, void *(*allocator)(uintptr));
+void runtime_MHeap_Init(MHeap *h);
MSpan* runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32 zeroed);
void runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct);
MSpan* runtime_MHeap_Lookup(MHeap *h, void *v);
@@ -445,9 +442,11 @@ MSpan* runtime_MHeap_LookupMaybe(MHeap *h, void *v);
void runtime_MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);
void* runtime_MHeap_SysAlloc(MHeap *h, uintptr n);
void runtime_MHeap_MapBits(MHeap *h);
+void runtime_MHeap_MapSpans(MHeap *h);
void runtime_MHeap_Scavenger(void*);
-void* runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed);
+void* runtime_mallocgc(uintptr size, uintptr typ, uint32 flag);
+void* runtime_persistentalloc(uintptr size, uintptr align, uint64 *stat);
int32 runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **s);
void runtime_gc(int32 force);
void runtime_markallocated(void *v, uintptr n, bool noptr);
@@ -463,17 +462,18 @@ void runtime_purgecachedstats(MCache*);
void* runtime_cnew(const Type*);
void* runtime_cnewarray(const Type*, intgo);
-void runtime_settype(void*, uintptr);
-void runtime_settype_flush(M*, bool);
+void runtime_settype_flush(M*);
void runtime_settype_sysfree(MSpan*);
uintptr runtime_gettype(void*);
enum
{
// flags to malloc
- FlagNoPointers = 1<<0, // no pointers here
- FlagNoProfiling = 1<<1, // must not profile
- FlagNoGC = 1<<2, // must not free or scan for pointers
+ FlagNoScan = 1<<0, // GC doesn't have to scan object
+ FlagNoProfiling = 1<<1, // must not profile
+ FlagNoGC = 1<<2, // must not free or scan for pointers
+ FlagNoZero = 1<<3, // don't zero memory
+ FlagNoInvokeGC = 1<<4, // don't invoke GC
};
typedef struct Obj Obj;
@@ -493,15 +493,15 @@ void runtime_helpgc(int32 nproc);
void runtime_gchelper(void);
struct __go_func_type;
-bool runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft);
+struct __go_ptr_type;
+bool runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft, const struct __go_ptr_type **ot);
void runtime_walkfintab(void (*fn)(void*), void (*scan)(Obj));
enum
{
TypeInfo_SingleObject = 0,
TypeInfo_Array = 1,
- TypeInfo_Map = 2,
- TypeInfo_Chan = 3,
+ TypeInfo_Chan = 2,
// Enables type information at the end of blocks allocated from heap
DebugTypeAtBlockEnd = 0,
diff --git a/libgo/runtime/mcache.c b/libgo/runtime/mcache.c
index 45bac4ffbce..38f824a139b 100644
--- a/libgo/runtime/mcache.c
+++ b/libgo/runtime/mcache.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Per-thread (in Go, per-M) malloc cache for small objects.
+// Per-P malloc cache for small objects.
//
// See malloc.h for an overview.
@@ -10,48 +10,23 @@
#include "arch.h"
#include "malloc.h"
-void*
-runtime_MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed)
+void
+runtime_MCache_Refill(MCache *c, int32 sizeclass)
{
MCacheList *l;
- MLink *first, *v;
- int32 n;
- // Allocate from list.
+ // Replenish using central lists.
l = &c->list[sizeclass];
- if(l->list == nil) {
- // Replenish using central lists.
- n = runtime_MCentral_AllocList(&runtime_mheap->central[sizeclass],
- runtime_class_to_transfercount[sizeclass], &first);
- if(n == 0)
- runtime_throw("out of memory");
- l->list = first;
- l->nlist = n;
- c->size += n*size;
- }
- v = l->list;
- l->list = v->next;
- l->nlist--;
- if(l->nlist < l->nlistmin)
- l->nlistmin = l->nlist;
- c->size -= size;
-
- // v is zeroed except for the link pointer
- // that we used above; zero that.
- v->next = nil;
- if(zeroed) {
- // block is zeroed iff second word is zero ...
- if(size > sizeof(uintptr) && ((uintptr*)v)[1] != 0)
- runtime_memclr((byte*)v, size);
- }
- c->local_cachealloc += size;
- c->local_objects++;
- return v;
+ if(l->list)
+ runtime_throw("MCache_Refill: the list is not empty");
+ l->nlist = runtime_MCentral_AllocList(&runtime_mheap.central[sizeclass], &l->list);
+ if(l->list == nil)
+ runtime_throw("out of memory");
}
// Take n elements off l and return them to the central free list.
static void
-ReleaseN(MCache *c, MCacheList *l, int32 n, int32 sizeclass)
+ReleaseN(MCacheList *l, int32 n, int32 sizeclass)
{
MLink *first, **lp;
int32 i;
@@ -64,18 +39,14 @@ ReleaseN(MCache *c, MCacheList *l, int32 n, int32 sizeclass)
l->list = *lp;
*lp = nil;
l->nlist -= n;
- if(l->nlist < l->nlistmin)
- l->nlistmin = l->nlist;
- c->size -= n*runtime_class_to_size[sizeclass];
// Return them to central free list.
- runtime_MCentral_FreeList(&runtime_mheap->central[sizeclass], n, first);
+ runtime_MCentral_FreeList(&runtime_mheap.central[sizeclass], first);
}
void
runtime_MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size)
{
- int32 i, n;
MCacheList *l;
MLink *p;
@@ -85,34 +56,12 @@ runtime_MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size)
p->next = l->list;
l->list = p;
l->nlist++;
- c->size += size;
c->local_cachealloc -= size;
- c->local_objects--;
-
- if(l->nlist >= MaxMCacheListLen) {
- // Release a chunk back.
- ReleaseN(c, l, runtime_class_to_transfercount[sizeclass], sizeclass);
- }
-
- if(c->size >= MaxMCacheSize) {
- // Scavenge.
- for(i=0; i<NumSizeClasses; i++) {
- l = &c->list[i];
- n = l->nlistmin;
- // n is the minimum number of elements we've seen on
- // the list since the last scavenge. If n > 0, it means that
- // we could have gotten by with n fewer elements
- // without needing to consult the central free list.
- // Move toward that situation by releasing n/2 of them.
- if(n > 0) {
- if(n > 1)
- n /= 2;
- ReleaseN(c, l, n, i);
- }
- l->nlistmin = l->nlist;
- }
- }
+ // We transfer span at a time from MCentral to MCache,
+ // if we have 2 times more than that, release a half back.
+ if(l->nlist >= 2*(runtime_class_to_allocnpages[sizeclass]<<PageShift)/size)
+ ReleaseN(l, l->nlist/2, sizeclass);
}
void
@@ -123,7 +72,10 @@ runtime_MCache_ReleaseAll(MCache *c)
for(i=0; i<NumSizeClasses; i++) {
l = &c->list[i];
- ReleaseN(c, l, l->nlist, i);
- l->nlistmin = 0;
+ if(l->list) {
+ runtime_MCentral_FreeList(&runtime_mheap.central[i], l->list);
+ l->list = nil;
+ l->nlist = 0;
+ }
}
}
diff --git a/libgo/runtime/mcentral.c b/libgo/runtime/mcentral.c
index b3108a1c061..81916101e46 100644
--- a/libgo/runtime/mcentral.c
+++ b/libgo/runtime/mcentral.c
@@ -30,16 +30,15 @@ runtime_MCentral_Init(MCentral *c, int32 sizeclass)
runtime_MSpanList_Init(&c->empty);
}
-// Allocate up to n objects from the central free list.
+// Allocate a list of objects from the central free list.
// Return the number of objects allocated.
// The objects are linked together by their first words.
-// On return, *pstart points at the first object.
+// On return, *pfirst points at the first object.
int32
-runtime_MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst)
+runtime_MCentral_AllocList(MCentral *c, MLink **pfirst)
{
MSpan *s;
- MLink *first, *last;
- int32 cap, avail, i;
+ int32 cap, n;
runtime_lock(c);
// Replenish central list if empty.
@@ -52,49 +51,27 @@ runtime_MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst)
}
s = c->nonempty.next;
cap = (s->npages << PageShift) / s->elemsize;
- avail = cap - s->ref;
- if(avail < n)
- n = avail;
-
- // First one is guaranteed to work, because we just grew the list.
- first = s->freelist;
- last = first;
- for(i=1; i<n; i++) {
- last = last->next;
- }
- s->freelist = last->next;
- last->next = nil;
+ n = cap - s->ref;
+ *pfirst = s->freelist;
+ s->freelist = nil;
s->ref += n;
c->nfree -= n;
-
- if(n == avail) {
- if(s->freelist != nil || s->ref != (uint32)cap) {
- runtime_throw("invalid freelist");
- }
- runtime_MSpanList_Remove(s);
- runtime_MSpanList_Insert(&c->empty, s);
- }
-
+ runtime_MSpanList_Remove(s);
+ runtime_MSpanList_Insert(&c->empty, s);
runtime_unlock(c);
- *pfirst = first;
return n;
}
-// Free n objects back into the central free list.
+// Free the list of objects back into the central free list.
void
-runtime_MCentral_FreeList(MCentral *c, int32 n, MLink *start)
+runtime_MCentral_FreeList(MCentral *c, MLink *start)
{
- MLink *v, *next;
-
- // Assume next == nil marks end of list.
- // n and end would be useful if we implemented
- // the transfer cache optimization in the TODO above.
- USED(n);
+ MLink *next;
runtime_lock(c);
- for(v=start; v; v=next) {
- next = v->next;
- MCentral_Free(c, v);
+ for(; start != nil; start = next) {
+ next = start->next;
+ MCentral_Free(c, start);
}
runtime_unlock(c);
}
@@ -108,7 +85,7 @@ MCentral_Free(MCentral *c, void *v)
int32 size;
// Find span for v.
- s = runtime_MHeap_Lookup(runtime_mheap, v);
+ s = runtime_MHeap_Lookup(&runtime_mheap, v);
if(s == nil || s->ref == 0)
runtime_throw("invalid free");
@@ -133,7 +110,7 @@ MCentral_Free(MCentral *c, void *v)
s->freelist = nil;
c->nfree -= (s->npages << PageShift) / size;
runtime_unlock(c);
- runtime_MHeap_Free(runtime_mheap, s, 0);
+ runtime_MHeap_Free(&runtime_mheap, s, 0);
runtime_lock(c);
}
}
@@ -168,7 +145,7 @@ runtime_MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *e
c->nfree -= (s->npages << PageShift) / size;
runtime_unlock(c);
runtime_unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
- runtime_MHeap_Free(runtime_mheap, s, 0);
+ runtime_MHeap_Free(&runtime_mheap, s, 0);
} else {
runtime_unlock(c);
}
@@ -200,7 +177,7 @@ MCentral_Grow(MCentral *c)
runtime_unlock(c);
runtime_MGetSizeClassInfo(c->sizeclass, &size, &npages, &n);
- s = runtime_MHeap_Alloc(runtime_mheap, npages, c->sizeclass, 0, 1);
+ s = runtime_MHeap_Alloc(&runtime_mheap, npages, c->sizeclass, 0, 1);
if(s == nil) {
// TODO(rsc): Log out of memory
runtime_lock(c);
diff --git a/libgo/runtime/mem.c b/libgo/runtime/mem.c
index 8481e950750..78f7c51faf2 100644
--- a/libgo/runtime/mem.c
+++ b/libgo/runtime/mem.c
@@ -60,13 +60,11 @@ mmap_fixed(byte *v, uintptr n, int32 prot, int32 flags, int32 fd, uint32 offset)
}
void*
-runtime_SysAlloc(uintptr n)
+runtime_SysAlloc(uintptr n, uint64 *stat)
{
void *p;
int fd = -1;
- mstats.sys += n;
-
#ifdef USE_DEV_ZERO
if (dev_zero == -1) {
dev_zero = open("/dev/zero", O_RDONLY);
@@ -91,6 +89,7 @@ runtime_SysAlloc(uintptr n)
}
return nil;
}
+ runtime_xadd64(stat, n);
return p;
}
@@ -103,9 +102,16 @@ runtime_SysUnused(void *v __attribute__ ((unused)), uintptr n __attribute__ ((un
}
void
-runtime_SysFree(void *v, uintptr n)
+runtime_SysUsed(void *v, uintptr n)
+{
+ USED(v);
+ USED(n);
+}
+
+void
+runtime_SysFree(void *v, uintptr n, uint64 *stat)
{
- mstats.sys -= n;
+ runtime_xadd64(stat, -(uint64)n);
runtime_munmap(v, n);
}
@@ -132,8 +138,10 @@ runtime_SysReserve(void *v, uintptr n)
// Only user-mode Linux (UML) rejects these requests.
if(sizeof(void*) == 8 && (uintptr)v >= 0xffffffffU) {
p = mmap_fixed(v, 64<<10, PROT_NONE, MAP_ANON|MAP_PRIVATE, fd, 0);
- if (p != v)
+ if (p != v) {
+ runtime_munmap(p, 64<<10);
return nil;
+ }
runtime_munmap(p, 64<<10);
return v;
}
@@ -149,12 +157,12 @@ runtime_SysReserve(void *v, uintptr n)
}
void
-runtime_SysMap(void *v, uintptr n)
+runtime_SysMap(void *v, uintptr n, uint64 *stat)
{
void *p;
int fd = -1;
- mstats.sys += n;
+ runtime_xadd64(stat, n);
#ifdef USE_DEV_ZERO
if (dev_zero == -1) {
diff --git a/libgo/runtime/mfinal.c b/libgo/runtime/mfinal.c
index 407092bf392..625af528e1e 100644
--- a/libgo/runtime/mfinal.c
+++ b/libgo/runtime/mfinal.c
@@ -5,6 +5,7 @@
#include "runtime.h"
#include "arch.h"
#include "malloc.h"
+#include "go-type.h"
enum { debug = 0 };
@@ -13,6 +14,7 @@ struct Fin
{
FuncVal *fn;
const struct __go_func_type *ft;
+ const struct __go_ptr_type *ot;
};
// Finalizer hash table. Direct hash, linear scan, at most 3/4 full.
@@ -42,7 +44,7 @@ static struct {
} fintab[TABSZ];
static void
-addfintab(Fintab *t, void *k, FuncVal *fn, const struct __go_func_type *ft)
+addfintab(Fintab *t, void *k, FuncVal *fn, const struct __go_func_type *ft, const struct __go_ptr_type *ot)
{
int32 i, j;
@@ -67,6 +69,7 @@ ret:
t->fkey[i] = k;
t->val[i].fn = fn;
t->val[i].ft = ft;
+ t->val[i].ot = ot;
}
static bool
@@ -87,6 +90,7 @@ lookfintab(Fintab *t, void *k, bool del, Fin *f)
t->fkey[i] = (void*)-1;
t->val[i].fn = nil;
t->val[i].ft = nil;
+ t->val[i].ot = nil;
t->ndead++;
}
return true;
@@ -117,13 +121,13 @@ resizefintab(Fintab *tab)
newtab.max *= 3;
}
- newtab.fkey = runtime_mallocgc(newtab.max*sizeof newtab.fkey[0], FlagNoPointers, 0, 1);
- newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1);
+ newtab.fkey = runtime_mallocgc(newtab.max*sizeof newtab.fkey[0], 0, FlagNoInvokeGC|FlagNoScan);
+ newtab.val = runtime_mallocgc(newtab.max*sizeof newtab.val[0], 0, FlagNoInvokeGC);
for(i=0; i<tab->max; i++) {
k = tab->fkey[i];
if(k != nil && k != (void*)-1)
- addfintab(&newtab, k, tab->val[i].fn, tab->val[i].ft);
+ addfintab(&newtab, k, tab->val[i].fn, tab->val[i].ft, tab->val[i].ot);
}
runtime_free(tab->fkey);
@@ -137,7 +141,7 @@ resizefintab(Fintab *tab)
}
bool
-runtime_addfinalizer(void *p, FuncVal *f, const struct __go_func_type *ft)
+runtime_addfinalizer(void *p, FuncVal *f, const struct __go_func_type *ft, const struct __go_ptr_type *ot)
{
Fintab *tab;
byte *base;
@@ -166,7 +170,7 @@ runtime_addfinalizer(void *p, FuncVal *f, const struct __go_func_type *ft)
resizefintab(tab);
}
- addfintab(tab, p, f, ft);
+ addfintab(tab, p, f, ft, ot);
runtime_setblockspecial(p, true);
runtime_unlock(tab);
return true;
@@ -175,7 +179,7 @@ runtime_addfinalizer(void *p, FuncVal *f, const struct __go_func_type *ft)
// get finalizer; if del, delete finalizer.
// caller is responsible for updating RefHasFinalizer (special) bit.
bool
-runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft)
+runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_type **ft, const struct __go_ptr_type **ot)
{
Fintab *tab;
bool res;
@@ -189,6 +193,7 @@ runtime_getfinalizer(void *p, bool del, FuncVal **fn, const struct __go_func_typ
return false;
*fn = f.fn;
*ft = f.ft;
+ *ot = f.ot;
return true;
}
diff --git a/libgo/runtime/mfixalloc.c b/libgo/runtime/mfixalloc.c
index 6e4f0c6e607..9d0b3bbda7e 100644
--- a/libgo/runtime/mfixalloc.c
+++ b/libgo/runtime/mfixalloc.c
@@ -13,17 +13,16 @@
// Initialize f to allocate objects of the given size,
// using the allocator to obtain chunks of memory.
void
-runtime_FixAlloc_Init(FixAlloc *f, uintptr size, void *(*alloc)(uintptr), void (*first)(void*, byte*), void *arg)
+runtime_FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat)
{
f->size = size;
- f->alloc = alloc;
f->first = first;
f->arg = arg;
f->list = nil;
f->chunk = nil;
f->nchunk = 0;
f->inuse = 0;
- f->sys = 0;
+ f->stat = stat;
}
void*
@@ -43,10 +42,7 @@ runtime_FixAlloc_Alloc(FixAlloc *f)
return v;
}
if(f->nchunk < f->size) {
- f->sys += FixAllocChunk;
- f->chunk = f->alloc(FixAllocChunk);
- if(f->chunk == nil)
- runtime_throw("out of memory (FixAlloc)");
+ f->chunk = runtime_persistentalloc(FixAllocChunk, 0, f->stat);
f->nchunk = FixAllocChunk;
}
v = f->chunk;
diff --git a/libgo/runtime/mgc0.c b/libgo/runtime/mgc0.c
index c3b32111ca0..3edcee9c397 100644
--- a/libgo/runtime/mgc0.c
+++ b/libgo/runtime/mgc0.c
@@ -59,6 +59,13 @@ enum {
PRECISE = 1,
LOOP = 2,
PC_BITS = PRECISE | LOOP,
+
+ // Pointer map
+ BitsPerPointer = 2,
+ BitsNoPointer = 0,
+ BitsPointer = 1,
+ BitsIface = 2,
+ BitsEface = 3,
};
// Bits in per-word bitmap.
@@ -70,7 +77,7 @@ enum {
// The bits in the word are packed together by type first, then by
// heap location, so each 64-bit bitmap word consists of, from top to bottom,
// the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
-// then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
+// then the 16 bitNoScan/bitBlockBoundary bits, then the 16 bitAllocated bits.
// This layout makes it easier to iterate over the bits of a given type.
//
// The bitmap starts at mheap.arena_start and extends *backward* from
@@ -87,7 +94,7 @@ enum {
// /* then test bits & bitAllocated, bits & bitMarked, etc. */
//
#define bitAllocated ((uintptr)1<<(bitShift*0))
-#define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
+#define bitNoScan ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
#define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
#define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
#define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
@@ -109,8 +116,6 @@ enum {
//
uint32 runtime_worldsema = 1;
-static int32 gctrace;
-
// The size of Workbuf is N*PageSize.
typedef struct Workbuf Workbuf;
struct Workbuf
@@ -129,6 +134,7 @@ struct Finalizer
FuncVal *fn;
void *arg;
const struct __go_func_type *ft;
+ const struct __go_ptr_type *ot;
};
typedef struct FinBlock FinBlock;
@@ -178,7 +184,6 @@ static struct {
enum {
GC_DEFAULT_PTR = GC_NUM_INSTR,
- GC_MAP_NEXT,
GC_CHAN,
GC_NUM_INSTR2
@@ -201,6 +206,16 @@ static struct {
uint64 instr[GC_NUM_INSTR2];
uint64 putempty;
uint64 getfull;
+ struct {
+ uint64 foundbit;
+ uint64 foundword;
+ uint64 foundspan;
+ } flushptrbuf;
+ struct {
+ uint64 foundbit;
+ uint64 foundword;
+ uint64 foundspan;
+ } markonly;
} gcstats;
// markonly marks an object. It returns true if the object
@@ -210,12 +225,12 @@ static bool
markonly(void *obj)
{
byte *p;
- uintptr *bitp, bits, shift, x, xbits, off;
+ uintptr *bitp, bits, shift, x, xbits, off, j;
MSpan *s;
PageID k;
// Words outside the arena cannot be pointers.
- if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
+ if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
return false;
// obj may be a pointer to a live object.
@@ -225,42 +240,57 @@ markonly(void *obj)
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
// Find bits for this word.
- off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
- bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
+ bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
// Pointing at the beginning of a block?
- if((bits & (bitAllocated|bitBlockBoundary)) != 0)
+ if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
+ if(CollectStats)
+ runtime_xadd64(&gcstats.markonly.foundbit, 1);
goto found;
+ }
+
+ // Pointing just past the beginning?
+ // Scan backward a little to find a block boundary.
+ for(j=shift; j-->0; ) {
+ if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
+ shift = j;
+ bits = xbits>>shift;
+ if(CollectStats)
+ runtime_xadd64(&gcstats.markonly.foundword, 1);
+ goto found;
+ }
+ }
// Otherwise consult span table to find beginning.
// (Manually inlined copy of MHeap_LookupMaybe.)
k = (uintptr)obj>>PageShift;
x = k;
if(sizeof(void*) == 8)
- x -= (uintptr)runtime_mheap->arena_start>>PageShift;
- s = runtime_mheap->map[x];
- if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
+ x -= (uintptr)runtime_mheap.arena_start>>PageShift;
+ s = runtime_mheap.spans[x];
+ if(s == nil || k < s->start || (byte*)obj >= s->limit || s->state != MSpanInUse)
return false;
p = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass == 0) {
obj = p;
} else {
- if((byte*)obj >= (byte*)s->limit)
- return false;
uintptr size = s->elemsize;
int32 i = ((byte*)obj - p)/size;
obj = p+i*size;
}
// Now that we know the object header, reload bits.
- off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
- bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
+ bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
+ if(CollectStats)
+ runtime_xadd64(&gcstats.markonly.foundspan, 1);
found:
// Now we have bits, bitp, and shift correct for
@@ -338,7 +368,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
Workbuf *wbuf;
PtrTarget *ptrbuf_end;
- arena_start = runtime_mheap->arena_start;
+ arena_start = runtime_mheap.arena_start;
wp = *_wp;
wbuf = *_wbuf;
@@ -377,7 +407,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
// obj belongs to interval [mheap.arena_start, mheap.arena_used).
if(Debug > 1) {
- if(obj < runtime_mheap->arena_start || obj >= runtime_mheap->arena_used)
+ if(obj < runtime_mheap.arena_start || obj >= runtime_mheap.arena_used)
runtime_throw("object is outside of mheap");
}
@@ -398,8 +428,11 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
bits = xbits >> shift;
// Pointing at the beginning of a block?
- if((bits & (bitAllocated|bitBlockBoundary)) != 0)
+ if((bits & (bitAllocated|bitBlockBoundary)) != 0) {
+ if(CollectStats)
+ runtime_xadd64(&gcstats.flushptrbuf.foundbit, 1);
goto found;
+ }
ti = 0;
@@ -410,6 +443,8 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
obj = (byte*)obj - (shift-j)*PtrSize;
shift = j;
bits = xbits>>shift;
+ if(CollectStats)
+ runtime_xadd64(&gcstats.flushptrbuf.foundword, 1);
goto found;
}
}
@@ -420,15 +455,13 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
x = k;
if(sizeof(void*) == 8)
x -= (uintptr)arena_start>>PageShift;
- s = runtime_mheap->map[x];
- if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
+ s = runtime_mheap.spans[x];
+ if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse)
continue;
p = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass == 0) {
obj = p;
} else {
- if((byte*)obj >= (byte*)s->limit)
- continue;
size = s->elemsize;
int32 i = ((byte*)obj - p)/size;
obj = p+i*size;
@@ -440,6 +473,8 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
+ if(CollectStats)
+ runtime_xadd64(&gcstats.flushptrbuf.foundspan, 1);
found:
// Now we have bits, bitp, and shift correct for
@@ -460,7 +495,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
}
// If object has no pointers, don't need to scan further.
- if((bits & bitNoPointers) != 0)
+ if((bits & bitNoScan) != 0)
continue;
// Ask span about size class.
@@ -468,7 +503,7 @@ flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf
x = (uintptr)obj >> PageShift;
if(sizeof(void*) == 8)
x -= (uintptr)arena_start>>PageShift;
- s = runtime_mheap->map[x];
+ s = runtime_mheap.spans[x];
PREFETCH(obj);
@@ -552,9 +587,6 @@ flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_
static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
#if 0
-// Hashmap iterator program
-static uintptr mapProg[2] = {0, GC_MAP_NEXT};
-
// Hchan program
static uintptr chanProg[2] = {0, GC_CHAN};
#endif
@@ -578,7 +610,7 @@ checkptr(void *obj, uintptr objti)
if(!Debug)
runtime_throw("checkptr is debug only");
- if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
+ if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
return;
type = runtime_gettype(obj);
t = (Type*)(type & ~(uintptr)(PtrSize-1));
@@ -586,8 +618,8 @@ checkptr(void *obj, uintptr objti)
return;
x = (uintptr)obj >> PageShift;
if(sizeof(void*) == 8)
- x -= (uintptr)(runtime_mheap->arena_start)>>PageShift;
- s = runtime_mheap->map[x];
+ x -= (uintptr)(runtime_mheap.arena_start)>>PageShift;
+ s = runtime_mheap.spans[x];
objstart = (byte*)((uintptr)s->start<<PageShift);
if(s->sizeclass != 0) {
i = ((byte*)obj - objstart)/s->elemsize;
@@ -595,8 +627,11 @@ checkptr(void *obj, uintptr objti)
}
tisize = *(uintptr*)objti;
// Sanity check for object size: it should fit into the memory block.
- if((byte*)obj + tisize > objstart + s->elemsize)
+ if((byte*)obj + tisize > objstart + s->elemsize) {
+ runtime_printf("object of type '%S' at %p/%p does not fit in block %p/%p\n",
+ *t->string, obj, tisize, objstart, s->elemsize);
runtime_throw("invalid gc type info");
+ }
if(obj != objstart)
return;
// If obj points to the beginning of the memory block,
@@ -613,7 +648,7 @@ checkptr(void *obj, uintptr objti)
for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
if(pc1[j] != pc2[j]) {
runtime_printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n",
- t->string ? (const int8*)t->string->str : (const int8*)"?", j, pc1[j], pc2[j]);
+ t->string ? (const int8*)t->string->str : (const int8*)"?", j, pc1[j], pc2[j]);
runtime_throw("invalid gc type info");
}
}
@@ -638,7 +673,7 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
uintptr n, i, end_b, elemsize, size, ti, objti, count /* , type */;
uintptr *pc, precise_type, nominal_size;
#if 0
- uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti, *chan_ret, chancap;
+ uintptr *chan_ret, chancap;
#endif
void *obj;
const Type *t;
@@ -650,11 +685,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
Eface *eface;
Iface *iface;
#if 0
- Hmap *hmap;
- MapType *maptype;
- bool mapkey_kind, mapval_kind;
- struct hash_gciter map_iter;
- struct hash_gciter_data d;
Hchan *chan;
ChanType *chantype;
#endif
@@ -663,8 +693,8 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
runtime_throw("scanblock: size of Workbuf is suboptimal");
// Memory arena parameters.
- arena_start = runtime_mheap->arena_start;
- arena_used = runtime_mheap->arena_used;
+ arena_start = runtime_mheap.arena_start;
+ arena_used = runtime_mheap.arena_used;
stack_ptr = stack+nelem(stack)-1;
@@ -685,10 +715,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
// (Silence the compiler)
#if 0
- map_ret = nil;
- mapkey_size = mapval_size = 0;
- mapkey_kind = mapval_kind = false;
- mapkey_ti = mapval_ti = 0;
chan = nil;
chantype = nil;
chan_ret = nil;
@@ -759,23 +785,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
stack_top.elemsize = pc[0];
stack_top.loop_or_ret = pc+1;
break;
- case TypeInfo_Map:
- hmap = (Hmap*)b;
- maptype = (MapType*)t;
- if(hash_gciter_init(hmap, &map_iter)) {
- mapkey_size = maptype->key->size;
- mapkey_kind = maptype->key->kind;
- mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
- mapval_size = maptype->elem->size;
- mapval_kind = maptype->elem->kind;
- mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
-
- map_ret = nil;
- pc = mapProg;
- } else {
- goto next_block;
- }
- break;
case TypeInfo_Chan:
chan = (Hchan*)b;
chantype = (ChanType*)t;
@@ -985,79 +994,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction
continue;
-#if 0
- case GC_MAP_PTR:
- hmap = *(Hmap**)(stack_top.b + pc[1]);
- if(hmap == nil) {
- pc += 3;
- continue;
- }
- if(markonly(hmap)) {
- maptype = (MapType*)pc[2];
- if(hash_gciter_init(hmap, &map_iter)) {
- mapkey_size = maptype->key->size;
- mapkey_kind = maptype->key->kind;
- mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
- mapval_size = maptype->elem->size;
- mapval_kind = maptype->elem->kind;
- mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
-
- // Start mapProg.
- map_ret = pc+3;
- pc = mapProg+1;
- } else {
- pc += 3;
- }
- } else {
- pc += 3;
- }
- continue;
-
- case GC_MAP_NEXT:
- // Add all keys and values to buffers, mark all subtables.
- while(hash_gciter_next(&map_iter, &d)) {
- // buffers: reserve space for 2 objects.
- if(ptrbufpos+2 >= ptrbuf_end)
- flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
- if(objbufpos+2 >= objbuf_end)
- flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
-
- if(d.st != nil)
- markonly(d.st);
-
- if(d.key_data != nil) {
- if(!(mapkey_kind & KindNoPointers) || d.indirectkey) {
- if(!d.indirectkey)
- *objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti};
- else {
- if(Debug) {
- obj = *(void**)d.key_data;
- if(!(arena_start <= obj && obj < arena_used))
- runtime_throw("scanblock: inconsistent hashmap");
- }
- *ptrbufpos++ = (struct PtrTarget){*(void**)d.key_data, mapkey_ti};
- }
- }
- if(!(mapval_kind & KindNoPointers) || d.indirectval) {
- if(!d.indirectval)
- *objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti};
- else {
- if(Debug) {
- obj = *(void**)d.val_data;
- if(!(arena_start <= obj && obj < arena_used))
- runtime_throw("scanblock: inconsistent hashmap");
- }
- *ptrbufpos++ = (struct PtrTarget){*(void**)d.val_data, mapval_ti};
- }
- }
- }
- }
- if(map_ret == nil)
- goto next_block;
- pc = map_ret;
- continue;
-#endif
-
case GC_REGION:
obj = (void*)(stack_top.b + pc[1]);
size = pc[2];
@@ -1071,7 +1007,6 @@ scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
#if 0
case GC_CHAN_PTR:
- // Similar to GC_MAP_PTR
chan = *(Hchan**)(stack_top.b + pc[1]);
if(chan == nil) {
pc += 3;
@@ -1191,14 +1126,14 @@ debug_scanblock(byte *b, uintptr n)
obj = (byte*)vp[i];
// Words outside the arena cannot be pointers.
- if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
+ if((byte*)obj < runtime_mheap.arena_start || (byte*)obj >= runtime_mheap.arena_used)
continue;
// Round down to word boundary.
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
// Consult span table to find beginning.
- s = runtime_MHeap_LookupMaybe(runtime_mheap, obj);
+ s = runtime_MHeap_LookupMaybe(&runtime_mheap, obj);
if(s == nil)
continue;
@@ -1207,15 +1142,13 @@ debug_scanblock(byte *b, uintptr n)
if(s->sizeclass == 0) {
obj = p;
} else {
- if((byte*)obj >= (byte*)s->limit)
- continue;
int32 i = ((byte*)obj - p)/size;
obj = p+i*size;
}
// Now that we know the object header, reload bits.
- off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
- bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)obj - (uintptr*)runtime_mheap.arena_start;
+ bitp = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
xbits = *bitp;
bits = xbits >> shift;
@@ -1230,7 +1163,7 @@ debug_scanblock(byte *b, uintptr n)
runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
// If object has no pointers, don't need to scan further.
- if((bits & bitNoPointers) != 0)
+ if((bits & bitNoScan) != 0)
continue;
debug_scanblock(obj, size);
@@ -1320,7 +1253,7 @@ getempty(Workbuf *b)
runtime_lock(&work);
if(work.nchunk < sizeof *b) {
work.nchunk = 1<<20;
- work.chunk = runtime_SysAlloc(work.nchunk);
+ work.chunk = runtime_SysAlloc(work.nchunk, &mstats.gc_sys);
if(work.chunk == nil)
runtime_throw("runtime: cannot allocate memory");
}
@@ -1416,12 +1349,12 @@ addroot(Obj obj)
cap = PageSize/sizeof(Obj);
if(cap < 2*work.rootcap)
cap = 2*work.rootcap;
- new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj));
+ new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj), &mstats.gc_sys);
if(new == nil)
runtime_throw("runtime: cannot allocate memory");
if(work.roots != nil) {
runtime_memmove(new, work.roots, work.rootcap*sizeof(Obj));
- runtime_SysFree(work.roots, work.rootcap*sizeof(Obj));
+ runtime_SysFree(work.roots, work.rootcap*sizeof(Obj), &mstats.gc_sys);
}
work.roots = new;
work.rootcap = cap;
@@ -1560,8 +1493,8 @@ addroots(void)
runtime_time_scan(addroot);
// MSpan.types
- allspans = runtime_mheap->allspans;
- for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
+ allspans = runtime_mheap.allspans;
+ for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
s = allspans[spanidx];
if(s->state == MSpanInUse) {
// The garbage collector ignores type pointers stored in MSpan.types:
@@ -1589,10 +1522,7 @@ addroots(void)
case Gdead:
break;
case Grunning:
- if(gp != runtime_g())
- runtime_throw("mark - world not stopped");
- addstackroots(gp);
- break;
+ runtime_throw("mark - world not stopped");
case Grunnable:
case Gsyscall:
case Gwaiting:
@@ -1614,10 +1544,11 @@ handlespecial(byte *p, uintptr size)
{
FuncVal *fn;
const struct __go_func_type *ft;
+ const struct __go_ptr_type *ot;
FinBlock *block;
Finalizer *f;
- if(!runtime_getfinalizer(p, true, &fn, &ft)) {
+ if(!runtime_getfinalizer(p, true, &fn, &ft, &ot)) {
runtime_setblockspecial(p, false);
runtime_MProf_Free(p, size);
return false;
@@ -1626,9 +1557,7 @@ handlespecial(byte *p, uintptr size)
runtime_lock(&finlock);
if(finq == nil || finq->cnt == finq->cap) {
if(finc == nil) {
- finc = runtime_SysAlloc(PageSize);
- if(finc == nil)
- runtime_throw("runtime: cannot allocate memory");
+ finc = runtime_persistentalloc(PageSize, 0, &mstats.gc_sys);
finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
finc->alllink = allfin;
allfin = finc;
@@ -1642,6 +1571,7 @@ handlespecial(byte *p, uintptr size)
finq->cnt++;
f->fn = fn;
f->ft = ft;
+ f->ot = ot;
f->arg = p;
runtime_unlock(&finlock);
return true;
@@ -1668,10 +1598,10 @@ sweepspan(ParFor *desc, uint32 idx)
m = runtime_m();
USED(&desc);
- s = runtime_mheap->allspans[idx];
+ s = runtime_mheap.allspans[idx];
if(s->state != MSpanInUse)
return;
- arena_start = runtime_mheap->arena_start;
+ arena_start = runtime_mheap.arena_start;
p = (byte*)(s->start << PageShift);
cl = s->sizeclass;
size = s->elemsize;
@@ -1735,9 +1665,9 @@ sweepspan(ParFor *desc, uint32 idx)
// Free large span.
runtime_unmarkspan(p, 1<<PageShift);
*(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing
- runtime_MHeap_Free(runtime_mheap, s, 1);
- c->local_alloc -= size;
- c->local_nfree++;
+ runtime_MHeap_Free(&runtime_mheap, s, 1);
+ c->local_nlargefree++;
+ c->local_largefree += size;
} else {
// Free small object.
switch(compression) {
@@ -1758,12 +1688,9 @@ sweepspan(ParFor *desc, uint32 idx)
}
if(nfree) {
- c->local_by_size[cl].nfree += nfree;
- c->local_alloc -= size * nfree;
- c->local_nfree += nfree;
+ c->local_nsmallfree[cl] += nfree;
c->local_cachealloc -= nfree * size;
- c->local_objects -= nfree;
- runtime_MCentral_FreeSpan(&runtime_mheap->central[cl], s, nfree, head.next, end);
+ runtime_MCentral_FreeSpan(&runtime_mheap.central[cl], s, nfree, head.next, end);
}
}
@@ -1777,10 +1704,10 @@ dumpspan(uint32 idx)
MSpan *s;
bool allocated, special;
- s = runtime_mheap->allspans[idx];
+ s = runtime_mheap.allspans[idx];
if(s->state != MSpanInUse)
return;
- arena_start = runtime_mheap->arena_start;
+ arena_start = runtime_mheap.arena_start;
p = (byte*)(s->start << PageShift);
sizeclass = s->sizeclass;
size = s->elemsize;
@@ -1838,7 +1765,7 @@ runtime_memorydump(void)
{
uint32 spanidx;
- for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
+ for(spanidx=0; spanidx<runtime_mheap.nspan; spanidx++) {
dumpspan(spanidx);
}
}
@@ -1880,13 +1807,28 @@ runtime_gchelper(void)
static int32 gcpercent = GcpercentUnknown;
static void
-cachestats(GCStats *stats)
+cachestats(void)
+{
+ MCache *c;
+ P *p, **pp;
+
+ for(pp=runtime_allp; (p=*pp) != nil; pp++) {
+ c = p->mcache;
+ if(c==nil)
+ continue;
+ runtime_purgecachedstats(c);
+ }
+}
+
+static void
+updatememstats(GCStats *stats)
{
M *mp;
+ MSpan *s;
MCache *c;
P *p, **pp;
uint32 i;
- uint64 stacks_inuse;
+ uint64 stacks_inuse, smallfree;
uint64 *src, *dst;
if(stats)
@@ -1902,29 +1844,80 @@ cachestats(GCStats *stats)
runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
}
}
+ mstats.stacks_inuse = stacks_inuse;
+ mstats.mcache_inuse = runtime_mheap.cachealloc.inuse;
+ mstats.mspan_inuse = runtime_mheap.spanalloc.inuse;
+ mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
+ mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys;
+
+ // Calculate memory allocator stats.
+ // During program execution we only count number of frees and amount of freed memory.
+ // Current number of alive object in the heap and amount of alive heap memory
+ // are calculated by scanning all spans.
+ // Total number of mallocs is calculated as number of frees plus number of alive objects.
+ // Similarly, total amount of allocated memory is calculated as amount of freed memory
+ // plus amount of alive heap memory.
+ mstats.alloc = 0;
+ mstats.total_alloc = 0;
+ mstats.nmalloc = 0;
+ mstats.nfree = 0;
+ for(i = 0; i < nelem(mstats.by_size); i++) {
+ mstats.by_size[i].nmalloc = 0;
+ mstats.by_size[i].nfree = 0;
+ }
+
+ // Flush MCache's to MCentral.
for(pp=runtime_allp; (p=*pp) != nil; pp++) {
c = p->mcache;
if(c==nil)
continue;
- runtime_purgecachedstats(c);
- for(i=0; i<nelem(c->local_by_size); i++) {
- mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
- c->local_by_size[i].nmalloc = 0;
- mstats.by_size[i].nfree += c->local_by_size[i].nfree;
- c->local_by_size[i].nfree = 0;
+ runtime_MCache_ReleaseAll(c);
+ }
+
+ // Aggregate local stats.
+ cachestats();
+
+ // Scan all spans and count number of alive objects.
+ for(i = 0; i < runtime_mheap.nspan; i++) {
+ s = runtime_mheap.allspans[i];
+ if(s->state != MSpanInUse)
+ continue;
+ if(s->sizeclass == 0) {
+ mstats.nmalloc++;
+ mstats.alloc += s->elemsize;
+ } else {
+ mstats.nmalloc += s->ref;
+ mstats.by_size[s->sizeclass].nmalloc += s->ref;
+ mstats.alloc += s->ref*s->elemsize;
}
}
- mstats.stacks_inuse = stacks_inuse;
+
+ // Aggregate by size class.
+ smallfree = 0;
+ mstats.nfree = runtime_mheap.nlargefree;
+ for(i = 0; i < nelem(mstats.by_size); i++) {
+ mstats.nfree += runtime_mheap.nsmallfree[i];
+ mstats.by_size[i].nfree = runtime_mheap.nsmallfree[i];
+ mstats.by_size[i].nmalloc += runtime_mheap.nsmallfree[i];
+ smallfree += runtime_mheap.nsmallfree[i] * runtime_class_to_size[i];
+ }
+ mstats.nmalloc += mstats.nfree;
+
+ // Calculate derived stats.
+ mstats.total_alloc = mstats.alloc + runtime_mheap.largefree + smallfree;
+ mstats.heap_alloc = mstats.alloc;
+ mstats.heap_objects = mstats.nmalloc - mstats.nfree;
}
// Structure of arguments passed to function gc().
-// This allows the arguments to be passed via reflect_call.
+// This allows the arguments to be passed via runtime_mcall.
struct gc_args
{
- int32 force;
+ int64 start_time; // start time of GC in ns (just before stoptheworld)
};
static void gc(struct gc_args *args);
+static void mgc(G *gp);
static int32
readgogc(void)
@@ -1943,8 +1936,9 @@ void
runtime_gc(int32 force)
{
M *m;
- const byte *p;
- struct gc_args a, *ap;
+ G *g;
+ struct gc_args a;
+ int32 i;
// The atomic operations are not atomic if the uint64s
// are not aligned on uint64 boundaries. This has been
@@ -1967,30 +1961,77 @@ runtime_gc(int32 force)
// while holding a lock. The next mallocgc
// without a lock will do the gc instead.
m = runtime_m();
- if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
+ if(!mstats.enablegc || runtime_g() == m->g0 || m->locks > 0 || runtime_panicking)
return;
if(gcpercent == GcpercentUnknown) { // first time through
- gcpercent = readgogc();
-
- p = runtime_getenv("GOGCTRACE");
- if(p != nil)
- gctrace = runtime_atoi(p);
+ runtime_lock(&runtime_mheap);
+ if(gcpercent == GcpercentUnknown)
+ gcpercent = readgogc();
+ runtime_unlock(&runtime_mheap);
}
if(gcpercent < 0)
return;
- // Run gc on a bigger stack to eliminate
- // a potentially large number of calls to runtime_morestack.
- // But not when using gccgo.
- a.force = force;
- ap = &a;
- gc(ap);
+ runtime_semacquire(&runtime_worldsema, false);
+ if(!force && mstats.heap_alloc < mstats.next_gc) {
+ // typically threads which lost the race to grab
+ // worldsema exit here when gc is done.
+ runtime_semrelease(&runtime_worldsema);
+ return;
+ }
- if(gctrace > 1 && !force) {
- a.force = 1;
- gc(&a);
+ // Ok, we're doing it! Stop everybody else
+ a.start_time = runtime_nanotime();
+ m->gcing = 1;
+ runtime_stoptheworld();
+
+ // Run gc on the g0 stack. We do this so that the g stack
+ // we're currently running on will no longer change. Cuts
+ // the root set down a bit (g0 stacks are not scanned, and
+ // we don't need to scan gc's internal state). Also an
+ // enabler for copyable stacks.
+ for(i = 0; i < (runtime_debug.gctrace > 1 ? 2 : 1); i++) {
+ // switch to g0, call gc(&a), then switch back
+ g = runtime_g();
+ g->param = &a;
+ g->status = Gwaiting;
+ g->waitreason = "garbage collection";
+ runtime_mcall(mgc);
+ // record a new start time in case we're going around again
+ a.start_time = runtime_nanotime();
}
+
+ // all done
+ m->gcing = 0;
+ m->locks++;
+ runtime_semrelease(&runtime_worldsema);
+ runtime_starttheworld();
+ m->locks--;
+
+ // now that gc is done, kick off finalizer thread if needed
+ if(finq != nil) {
+ runtime_lock(&finlock);
+ // kick off or wake up goroutine to run queued finalizers
+ if(fing == nil)
+ fing = __go_go(runfinq, nil);
+ else if(fingwait) {
+ fingwait = 0;
+ runtime_ready(fing);
+ }
+ runtime_unlock(&finlock);
+ }
+ // give the queued finalizers, if any, a chance to run
+ runtime_gosched();
+}
+
+static void
+mgc(G *gp)
+{
+ gc(gp->param);
+ gp->param = nil;
+ gp->status = Grunning;
+ runtime_gogo(gp);
}
static void
@@ -2004,29 +2045,20 @@ gc(struct gc_args *args)
uint32 i;
// Eface eface;
- runtime_semacquire(&runtime_worldsema);
- if(!args->force && mstats.heap_alloc < mstats.next_gc) {
- runtime_semrelease(&runtime_worldsema);
- return;
- }
-
m = runtime_m();
- t0 = runtime_nanotime();
-
- m->gcing = 1;
- runtime_stoptheworld();
+ t0 = args->start_time;
if(CollectStats)
runtime_memclr((byte*)&gcstats, sizeof(gcstats));
for(mp=runtime_allm; mp; mp=mp->alllink)
- runtime_settype_flush(mp, false);
+ runtime_settype_flush(mp);
heap0 = 0;
obj0 = 0;
- if(gctrace) {
- cachestats(nil);
+ if(runtime_debug.gctrace) {
+ updatememstats(nil);
heap0 = mstats.heap_alloc;
obj0 = mstats.nmalloc - mstats.nfree;
}
@@ -2050,7 +2082,7 @@ gc(struct gc_args *args)
work.nproc = runtime_gcprocs();
addroots();
runtime_parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
- runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap->nspan, nil, true, sweepspan);
+ runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap.nspan, nil, true, sweepspan);
if(work.nproc > 1) {
runtime_noteclear(&work.alldone);
runtime_helpgc(work.nproc);
@@ -2076,29 +2108,8 @@ gc(struct gc_args *args)
if(work.nproc > 1)
runtime_notesleep(&work.alldone);
- cachestats(&stats);
-
- stats.nprocyield += work.sweepfor->nprocyield;
- stats.nosyield += work.sweepfor->nosyield;
- stats.nsleep += work.sweepfor->nsleep;
-
- mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
- m->gcing = 0;
-
- if(finq != nil) {
- m->locks++; // disable gc during the mallocs in newproc
- // kick off or wake up goroutine to run queued finalizers
- if(fing == nil)
- fing = __go_go(runfinq, nil);
- else if(fingwait) {
- fingwait = 0;
- runtime_ready(fing);
- }
- m->locks--;
- }
-
- heap1 = mstats.heap_alloc;
- obj1 = mstats.nmalloc - mstats.nfree;
+ cachestats();
+ mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
t4 = runtime_nanotime();
mstats.last_gc = t4;
@@ -2108,7 +2119,15 @@ gc(struct gc_args *args)
if(mstats.debuggc)
runtime_printf("pause %D\n", t4-t0);
- if(gctrace) {
+ if(runtime_debug.gctrace) {
+ updatememstats(&stats);
+ heap1 = mstats.heap_alloc;
+ obj1 = mstats.nmalloc - mstats.nfree;
+
+ stats.nprocyield += work.sweepfor->nprocyield;
+ stats.nosyield += work.sweepfor->nosyield;
+ stats.nsleep += work.sweepfor->nsleep;
+
runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
@@ -2137,16 +2156,13 @@ gc(struct gc_args *args)
runtime_printf("\ttotal:\t%D\n", ninstr);
runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
+
+ runtime_printf("markonly base lookup: bit %D word %D span %D\n", gcstats.markonly.foundbit, gcstats.markonly.foundword, gcstats.markonly.foundspan);
+ runtime_printf("flushptrbuf base lookup: bit %D word %D span %D\n", gcstats.flushptrbuf.foundbit, gcstats.flushptrbuf.foundword, gcstats.flushptrbuf.foundspan);
}
}
runtime_MProf_GC();
- runtime_semrelease(&runtime_worldsema);
- runtime_starttheworld();
-
- // give the queued finalizers, if any, a chance to run
- if(finq != nil)
- runtime_gosched();
}
void runtime_ReadMemStats(MStats *)
@@ -2161,15 +2177,17 @@ runtime_ReadMemStats(MStats *stats)
// because stoptheworld can only be used by
// one goroutine at a time, and there might be
// a pending garbage collection already calling it.
- runtime_semacquire(&runtime_worldsema);
+ runtime_semacquire(&runtime_worldsema, false);
m = runtime_m();
m->gcing = 1;
runtime_stoptheworld();
- cachestats(nil);
+ updatememstats(nil);
*stats = mstats;
m->gcing = 0;
+ m->locks++;
runtime_semrelease(&runtime_worldsema);
runtime_starttheworld();
+ m->locks--;
}
void runtime_debug_readGCStats(Slice*)
@@ -2187,7 +2205,7 @@ runtime_debug_readGCStats(Slice *pauses)
// Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
p = (uint64*)pauses->array;
- runtime_lock(runtime_mheap);
+ runtime_lock(&runtime_mheap);
n = mstats.numgc;
if(n > nelem(mstats.pause_ns))
n = nelem(mstats.pause_ns);
@@ -2202,7 +2220,7 @@ runtime_debug_readGCStats(Slice *pauses)
p[n] = mstats.last_gc;
p[n+1] = mstats.numgc;
p[n+2] = mstats.pause_total_ns;
- runtime_unlock(runtime_mheap);
+ runtime_unlock(&runtime_mheap);
pauses->__count = n+3;
}
@@ -2214,14 +2232,14 @@ runtime_debug_setGCPercent(intgo in)
{
intgo out;
- runtime_lock(runtime_mheap);
+ runtime_lock(&runtime_mheap);
if(gcpercent == GcpercentUnknown)
gcpercent = readgogc();
out = gcpercent;
if(in < 0)
in = -1;
gcpercent = in;
- runtime_unlock(runtime_mheap);
+ runtime_unlock(&runtime_mheap);
return out;
}
@@ -2235,6 +2253,8 @@ gchelperstart(void)
runtime_throw("gchelperstart: bad m->helpgc");
if(runtime_xchg(&bufferList[m->helpgc].busy, 1))
runtime_throw("gchelperstart: already busy");
+ if(runtime_g() != m->g0)
+ runtime_throw("gchelper not running on g0 stack");
}
static void
@@ -2243,33 +2263,51 @@ runfinq(void* dummy __attribute__ ((unused)))
Finalizer *f;
FinBlock *fb, *next;
uint32 i;
+ Eface ef;
+ Iface iface;
for(;;) {
- // There's no need for a lock in this section
- // because it only conflicts with the garbage
- // collector, and the garbage collector only
- // runs when everyone else is stopped, and
- // runfinq only stops at the gosched() or
- // during the calls in the for loop.
+ runtime_lock(&finlock);
fb = finq;
finq = nil;
if(fb == nil) {
fingwait = 1;
- runtime_park(nil, nil, "finalizer wait");
+ runtime_park(runtime_unlock, &finlock, "finalizer wait");
continue;
}
+ runtime_unlock(&finlock);
if(raceenabled)
runtime_racefingo();
for(; fb; fb=next) {
next = fb->next;
for(i=0; i<(uint32)fb->cnt; i++) {
+ const Type *fint;
void *param;
f = &fb->fin[i];
- param = &f->arg;
+ fint = ((const Type**)f->ft->__in.array)[0];
+ if(fint->kind == KindPtr) {
+ // direct use of pointer
+ param = &f->arg;
+ } else if(((const InterfaceType*)fint)->__methods.__count == 0) {
+ // convert to empty interface
+ ef.type = (const Type*)f->ot;
+ ef.__object = f->arg;
+ param = &ef;
+ } else {
+ // convert to interface with methods
+ iface.__methods = __go_convert_interface_2((const Type*)fint,
+ (const Type*)f->ot,
+ 1);
+ iface.__object = f->arg;
+ if(iface.__methods == nil)
+ runtime_throw("invalid type conversion in runfinq");
+ param = &iface;
+ }
reflect_call(f->ft, f->fn, 0, 0, &param, nil);
f->fn = nil;
f->arg = nil;
+ f->ot = nil;
}
fb->cnt = 0;
fb->next = finc;
@@ -2280,28 +2318,28 @@ runfinq(void* dummy __attribute__ ((unused)))
}
// mark the block at v of size n as allocated.
-// If noptr is true, mark it as having no pointers.
+// If noscan is true, mark it as not needing scanning.
void
-runtime_markallocated(void *v, uintptr n, bool noptr)
+runtime_markallocated(void *v, uintptr n, bool noscan)
{
uintptr *b, obits, bits, off, shift;
if(0)
runtime_printf("markallocated %p+%p\n", v, n);
- if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
+ if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
runtime_throw("markallocated: bad pointer");
- off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
- b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
obits = *b;
bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
- if(noptr)
- bits |= bitNoPointers<<shift;
- if(runtime_singleproc) {
+ if(noscan)
+ bits |= bitNoScan<<shift;
+ if(runtime_gomaxprocs == 1) {
*b = bits;
break;
} else {
@@ -2319,19 +2357,19 @@ runtime_markfreed(void *v, uintptr n)
uintptr *b, obits, bits, off, shift;
if(0)
- runtime_printf("markallocated %p+%p\n", v, n);
+ runtime_printf("markfreed %p+%p\n", v, n);
- if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
- runtime_throw("markallocated: bad pointer");
+ if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
+ runtime_throw("markfreed: bad pointer");
- off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
- b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
obits = *b;
bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
- if(runtime_singleproc) {
+ if(runtime_gomaxprocs == 1) {
*b = bits;
break;
} else {
@@ -2351,11 +2389,11 @@ runtime_checkfreed(void *v, uintptr n)
if(!runtime_checking)
return;
- if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
+ if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
return; // not allocated, so okay
- off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
- b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start; // word offset
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
bits = *b>>shift;
@@ -2374,7 +2412,7 @@ runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
uintptr *b, off, shift;
byte *p;
- if((byte*)v+size*n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
+ if((byte*)v+size*n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
runtime_throw("markspan: bad pointer");
p = v;
@@ -2385,8 +2423,8 @@ runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
// the entire span, and each bitmap word has bits for only
// one span, so no other goroutines are changing these
// bitmap words.
- off = (uintptr*)p - (uintptr*)runtime_mheap->arena_start; // word offset
- b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)p - (uintptr*)runtime_mheap.arena_start; // word offset
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
}
@@ -2398,14 +2436,14 @@ runtime_unmarkspan(void *v, uintptr n)
{
uintptr *p, *b, off;
- if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
+ if((byte*)v+n > (byte*)runtime_mheap.arena_used || (byte*)v < runtime_mheap.arena_start)
runtime_throw("markspan: bad pointer");
p = v;
- off = p - (uintptr*)runtime_mheap->arena_start; // word offset
+ off = p - (uintptr*)runtime_mheap.arena_start; // word offset
if(off % wordsPerBitmapWord != 0)
runtime_throw("markspan: unaligned pointer");
- b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
n /= PtrSize;
if(n%wordsPerBitmapWord != 0)
runtime_throw("unmarkspan: unaligned length");
@@ -2426,8 +2464,8 @@ runtime_blockspecial(void *v)
if(DebugMark)
return true;
- off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
- b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
return (*b & (bitSpecial<<shift)) != 0;
@@ -2441,8 +2479,8 @@ runtime_setblockspecial(void *v, bool s)
if(DebugMark)
return;
- off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
- b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
+ off = (uintptr*)v - (uintptr*)runtime_mheap.arena_start;
+ b = (uintptr*)runtime_mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord;
for(;;) {
@@ -2451,7 +2489,7 @@ runtime_setblockspecial(void *v, bool s)
bits = obits | (bitSpecial<<shift);
else
bits = obits & ~(bitSpecial<<shift);
- if(runtime_singleproc) {
+ if(runtime_gomaxprocs == 1) {
*b = bits;
break;
} else {
@@ -2476,13 +2514,13 @@ runtime_MHeap_MapBits(MHeap *h)
uintptr n;
n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
- n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
+ n = ROUND(n, bitmapChunk);
if(h->bitmap_mapped >= n)
return;
page_size = getpagesize();
n = (n+page_size-1) & ~(page_size-1);
- runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
+ runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped, &mstats.gc_sys);
h->bitmap_mapped = n;
}
diff --git a/libgo/runtime/mgc0.h b/libgo/runtime/mgc0.h
index d14fb37c209..f8abe6c9c1c 100644
--- a/libgo/runtime/mgc0.h
+++ b/libgo/runtime/mgc0.h
@@ -26,7 +26,6 @@ enum {
GC_ARRAY_START, // Start an array with a fixed length. Args: (off, len, elemsize)
GC_ARRAY_NEXT, // The next element of an array. Args: none
GC_CALL, // Call a subroutine. Args: (off, objgcrel)
- GC_MAP_PTR, // Go map. Args: (off, MapType*)
GC_CHAN_PTR, // Go channel. Args: (off, ChanType*)
GC_STRING, // Go string. Args: (off)
GC_EFACE, // interface{}. Args: (off)
diff --git a/libgo/runtime/mheap.c b/libgo/runtime/mheap.c
index b4d94b68559..1b6cfd3dcde 100644
--- a/libgo/runtime/mheap.c
+++ b/libgo/runtime/mheap.c
@@ -36,12 +36,12 @@ RecordSpan(void *vh, byte *p)
cap = 64*1024/sizeof(all[0]);
if(cap < h->nspancap*3/2)
cap = h->nspancap*3/2;
- all = (MSpan**)runtime_SysAlloc(cap*sizeof(all[0]));
+ all = (MSpan**)runtime_SysAlloc(cap*sizeof(all[0]), &mstats.other_sys);
if(all == nil)
runtime_throw("runtime: cannot allocate memory");
if(h->allspans) {
runtime_memmove(all, h->allspans, h->nspancap*sizeof(all[0]));
- runtime_SysFree(h->allspans, h->nspancap*sizeof(all[0]));
+ runtime_SysFree(h->allspans, h->nspancap*sizeof(all[0]), &mstats.other_sys);
}
h->allspans = all;
h->nspancap = cap;
@@ -51,12 +51,12 @@ RecordSpan(void *vh, byte *p)
// Initialize the heap; fetch memory using alloc.
void
-runtime_MHeap_Init(MHeap *h, void *(*alloc)(uintptr))
+runtime_MHeap_Init(MHeap *h)
{
uint32 i;
- runtime_FixAlloc_Init(&h->spanalloc, sizeof(MSpan), alloc, RecordSpan, h);
- runtime_FixAlloc_Init(&h->cachealloc, sizeof(MCache), alloc, nil, nil);
+ runtime_FixAlloc_Init(&h->spanalloc, sizeof(MSpan), RecordSpan, h, &mstats.mspan_sys);
+ runtime_FixAlloc_Init(&h->cachealloc, sizeof(MCache), nil, nil, &mstats.mcache_sys);
// h->mapcache needs no init
for(i=0; i<nelem(h->free); i++)
runtime_MSpanList_Init(&h->free[i]);
@@ -65,6 +65,23 @@ runtime_MHeap_Init(MHeap *h, void *(*alloc)(uintptr))
runtime_MCentral_Init(&h->central[i], i);
}
+void
+runtime_MHeap_MapSpans(MHeap *h)
+{
+ uintptr n;
+
+ // Map spans array, PageSize at a time.
+ n = (uintptr)h->arena_used;
+ if(sizeof(void*) == 8)
+ n -= (uintptr)h->arena_start;
+ n = n / PageSize * sizeof(h->spans[0]);
+ n = ROUND(n, PageSize);
+ if(h->spans_mapped >= n)
+ return;
+ runtime_SysMap((byte*)h->spans + h->spans_mapped, n - h->spans_mapped, &mstats.other_sys);
+ h->spans_mapped = n;
+}
+
// Allocate a new span of npage pages from the heap
// and record its size class in the HeapMap and HeapMapCache.
MSpan*
@@ -73,7 +90,8 @@ runtime_MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct, int32
MSpan *s;
runtime_lock(h);
- runtime_purgecachedstats(runtime_m()->mcache);
+ mstats.heap_alloc += runtime_m()->mcache->local_cachealloc;
+ runtime_m()->mcache->local_cachealloc = 0;
s = MHeap_AllocLocked(h, npage, sizeclass);
if(s != nil) {
mstats.heap_inuse += npage<<PageShift;
@@ -138,6 +156,7 @@ HaveSpan:
// is just a unique constant not seen elsewhere in the
// runtime, as a clue in case it turns up unexpectedly in
// memory or in a stack trace.
+ runtime_SysUsed((void*)(s->start<<PageShift), s->npages<<PageShift);
*(uintptr*)(s->start<<PageShift) = (uintptr)0xbeadbeadbeadbeadULL;
}
s->npreleased = 0;
@@ -145,17 +164,15 @@ HaveSpan:
if(s->npages > npage) {
// Trim extra and put it back in the heap.
t = runtime_FixAlloc_Alloc(&h->spanalloc);
- mstats.mspan_inuse = h->spanalloc.inuse;
- mstats.mspan_sys = h->spanalloc.sys;
runtime_MSpan_Init(t, s->start + npage, s->npages - npage);
s->npages = npage;
p = t->start;
if(sizeof(void*) == 8)
p -= ((uintptr)h->arena_start>>PageShift);
if(p > 0)
- h->map[p-1] = s;
- h->map[p] = t;
- h->map[p+t->npages-1] = t;
+ h->spans[p-1] = s;
+ h->spans[p] = t;
+ h->spans[p+t->npages-1] = t;
*(uintptr*)(t->start<<PageShift) = *(uintptr*)(s->start<<PageShift); // copy "needs zeroing" mark
t->state = MSpanInUse;
MHeap_FreeLocked(h, t);
@@ -172,7 +189,7 @@ HaveSpan:
if(sizeof(void*) == 8)
p -= ((uintptr)h->arena_start>>PageShift);
for(n=0; n<npage; n++)
- h->map[p+n] = s;
+ h->spans[p+n] = s;
return s;
}
@@ -232,19 +249,16 @@ MHeap_Grow(MHeap *h, uintptr npage)
return false;
}
}
- mstats.heap_sys += ask;
// Create a fake "in use" span and free it, so that the
// right coalescing happens.
s = runtime_FixAlloc_Alloc(&h->spanalloc);
- mstats.mspan_inuse = h->spanalloc.inuse;
- mstats.mspan_sys = h->spanalloc.sys;
runtime_MSpan_Init(s, (uintptr)v>>PageShift, ask>>PageShift);
p = s->start;
if(sizeof(void*) == 8)
p -= ((uintptr)h->arena_start>>PageShift);
- h->map[p] = s;
- h->map[p + s->npages - 1] = s;
+ h->spans[p] = s;
+ h->spans[p + s->npages - 1] = s;
s->state = MSpanInUse;
MHeap_FreeLocked(h, s);
return true;
@@ -261,7 +275,7 @@ runtime_MHeap_Lookup(MHeap *h, void *v)
p = (uintptr)v;
if(sizeof(void*) == 8)
p -= (uintptr)h->arena_start;
- return h->map[p >> PageShift];
+ return h->spans[p >> PageShift];
}
// Look up the span at the given address.
@@ -283,10 +297,8 @@ runtime_MHeap_LookupMaybe(MHeap *h, void *v)
q = p;
if(sizeof(void*) == 8)
q -= (uintptr)h->arena_start >> PageShift;
- s = h->map[q];
- if(s == nil || p < s->start || p - s->start >= s->npages)
- return nil;
- if(s->state != MSpanInUse)
+ s = h->spans[q];
+ if(s == nil || p < s->start || (byte*)v >= s->limit || s->state != MSpanInUse)
return nil;
return s;
}
@@ -296,7 +308,8 @@ void
runtime_MHeap_Free(MHeap *h, MSpan *s, int32 acct)
{
runtime_lock(h);
- runtime_purgecachedstats(runtime_m()->mcache);
+ mstats.heap_alloc += runtime_m()->mcache->local_cachealloc;
+ runtime_m()->mcache->local_cachealloc = 0;
mstats.heap_inuse -= s->npages<<PageShift;
if(acct) {
mstats.heap_alloc -= s->npages<<PageShift;
@@ -313,8 +326,6 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
MSpan *t;
PageID p;
- if(s->types.sysalloc)
- runtime_settype_sysfree(s);
s->types.compression = MTypes_Empty;
if(s->state != MSpanInUse || s->ref != 0) {
@@ -334,31 +345,31 @@ MHeap_FreeLocked(MHeap *h, MSpan *s)
p = s->start;
if(sizeof(void*) == 8)
p -= (uintptr)h->arena_start >> PageShift;
- if(p > 0 && (t = h->map[p-1]) != nil && t->state != MSpanInUse) {
- tp = (uintptr*)(t->start<<PageShift);
- *tp |= *sp; // propagate "needs zeroing" mark
+ if(p > 0 && (t = h->spans[p-1]) != nil && t->state != MSpanInUse) {
+ if(t->npreleased == 0) { // cant't touch this otherwise
+ tp = (uintptr*)(t->start<<PageShift);
+ *tp |= *sp; // propagate "needs zeroing" mark
+ }
s->start = t->start;
s->npages += t->npages;
s->npreleased = t->npreleased; // absorb released pages
p -= t->npages;
- h->map[p] = s;
+ h->spans[p] = s;
runtime_MSpanList_Remove(t);
t->state = MSpanDead;
runtime_FixAlloc_Free(&h->spanalloc, t);
- mstats.mspan_inuse = h->spanalloc.inuse;
- mstats.mspan_sys = h->spanalloc.sys;
}
- if(p+s->npages < nelem(h->map) && (t = h->map[p+s->npages]) != nil && t->state != MSpanInUse) {
- tp = (uintptr*)(t->start<<PageShift);
- *sp |= *tp; // propagate "needs zeroing" mark
+ if((p+s->npages)*sizeof(h->spans[0]) < h->spans_mapped && (t = h->spans[p+s->npages]) != nil && t->state != MSpanInUse) {
+ if(t->npreleased == 0) { // cant't touch this otherwise
+ tp = (uintptr*)(t->start<<PageShift);
+ *sp |= *tp; // propagate "needs zeroing" mark
+ }
s->npages += t->npages;
s->npreleased += t->npreleased;
- h->map[p + s->npages - 1] = s;
+ h->spans[p + s->npages - 1] = s;
runtime_MSpanList_Remove(t);
t->state = MSpanDead;
runtime_FixAlloc_Free(&h->spanalloc, t);
- mstats.mspan_inuse = h->spanalloc.inuse;
- mstats.mspan_sys = h->spanalloc.sys;
}
// Insert s into appropriate list.
@@ -388,7 +399,7 @@ scavengelist(MSpan *list, uint64 now, uint64 limit)
sumreleased = 0;
for(s=list->next; s != list; s=s->next) {
- if((now - s->unusedsince) > limit) {
+ if((now - s->unusedsince) > limit && s->npreleased != s->npages) {
released = (s->npages - s->npreleased) << PageShift;
mstats.heap_released += released;
sumreleased += released;
@@ -399,19 +410,26 @@ scavengelist(MSpan *list, uint64 now, uint64 limit)
return sumreleased;
}
-static uintptr
-scavenge(uint64 now, uint64 limit)
+static void
+scavenge(int32 k, uint64 now, uint64 limit)
{
uint32 i;
uintptr sumreleased;
MHeap *h;
- h = runtime_mheap;
+ h = &runtime_mheap;
sumreleased = 0;
for(i=0; i < nelem(h->free); i++)
sumreleased += scavengelist(&h->free[i], now, limit);
sumreleased += scavengelist(&h->large, now, limit);
- return sumreleased;
+
+ if(runtime_debug.gctrace > 0) {
+ if(sumreleased > 0)
+ runtime_printf("scvg%d: %D MB released\n", k, (uint64)sumreleased>>20);
+ runtime_printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n",
+ k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20,
+ mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20);
+ }
}
// Release (part of) unused memory to OS.
@@ -424,9 +442,6 @@ runtime_MHeap_Scavenger(void* dummy)
MHeap *h;
uint64 tick, now, forcegc, limit;
uint32 k;
- uintptr sumreleased;
- const byte *env;
- bool trace;
Note note, *notep;
USED(dummy);
@@ -446,17 +461,10 @@ runtime_MHeap_Scavenger(void* dummy)
else
tick = limit/2;
- trace = false;
- env = runtime_getenv("GOGCTRACE");
- if(env != nil)
- trace = runtime_atoi(env) > 0;
-
- h = runtime_mheap;
+ h = &runtime_mheap;
for(k=0;; k++) {
runtime_noteclear(&note);
- runtime_entersyscallblock();
- runtime_notetsleep(&note, tick);
- runtime_exitsyscall();
+ runtime_notetsleepg(&note, tick);
runtime_lock(h);
now = runtime_nanotime();
@@ -468,24 +476,14 @@ runtime_MHeap_Scavenger(void* dummy)
runtime_noteclear(&note);
notep = &note;
__go_go(forcegchelper, (void*)notep);
- runtime_entersyscallblock();
- runtime_notesleep(&note);
- runtime_exitsyscall();
- if(trace)
+ runtime_notetsleepg(&note, -1);
+ if(runtime_debug.gctrace > 0)
runtime_printf("scvg%d: GC forced\n", k);
runtime_lock(h);
now = runtime_nanotime();
}
- sumreleased = scavenge(now, limit);
+ scavenge(k, now, limit);
runtime_unlock(h);
-
- if(trace) {
- if(sumreleased > 0)
- runtime_printf("scvg%d: %p MB released\n", k, sumreleased>>20);
- runtime_printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n",
- k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20,
- mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20);
- }
}
}
@@ -495,9 +493,9 @@ void
runtime_debug_freeOSMemory(void)
{
runtime_gc(1);
- runtime_lock(runtime_mheap);
- scavenge(~(uintptr)0, 0);
- runtime_unlock(runtime_mheap);
+ runtime_lock(&runtime_mheap);
+ scavenge(-1, ~(uintptr)0, 0);
+ runtime_unlock(&runtime_mheap);
}
// Initialize a new span with the given start and npages.
diff --git a/libgo/runtime/mprof.goc b/libgo/runtime/mprof.goc
index 73d937908c6..7507dfc9173 100644
--- a/libgo/runtime/mprof.goc
+++ b/libgo/runtime/mprof.goc
@@ -14,44 +14,11 @@ package runtime
#include "go-string.h"
// NOTE(rsc): Everything here could use cas if contention became an issue.
-static Lock proflock, alloclock;
+static Lock proflock;
// All memory allocations are local and do not escape outside of the profiler.
// The profiler is forbidden from referring to garbage-collected memory.
-static byte *pool; // memory allocation pool
-static uintptr poolfree; // number of bytes left in the pool
-enum {
- Chunk = 32*PageSize, // initial size of the pool
-};
-
-// Memory allocation local to this file.
-// There is no way to return the allocated memory back to the OS.
-static void*
-allocate(uintptr size)
-{
- void *v;
-
- if(size == 0)
- return nil;
-
- if(size >= Chunk/2)
- return runtime_SysAlloc(size);
-
- runtime_lock(&alloclock);
- if(size > poolfree) {
- pool = runtime_SysAlloc(Chunk);
- if(pool == nil)
- runtime_throw("runtime: cannot allocate memory");
- poolfree = Chunk;
- }
- v = pool;
- pool += size;
- poolfree -= size;
- runtime_unlock(&alloclock);
- return v;
-}
-
enum { MProf, BProf }; // profile types
// Per-call-stack profiling information.
@@ -104,10 +71,9 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
Bucket *b;
if(buckhash == nil) {
- buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0]);
+ buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys);
if(buckhash == nil)
runtime_throw("runtime: cannot allocate memory");
- mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
}
// Hash stack.
@@ -137,9 +103,7 @@ stkbucket(int32 typ, Location *stk, int32 nstk, bool alloc)
if(!alloc)
return nil;
- b = allocate(sizeof *b + nstk*sizeof stk[0]);
- if(b == nil)
- runtime_throw("runtime: cannot allocate memory");
+ b = runtime_persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys);
bucketmem += sizeof *b + nstk*sizeof stk[0];
runtime_memmove(b->stk, stk, nstk*sizeof stk[0]);
b->typ = typ;
@@ -241,7 +205,7 @@ setaddrbucket(uintptr addr, Bucket *b)
if(ah->addr == (addr>>AddrHashShift))
goto found;
- ah = allocate(sizeof *ah);
+ ah = runtime_persistentalloc(sizeof *ah, 0, &mstats.buckhash_sys);
addrmem += sizeof *ah;
ah->next = addrhash[h];
ah->addr = addr>>AddrHashShift;
@@ -249,7 +213,7 @@ setaddrbucket(uintptr addr, Bucket *b)
found:
if((e = addrfree) == nil) {
- e = allocate(64*sizeof *e);
+ e = runtime_persistentalloc(64*sizeof *e, 0, &mstats.buckhash_sys);
addrmem += 64*sizeof *e;
for(i=0; i+1<64; i++)
e[i].next = &e[i+1];
@@ -296,16 +260,10 @@ found:
void
runtime_MProf_Malloc(void *p, uintptr size)
{
- M *m;
int32 nstk;
Location stk[32];
Bucket *b;
- m = runtime_m();
- if(m->nomemprof > 0)
- return;
-
- m->nomemprof++;
nstk = runtime_callers(1, stk, 32);
runtime_lock(&proflock);
b = stkbucket(MProf, stk, nstk, true);
@@ -313,22 +271,14 @@ runtime_MProf_Malloc(void *p, uintptr size)
b->recent_alloc_bytes += size;
setaddrbucket((uintptr)p, b);
runtime_unlock(&proflock);
- m = runtime_m();
- m->nomemprof--;
}
// Called when freeing a profiled block.
void
runtime_MProf_Free(void *p, uintptr size)
{
- M *m;
Bucket *b;
- m = runtime_m();
- if(m->nomemprof > 0)
- return;
-
- m->nomemprof++;
runtime_lock(&proflock);
b = getaddrbucket((uintptr)p);
if(b != nil) {
@@ -336,8 +286,6 @@ runtime_MProf_Free(void *p, uintptr size)
b->recent_free_bytes += size;
}
runtime_unlock(&proflock);
- m = runtime_m();
- m->nomemprof--;
}
int64 runtime_blockprofilerate; // in CPU ticks
@@ -347,7 +295,17 @@ void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockP
void
runtime_SetBlockProfileRate(intgo rate)
{
- runtime_atomicstore64((uint64*)&runtime_blockprofilerate, rate * runtime_tickspersecond() / (1000*1000*1000));
+ int64 r;
+
+ if(rate <= 0)
+ r = 0; // disable profiling
+ else {
+ // convert ns to cycles, use float64 to prevent overflow during multiplication
+ r = (float64)rate*runtime_tickspersecond()/(1000*1000*1000);
+ if(r == 0)
+ r = 1;
+ }
+ runtime_atomicstore64((uint64*)&runtime_blockprofilerate, r);
}
void
@@ -510,10 +468,10 @@ func Stack(b Slice, all bool) (n int) {
bool enablegc;
sp = runtime_getcallersp(&b);
- pc = runtime_getcallerpc(&b);
+ pc = (byte*)(uintptr)runtime_getcallerpc(&b);
if(all) {
- runtime_semacquire(&runtime_worldsema);
+ runtime_semacquire(&runtime_worldsema, false);
runtime_m()->gcing = 1;
runtime_stoptheworld();
enablegc = mstats.enablegc;
@@ -530,7 +488,7 @@ func Stack(b Slice, all bool) (n int) {
USED(sp);
runtime_goroutineheader(g);
runtime_traceback();
- runtime_goroutinetrailer(g);
+ runtime_printcreatedby(g);
if(all)
runtime_tracebackothers(g);
n = b.__count - g->writenbuf;
@@ -572,7 +530,7 @@ func GoroutineProfile(b Slice) (n int, ok bool) {
ok = false;
n = runtime_gcount();
if(n <= b.__count) {
- runtime_semacquire(&runtime_worldsema);
+ runtime_semacquire(&runtime_worldsema, false);
runtime_m()->gcing = 1;
runtime_stoptheworld();
@@ -598,5 +556,5 @@ func GoroutineProfile(b Slice) (n int, ok bool) {
void
runtime_mprofinit(void)
{
- addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash);
+ addrhash = runtime_persistentalloc((1<<AddrHashBits)*sizeof *addrhash, 0, &mstats.buckhash_sys);
}
diff --git a/libgo/runtime/msize.c b/libgo/runtime/msize.c
index 3b5591c1b17..745a76958c8 100644
--- a/libgo/runtime/msize.c
+++ b/libgo/runtime/msize.c
@@ -31,7 +31,6 @@
int32 runtime_class_to_size[NumSizeClasses];
int32 runtime_class_to_allocnpages[NumSizeClasses];
-int32 runtime_class_to_transfercount[NumSizeClasses];
// The SizeToClass lookup is implemented using two arrays,
// one mapping sizes <= 1024 to their class and one mapping
@@ -42,17 +41,17 @@ int32 runtime_class_to_transfercount[NumSizeClasses];
// size divided by 128 (rounded up). The arrays are filled in
// by InitSizes.
-static int32 size_to_class8[1024/8 + 1];
-static int32 size_to_class128[(MaxSmallSize-1024)/128 + 1];
+int8 runtime_size_to_class8[1024/8 + 1];
+int8 runtime_size_to_class128[(MaxSmallSize-1024)/128 + 1];
-int32
-runtime_SizeToClass(int32 size)
+static int32
+SizeToClass(int32 size)
{
if(size > MaxSmallSize)
runtime_throw("SizeToClass - invalid size");
if(size > 1024-8)
- return size_to_class128[(size-1024+127) >> 7];
- return size_to_class8[(size+7)>>3];
+ return runtime_size_to_class128[(size-1024+127) >> 7];
+ return runtime_size_to_class8[(size+7)>>3];
}
void
@@ -111,16 +110,16 @@ runtime_InitSizes(void)
nextsize = 0;
for (sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) {
for(; nextsize < 1024 && nextsize <= runtime_class_to_size[sizeclass]; nextsize+=8)
- size_to_class8[nextsize/8] = sizeclass;
+ runtime_size_to_class8[nextsize/8] = sizeclass;
if(nextsize >= 1024)
for(; nextsize <= runtime_class_to_size[sizeclass]; nextsize += 128)
- size_to_class128[(nextsize-1024)/128] = sizeclass;
+ runtime_size_to_class128[(nextsize-1024)/128] = sizeclass;
}
// Double-check SizeToClass.
if(0) {
for(n=0; n < MaxSmallSize; n++) {
- sizeclass = runtime_SizeToClass(n);
+ sizeclass = SizeToClass(n);
if(sizeclass < 1 || sizeclass >= NumSizeClasses || runtime_class_to_size[sizeclass] < n) {
runtime_printf("size=%d sizeclass=%d runtime_class_to_size=%d\n", n, sizeclass, runtime_class_to_size[sizeclass]);
runtime_printf("incorrect SizeToClass");
@@ -137,16 +136,6 @@ runtime_InitSizes(void)
// Copy out for statistics table.
for(i=0; i<nelem(runtime_class_to_size); i++)
mstats.by_size[i].size = runtime_class_to_size[i];
-
- // Initialize the runtime_class_to_transfercount table.
- for(sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) {
- n = 64*1024 / runtime_class_to_size[sizeclass];
- if(n < 2)
- n = 2;
- if(n > 32)
- n = 32;
- runtime_class_to_transfercount[sizeclass] = n;
- }
return;
dump:
@@ -157,12 +146,14 @@ dump:
runtime_printf(" %d", runtime_class_to_size[sizeclass]);
runtime_printf("\n\n");
runtime_printf("size_to_class8:");
- for(i=0; i<nelem(size_to_class8); i++)
- runtime_printf(" %d=>%d(%d)\n", i*8, size_to_class8[i], runtime_class_to_size[size_to_class8[i]]);
+ for(i=0; i<nelem(runtime_size_to_class8); i++)
+ runtime_printf(" %d=>%d(%d)\n", i*8, runtime_size_to_class8[i],
+ runtime_class_to_size[runtime_size_to_class8[i]]);
runtime_printf("\n");
runtime_printf("size_to_class128:");
- for(i=0; i<nelem(size_to_class128); i++)
- runtime_printf(" %d=>%d(%d)\n", i*128, size_to_class128[i], runtime_class_to_size[size_to_class128[i]]);
+ for(i=0; i<nelem(runtime_size_to_class128); i++)
+ runtime_printf(" %d=>%d(%d)\n", i*128, runtime_size_to_class128[i],
+ runtime_class_to_size[runtime_size_to_class128[i]]);
runtime_printf("\n");
}
runtime_throw("InitSizes failed");
diff --git a/libgo/runtime/netpoll.goc b/libgo/runtime/netpoll.goc
index a0bd735f85c..02705734dd8 100644
--- a/libgo/runtime/netpoll.goc
+++ b/libgo/runtime/netpoll.goc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin linux
+// +build darwin dragonfly freebsd linux netbsd openbsd windows
package net
@@ -19,7 +19,7 @@ package net
// Integrated network poller (platform-independent part).
// A particular implementation (epoll/kqueue) must define the following functions:
// void runtime_netpollinit(void); // to initialize the poller
-// int32 runtime_netpollopen(int32 fd, PollDesc *pd); // to arm edge-triggered notifications
+// int32 runtime_netpollopen(uintptr fd, PollDesc *pd); // to arm edge-triggered notifications
// and associate fd with pd.
// An implementation must call the following function to denote that the pd is ready.
// void runtime_netpollready(G **gpp, PollDesc *pd, int32 mode);
@@ -30,7 +30,7 @@ struct PollDesc
{
PollDesc* link; // in pollcache, protected by pollcache.Lock
Lock; // protectes the following fields
- int32 fd;
+ uintptr fd;
bool closing;
uintptr seq; // protects from stale timers and ready notifications
G* rg; // G waiting for read or READY (binary semaphore)
@@ -52,8 +52,8 @@ static struct
// seq is incremented when deadlines are changed or descriptor is reused.
} pollcache;
-static void netpollblock(PollDesc*, int32);
-static G* netpollunblock(PollDesc*, int32);
+static bool netpollblock(PollDesc*, int32);
+static G* netpollunblock(PollDesc*, int32, bool);
static void deadline(int64, Eface);
static void readDeadline(int64, Eface);
static void writeDeadline(int64, Eface);
@@ -68,7 +68,7 @@ func runtime_pollServerInit() {
runtime_netpollinit();
}
-func runtime_pollOpen(fd int) (pd *PollDesc, errno int) {
+func runtime_pollOpen(fd uintptr) (pd *PollDesc, errno int) {
pd = allocPollDesc();
runtime_lock(pd);
if(pd->wg != nil && pd->wg != READY)
@@ -117,18 +117,35 @@ ret:
func runtime_pollWait(pd *PollDesc, mode int) (err int) {
runtime_lock(pd);
err = checkerr(pd, mode);
- if(err)
- goto ret;
- netpollblock(pd, mode);
- err = checkerr(pd, mode);
-ret:
+ if(err == 0) {
+ while(!netpollblock(pd, mode)) {
+ err = checkerr(pd, mode);
+ if(err != 0)
+ break;
+ // Can happen if timeout has fired and unblocked us,
+ // but before we had a chance to run, timeout has been reset.
+ // Pretend it has not happened and retry.
+ }
+ }
+ runtime_unlock(pd);
+}
+
+func runtime_pollWaitCanceled(pd *PollDesc, mode int) {
+ runtime_lock(pd);
+ // wait for ioready, ignore closing or timeouts.
+ while(!netpollblock(pd, mode))
+ ;
runtime_unlock(pd);
}
func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) {
+ G *rg, *wg;
+
runtime_lock(pd);
- if(pd->closing)
- goto ret;
+ if(pd->closing) {
+ runtime_unlock(pd);
+ return;
+ }
pd->seq++; // invalidate current timers
// Reset current timers.
if(pd->rt.fv) {
@@ -140,9 +157,8 @@ func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) {
pd->wt.fv = nil;
}
// Setup new timers.
- if(d != 0 && d <= runtime_nanotime()) {
+ if(d != 0 && d <= runtime_nanotime())
d = -1;
- }
if(mode == 'r' || mode == 'r'+'w')
pd->rd = d;
if(mode == 'w' || mode == 'r'+'w')
@@ -172,8 +188,18 @@ func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) {
runtime_addtimer(&pd->wt);
}
}
-ret:
+ // If we set the new deadline in the past, unblock currently pending IO if any.
+ rg = nil;
+ wg = nil;
+ if(pd->rd < 0)
+ rg = netpollunblock(pd, 'r', false);
+ if(pd->wd < 0)
+ wg = netpollunblock(pd, 'w', false);
runtime_unlock(pd);
+ if(rg)
+ runtime_ready(rg);
+ if(wg)
+ runtime_ready(wg);
}
func runtime_pollUnblock(pd *PollDesc) {
@@ -184,8 +210,8 @@ func runtime_pollUnblock(pd *PollDesc) {
runtime_throw("runtime_pollUnblock: already closing");
pd->closing = true;
pd->seq++;
- rg = netpollunblock(pd, 'r');
- wg = netpollunblock(pd, 'w');
+ rg = netpollunblock(pd, 'r', false);
+ wg = netpollunblock(pd, 'w', false);
if(pd->rt.fv) {
runtime_deltimer(&pd->rt);
pd->rt.fv = nil;
@@ -201,6 +227,12 @@ func runtime_pollUnblock(pd *PollDesc) {
runtime_ready(wg);
}
+uintptr
+runtime_netpollfd(PollDesc *pd)
+{
+ return pd->fd;
+}
+
// make pd ready, newly runnable goroutines (if any) are enqueued info gpp list
void
runtime_netpollready(G **gpp, PollDesc *pd, int32 mode)
@@ -210,9 +242,9 @@ runtime_netpollready(G **gpp, PollDesc *pd, int32 mode)
rg = wg = nil;
runtime_lock(pd);
if(mode == 'r' || mode == 'r'+'w')
- rg = netpollunblock(pd, 'r');
+ rg = netpollunblock(pd, 'r', true);
if(mode == 'w' || mode == 'r'+'w')
- wg = netpollunblock(pd, 'w');
+ wg = netpollunblock(pd, 'w', true);
runtime_unlock(pd);
if(rg) {
rg->schedlink = *gpp;
@@ -234,7 +266,8 @@ checkerr(PollDesc *pd, int32 mode)
return 0;
}
-static void
+// returns true if IO is ready, or false if timedout or closed
+static bool
netpollblock(PollDesc *pd, int32 mode)
{
G **gpp;
@@ -244,17 +277,20 @@ netpollblock(PollDesc *pd, int32 mode)
gpp = &pd->wg;
if(*gpp == READY) {
*gpp = nil;
- return;
+ return true;
}
if(*gpp != nil)
- runtime_throw("epoll: double wait");
+ runtime_throw("netpollblock: double wait");
*gpp = runtime_g();
runtime_park(runtime_unlock, &pd->Lock, "IO wait");
runtime_lock(pd);
+ if(runtime_g()->param)
+ return true;
+ return false;
}
static G*
-netpollunblock(PollDesc *pd, int32 mode)
+netpollunblock(PollDesc *pd, int32 mode, bool ioready)
{
G **gpp, *old;
@@ -264,10 +300,15 @@ netpollunblock(PollDesc *pd, int32 mode)
if(*gpp == READY)
return nil;
if(*gpp == nil) {
- *gpp = READY;
+ // Only set READY for ioready. runtime_pollWait
+ // will check for timeout/cancel before waiting.
+ if(ioready)
+ *gpp = READY;
return nil;
}
old = *gpp;
+ // pass unblock reason onto blocked g
+ old->param = (void*)(uintptr)ioready;
*gpp = nil;
return old;
}
@@ -296,14 +337,14 @@ deadlineimpl(int64 now, Eface arg, bool read, bool write)
runtime_throw("deadlineimpl: inconsistent read deadline");
pd->rd = -1;
pd->rt.fv = nil;
- rg = netpollunblock(pd, 'r');
+ rg = netpollunblock(pd, 'r', false);
}
if(write) {
if(pd->wd <= 0 || (pd->wt.fv == nil && !read))
runtime_throw("deadlineimpl: inconsistent write deadline");
pd->wd = -1;
pd->wt.fv = nil;
- wg = netpollunblock(pd, 'w');
+ wg = netpollunblock(pd, 'w', false);
}
runtime_unlock(pd);
if(rg)
@@ -343,7 +384,7 @@ allocPollDesc(void)
n = 1;
// Must be in non-GC memory because can be referenced
// only from epoll/kqueue internals.
- pd = runtime_SysAlloc(n*sizeof(*pd));
+ pd = runtime_persistentalloc(n*sizeof(*pd), 0, &mstats.other_sys);
for(i = 0; i < n; i++) {
pd[i].link = pollcache.first;
pollcache.first = &pd[i];
diff --git a/libgo/runtime/netpoll_epoll.c b/libgo/runtime/netpoll_epoll.c
index 98c5cbeb587..b98aa818c89 100644
--- a/libgo/runtime/netpoll_epoll.c
+++ b/libgo/runtime/netpoll_epoll.c
@@ -94,24 +94,24 @@ runtime_netpollinit(void)
}
int32
-runtime_netpollopen(int32 fd, PollDesc *pd)
+runtime_netpollopen(uintptr fd, PollDesc *pd)
{
EpollEvent ev;
int32 res;
ev.events = EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLET;
ev.data.ptr = (void*)pd;
- res = runtime_epollctl(epfd, EPOLL_CTL_ADD, fd, &ev);
+ res = runtime_epollctl(epfd, EPOLL_CTL_ADD, (int32)fd, &ev);
return -res;
}
int32
-runtime_netpollclose(int32 fd)
+runtime_netpollclose(uintptr fd)
{
EpollEvent ev;
int32 res;
- res = runtime_epollctl(epfd, EPOLL_CTL_DEL, fd, &ev);
+ res = runtime_epollctl(epfd, EPOLL_CTL_DEL, (int32)fd, &ev);
return -res;
}
diff --git a/libgo/runtime/netpoll_kqueue.c b/libgo/runtime/netpoll_kqueue.c
index 9b79b2020df..78901611884 100644
--- a/libgo/runtime/netpoll_kqueue.c
+++ b/libgo/runtime/netpoll_kqueue.c
@@ -2,10 +2,11 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin
+// +build darwin dragonfly freebsd netbsd openbsd
#include "runtime.h"
#include "defs_GOOS_GOARCH.h"
+#include "os_GOOS.h"
// Integrated network poller (kqueue-based implementation).
@@ -27,7 +28,7 @@ runtime_netpollinit(void)
}
int32
-runtime_netpollopen(int32 fd, PollDesc *pd)
+runtime_netpollopen(uintptr fd, PollDesc *pd)
{
Kevent ev[2];
int32 n;
@@ -35,30 +36,22 @@ runtime_netpollopen(int32 fd, PollDesc *pd)
// Arm both EVFILT_READ and EVFILT_WRITE in edge-triggered mode (EV_CLEAR)
// for the whole fd lifetime. The notifications are automatically unregistered
// when fd is closed.
- ev[0].ident = fd;
+ ev[0].ident = (uint32)fd;
ev[0].filter = EVFILT_READ;
- ev[0].flags = EV_ADD|EV_RECEIPT|EV_CLEAR;
+ ev[0].flags = EV_ADD|EV_CLEAR;
ev[0].fflags = 0;
ev[0].data = 0;
- ev[0].udata = (byte*)pd;
+ ev[0].udata = (kevent_udata)pd;
ev[1] = ev[0];
ev[1].filter = EVFILT_WRITE;
- n = runtime_kevent(kq, ev, 2, ev, 2, nil);
+ n = runtime_kevent(kq, ev, 2, nil, 0, nil);
if(n < 0)
return -n;
- if(n != 2 ||
- (ev[0].flags&EV_ERROR) == 0 || ev[0].ident != fd || ev[0].filter != EVFILT_READ ||
- (ev[1].flags&EV_ERROR) == 0 || ev[1].ident != fd || ev[1].filter != EVFILT_WRITE)
- return EFAULT; // just to mark out from other errors
- if(ev[0].data != 0)
- return ev[0].data;
- if(ev[1].data != 0)
- return ev[1].data;
return 0;
}
int32
-runtime_netpollclose(int32 fd)
+runtime_netpollclose(uintptr fd)
{
// Don't need to unregister because calling close()
// on fd will remove any kevents that reference the descriptor.
@@ -74,7 +67,7 @@ runtime_netpoll(bool block)
static int32 lasterr;
Kevent events[64], *ev;
Timespec ts, *tp;
- int32 n, i;
+ int32 n, i, mode;
G *gp;
if(kq == -1)
@@ -97,10 +90,13 @@ retry:
}
for(i = 0; i < n; i++) {
ev = &events[i];
+ mode = 0;
if(ev->filter == EVFILT_READ)
- runtime_netpollready(&gp, (PollDesc*)ev->udata, 'r');
+ mode += 'r';
if(ev->filter == EVFILT_WRITE)
- runtime_netpollready(&gp, (PollDesc*)ev->udata, 'w');
+ mode += 'w';
+ if(mode)
+ runtime_netpollready(&gp, (PollDesc*)ev->udata, mode);
}
if(block && gp == nil)
goto retry;
diff --git a/libgo/runtime/netpoll_stub.c b/libgo/runtime/netpoll_stub.c
index e28e38e2643..84eef754c8d 100644
--- a/libgo/runtime/netpoll_stub.c
+++ b/libgo/runtime/netpoll_stub.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build freebsd netbsd openbsd plan9 windows
+// +build plan9
#include "runtime.h"
diff --git a/libgo/runtime/panic.c b/libgo/runtime/panic.c
index 7d79256cf41..7a8d95b1ade 100644
--- a/libgo/runtime/panic.c
+++ b/libgo/runtime/panic.c
@@ -38,7 +38,7 @@ runtime_startpanic(void)
M *m;
m = runtime_m();
- if(runtime_mheap == 0 || runtime_mheap->cachealloc.size == 0) { // very early
+ if(runtime_mheap.cachealloc.size == 0) { // very early
runtime_printf("runtime: panic before malloc heap initialized\n");
m->mallocing = 1; // tell rest of panic not to try to malloc
} else if(m->mcache == nil) // can happen if called from signal handler or throw
@@ -48,8 +48,13 @@ runtime_startpanic(void)
runtime_exit(3);
}
m->dying = 1;
+ if(runtime_g() != nil)
+ runtime_g()->writebuf = nil;
runtime_xadd(&runtime_panicking, 1);
runtime_lock(&paniclk);
+ if(runtime_debug.schedtrace > 0 || runtime_debug.scheddetail > 0)
+ runtime_schedtrace(true);
+ runtime_freezetheworld();
}
void
@@ -58,18 +63,22 @@ runtime_dopanic(int32 unused __attribute__ ((unused)))
G *g;
static bool didothers;
bool crash;
+ int32 t;
g = runtime_g();
if(g->sig != 0)
runtime_printf("[signal %x code=%p addr=%p]\n",
g->sig, (void*)g->sigcode0, (void*)g->sigcode1);
- if(runtime_gotraceback(&crash)){
+ if((t = runtime_gotraceback(&crash)) > 0){
if(g != runtime_m()->g0) {
runtime_printf("\n");
runtime_goroutineheader(g);
runtime_traceback();
- runtime_goroutinetrailer(g);
+ runtime_printcreatedby(g);
+ } else if(t >= 2 || runtime_m()->throwing > 0) {
+ runtime_printf("\nruntime stack:\n");
+ runtime_traceback();
}
if(!didothers) {
didothers = true;
@@ -113,11 +122,15 @@ runtime_panicstring(const char *s)
{
Eface err;
+ if(runtime_m()->mallocing) {
+ runtime_printf("panic: %s\n", s);
+ runtime_throw("panic during malloc");
+ }
if(runtime_m()->gcing) {
runtime_printf("panic: %s\n", s);
runtime_throw("panic during gc");
}
- runtime_newErrorString(runtime_gostringnocopy((const byte*)s), &err);
+ runtime_newErrorCString(s, &err);
runtime_panic(err);
}
diff --git a/libgo/runtime/parfor.c b/libgo/runtime/parfor.c
index c0e40f5081b..9489d8dc2ec 100644
--- a/libgo/runtime/parfor.c
+++ b/libgo/runtime/parfor.c
@@ -151,9 +151,9 @@ runtime_parfordo(ParFor *desc)
if(victim >= tid)
victim++;
victimpos = &desc->thr[victim].pos;
- pos = runtime_atomicload64(victimpos);
for(;;) {
// See if it has any work.
+ pos = runtime_atomicload64(victimpos);
begin = (uint32)pos;
end = (uint32)(pos>>32);
if(begin+1 >= end) {
@@ -166,7 +166,7 @@ runtime_parfordo(ParFor *desc)
}
begin2 = begin + (end-begin)/2;
newpos = (uint64)begin | (uint64)begin2<<32;
- if(runtime_cas64(victimpos, &pos, newpos)) {
+ if(runtime_cas64(victimpos, pos, newpos)) {
begin = begin2;
break;
}
diff --git a/libgo/runtime/print.c b/libgo/runtime/print.c
index f5c6e82840e..766ddbdc499 100644
--- a/libgo/runtime/print.c
+++ b/libgo/runtime/print.c
@@ -5,6 +5,7 @@
#include <stdarg.h>
#include "runtime.h"
#include "array.h"
+#include "go-type.h"
//static Lock debuglock;
@@ -13,7 +14,7 @@ static void go_vprintf(const char*, va_list);
// write to goroutine-local buffer if diverting output,
// or else standard error.
static void
-gwrite(const void *v, int32 n)
+gwrite(const void *v, intgo n)
{
G* g = runtime_g();
@@ -301,8 +302,6 @@ runtime_printpointer(void *p)
void
runtime_printstring(String v)
{
- // extern uint32 runtime_maxstring;
-
// if(v.len > runtime_maxstring) {
// gwrite("[string too long]", 17);
// return;
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index 0e77a3e0603..ab7cde43863 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -231,8 +231,8 @@ kickoff(void)
}
// Switch context to a different goroutine. This is like longjmp.
-static void runtime_gogo(G*) __attribute__ ((noinline));
-static void
+void runtime_gogo(G*) __attribute__ ((noinline));
+void
runtime_gogo(G* newg)
{
#ifdef USING_SPLIT_STACK
@@ -249,8 +249,8 @@ runtime_gogo(G* newg)
// setjmp. Because getcontext always returns 0, unlike setjmp, we use
// g->fromgogo as a code. It will be true if we got here via
// setcontext. g == nil the first time this is called in a new m.
-static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
-static void
+void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
+void
runtime_mcall(void (*pfn)(G*))
{
M *mp;
@@ -365,8 +365,9 @@ struct Sched {
uint64 goidgen;
M* midle; // idle m's waiting for work
int32 nmidle; // number of idle m's waiting for work
- int32 mlocked; // number of locked m's waiting for work
+ int32 nmidlelocked; // number of locked m's waiting for work
int32 mcount; // number of m's that have been created
+ int32 maxmcount; // maximum number of m's allowed (or die)
P* pidle; // idle P's
uint32 npidle;
@@ -381,6 +382,7 @@ struct Sched {
Lock gflock;
G* gfree;
+ uint32 gcwaiting; // gc is waiting to run
int32 stopwait;
Note stopnote;
uint32 sysmonwait;
@@ -396,10 +398,8 @@ enum { MaxGomaxprocs = 1<<8 };
Sched runtime_sched;
int32 runtime_gomaxprocs;
-bool runtime_singleproc;
-bool runtime_iscgo = true;
uint32 runtime_needextram = 1;
-uint32 runtime_gcwaiting;
+bool runtime_iscgo = true;
M runtime_m0;
G runtime_g0; // idle goroutine for m0
G* runtime_allg;
@@ -409,6 +409,7 @@ P** runtime_allp;
M* runtime_extram;
int8* runtime_goos;
int32 runtime_ncpu;
+bool runtime_precisestack;
static int32 newprocs;
void* runtime_mstart(void*);
@@ -431,21 +432,22 @@ static void wakep(void);
static void stoplockedm(void);
static void startlockedm(G*);
static void sysmon(void);
-static uint32 retake(uint32*);
-static void inclocked(int32);
+static uint32 retake(int64);
+static void incidlelocked(int32);
static void checkdead(void);
static void exitsyscall0(G*);
static void park0(G*);
-static void gosched0(G*);
static void goexit0(G*);
static void gfput(P*, G*);
static G* gfget(P*);
static void gfpurge(P*);
static void globrunqput(G*);
-static G* globrunqget(P*);
+static G* globrunqget(P*, int32);
static P* pidleget(void);
static void pidleput(P*);
static void injectglist(G*);
+static bool preemptall(void);
+static bool exitsyscallfast(void);
// The bootstrap sequence is:
//
@@ -460,6 +462,7 @@ runtime_schedinit(void)
{
int32 n, procs;
const byte *p;
+ Eface i;
m = &runtime_m0;
g = &runtime_g0;
@@ -470,18 +473,22 @@ runtime_schedinit(void)
initcontext();
inittlssize();
- m->nomemprof++;
+ runtime_sched.maxmcount = 10000;
+ runtime_precisestack = 0;
+
runtime_mprofinit();
runtime_mallocinit();
mcommoninit(m);
+
+ // Initialize the itable value for newErrorCString,
+ // so that the next time it gets called, possibly
+ // in a fault during a garbage collection, it will not
+ // need to allocated memory.
+ runtime_newErrorCString(0, &i);
runtime_goargs();
runtime_goenvs();
-
- // For debugging:
- // Allocate internal symbol table representation now,
- // so that we don't need to call malloc when we crash.
- // runtime_findfunc(0);
+ runtime_parsedebugvars();
runtime_sched.lastpoll = runtime_nanotime();
procs = 1;
@@ -496,16 +503,26 @@ runtime_schedinit(void)
// Can not enable GC until all roots are registered.
// mstats.enablegc = 1;
- m->nomemprof--;
+
+ // if(raceenabled)
+ // g->racectx = runtime_raceinit();
}
extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
+static void
+initDone(void *arg __attribute__ ((unused))) {
+ runtime_unlockOSThread();
+};
+
// The main goroutine.
void
runtime_main(void* dummy __attribute__((unused)))
{
+ Defer d;
+ _Bool frame;
+
newm(sysmon, nil);
// Lock the main goroutine onto this, the main OS thread,
@@ -515,10 +532,24 @@ runtime_main(void* dummy __attribute__((unused)))
// by calling runtime.LockOSThread during initialization
// to preserve the lock.
runtime_lockOSThread();
+
+ // Defer unlock so that runtime.Goexit during init does the unlock too.
+ d.__pfn = initDone;
+ d.__next = g->defer;
+ d.__arg = (void*)-1;
+ d.__panic = g->panic;
+ d.__retaddr = nil;
+ d.__frame = &frame;
+ g->defer = &d;
+
if(m != &runtime_m0)
runtime_throw("runtime_main not on m0");
__go_go(runtime_MHeap_Scavenger, nil);
main_init();
+
+ if(g->defer != &d || d.__pfn != initDone)
+ runtime_throw("runtime: bad defer entry after init");
+ g->defer = d.__next;
runtime_unlockOSThread();
// For gccgo we have to wait until after main is initialized
@@ -574,7 +605,7 @@ runtime_goroutineheader(G *gp)
}
void
-runtime_goroutinetrailer(G *g)
+runtime_printcreatedby(G *g)
{
if(g != nil && g->gopc != 0 && g->goid != 1) {
String fn;
@@ -604,8 +635,28 @@ runtime_tracebackothers(G * volatile me)
tb.gp = me;
traceback = runtime_gotraceback(nil);
+
+ // Show the current goroutine first, if we haven't already.
+ if((gp = m->curg) != nil && gp != me) {
+ runtime_printf("\n");
+ runtime_goroutineheader(gp);
+ gp->traceback = &tb;
+
+#ifdef USING_SPLIT_STACK
+ __splitstack_getcontext(&me->stack_context[0]);
+#endif
+ getcontext(&me->context);
+
+ if(gp->traceback != nil) {
+ runtime_gogo(gp);
+ }
+
+ runtime_printtrace(tb.locbuf, tb.c, false);
+ runtime_printcreatedby(gp);
+ }
+
for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
- if(gp == me || gp->status == Gdead)
+ if(gp == me || gp == m->curg || gp->status == Gdead)
continue;
if(gp->issystem && traceback < 2)
continue;
@@ -620,25 +671,38 @@ runtime_tracebackothers(G * volatile me)
// This means that if g is running or in a syscall, we
// can't reliably print a stack trace. FIXME.
- if(gp->status == Gsyscall || gp->status == Grunning) {
- runtime_printf("no stack trace available\n");
- runtime_goroutinetrailer(gp);
- continue;
- }
- gp->traceback = &tb;
+ if(gp->status == Grunning) {
+ runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
+ runtime_printcreatedby(gp);
+ } else if(gp->status == Gsyscall) {
+ runtime_printf("\tgoroutine in C code; stack unavailable\n");
+ runtime_printcreatedby(gp);
+ } else {
+ gp->traceback = &tb;
#ifdef USING_SPLIT_STACK
- __splitstack_getcontext(&me->stack_context[0]);
+ __splitstack_getcontext(&me->stack_context[0]);
#endif
- getcontext(&me->context);
+ getcontext(&me->context);
- if(gp->traceback != nil) {
- runtime_gogo(gp);
+ if(gp->traceback != nil) {
+ runtime_gogo(gp);
+ }
+
+ runtime_printtrace(tb.locbuf, tb.c, false);
+ runtime_printcreatedby(gp);
}
+ }
+}
- runtime_printtrace(tb.locbuf, tb.c, false);
- runtime_goroutinetrailer(gp);
+static void
+checkmcount(void)
+{
+ // sched lock is held
+ if(runtime_sched.mcount > runtime_sched.maxmcount) {
+ runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
+ runtime_throw("thread exhaustion");
}
}
@@ -669,7 +733,7 @@ mcommoninit(M *mp)
runtime_lock(&runtime_sched);
mp->id = runtime_sched.mcount++;
-
+ checkmcount();
runtime_mpreinit(mp);
// Add to runtime_allm so garbage collector doesn't free m
@@ -686,6 +750,7 @@ void
runtime_ready(G *gp)
{
// Mark runnable.
+ m->locks++; // disable preemption because it can be holding p in a local var
if(gp->status != Gwaiting) {
runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
runtime_throw("bad g->status in ready");
@@ -694,6 +759,7 @@ runtime_ready(G *gp)
runqput(m->p, gp);
if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic
wakep();
+ m->locks--;
}
int32
@@ -753,6 +819,34 @@ runtime_helpgc(int32 nproc)
runtime_unlock(&runtime_sched);
}
+// Similar to stoptheworld but best-effort and can be called several times.
+// There is no reverse operation, used during crashing.
+// This function must not lock any mutexes.
+void
+runtime_freezetheworld(void)
+{
+ int32 i;
+
+ if(runtime_gomaxprocs == 1)
+ return;
+ // stopwait and preemption requests can be lost
+ // due to races with concurrently executing threads,
+ // so try several times
+ for(i = 0; i < 5; i++) {
+ // this should tell the scheduler to not start any new goroutines
+ runtime_sched.stopwait = 0x7fffffff;
+ runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
+ // this should stop running goroutines
+ if(!preemptall())
+ break; // no running goroutines
+ runtime_usleep(1000);
+ }
+ // to be sure
+ runtime_usleep(1000);
+ preemptall();
+ runtime_usleep(1000);
+}
+
void
runtime_stoptheworld(void)
{
@@ -763,7 +857,8 @@ runtime_stoptheworld(void)
runtime_lock(&runtime_sched);
runtime_sched.stopwait = runtime_gomaxprocs;
- runtime_atomicstore((uint32*)&runtime_gcwaiting, 1);
+ runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
+ preemptall();
// stop current P
m->p->status = Pgcstop;
runtime_sched.stopwait--;
@@ -782,7 +877,7 @@ runtime_stoptheworld(void)
wait = runtime_sched.stopwait > 0;
runtime_unlock(&runtime_sched);
- // wait for remaining P's to stop voluntary
+ // wait for remaining P's to stop voluntarily
if(wait) {
runtime_notesleep(&runtime_sched.stopnote);
runtime_noteclear(&runtime_sched.stopnote);
@@ -810,6 +905,7 @@ runtime_starttheworld(void)
G *gp;
bool add;
+ m->locks++; // disable preemption because it can be holding p in a local var
gp = runtime_netpoll(false); // non-blocking
injectglist(gp);
add = needaddgcproc();
@@ -819,7 +915,7 @@ runtime_starttheworld(void)
newprocs = 0;
} else
procresize(runtime_gomaxprocs);
- runtime_gcwaiting = 0;
+ runtime_sched.gcwaiting = 0;
p1 = nil;
while((p = pidleget()) != nil) {
@@ -829,16 +925,9 @@ runtime_starttheworld(void)
pidleput(p);
break;
}
- mp = mget();
- if(mp == nil) {
- p->link = p1;
- p1 = p;
- continue;
- }
- if(mp->nextp)
- runtime_throw("starttheworld: inconsistent mp->nextp");
- mp->nextp = p;
- runtime_notewakeup(&mp->park);
+ p->m = mget();
+ p->link = p1;
+ p1 = p;
}
if(runtime_sched.sysmonwait) {
runtime_sched.sysmonwait = false;
@@ -849,8 +938,18 @@ runtime_starttheworld(void)
while(p1) {
p = p1;
p1 = p1->link;
- add = false;
- newm(nil, p);
+ if(p->m) {
+ mp = p->m;
+ p->m = nil;
+ if(mp->nextp)
+ runtime_throw("starttheworld: inconsistent mp->nextp");
+ mp->nextp = p;
+ runtime_notewakeup(&mp->park);
+ } else {
+ // Start M to run P. Do not start another M below.
+ newm(nil, p);
+ add = false;
+ }
}
if(add) {
@@ -863,6 +962,7 @@ runtime_starttheworld(void)
// the maximum number of procs.
newm(mhelpgc, nil);
}
+ m->locks--;
}
// Called to start an M.
@@ -909,11 +1009,8 @@ runtime_mstart(void* mp)
// Install signal handlers; after minit so that minit can
// prepare the thread to be able to handle the signals.
- if(m == &runtime_m0) {
+ if(m == &runtime_m0)
runtime_initsig();
- if(runtime_iscgo)
- runtime_newextram();
- }
if(m->mstartfn)
m->mstartfn();
@@ -1015,6 +1112,14 @@ runtime_needm(void)
{
M *mp;
+ if(runtime_needextram) {
+ // Can happen if C/C++ code calls Go from a global ctor.
+ // Can not throw, because scheduler is not initialized yet.
+ runtime_write(2, "fatal error: cgo callback before cgo call\n",
+ sizeof("fatal error: cgo callback before cgo call\n")-1);
+ runtime_exit(1);
+ }
+
// Lock extra list, take head, unlock popped list.
// nilokay=false is safe here because of the invariant above,
// that the extra list always contains or will soon contain
@@ -1090,6 +1195,7 @@ runtime_newextram(void)
mp->locked = LockInternal;
mp->lockedg = gp;
gp->lockedm = mp;
+ gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
// put on allg for garbage collector
runtime_lock(&runtime_sched);
if(runtime_lastg == nil)
@@ -1325,7 +1431,7 @@ handoffp(P *p)
return;
}
runtime_lock(&runtime_sched);
- if(runtime_gcwaiting) {
+ if(runtime_sched.gcwaiting) {
p->status = Pgcstop;
if(--runtime_sched.stopwait == 0)
runtime_notewakeup(&runtime_sched.stopnote);
@@ -1373,7 +1479,7 @@ stoplockedm(void)
p = releasep();
handoffp(p);
}
- inclocked(1);
+ incidlelocked(1);
// Wait until another thread schedules lockedg again.
runtime_notesleep(&m->park);
runtime_noteclear(&m->park);
@@ -1396,7 +1502,7 @@ startlockedm(G *gp)
if(mp->nextp)
runtime_throw("startlockedm: m has p");
// directly handoff current P to the locked m
- inclocked(-1);
+ incidlelocked(-1);
p = releasep();
mp->nextp = p;
runtime_notewakeup(&mp->park);
@@ -1410,7 +1516,7 @@ gcstopm(void)
{
P *p;
- if(!runtime_gcwaiting)
+ if(!runtime_sched.gcwaiting)
runtime_throw("gcstopm: not waiting for gc");
if(m->spinning) {
m->spinning = false;
@@ -1437,7 +1543,7 @@ execute(G *gp)
runtime_throw("execute: bad g status");
}
gp->status = Grunning;
- m->p->tick++;
+ m->p->schedtick++;
m->curg = gp;
gp->m = m;
@@ -1459,7 +1565,7 @@ findrunnable(void)
int32 i;
top:
- if(runtime_gcwaiting) {
+ if(runtime_sched.gcwaiting) {
gcstopm();
goto top;
}
@@ -1470,7 +1576,7 @@ top:
// global runq
if(runtime_sched.runqsize) {
runtime_lock(&runtime_sched);
- gp = globrunqget(m->p);
+ gp = globrunqget(m->p, 0);
runtime_unlock(&runtime_sched);
if(gp)
return gp;
@@ -1493,7 +1599,7 @@ top:
}
// random steal from other P's
for(i = 0; i < 2*runtime_gomaxprocs; i++) {
- if(runtime_gcwaiting)
+ if(runtime_sched.gcwaiting)
goto top;
p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
if(p == m->p)
@@ -1506,12 +1612,12 @@ top:
stop:
// return P and block
runtime_lock(&runtime_sched);
- if(runtime_gcwaiting) {
+ if(runtime_sched.gcwaiting) {
runtime_unlock(&runtime_sched);
goto top;
}
if(runtime_sched.runqsize) {
- gp = globrunqget(m->p);
+ gp = globrunqget(m->p, 0);
runtime_unlock(&runtime_sched);
return gp;
}
@@ -1561,6 +1667,25 @@ stop:
goto top;
}
+static void
+resetspinning(void)
+{
+ int32 nmspinning;
+
+ if(m->spinning) {
+ m->spinning = false;
+ nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
+ if(nmspinning < 0)
+ runtime_throw("findrunnable: negative nmspinning");
+ } else
+ nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
+
+ // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
+ // so see if we need to wakeup another P here.
+ if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
+ wakep();
+}
+
// Injects the list of runnable G's into the scheduler.
// Can run concurrently with GC.
static void
@@ -1590,33 +1715,44 @@ static void
schedule(void)
{
G *gp;
+ uint32 tick;
if(m->locks)
runtime_throw("schedule: holding locks");
top:
- if(runtime_gcwaiting) {
+ if(runtime_sched.gcwaiting) {
gcstopm();
goto top;
}
- gp = runqget(m->p);
- if(gp == nil)
- gp = findrunnable();
-
- if(m->spinning) {
- m->spinning = false;
- runtime_xadd(&runtime_sched.nmspinning, -1);
+ gp = nil;
+ // Check the global runnable queue once in a while to ensure fairness.
+ // Otherwise two goroutines can completely occupy the local runqueue
+ // by constantly respawning each other.
+ tick = m->p->schedtick;
+ // This is a fancy way to say tick%61==0,
+ // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
+ if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
+ runtime_lock(&runtime_sched);
+ gp = globrunqget(m->p, 1);
+ runtime_unlock(&runtime_sched);
+ if(gp)
+ resetspinning();
+ }
+ if(gp == nil) {
+ gp = runqget(m->p);
+ if(gp && m->spinning)
+ runtime_throw("schedule: spinning with local work");
+ }
+ if(gp == nil) {
+ gp = findrunnable(); // blocks until work is available
+ resetspinning();
}
-
- // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
- // so see if we need to wakeup another M here.
- if (m->p->runqhead != m->p->runqtail &&
- runtime_atomicload(&runtime_sched.nmspinning) == 0 &&
- runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic
- wakep();
if(gp->lockedm) {
+ // Hands off own p to the locked m,
+ // then blocks waiting for a new p.
startlockedm(gp);
goto top;
}
@@ -1658,12 +1794,12 @@ park0(G *gp)
void
runtime_gosched(void)
{
- runtime_mcall(gosched0);
+ runtime_mcall(runtime_gosched0);
}
// runtime_gosched continuation on g0.
-static void
-gosched0(G *gp)
+void
+runtime_gosched0(G *gp)
{
gp->status = Grunnable;
gp->m = nil;
@@ -1679,6 +1815,9 @@ gosched0(G *gp)
}
// Finishes execution of the current goroutine.
+// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
+// Since it does not return it does not matter. But if it is preempted
+// at the split stack check, GC will complain about inconsistent sp.
void
runtime_goexit(void)
{
@@ -1698,7 +1837,7 @@ goexit0(G *gp)
m->curg = nil;
m->lockedg = nil;
if(m->locked & ~LockExternal) {
- runtime_printf("invalid m->locked = %d", m->locked);
+ runtime_printf("invalid m->locked = %d\n", m->locked);
runtime_throw("internal lockOSThread error");
}
m->locked = 0;
@@ -1720,10 +1859,11 @@ void runtime_entersyscall(void) __attribute__ ((no_split_stack));
void
runtime_entersyscall()
{
- if(m->profilehz > 0)
- runtime_setprof(false);
+ // Disable preemption because during this function g is in Gsyscall status,
+ // but can have inconsistent g->sched, do not let GC observe it.
+ m->locks++;
- // Leave SP around for gc and traceback.
+ // Leave SP around for GC and traceback.
#ifdef USING_SPLIT_STACK
g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
&g->gcnext_segment, &g->gcnext_sp,
@@ -1752,10 +1892,9 @@ runtime_entersyscall()
}
m->mcache = nil;
- m->p->tick++;
m->p->m = nil;
runtime_atomicstore(&m->p->status, Psyscall);
- if(runtime_gcwaiting) {
+ if(runtime_sched.gcwaiting) {
runtime_lock(&runtime_sched);
if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) {
if(--runtime_sched.stopwait == 0)
@@ -1763,6 +1902,8 @@ runtime_entersyscall()
}
runtime_unlock(&runtime_sched);
}
+
+ m->locks--;
}
// The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
@@ -1771,10 +1912,9 @@ runtime_entersyscallblock(void)
{
P *p;
- if(m->profilehz > 0)
- runtime_setprof(false);
+ m->locks++; // see comment in entersyscall
- // Leave SP around for gc and traceback.
+ // Leave SP around for GC and traceback.
#ifdef USING_SPLIT_STACK
g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
&g->gcnext_segment, &g->gcnext_sp,
@@ -1792,7 +1932,9 @@ runtime_entersyscallblock(void)
p = releasep();
handoffp(p);
if(g->isbackground) // do not consider blocked scavenger for deadlock detection
- inclocked(1);
+ incidlelocked(1);
+
+ m->locks--;
}
// The goroutine g exited its system call.
@@ -1803,19 +1945,16 @@ void
runtime_exitsyscall(void)
{
G *gp;
- P *p;
- // Check whether the profiler needs to be turned on.
- if(m->profilehz > 0)
- runtime_setprof(true);
+ m->locks++; // see comment in entersyscall
gp = g;
- // Try to re-acquire the last P.
- if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
+ if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
+ incidlelocked(-1);
+
+ if(exitsyscallfast()) {
// There's a cpu for us, so we can run.
- m->mcache = m->p->mcache;
- m->p->m = m;
- m->p->tick++;
+ m->p->syscalltick++;
gp->status = Grunning;
// Garbage collector isn't running (since we are),
// so okay to clear gcstack and gcsp.
@@ -1824,27 +1963,11 @@ runtime_exitsyscall(void)
#endif
gp->gcnext_sp = nil;
runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
+ m->locks--;
return;
}
- if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
- inclocked(-1);
- // Try to get any other idle P.
- m->p = nil;
- if(runtime_sched.pidle) {
- runtime_lock(&runtime_sched);
- p = pidleget();
- runtime_unlock(&runtime_sched);
- if(p) {
- acquirep(p);
-#ifdef USING_SPLIT_STACK
- gp->gcstack = nil;
-#endif
- gp->gcnext_sp = nil;
- runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
- return;
- }
- }
+ m->locks--;
// Call the scheduler.
runtime_mcall(exitsyscall0);
@@ -1860,6 +1983,43 @@ runtime_exitsyscall(void)
#endif
gp->gcnext_sp = nil;
runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
+ m->p->syscalltick++;
+}
+
+static bool
+exitsyscallfast(void)
+{
+ P *p;
+
+ // Freezetheworld sets stopwait but does not retake P's.
+ if(runtime_sched.stopwait) {
+ m->p = nil;
+ return false;
+ }
+
+ // Try to re-acquire the last P.
+ if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
+ // There's a cpu for us, so we can run.
+ m->mcache = m->p->mcache;
+ m->p->m = m;
+ return true;
+ }
+ // Try to get any other idle P.
+ m->p = nil;
+ if(runtime_sched.pidle) {
+ runtime_lock(&runtime_sched);
+ p = pidleget();
+ if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
+ runtime_atomicstore(&runtime_sched.sysmonwait, 0);
+ runtime_notewakeup(&runtime_sched.sysmonnote);
+ }
+ runtime_unlock(&runtime_sched);
+ if(p) {
+ acquirep(p);
+ return true;
+ }
+ }
+ return false;
}
// runtime_exitsyscall slow path on g0.
@@ -1876,6 +2036,10 @@ exitsyscall0(G *gp)
p = pidleget();
if(p == nil)
globrunqput(gp);
+ else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
+ runtime_atomicstore(&runtime_sched.sysmonwait, 0);
+ runtime_notewakeup(&runtime_sched.sysmonnote);
+ }
runtime_unlock(&runtime_sched);
if(p) {
acquirep(p);
@@ -1890,6 +2054,33 @@ exitsyscall0(G *gp)
schedule(); // Never returns.
}
+// Called from syscall package before fork.
+void syscall_runtime_BeforeFork(void)
+ __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
+void
+syscall_runtime_BeforeFork(void)
+{
+ // Fork can hang if preempted with signals frequently enough (see issue 5517).
+ // Ensure that we stay on the same M where we disable profiling.
+ m->locks++;
+ if(m->profilehz != 0)
+ runtime_resetcpuprofiler(0);
+}
+
+// Called from syscall package after fork in parent.
+void syscall_runtime_AfterFork(void)
+ __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
+void
+syscall_runtime_AfterFork(void)
+{
+ int32 hz;
+
+ hz = runtime_sched.profilehz;
+ if(hz != 0)
+ runtime_resetcpuprofiler(hz);
+ m->locks--;
+}
+
// Allocate a new g, with a stack big enough for stacksize bytes.
G*
runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
@@ -1919,9 +2110,16 @@ runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
/* For runtime package testing. */
+
+// Create a new g running fn with siz bytes of arguments.
+// Put it on the queue of g's waiting to run.
+// The compiler turns a go statement into a call to this.
+// Cannot split the stack because it assumes that the arguments
+// are available sequentially after &fn; they would not be
+// copied if a stack split occurred. It's OK for this to call
+// functions that split the stack.
void runtime_testing_entersyscall(void)
__asm__ (GOSYM_PREFIX "runtime.entersyscall");
-
void
runtime_testing_entersyscall()
{
@@ -1944,6 +2142,7 @@ __go_go(void (*fn)(void*), void* arg)
size_t spsize;
G *newg;
+//runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
m->locks++; // disable preemption because it can be holding p in a local var
if((newg = gfget(m->p)) != nil) {
@@ -2099,7 +2298,7 @@ runtime_gomaxprocsfunc(int32 n)
}
runtime_unlock(&runtime_sched);
- runtime_semacquire(&runtime_worldsema);
+ runtime_semacquire(&runtime_worldsema, false);
m->gcing = 1;
runtime_stoptheworld();
newprocs = n;
@@ -2110,8 +2309,11 @@ runtime_gomaxprocsfunc(int32 n)
return ret;
}
+// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
+// after they modify m->locked. Do not allow preemption during this call,
+// or else the m might be different in this function than in the caller.
static void
-LockOSThread(void)
+lockOSThread(void)
{
m->lockedg = g;
g->lockedm = m;
@@ -2122,18 +2324,22 @@ void
runtime_LockOSThread(void)
{
m->locked |= LockExternal;
- LockOSThread();
+ lockOSThread();
}
void
runtime_lockOSThread(void)
{
m->locked += LockInternal;
- LockOSThread();
+ lockOSThread();
}
+
+// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
+// after they update m->locked. Do not allow preemption during this call,
+// or else the m might be in different in this function than in the caller.
static void
-UnlockOSThread(void)
+unlockOSThread(void)
{
if(m->locked != 0)
return;
@@ -2147,7 +2353,7 @@ void
runtime_UnlockOSThread(void)
{
m->locked &= ~LockExternal;
- UnlockOSThread();
+ unlockOSThread();
}
void
@@ -2156,7 +2362,7 @@ runtime_unlockOSThread(void)
if(m->locked < LockInternal)
runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
m->locked -= LockInternal;
- UnlockOSThread();
+ unlockOSThread();
}
bool
@@ -2176,13 +2382,6 @@ runtime_golockedOSThread(void)
return runtime_lockedOSThread();
}
-// for testing of wire, unwire
-uint32
-runtime_mid()
-{
- return m->id;
-}
-
intgo runtime_NumGoroutine (void)
__asm__ (GOSYM_PREFIX "runtime.NumGoroutine");
@@ -2227,28 +2426,42 @@ static struct {
Location locbuf[100];
} prof;
+static void
+System(void)
+{
+}
+
// Called if we receive a SIGPROF signal.
void
runtime_sigprof()
{
int32 n, i;
+ bool traceback;
- // Windows does profiling in a dedicated thread w/o m.
- if(!Windows && (m == nil || m->mcache == nil))
- return;
if(prof.fn == nil || prof.hz == 0)
return;
-
+ traceback = true;
+ // Windows does profiling in a dedicated thread w/o m.
+ if(!Windows && (m == nil || m->mcache == nil))
+ traceback = false;
+
runtime_lock(&prof);
if(prof.fn == nil) {
runtime_unlock(&prof);
return;
}
- n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf));
- for(i = 0; i < n; i++)
- prof.pcbuf[i] = prof.locbuf[i].pc;
- if(n > 0)
- prof.fn(prof.pcbuf, n);
+ n = 0;
+ if(traceback) {
+ n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf));
+ for(i = 0; i < n; i++)
+ prof.pcbuf[i] = prof.locbuf[i].pc;
+ }
+ if (!traceback || n <= 0) {
+ n = 2;
+ prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
+ prof.pcbuf[1] = (uintptr)System + 1;
+ }
+ prof.fn(prof.pcbuf, n);
runtime_unlock(&prof);
}
@@ -2264,7 +2477,11 @@ runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
if(fn == nil)
hz = 0;
- // Stop profiler on this cpu so that it is safe to lock prof.
+ // Disable preemption, otherwise we can be rescheduled to another thread
+ // that has profiling enabled.
+ m->locks++;
+
+ // Stop profiler on this thread so that it is safe to lock prof.
// if a profiling signal came in while we had prof locked,
// it would deadlock.
runtime_resetcpuprofiler(0);
@@ -2279,6 +2496,8 @@ runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
if(hz != 0)
runtime_resetcpuprofiler(hz);
+
+ m->locks--;
}
// Change number of processors. The world is stopped, sched is locked.
@@ -2296,7 +2515,8 @@ procresize(int32 new)
for(i = 0; i < new; i++) {
p = runtime_allp[i];
if(p == nil) {
- p = (P*)runtime_mallocgc(sizeof(*p), 0, 0, 1);
+ p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
+ p->id = i;
p->status = Pgcstop;
runtime_atomicstorep(&runtime_allp[i], p);
}
@@ -2308,7 +2528,7 @@ procresize(int32 new)
}
if(p->runq == nil) {
p->runqsize = 128;
- p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, 0, 1);
+ p->runq = (G**)runtime_mallocgc(p->runqsize*sizeof(G*), 0, FlagNoInvokeGC);
}
}
@@ -2351,7 +2571,6 @@ procresize(int32 new)
p->status = Pidle;
pidleput(p);
}
- runtime_singleproc = new == 1;
runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
}
@@ -2393,10 +2612,10 @@ releasep(void)
}
static void
-inclocked(int32 v)
+incidlelocked(int32 v)
{
runtime_lock(&runtime_sched);
- runtime_sched.mlocked += v;
+ runtime_sched.nmidlelocked += v;
if(v > 0)
checkdead();
runtime_unlock(&runtime_sched);
@@ -2411,12 +2630,12 @@ checkdead(void)
int32 run, grunning, s;
// -1 for sysmon
- run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.mlocked - 1 - countextra();
+ run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
if(run > 0)
return;
if(run < 0) {
- runtime_printf("checkdead: nmidle=%d mlocked=%d mcount=%d\n",
- runtime_sched.nmidle, runtime_sched.mlocked, runtime_sched.mcount);
+ runtime_printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
+ runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
runtime_throw("checkdead: inconsistent counts");
}
grunning = 0;
@@ -2441,10 +2660,10 @@ static void
sysmon(void)
{
uint32 idle, delay;
- int64 now, lastpoll;
+ int64 now, lastpoll, lasttrace;
G *gp;
- uint32 ticks[MaxGomaxprocs];
+ lasttrace = 0;
idle = 0; // how many cycles in succession we had not wokeup somebody
delay = 0;
for(;;) {
@@ -2455,9 +2674,10 @@ sysmon(void)
if(delay > 10*1000) // up to 10ms
delay = 10*1000;
runtime_usleep(delay);
- if(runtime_gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) { // TODO: fast atomic
+ if(runtime_debug.schedtrace <= 0 &&
+ (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic
runtime_lock(&runtime_sched);
- if(runtime_atomicload(&runtime_gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
+ if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
runtime_atomicstore(&runtime_sched.sysmonwait, 1);
runtime_unlock(&runtime_sched);
runtime_notesleep(&runtime_sched.sysmonnote);
@@ -2470,53 +2690,198 @@ sysmon(void)
// poll network if not polled for more than 10ms
lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
now = runtime_nanotime();
- if(lastpoll != 0 && lastpoll + 10*1000*1000 > now) {
+ if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
+ runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
gp = runtime_netpoll(false); // non-blocking
- injectglist(gp);
+ if(gp) {
+ // Need to decrement number of idle locked M's
+ // (pretending that one more is running) before injectglist.
+ // Otherwise it can lead to the following situation:
+ // injectglist grabs all P's but before it starts M's to run the P's,
+ // another M returns from syscall, finishes running its G,
+ // observes that there is no work to do and no other running M's
+ // and reports deadlock.
+ incidlelocked(-1);
+ injectglist(gp);
+ incidlelocked(1);
+ }
}
// retake P's blocked in syscalls
- if(retake(ticks))
+ // and preempt long running G's
+ if(retake(now))
idle = 0;
else
idle++;
+
+ if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
+ lasttrace = now;
+ runtime_schedtrace(runtime_debug.scheddetail);
+ }
}
}
+typedef struct Pdesc Pdesc;
+struct Pdesc
+{
+ uint32 schedtick;
+ int64 schedwhen;
+ uint32 syscalltick;
+ int64 syscallwhen;
+};
+static Pdesc pdesc[MaxGomaxprocs];
+
static uint32
-retake(uint32 *ticks)
+retake(int64 now)
{
uint32 i, s, n;
int64 t;
P *p;
+ Pdesc *pd;
n = 0;
for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
p = runtime_allp[i];
if(p==nil)
continue;
- t = p->tick;
- if(ticks[i] != t) {
- ticks[i] = t;
- continue;
- }
+ pd = &pdesc[i];
s = p->status;
- if(s != Psyscall)
- continue;
- if(p->runqhead == p->runqtail && runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0) // TODO: fast atomic
- continue;
- // Need to increment number of locked M's before the CAS.
- // Otherwise the M from which we retake can exit the syscall,
- // increment nmidle and report deadlock.
- inclocked(-1);
- if(runtime_cas(&p->status, s, Pidle)) {
- n++;
- handoffp(p);
+ if(s == Psyscall) {
+ // Retake P from syscall if it's there for more than 1 sysmon tick (20us).
+ // But only if there is other work to do.
+ t = p->syscalltick;
+ if(pd->syscalltick != t) {
+ pd->syscalltick = t;
+ pd->syscallwhen = now;
+ continue;
+ }
+ if(p->runqhead == p->runqtail &&
+ runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0)
+ continue;
+ // Need to decrement number of idle locked M's
+ // (pretending that one more is running) before the CAS.
+ // Otherwise the M from which we retake can exit the syscall,
+ // increment nmidle and report deadlock.
+ incidlelocked(-1);
+ if(runtime_cas(&p->status, s, Pidle)) {
+ n++;
+ handoffp(p);
+ }
+ incidlelocked(1);
+ } else if(s == Prunning) {
+ // Preempt G if it's running for more than 10ms.
+ t = p->schedtick;
+ if(pd->schedtick != t) {
+ pd->schedtick = t;
+ pd->schedwhen = now;
+ continue;
+ }
+ if(pd->schedwhen + 10*1000*1000 > now)
+ continue;
+ // preemptone(p);
}
- inclocked(1);
}
return n;
}
+// Tell all goroutines that they have been preempted and they should stop.
+// This function is purely best-effort. It can fail to inform a goroutine if a
+// processor just started running it.
+// No locks need to be held.
+// Returns true if preemption request was issued to at least one goroutine.
+static bool
+preemptall(void)
+{
+ return false;
+}
+
+void
+runtime_schedtrace(bool detailed)
+{
+ static int64 starttime;
+ int64 now;
+ int64 id1, id2, id3;
+ int32 i, q, t, h, s;
+ const char *fmt;
+ M *mp, *lockedm;
+ G *gp, *lockedg;
+ P *p;
+
+ now = runtime_nanotime();
+ if(starttime == 0)
+ starttime = now;
+
+ runtime_lock(&runtime_sched);
+ runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
+ (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
+ runtime_sched.nmidle, runtime_sched.runqsize);
+ if(detailed) {
+ runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
+ runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
+ runtime_sched.stopwait, runtime_sched.sysmonwait);
+ }
+ // We must be careful while reading data from P's, M's and G's.
+ // Even if we hold schedlock, most data can be changed concurrently.
+ // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
+ for(i = 0; i < runtime_gomaxprocs; i++) {
+ p = runtime_allp[i];
+ if(p == nil)
+ continue;
+ mp = p->m;
+ t = p->runqtail;
+ h = p->runqhead;
+ s = p->runqsize;
+ q = t - h;
+ if(q < 0)
+ q += s;
+ if(detailed)
+ runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d/%d gfreecnt=%d\n",
+ i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, q, s, p->gfreecnt);
+ else {
+ // In non-detailed mode format lengths of per-P run queues as:
+ // [len1 len2 len3 len4]
+ fmt = " %d";
+ if(runtime_gomaxprocs == 1)
+ fmt = " [%d]\n";
+ else if(i == 0)
+ fmt = " [%d";
+ else if(i == runtime_gomaxprocs-1)
+ fmt = " %d]\n";
+ runtime_printf(fmt, q);
+ }
+ }
+ if(!detailed) {
+ runtime_unlock(&runtime_sched);
+ return;
+ }
+ for(mp = runtime_allm; mp; mp = mp->alllink) {
+ p = mp->p;
+ gp = mp->curg;
+ lockedg = mp->lockedg;
+ id1 = -1;
+ if(p)
+ id1 = p->id;
+ id2 = -1;
+ if(gp)
+ id2 = gp->goid;
+ id3 = -1;
+ if(lockedg)
+ id3 = lockedg->goid;
+ runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
+ " locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
+ mp->id, id1, id2,
+ mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
+ mp->spinning, id3);
+ }
+ for(gp = runtime_allg; gp; gp = gp->alllink) {
+ mp = gp->m;
+ lockedm = gp->lockedm;
+ runtime_printf(" G%D: status=%d(%s) m=%d lockedm=%d\n",
+ gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
+ lockedm ? lockedm->id : -1);
+ }
+ runtime_unlock(&runtime_sched);
+}
+
// Put mp on midle list.
// Sched must be locked.
static void
@@ -2559,7 +2924,7 @@ globrunqput(G *gp)
// Try get a batch of G's from the global runnable queue.
// Sched must be locked.
static G*
-globrunqget(P *p)
+globrunqget(P *p, int32 max)
{
G *gp, *gp1;
int32 n;
@@ -2569,6 +2934,8 @@ globrunqget(P *p)
n = runtime_sched.runqsize/runtime_gomaxprocs+1;
if(n > runtime_sched.runqsize)
n = runtime_sched.runqsize;
+ if(max > 0 && n > max)
+ n = max;
runtime_sched.runqsize -= n;
if(runtime_sched.runqsize == 0)
runtime_sched.runqtail = nil;
@@ -2827,6 +3194,22 @@ runtime_testSchedLocalQueueSteal(void)
}
}
+intgo runtime_debug_setMaxThreads(intgo)
+ __asm__(GOSYM_PREFIX "runtime_debug.setMaxThreads");
+
+intgo
+runtime_debug_setMaxThreads(intgo in)
+{
+ intgo out;
+
+ runtime_lock(&runtime_sched);
+ out = runtime_sched.maxmcount;
+ runtime_sched.maxmcount = in;
+ checkmcount();
+ runtime_unlock(&runtime_sched);
+ return out;
+}
+
void
runtime_proc_scan(void (*addroot)(Obj))
{
@@ -2852,3 +3235,11 @@ __go_get_closure(void)
{
return g->closure;
}
+
+// Return whether we are waiting for a GC. This gc toolchain uses
+// preemption instead.
+bool
+runtime_gcwaiting(void)
+{
+ return runtime_sched.gcwaiting;
+}
diff --git a/libgo/runtime/race.h b/libgo/runtime/race.h
index 3357bed312d..884245cedad 100644
--- a/libgo/runtime/race.h
+++ b/libgo/runtime/race.h
@@ -16,14 +16,14 @@ uintptr runtime_raceinit(void);
void runtime_racefini(void);
void runtime_racemapshadow(void *addr, uintptr size);
-void runtime_racemalloc(void *p, uintptr sz, void *pc);
+void runtime_racemalloc(void *p, uintptr sz);
void runtime_racefree(void *p);
uintptr runtime_racegostart(void *pc);
void runtime_racegoend(void);
void runtime_racewritepc(void *addr, void *callpc, void *pc);
void runtime_racereadpc(void *addr, void *callpc, void *pc);
-void runtime_racewriterangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc);
-void runtime_racereadrangepc(void *addr, uintptr sz, uintptr step, void *callpc, void *pc);
+void runtime_racewriterangepc(void *addr, uintptr sz, void *callpc, void *pc);
+void runtime_racereadrangepc(void *addr, uintptr sz, void *callpc, void *pc);
void runtime_racefingo(void);
void runtime_raceacquire(void *addr);
void runtime_raceacquireg(G *gp, void *addr);
diff --git a/libgo/runtime/runtime.c b/libgo/runtime/runtime.c
index 1ff6d00e299..56fc045eac8 100644
--- a/libgo/runtime/runtime.c
+++ b/libgo/runtime/runtime.c
@@ -124,11 +124,12 @@ TestAtomic64(void)
z64 = 42;
x64 = 0;
PREFETCH(&z64);
- if(runtime_cas64(&z64, &x64, 1))
+ if(runtime_cas64(&z64, x64, 1))
runtime_throw("cas64 failed");
- if(x64 != 42)
+ if(x64 != 0)
runtime_throw("cas64 failed");
- if(!runtime_cas64(&z64, &x64, 1))
+ x64 = 42;
+ if(!runtime_cas64(&z64, x64, 1))
runtime_throw("cas64 failed");
if(x64 != 42 || z64 != 1)
runtime_throw("cas64 failed");
@@ -279,3 +280,79 @@ runtime_signalstack(byte *p, int32 n)
if(sigaltstack(&st, nil) < 0)
*(int *)0xf1 = 0xf1;
}
+
+DebugVars runtime_debug;
+
+static struct {
+ const char* name;
+ int32* value;
+} dbgvar[] = {
+ {"gctrace", &runtime_debug.gctrace},
+ {"schedtrace", &runtime_debug.schedtrace},
+ {"scheddetail", &runtime_debug.scheddetail},
+};
+
+void
+runtime_parsedebugvars(void)
+{
+ const byte *p;
+ intgo i, n;
+
+ p = runtime_getenv("GODEBUG");
+ if(p == nil)
+ return;
+ for(;;) {
+ for(i=0; i<(intgo)nelem(dbgvar); i++) {
+ n = runtime_findnull((const byte*)dbgvar[i].name);
+ if(runtime_mcmp(p, dbgvar[i].name, n) == 0 && p[n] == '=')
+ *dbgvar[i].value = runtime_atoi(p+n+1);
+ }
+ p = (const byte *)runtime_strstr((const char *)p, ",");
+ if(p == nil)
+ break;
+ p++;
+ }
+}
+
+// Poor mans 64-bit division.
+// This is a very special function, do not use it if you are not sure what you are doing.
+// int64 division is lowered into _divv() call on 386, which does not fit into nosplit functions.
+// Handles overflow in a time-specific manner.
+int32
+runtime_timediv(int64 v, int32 div, int32 *rem)
+{
+ int32 res, bit;
+
+ if(v >= (int64)div*0x7fffffffLL) {
+ if(rem != nil)
+ *rem = 0;
+ return 0x7fffffff;
+ }
+ res = 0;
+ for(bit = 30; bit >= 0; bit--) {
+ if(v >= ((int64)div<<bit)) {
+ v = v - ((int64)div<<bit);
+ res += 1<<bit;
+ }
+ }
+ if(rem != nil)
+ *rem = v;
+ return res;
+}
+
+// Setting the max stack size doesn't really do anything for gccgo.
+
+uintptr runtime_maxstacksize = 1<<20; // enough until runtime.main sets it for real
+
+intgo runtime_debug_setMaxStack(intgo)
+ __asm__ (GOSYM_PREFIX "runtime_debug.setMaxStack");
+
+intgo
+runtime_debug_setMaxStack(intgo in)
+{
+ intgo out;
+
+ out = runtime_maxstacksize;
+ runtime_maxstacksize = in;
+ return out;
+}
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index d2e7d4c11bc..e82e83231e6 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -72,6 +72,7 @@ typedef struct ParFor ParFor;
typedef struct ParForThread ParForThread;
typedef struct CgoMal CgoMal;
typedef struct PollDesc PollDesc;
+typedef struct DebugVars DebugVars;
typedef struct __go_open_array Slice;
typedef struct __go_interface Iface;
@@ -82,6 +83,7 @@ typedef struct __go_panic_stack Panic;
typedef struct __go_ptr_type PtrType;
typedef struct __go_func_type FuncType;
+typedef struct __go_interface_type InterfaceType;
typedef struct __go_map_type MapType;
typedef struct __go_channel_type ChanType;
@@ -206,21 +208,20 @@ struct G
void* param; // passed parameter on wakeup
bool fromgogo; // reached from gogo
int16 status;
- int64 goid;
uint32 selgen; // valid sudog pointer
+ int64 goid;
const char* waitreason; // if status==Gwaiting
G* schedlink;
bool ispanic;
bool issystem; // do not output in stack dump
bool isbackground; // ignore in deadlock detector
- bool blockingsyscall; // hint that the next syscall will block
M* m; // for debuggers, but offset not hard-coded
M* lockedm;
int32 sig;
int32 writenbuf;
byte* writebuf;
- // DeferChunk *dchunk;
- // DeferChunk *dchunknext;
+ // DeferChunk* dchunk;
+ // DeferChunk* dchunknext;
uintptr sigcode0;
uintptr sigcode1;
// uintptr sigpc;
@@ -243,6 +244,7 @@ struct M
size_t gsignalstacksize;
void (*mstartfn)(void);
G* curg; // current running goroutine
+ G* caughtsig; // goroutine running during fatal signal
P* p; // attached P for executing Go code (nil if not executing Go code)
P* nextp;
int32 id;
@@ -250,11 +252,9 @@ struct M
int32 throwing;
int32 gcing;
int32 locks;
- int32 nomemprof;
int32 dying;
int32 profilehz;
int32 helpgc;
- bool blockingsyscall;
bool spinning;
uint32 fastrand;
uint64 ncgocall; // number of cgo calls in total
@@ -289,10 +289,12 @@ struct P
{
Lock;
- uint32 status; // one of Pidle/Prunning/...
+ int32 id;
+ uint32 status; // one of Pidle/Prunning/...
P* link;
- uint32 tick; // incremented on every scheduler or system call
- M* m; // back-link to associated M (nil if idle)
+ uint32 schedtick; // incremented on every scheduler call
+ uint32 syscalltick; // incremented on every system call
+ M* m; // back-link to associated M (nil if idle)
MCache* mcache;
// Queue of runnable goroutines.
@@ -308,9 +310,13 @@ struct P
byte pad[64];
};
-// The m->locked word holds a single bit saying whether
-// external calls to LockOSThread are in effect, and then a counter
-// of the internal nesting depth of lockOSThread / unlockOSThread.
+// The m->locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
+// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active.
+// External locks are not recursive; a second lock is silently ignored.
+// The upper bits of m->lockedcount record the nesting depth of calls to lockOSThread
+// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal).
+// Internal locks can be recursive. For instance, a lock for cgo can occur while the main
+// goroutine is holding the lock during the initialization phase.
enum
{
LockExternal = 1,
@@ -333,19 +339,16 @@ enum
SigIgnored = 1<<6, // the signal was ignored before we registered for it
};
-#ifndef NSIG
-#define NSIG 32
-#endif
-
-// NOTE(rsc): keep in sync with extern.go:/type.Func.
-// Eventually, the loaded symbol table should be closer to this form.
+// Layout of in-memory per-function information prepared by linker
+// See http://golang.org/s/go12symtab.
+// Keep in sync with linker and with ../../libmach/sym.c
+// and with package debug/gosym.
struct Func
{
String name;
uintptr entry; // entry pc
};
-
#ifdef GOOS_windows
enum {
Windows = 1
@@ -372,7 +375,7 @@ struct Timers
// If this struct changes, adjust ../time/sleep.go:/runtimeTimer.
struct Timer
{
- int32 i; // heap index
+ int32 i; // heap index
// Timer wakes up at when, and then at when+period, ... (period > 0 only)
// each time calling f(now, arg) in the timer goroutine, so f must be
@@ -420,6 +423,16 @@ struct CgoMal
void *alloc;
};
+// Holds variables parsed from GODEBUG env var.
+struct DebugVars
+{
+ int32 gctrace;
+ int32 schedtrace;
+ int32 scheddetail;
+};
+
+extern bool runtime_precisestack;
+
/*
* defined macros
* you need super-gopher-guru privilege
@@ -453,12 +466,11 @@ extern M* runtime_allm;
extern P** runtime_allp;
extern int32 runtime_gomaxprocs;
extern uint32 runtime_needextram;
-extern bool runtime_singleproc;
extern uint32 runtime_panicking;
-extern uint32 runtime_gcwaiting; // gc is waiting to run
extern int8* runtime_goos;
extern int32 runtime_ncpu;
extern void (*runtime_sysargs)(int32, uint8**);
+extern DebugVars runtime_debug;
/*
* common functions and data
@@ -466,11 +478,13 @@ extern void (*runtime_sysargs)(int32, uint8**);
#define runtime_strcmp(s1, s2) __builtin_strcmp((s1), (s2))
#define runtime_strstr(s1, s2) __builtin_strstr((s1), (s2))
intgo runtime_findnull(const byte*);
+intgo runtime_findnullw(const uint16*);
void runtime_dump(byte*, int32);
/*
* very low level c-called
*/
+void runtime_gogo(G*);
struct __go_func_type;
void runtime_args(int32, byte**);
void runtime_osinit();
@@ -492,14 +506,13 @@ void runtime_sigenable(uint32 sig);
void runtime_sigdisable(uint32 sig);
int32 runtime_gotraceback(bool *crash);
void runtime_goroutineheader(G*);
-void runtime_goroutinetrailer(G*);
void runtime_printtrace(Location*, int32, bool);
#define runtime_open(p, f, m) open((p), (f), (m))
#define runtime_read(d, v, n) read((d), (v), (n))
#define runtime_write(d, v, n) write((d), (v), (n))
#define runtime_close(d) close(d)
#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
-#define runtime_cas64(pval, pold, new) __atomic_compare_exchange_n (pval, pold, new, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+#define runtime_cas64(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
// Don't confuse with XADD x86 instruction,
// this one is actually 'addx', that is, add-and-fetch.
@@ -530,17 +543,21 @@ void runtime_mallocinit(void);
void runtime_mprofinit(void);
#define runtime_malloc(s) __go_alloc(s)
#define runtime_free(p) __go_free(p)
-bool runtime_addfinalizer(void*, FuncVal *fn, const struct __go_func_type *);
+bool runtime_addfinalizer(void*, FuncVal *fn, const struct __go_func_type *, const struct __go_ptr_type *);
#define runtime_getcallersp(p) __builtin_frame_address(1)
int32 runtime_mcount(void);
int32 runtime_gcount(void);
+void runtime_mcall(void(*)(G*));
uint32 runtime_fastrand1(void);
+int32 runtime_timediv(int64, int32, int32*);
void runtime_setmg(M*, G*);
void runtime_newextram(void);
#define runtime_exit(s) exit(s)
#define runtime_breakpoint() __builtin_trap()
void runtime_gosched(void);
+void runtime_gosched0(G*);
+void runtime_schedtrace(bool);
void runtime_park(void(*)(Lock*), Lock*, const char*);
void runtime_tsleep(int64, const char*);
M* runtime_newm(void);
@@ -555,6 +572,8 @@ int32 runtime_callers(int32, Location*, int32);
int64 runtime_nanotime(void);
void runtime_dopanic(int32) __attribute__ ((noreturn));
void runtime_startpanic(void);
+void runtime_freezetheworld(void);
+void runtime_unwindstack(G*, byte*);
void runtime_sigprof();
void runtime_resetcpuprofiler(int32);
void runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32);
@@ -567,10 +586,14 @@ void runtime_addtimer(Timer*);
bool runtime_deltimer(Timer*);
G* runtime_netpoll(bool);
void runtime_netpollinit(void);
-int32 runtime_netpollopen(int32, PollDesc*);
-int32 runtime_netpollclose(int32);
+int32 runtime_netpollopen(uintptr, PollDesc*);
+int32 runtime_netpollclose(uintptr);
void runtime_netpollready(G**, PollDesc*, int32);
+uintptr runtime_netpollfd(PollDesc*);
void runtime_crash(void);
+void runtime_parsedebugvars(void);
+void _rt0_go(void);
+void* runtime_funcdata(Func*, int32);
void runtime_stoptheworld(void);
void runtime_starttheworld(void);
@@ -603,11 +626,15 @@ void runtime_unlock(Lock*);
* wake up early, it must wait to call noteclear until it
* can be sure that no other goroutine is calling
* notewakeup.
+ *
+ * notesleep/notetsleep are generally called on g0,
+ * notetsleepg is similar to notetsleep but is called on user g.
*/
void runtime_noteclear(Note*);
void runtime_notesleep(Note*);
void runtime_notewakeup(Note*);
-void runtime_notetsleep(Note*, int64);
+bool runtime_notetsleep(Note*, int64); // false - timeout
+bool runtime_notetsleepg(Note*, int64); // false - timeout
/*
* low-level synchronization for implementing the above
@@ -698,11 +725,13 @@ void runtime_newTypeAssertionError(const String*, const String*, const String*,
__asm__ (GOSYM_PREFIX "runtime.NewTypeAssertionError");
void runtime_newErrorString(String, Eface*)
__asm__ (GOSYM_PREFIX "runtime.NewErrorString");
+void runtime_newErrorCString(const char*, Eface*)
+ __asm__ (GOSYM_PREFIX "runtime.NewErrorCString");
/*
* wrapped for go users
*/
-void runtime_semacquire(uint32 volatile *);
+void runtime_semacquire(uint32 volatile *, bool);
void runtime_semrelease(uint32 volatile *);
int32 runtime_gomaxprocsfunc(int32 n);
void runtime_procyield(uint32);
@@ -711,19 +740,10 @@ void runtime_lockOSThread(void);
void runtime_unlockOSThread(void);
bool runtime_showframe(String, bool);
+void runtime_printcreatedby(G*);
uintptr runtime_memlimit(void);
-// If appropriate, ask the operating system to control whether this
-// thread should receive profiling signals. This is only necessary on OS X.
-// An operating system should not deliver a profiling signal to a
-// thread that is not actually executing (what good is that?), but that's
-// what OS X prefers to do. When profiling is turned on, we mask
-// away the profiling signal when threads go to sleep, so that OS X
-// is forced to deliver the signal to a thread that's actually running.
-// This is a no-op on other systems.
-void runtime_setprof(bool);
-
#define ISNAN(f) __builtin_isnan(f)
enum
@@ -763,3 +783,6 @@ int32 getproccount(void);
void __go_set_closure(void*);
void* __go_get_closure(void);
+
+bool runtime_gcwaiting(void);
+void runtime_badsignal(int);
diff --git a/libgo/runtime/sema.goc b/libgo/runtime/sema.goc
index be971bd1265..f5d5bc89e3d 100644
--- a/libgo/runtime/sema.goc
+++ b/libgo/runtime/sema.goc
@@ -21,22 +21,23 @@ package sync
#include "runtime.h"
#include "arch.h"
-typedef struct Sema Sema;
-struct Sema
+typedef struct SemaWaiter SemaWaiter;
+struct SemaWaiter
{
uint32 volatile* addr;
G* g;
int64 releasetime;
- Sema* prev;
- Sema* next;
+ int32 nrelease; // -1 for acquire
+ SemaWaiter* prev;
+ SemaWaiter* next;
};
typedef struct SemaRoot SemaRoot;
struct SemaRoot
{
Lock;
- Sema* head;
- Sema* tail;
+ SemaWaiter* head;
+ SemaWaiter* tail;
// Number of waiters. Read w/o the lock.
uint32 volatile nwait;
};
@@ -58,7 +59,7 @@ semroot(uint32 volatile *addr)
}
static void
-semqueue(SemaRoot *root, uint32 volatile *addr, Sema *s)
+semqueue(SemaRoot *root, uint32 volatile *addr, SemaWaiter *s)
{
s->g = runtime_g();
s->addr = addr;
@@ -72,7 +73,7 @@ semqueue(SemaRoot *root, uint32 volatile *addr, Sema *s)
}
static void
-semdequeue(SemaRoot *root, Sema *s)
+semdequeue(SemaRoot *root, SemaWaiter *s)
{
if(s->next)
s->next->prev = s->prev;
@@ -97,10 +98,10 @@ cansemacquire(uint32 volatile *addr)
return 0;
}
-static void
-semacquireimpl(uint32 volatile *addr, int32 profile)
+void
+runtime_semacquire(uint32 volatile *addr, bool profile)
{
- Sema s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it
+ SemaWaiter s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it
SemaRoot *root;
int64 t0;
@@ -145,15 +146,9 @@ semacquireimpl(uint32 volatile *addr, int32 profile)
}
void
-runtime_semacquire(uint32 volatile *addr)
-{
- semacquireimpl(addr, 0);
-}
-
-void
runtime_semrelease(uint32 volatile *addr)
{
- Sema *s;
+ SemaWaiter *s;
SemaRoot *root;
root = semroot(addr);
@@ -188,10 +183,117 @@ runtime_semrelease(uint32 volatile *addr)
}
}
+// TODO(dvyukov): move to netpoll.goc once it's used by all OSes.
+void net_runtime_Semacquire(uint32 *addr)
+ __asm__ (GOSYM_PREFIX "net.runtime_Semacquire");
+
+void net_runtime_Semacquire(uint32 *addr)
+{
+ runtime_semacquire(addr, true);
+}
+
+void net_runtime_Semrelease(uint32 *addr)
+ __asm__ (GOSYM_PREFIX "net.runtime_Semrelease");
+
+void net_runtime_Semrelease(uint32 *addr)
+{
+ runtime_semrelease(addr);
+}
+
func runtime_Semacquire(addr *uint32) {
- semacquireimpl(addr, 1);
+ runtime_semacquire(addr, true);
}
func runtime_Semrelease(addr *uint32) {
runtime_semrelease(addr);
}
+
+typedef struct SyncSema SyncSema;
+struct SyncSema
+{
+ Lock;
+ SemaWaiter* head;
+ SemaWaiter* tail;
+};
+
+func runtime_Syncsemcheck(size uintptr) {
+ if(size != sizeof(SyncSema)) {
+ runtime_printf("bad SyncSema size: sync:%D runtime:%D\n", (int64)size, (int64)sizeof(SyncSema));
+ runtime_throw("bad SyncSema size");
+ }
+}
+
+// Syncsemacquire waits for a pairing Syncsemrelease on the same semaphore s.
+func runtime_Syncsemacquire(s *SyncSema) {
+ SemaWaiter w, *wake;
+ int64 t0;
+
+ w.g = runtime_g();
+ w.nrelease = -1;
+ w.next = nil;
+ w.releasetime = 0;
+ t0 = 0;
+ if(runtime_blockprofilerate > 0) {
+ t0 = runtime_cputicks();
+ w.releasetime = -1;
+ }
+
+ runtime_lock(s);
+ if(s->head && s->head->nrelease > 0) {
+ // have pending release, consume it
+ wake = nil;
+ s->head->nrelease--;
+ if(s->head->nrelease == 0) {
+ wake = s->head;
+ s->head = wake->next;
+ if(s->head == nil)
+ s->tail = nil;
+ }
+ runtime_unlock(s);
+ if(wake)
+ runtime_ready(wake->g);
+ } else {
+ // enqueue itself
+ if(s->tail == nil)
+ s->head = &w;
+ else
+ s->tail->next = &w;
+ s->tail = &w;
+ runtime_park(runtime_unlock, s, "semacquire");
+ if(t0)
+ runtime_blockevent(w.releasetime - t0, 2);
+ }
+}
+
+// Syncsemrelease waits for n pairing Syncsemacquire on the same semaphore s.
+func runtime_Syncsemrelease(s *SyncSema, n uint32) {
+ SemaWaiter w, *wake;
+
+ w.g = runtime_g();
+ w.nrelease = (int32)n;
+ w.next = nil;
+ w.releasetime = 0;
+
+ runtime_lock(s);
+ while(w.nrelease > 0 && s->head && s->head->nrelease < 0) {
+ // have pending acquire, satisfy it
+ wake = s->head;
+ s->head = wake->next;
+ if(s->head == nil)
+ s->tail = nil;
+ if(wake->releasetime)
+ wake->releasetime = runtime_cputicks();
+ runtime_ready(wake->g);
+ w.nrelease--;
+ }
+ if(w.nrelease > 0) {
+ // enqueue itself
+ if(s->tail == nil)
+ s->head = &w;
+ else
+ s->tail->next = &w;
+ s->tail = &w;
+ runtime_park(runtime_unlock, s, "semarelease");
+ } else
+ runtime_unlock(s);
+}
diff --git a/libgo/runtime/signal_unix.c b/libgo/runtime/signal_unix.c
index 5a506c8af3d..ea0a58f2ea2 100644
--- a/libgo/runtime/signal_unix.c
+++ b/libgo/runtime/signal_unix.c
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build darwin freebsd linux openbsd netbsd
+// +build darwin dragonfly freebsd linux openbsd netbsd
#include <sys/time.h>
@@ -100,13 +100,11 @@ runtime_resetcpuprofiler(int32 hz)
runtime_memclr((byte*)&it, sizeof it);
if(hz == 0) {
runtime_setitimer(ITIMER_PROF, &it, nil);
- runtime_setprof(false);
} else {
it.it_interval.tv_sec = 0;
it.it_interval.tv_usec = 1000000 / hz;
it.it_value = it.it_interval;
runtime_setitimer(ITIMER_PROF, &it, nil);
- runtime_setprof(true);
}
runtime_m()->profilehz = hz;
}
diff --git a/libgo/runtime/sigqueue.goc b/libgo/runtime/sigqueue.goc
index 8657216d3f4..6769b239dc3 100644
--- a/libgo/runtime/sigqueue.goc
+++ b/libgo/runtime/sigqueue.goc
@@ -107,9 +107,7 @@ func signal_recv() (m uint32) {
new = HASWAITER;
if(runtime_cas(&sig.state, old, new)) {
if (new == HASWAITER) {
- runtime_entersyscallblock();
- runtime_notesleep(&sig);
- runtime_exitsyscall();
+ runtime_notetsleepg(&sig, -1);
runtime_noteclear(&sig);
}
break;
@@ -157,3 +155,10 @@ func signal_disable(s uint32) {
sig.wanted[s/32] &= ~(1U<<(s&31));
runtime_sigdisable(s);
}
+
+// This runs on a foreign stack, without an m or a g. No stack split.
+void
+runtime_badsignal(int sig)
+{
+ __go_sigsend(sig);
+}
diff --git a/libgo/runtime/string.goc b/libgo/runtime/string.goc
index 64ed4f6ebaa..a7446e93c45 100644
--- a/libgo/runtime/string.goc
+++ b/libgo/runtime/string.goc
@@ -21,6 +21,18 @@ runtime_findnull(const byte *s)
return __builtin_strlen((const char*) s);
}
+intgo
+runtime_findnullw(const uint16 *s)
+{
+ intgo l;
+
+ if(s == nil)
+ return 0;
+ for(l=0; s[l]!=0; l++)
+ ;
+ return l;
+}
+
static String
gostringsize(intgo l, byte** pmem)
{
@@ -32,7 +44,7 @@ gostringsize(intgo l, byte** pmem)
return runtime_emptystring;
}
// leave room for NUL for C runtime (e.g., callers of getenv)
- mem = runtime_mallocgc(l+1, FlagNoPointers, 1, 0);
+ mem = runtime_mallocgc(l+1, 0, FlagNoScan|FlagNoZero);
s.str = mem;
s.len = l;
mem[l] = 0;
@@ -63,6 +75,15 @@ runtime_gostringnocopy(const byte *str)
return s;
}
+String runtime_cstringToGo(byte*)
+ __asm__ (GOSYM_PREFIX "runtime.cstringToGo");
+
+String
+runtime_cstringToGo(byte *str)
+{
+ return runtime_gostringnocopy(str);
+}
+
enum
{
Runeself = 0x80,
diff --git a/libgo/runtime/thread-linux.c b/libgo/runtime/thread-linux.c
index 13d23c47b07..ae56261e6f5 100644
--- a/libgo/runtime/thread-linux.c
+++ b/libgo/runtime/thread-linux.c
@@ -4,6 +4,7 @@
#include "runtime.h"
#include "defs.h"
+#include "signal_unix.h"
// Linux futex.
//
@@ -33,25 +34,22 @@ typedef struct timespec Timespec;
void
runtime_futexsleep(uint32 *addr, uint32 val, int64 ns)
{
- Timespec ts, *tsp;
-
- if(ns < 0)
- tsp = nil;
- else {
- ts.tv_sec = ns/1000000000LL;
- ts.tv_nsec = ns%1000000000LL;
- // Avoid overflow
- if(ts.tv_sec > 1<<30)
- ts.tv_sec = 1<<30;
- tsp = &ts;
- }
+ Timespec ts;
+ int32 nsec;
// Some Linux kernels have a bug where futex of
// FUTEX_WAIT returns an internal error code
// as an errno. Libpthread ignores the return value
// here, and so can we: as it says a few lines up,
// spurious wakeups are allowed.
- syscall(__NR_futex, addr, FUTEX_WAIT, val, tsp, nil, 0);
+
+ if(ns < 0) {
+ syscall(__NR_futex, addr, FUTEX_WAIT, val, nil, nil, 0);
+ return;
+ }
+ ts.tv_sec = runtime_timediv(ns, 1000000000LL, &nsec);
+ ts.tv_nsec = nsec;
+ syscall(__NR_futex, addr, FUTEX_WAIT, val, &ts, nil, 0);
}
// If any procs are sleeping on addr, wake up at most cnt.
diff --git a/libgo/runtime/time.goc b/libgo/runtime/time.goc
index 8d12fe01080..e4e35ec0846 100644
--- a/libgo/runtime/time.goc
+++ b/libgo/runtime/time.goc
@@ -12,8 +12,13 @@ package time
#include "malloc.h"
#include "race.h"
+enum {
+ debug = 0,
+};
+
static Timers timers;
static void addtimer(Timer*);
+static void dumptimers(const char*);
// Package time APIs.
// Godoc uses the comments in package time, not these.
@@ -92,6 +97,11 @@ addtimer(Timer *t)
int32 n;
Timer **nt;
+ // when must never be negative; otherwise timerproc will overflow
+ // during its delta calculation and never expire other timers.
+ if(t->when < 0)
+ t->when = (int64)((1ULL<<63)-1);
+
if(timers.len >= timers.cap) {
// Grow slice.
n = 16;
@@ -121,8 +131,13 @@ addtimer(Timer *t)
timers.timerproc = __go_go(timerproc, nil);
timers.timerproc->issystem = true;
}
+ if(debug)
+ dumptimers("addtimer");
}
+// Used to force a dereference before the lock is acquired.
+static int32 gi;
+
// Delete timer t from the heap.
// Do not need to update the timerproc:
// if it wakes up early, no big deal.
@@ -131,6 +146,11 @@ runtime_deltimer(Timer *t)
{
int32 i;
+ // Dereference t so that any panic happens before the lock is held.
+ // Discard result, because t might be moving in the heap.
+ i = t->i;
+ gi = i;
+
runtime_lock(&timers);
// t may not be registered anymore and may have
@@ -152,6 +172,8 @@ runtime_deltimer(Timer *t)
siftup(i);
siftdown(i);
}
+ if(debug)
+ dumptimers("deltimer");
runtime_unlock(&timers);
return true;
}
@@ -170,6 +192,7 @@ timerproc(void* dummy __attribute__ ((unused)))
for(;;) {
runtime_lock(&timers);
+ timers.sleeping = false;
now = runtime_nanotime();
for(;;) {
if(timers.len == 0) {
@@ -210,9 +233,7 @@ timerproc(void* dummy __attribute__ ((unused)))
timers.sleeping = true;
runtime_noteclear(&timers.waitnote);
runtime_unlock(&timers);
- runtime_entersyscallblock();
- runtime_notetsleep(&timers.waitnote, delta);
- runtime_exitsyscall();
+ runtime_notetsleepg(&timers.waitnote, delta);
}
}
@@ -222,18 +243,20 @@ static void
siftup(int32 i)
{
int32 p;
+ int64 when;
Timer **t, *tmp;
t = timers.t;
+ when = t[i]->when;
+ tmp = t[i];
while(i > 0) {
- p = (i-1)/2; // parent
- if(t[i]->when >= t[p]->when)
+ p = (i-1)/4; // parent
+ if(when >= t[p]->when)
break;
- tmp = t[i];
t[i] = t[p];
- t[p] = tmp;
t[i]->i = i;
- t[p]->i = p;
+ t[p] = tmp;
+ tmp->i = p;
i = p;
}
}
@@ -241,29 +264,61 @@ siftup(int32 i)
static void
siftdown(int32 i)
{
- int32 c, len;
+ int32 c, c3, len;
+ int64 when, w, w3;
Timer **t, *tmp;
t = timers.t;
len = timers.len;
+ when = t[i]->when;
+ tmp = t[i];
for(;;) {
- c = i*2 + 1; // left child
+ c = i*4 + 1; // left child
+ c3 = c + 2; // mid child
if(c >= len) {
break;
}
- if(c+1 < len && t[c+1]->when < t[c]->when)
+ w = t[c]->when;
+ if(c+1 < len && t[c+1]->when < w) {
+ w = t[c+1]->when;
c++;
- if(t[c]->when >= t[i]->when)
+ }
+ if(c3 < len) {
+ w3 = t[c3]->when;
+ if(c3+1 < len && t[c3+1]->when < w3) {
+ w3 = t[c3+1]->when;
+ c3++;
+ }
+ if(w3 < w) {
+ w = w3;
+ c = c3;
+ }
+ }
+ if(w >= when)
break;
- tmp = t[i];
t[i] = t[c];
- t[c] = tmp;
t[i]->i = i;
- t[c]->i = c;
+ t[c] = tmp;
+ tmp->i = c;
i = c;
}
}
+static void
+dumptimers(const char *msg)
+{
+ Timer *t;
+ int32 i;
+
+ runtime_printf("timers: %s\n", msg);
+ for(i = 0; i < timers.len; i++) {
+ t = timers.t[i];
+ runtime_printf("\t%d\t%p:\ti %d when %D period %D fn %p\n",
+ i, t, t->i, t->when, t->period, t->fv->fn);
+ }
+ runtime_printf("\n");
+}
+
void
runtime_time_scan(void (*addroot)(Obj))
{