summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2014-12-05 11:45:03 -0500
committerRuss Cox <rsc@golang.org>2014-12-05 11:45:03 -0500
commitb03c1034af12d0ddd1b99ef791425e07d75f3677 (patch)
tree69e27f09db888f62c2aa9db29ab0c7233c1b286b
parent8670c52abcdb260f5366d6fa205a7c7f9b5b1067 (diff)
parent60e02bbbea5d3bf4a6b06551f09c984285f8139c (diff)
downloadgo-b03c1034af12d0ddd1b99ef791425e07d75f3677.tar.gz
all: merge default (faa3ed1dc30e) into dev.garbage
-rw-r--r--lib/codereview/codereview.py4
-rw-r--r--src/cmd/api/goapi.go1
-rw-r--r--src/cmd/gc/builtin.c31
-rw-r--r--src/cmd/gc/go.h1
-rw-r--r--src/cmd/gc/popt.c4
-rw-r--r--src/cmd/gc/runtime.go33
-rw-r--r--src/cmd/gc/typecheck.c6
-rw-r--r--src/cmd/gc/walk.c63
-rwxr-xr-xsrc/run.bash3
-rw-r--r--src/runtime/asm_386.s20
-rw-r--r--src/runtime/asm_amd64.s20
-rw-r--r--src/runtime/asm_amd64p32.s21
-rw-r--r--src/runtime/asm_arm.s12
-rw-r--r--src/runtime/asm_power64x.s12
-rw-r--r--src/runtime/export_test.go2
-rw-r--r--src/runtime/heapdump.go4
-rw-r--r--src/runtime/lfstack.go10
-rw-r--r--src/runtime/lfstack_test.go2
-rw-r--r--src/runtime/malloc.go83
-rw-r--r--src/runtime/malloc2.go41
-rw-r--r--src/runtime/mcache.go11
-rw-r--r--src/runtime/mcentral.go30
-rw-r--r--src/runtime/mgc.go1261
-rw-r--r--src/runtime/mgc0.go127
-rw-r--r--src/runtime/mgc0.h4
-rw-r--r--src/runtime/mgc1.go11
-rw-r--r--src/runtime/mheap.go6
-rw-r--r--src/runtime/os_linux_386.go3
-rw-r--r--src/runtime/print1.go30
-rw-r--r--src/runtime/proc.go3
-rw-r--r--src/runtime/proc1.go56
-rw-r--r--src/runtime/rt0_linux_386.s1
-rw-r--r--src/runtime/runtime1.go5
-rw-r--r--src/runtime/runtime2.go17
-rw-r--r--src/runtime/select.go7
-rw-r--r--src/runtime/stack1.go78
-rw-r--r--src/runtime/stubs.go5
-rw-r--r--src/runtime/vdso_none.go1
-rw-r--r--src/runtime/wbfat.go190
-rw-r--r--src/runtime/wbfat_gen.go41
-rw-r--r--src/sync/atomic/atomic_test.go8
-rw-r--r--test/live.go152
-rw-r--r--test/live2.go6
43 files changed, 1874 insertions, 552 deletions
diff --git a/lib/codereview/codereview.py b/lib/codereview/codereview.py
index 9181f1df3..0c9b27a31 100644
--- a/lib/codereview/codereview.py
+++ b/lib/codereview/codereview.py
@@ -2024,13 +2024,13 @@ def submit(ui, repo, *pats, **opts):
# push to remote; if it fails for any reason, roll back
try:
new_heads = len(hg_heads(ui, repo).split())
- if old_heads != new_heads and not (old_heads == 0 and new_heads == 1):
+ if cl.desc.find("create new branch") < 0 and old_heads != new_heads and not (old_heads == 0 and new_heads == 1):
# Created new head, so we weren't up to date.
need_sync()
# Push changes to remote. If it works, we're committed. If not, roll back.
try:
- if hg_push(ui, repo):
+ if hg_push(ui, repo, new_branch=cl.desc.find("create new branch")>=0):
raise hg_util.Abort("push error")
except hg_error.Abort, e:
if e.message.find("push creates new heads") >= 0:
diff --git a/src/cmd/api/goapi.go b/src/cmd/api/goapi.go
index 5a8c87603..e49ba33bb 100644
--- a/src/cmd/api/goapi.go
+++ b/src/cmd/api/goapi.go
@@ -405,6 +405,7 @@ func (w *Walker) parseFile(dir, file string) (*ast.File, error) {
" note struct{};" +
" p struct{};" +
" parfor struct{};" +
+ " slice struct{};" +
" slicetype struct{};" +
" stkframe struct{};" +
" sudog struct{};" +
diff --git a/src/cmd/gc/builtin.c b/src/cmd/gc/builtin.c
index fbca4ee5f..aeeadedca 100644
--- a/src/cmd/gc/builtin.c
+++ b/src/cmd/gc/builtin.c
@@ -24,6 +24,8 @@ char *runtimeimport =
"func @\"\".printslice (? any)\n"
"func @\"\".printnl ()\n"
"func @\"\".printsp ()\n"
+ "func @\"\".printlock ()\n"
+ "func @\"\".printunlock ()\n"
"func @\"\".concatstring2 (? string, ? string) (? string)\n"
"func @\"\".concatstring3 (? string, ? string, ? string) (? string)\n"
"func @\"\".concatstring4 (? string, ? string, ? string, ? string) (? string)\n"
@@ -86,10 +88,33 @@ char *runtimeimport =
"func @\"\".writebarrierstring (@\"\".dst·1 *any, @\"\".src·2 any)\n"
"func @\"\".writebarrierslice (@\"\".dst·1 *any, @\"\".src·2 any)\n"
"func @\"\".writebarrieriface (@\"\".dst·1 *any, @\"\".src·2 any)\n"
- "func @\"\".writebarrierfat2 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
- "func @\"\".writebarrierfat3 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
- "func @\"\".writebarrierfat4 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat01 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat10 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat11 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat001 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat010 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat011 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat100 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat101 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat110 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat111 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat0001 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat0010 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat0011 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat0100 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat0101 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat0110 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat0111 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat1000 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat1001 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat1010 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat1011 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat1100 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat1101 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat1110 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
+ "func @\"\".writebarrierfat1111 (@\"\".dst·1 *any, _ *byte, @\"\".src·3 any)\n"
"func @\"\".writebarrierfat (@\"\".typ·1 *byte, @\"\".dst·2 *any, @\"\".src·3 *any)\n"
+ "func @\"\".writebarriercopy (@\"\".typ·2 *byte, @\"\".dst·3 any, @\"\".src·4 any) (? int)\n"
"func @\"\".selectnbsend (@\"\".chanType·2 *byte, @\"\".hchan·3 chan<- any, @\"\".elem·4 *any) (? bool)\n"
"func @\"\".selectnbrecv (@\"\".chanType·2 *byte, @\"\".elem·3 *any, @\"\".hchan·4 <-chan any) (? bool)\n"
"func @\"\".selectnbrecv2 (@\"\".chanType·2 *byte, @\"\".elem·3 *any, @\"\".received·4 *bool, @\"\".hchan·5 <-chan any) (? bool)\n"
diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h
index 6e326961d..5236305f8 100644
--- a/src/cmd/gc/go.h
+++ b/src/cmd/gc/go.h
@@ -1473,6 +1473,7 @@ void walk(Node *fn);
void walkexpr(Node **np, NodeList **init);
void walkexprlist(NodeList *l, NodeList **init);
void walkexprlistsafe(NodeList *l, NodeList **init);
+void walkexprlistcheap(NodeList *l, NodeList **init);
void walkstmt(Node **np);
void walkstmtlist(NodeList *l);
Node* conv(Node*, Type*);
diff --git a/src/cmd/gc/popt.c b/src/cmd/gc/popt.c
index 993bb2482..6e6db88ef 100644
--- a/src/cmd/gc/popt.c
+++ b/src/cmd/gc/popt.c
@@ -847,6 +847,10 @@ nilopt(Prog *firstp)
Graph *g;
int ncheck, nkill;
+ // TODO(minux): nilopt on power64 throw away seemly random segment of code.
+ if(thechar == '9')
+ return;
+
g = flowstart(firstp, sizeof(NilFlow));
if(g == nil)
return;
diff --git a/src/cmd/gc/runtime.go b/src/cmd/gc/runtime.go
index 0fb15c265..c6007714c 100644
--- a/src/cmd/gc/runtime.go
+++ b/src/cmd/gc/runtime.go
@@ -36,6 +36,8 @@ func printeface(any)
func printslice(any)
func printnl()
func printsp()
+func printlock()
+func printunlock()
func concatstring2(string, string) string
func concatstring3(string, string, string) string
@@ -115,10 +117,35 @@ func writebarrieriface(dst *any, src any)
// The unused *byte argument makes sure that src is 2-pointer-aligned,
// which is the maximum alignment on NaCl amd64p32
// (and possibly on 32-bit systems if we start 64-bit aligning uint64s).
-func writebarrierfat2(dst *any, _ *byte, src any)
-func writebarrierfat3(dst *any, _ *byte, src any)
-func writebarrierfat4(dst *any, _ *byte, src any)
+// The bitmap in the name tells which words being copied are pointers.
+func writebarrierfat01(dst *any, _ *byte, src any)
+func writebarrierfat10(dst *any, _ *byte, src any)
+func writebarrierfat11(dst *any, _ *byte, src any)
+func writebarrierfat001(dst *any, _ *byte, src any)
+func writebarrierfat010(dst *any, _ *byte, src any)
+func writebarrierfat011(dst *any, _ *byte, src any)
+func writebarrierfat100(dst *any, _ *byte, src any)
+func writebarrierfat101(dst *any, _ *byte, src any)
+func writebarrierfat110(dst *any, _ *byte, src any)
+func writebarrierfat111(dst *any, _ *byte, src any)
+func writebarrierfat0001(dst *any, _ *byte, src any)
+func writebarrierfat0010(dst *any, _ *byte, src any)
+func writebarrierfat0011(dst *any, _ *byte, src any)
+func writebarrierfat0100(dst *any, _ *byte, src any)
+func writebarrierfat0101(dst *any, _ *byte, src any)
+func writebarrierfat0110(dst *any, _ *byte, src any)
+func writebarrierfat0111(dst *any, _ *byte, src any)
+func writebarrierfat1000(dst *any, _ *byte, src any)
+func writebarrierfat1001(dst *any, _ *byte, src any)
+func writebarrierfat1010(dst *any, _ *byte, src any)
+func writebarrierfat1011(dst *any, _ *byte, src any)
+func writebarrierfat1100(dst *any, _ *byte, src any)
+func writebarrierfat1101(dst *any, _ *byte, src any)
+func writebarrierfat1110(dst *any, _ *byte, src any)
+func writebarrierfat1111(dst *any, _ *byte, src any)
+
func writebarrierfat(typ *byte, dst *any, src *any)
+func writebarriercopy(typ *byte, dst any, src any) int
func selectnbsend(chanType *byte, hchan chan<- any, elem *any) bool
func selectnbrecv(chanType *byte, elem *any, hchan <-chan any) bool
diff --git a/src/cmd/gc/typecheck.c b/src/cmd/gc/typecheck.c
index 714c66268..f05d8022d 100644
--- a/src/cmd/gc/typecheck.c
+++ b/src/cmd/gc/typecheck.c
@@ -2891,7 +2891,8 @@ typecheckas(Node *n)
case OSLICE3:
case OSLICESTR:
// For x = x[0:y], x can be updated in place, without touching pointer.
- if(samesafeexpr(n->left, n->right->left) && (n->right->right->left == N || iszero(n->right->right->left)))
+ // TODO(rsc): Reenable once it is actually updated in place without touching the pointer.
+ if(0 && samesafeexpr(n->left, n->right->left) && (n->right->right->left == N || iszero(n->right->right->left)))
n->right->reslice = 1;
break;
@@ -2899,7 +2900,8 @@ typecheckas(Node *n)
// For x = append(x, ...), x can be updated in place when there is capacity,
// without touching the pointer; otherwise the emitted code to growslice
// can take care of updating the pointer, and only in that case.
- if(n->right->list != nil && samesafeexpr(n->left, n->right->list->n))
+ // TODO(rsc): Reenable once the emitted code does update the pointer.
+ if(0 && n->right->list != nil && samesafeexpr(n->left, n->right->list->n))
n->right->reslice = 1;
break;
}
diff --git a/src/cmd/gc/walk.c b/src/cmd/gc/walk.c
index 77f9c80f9..061089349 100644
--- a/src/cmd/gc/walk.c
+++ b/src/cmd/gc/walk.c
@@ -6,6 +6,7 @@
#include <libc.h>
#include "go.h"
#include "../ld/textflag.h"
+#include "../../runtime/mgc0.h"
static Node* walkprint(Node*, NodeList**);
static Node* writebarrierfn(char*, Type*, Type*);
@@ -363,6 +364,15 @@ walkexprlistsafe(NodeList *l, NodeList **init)
}
void
+walkexprlistcheap(NodeList *l, NodeList **init)
+{
+ for(; l; l=l->next) {
+ l->n = cheapexpr(l->n, init);
+ walkexpr(&l->n, init);
+ }
+}
+
+void
walkexpr(Node **np, NodeList **init)
{
Node *r, *l, *var, *a;
@@ -1771,6 +1781,11 @@ walkprint(Node *nn, NodeList **init)
calls = nil;
notfirst = 0;
+ // Hoist all the argument evaluation up before the lock.
+ walkexprlistcheap(all, init);
+
+ calls = list(calls, mkcall("printlock", T, init));
+
for(l=all; l; l=l->next) {
if(notfirst) {
calls = list(calls, mkcall("printsp", T, init));
@@ -1851,6 +1866,9 @@ walkprint(Node *nn, NodeList **init)
if(op == OPRINTN)
calls = list(calls, mkcall("printnl", T, nil));
+
+ calls = list(calls, mkcall("printunlock", T, init));
+
typechecklist(calls, Etop);
walkexprlist(calls, init);
@@ -1987,6 +2005,9 @@ applywritebarrier(Node *n, NodeList **init)
{
Node *l, *r;
Type *t;
+ vlong x;
+ static Bvec *bv;
+ char name[32];
if(n->left && n->right && needwritebarrier(n->left, n->right)) {
t = n->left->type;
@@ -2004,14 +2025,35 @@ applywritebarrier(Node *n, NodeList **init)
} else if(isinter(t)) {
n = mkcall1(writebarrierfn("writebarrieriface", t, n->right->type), T, init,
l, n->right);
- } else if(t->width == 2*widthptr) {
- n = mkcall1(writebarrierfn("writebarrierfat2", t, n->right->type), T, init,
- l, nodnil(), n->right);
- } else if(t->width == 3*widthptr) {
- n = mkcall1(writebarrierfn("writebarrierfat3", t, n->right->type), T, init,
- l, nodnil(), n->right);
- } else if(t->width == 4*widthptr) {
- n = mkcall1(writebarrierfn("writebarrierfat4", t, n->right->type), T, init,
+ } else if(t->width <= 4*widthptr) {
+ x = 0;
+ if(bv == nil)
+ bv = bvalloc(BitsPerPointer*4);
+ bvresetall(bv);
+ twobitwalktype1(t, &x, bv);
+ // The bvgets are looking for BitsPointer in successive slots.
+ enum {
+ PtrBit = 1,
+ };
+ if(BitsPointer != (1<<PtrBit))
+ fatal("wrong PtrBit");
+ switch(t->width/widthptr) {
+ default:
+ fatal("found writebarrierfat for %d-byte object of type %T", (int)t->width, t);
+ case 2:
+ snprint(name, sizeof name, "writebarrierfat%d%d",
+ bvget(bv, PtrBit), bvget(bv, BitsPerPointer+PtrBit));
+ break;
+ case 3:
+ snprint(name, sizeof name, "writebarrierfat%d%d%d",
+ bvget(bv, PtrBit), bvget(bv, BitsPerPointer+PtrBit), bvget(bv, 2*BitsPerPointer+PtrBit));
+ break;
+ case 4:
+ snprint(name, sizeof name, "writebarrierfat%d%d%d%d",
+ bvget(bv, PtrBit), bvget(bv, BitsPerPointer+PtrBit), bvget(bv, 2*BitsPerPointer+PtrBit), bvget(bv, 3*BitsPerPointer+PtrBit));
+ break;
+ }
+ n = mkcall1(writebarrierfn(name, t, n->right->type), T, init,
l, nodnil(), n->right);
} else {
r = n->right;
@@ -2873,6 +2915,11 @@ copyany(Node *n, NodeList **init, int runtimecall)
{
Node *nl, *nr, *nfrm, *nto, *nif, *nlen, *nwid, *fn;
NodeList *l;
+
+ if(haspointers(n->left->type->type)) {
+ fn = writebarrierfn("writebarriercopy", n->left->type, n->right->type);
+ return mkcall1(fn, n->type, init, typename(n->left->type->type), n->left, n->right);
+ }
if(runtimecall) {
if(n->right->type->etype == TSTRING)
diff --git a/src/run.bash b/src/run.bash
index 6b9ecc33c..b8ce417a0 100755
--- a/src/run.bash
+++ b/src/run.bash
@@ -162,7 +162,8 @@ esac
# Race detector only supported on Linux, FreeBSD and OS X,
# and only on amd64, and only when cgo is enabled.
# Delayed until here so we know whether to try external linking.
-case "$GOHOSTOS-$GOOS-$GOARCH-$CGO_ENABLED" in
+# DISABLED until we get garbage collection working.
+case "$GOHOSTOS-$GOOS-$GOARCH-$CGO_ENABLED-XXX-DISABLED" in
linux-linux-amd64-1 | freebsd-freebsd-amd64-1 | darwin-darwin-amd64-1)
echo
echo '# Testing race detector.'
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index a02bb5556..7cc64a3a4 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -2285,3 +2285,23 @@ TEXT runtime·getg(SB),NOSPLIT,$0-4
MOVL AX, ret+0(FP)
RET
+TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
+ MOVL addr+0(FP), AX
+ PREFETCHT0 (AX)
+ RET
+
+TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
+ MOVL addr+0(FP), AX
+ PREFETCHT1 (AX)
+ RET
+
+
+TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
+ MOVL addr+0(FP), AX
+ PREFETCHT2 (AX)
+ RET
+
+TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
+ MOVL addr+0(FP), AX
+ PREFETCHNTA (AX)
+ RET
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index 6e3f5ff6c..14be2fe92 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -2228,3 +2228,23 @@ TEXT runtime·getg(SB),NOSPLIT,$0-8
MOVQ g(CX), AX
MOVQ AX, ret+0(FP)
RET
+
+TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
+ MOVQ addr+0(FP), AX
+ PREFETCHT0 (AX)
+ RET
+
+TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
+ MOVQ addr+0(FP), AX
+ PREFETCHT1 (AX)
+ RET
+
+TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
+ MOVQ addr+0(FP), AX
+ PREFETCHT2 (AX)
+ RET
+
+TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
+ MOVQ addr+0(FP), AX
+ PREFETCHNTA (AX)
+ RET
diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s
index b8370efd3..c87d848fe 100644
--- a/src/runtime/asm_amd64p32.s
+++ b/src/runtime/asm_amd64p32.s
@@ -1079,3 +1079,24 @@ TEXT runtime·getg(SB),NOSPLIT,$0-4
MOVL g(CX), AX
MOVL AX, ret+0(FP)
RET
+
+TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
+ MOVL addr+0(FP), AX
+ PREFETCHT0 (AX)
+ RET
+
+TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
+ MOVL addr+0(FP), AX
+ PREFETCHT1 (AX)
+ RET
+
+
+TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
+ MOVL addr+0(FP), AX
+ PREFETCHT2 (AX)
+ RET
+
+TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
+ MOVL addr+0(FP), AX
+ PREFETCHNTA (AX)
+ RET
diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s
index 583c7ba50..c6c98b443 100644
--- a/src/runtime/asm_arm.s
+++ b/src/runtime/asm_arm.s
@@ -1320,3 +1320,15 @@ TEXT runtime·goexit(SB),NOSPLIT,$-4-0
TEXT runtime·getg(SB),NOSPLIT,$-4-4
MOVW g, ret+0(FP)
RET
+
+TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
+ RET
+
+TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
+ RET
+
+TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
+ RET
+
+TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
+ RET
diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s
index 3f2ab6d0e..548c88e47 100644
--- a/src/runtime/asm_power64x.s
+++ b/src/runtime/asm_power64x.s
@@ -977,3 +977,15 @@ TEXT runtime·goexit(SB),NOSPLIT,$-8-0
TEXT runtime·getg(SB),NOSPLIT,$-8-8
MOVD g, ret+0(FP)
RETURN
+
+TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
+ RETURN
+
+TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
+ RETURN
+
+TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
+ RETURN
+
+TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
+ RETURN
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index e871236e4..5ed255026 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -26,7 +26,7 @@ var Exitsyscall = exitsyscall
var LockedOSThread = lockedOSThread
type LFNode struct {
- Next *LFNode
+ Next uint64
Pushcnt uintptr
}
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 0c1a60c8b..e1693d40f 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -464,8 +464,8 @@ func dumpobjs() {
if n > uintptr(len(freemark)) {
gothrow("freemark array doesn't have enough entries")
}
- for l := s.freelist; l != nil; l = l.next {
- freemark[(uintptr(unsafe.Pointer(l))-p)/size] = true
+ for l := s.freelist; l.ptr() != nil; l = l.ptr().next {
+ freemark[(uintptr(l)-p)/size] = true
}
for j := uintptr(0); j < n; j, p = j+1, p+size {
if freemark[j] {
diff --git a/src/runtime/lfstack.go b/src/runtime/lfstack.go
index 8a36a67b3..fd3325972 100644
--- a/src/runtime/lfstack.go
+++ b/src/runtime/lfstack.go
@@ -18,7 +18,7 @@ func lfstackpush(head *uint64, node *lfnode) {
}
for {
old := atomicload64(head)
- node.next, _ = lfstackUnpack(old)
+ node.next = old
if cas64(head, old, new) {
break
}
@@ -32,12 +32,8 @@ func lfstackpop(head *uint64) unsafe.Pointer {
return nil
}
node, _ := lfstackUnpack(old)
- node2 := (*lfnode)(atomicloadp(unsafe.Pointer(&node.next)))
- new := uint64(0)
- if node2 != nil {
- new = lfstackPack(node2, node2.pushcnt)
- }
- if cas64(head, old, new) {
+ next := atomicload64(&node.next)
+ if cas64(head, old, next) {
return unsafe.Pointer(node)
}
}
diff --git a/src/runtime/lfstack_test.go b/src/runtime/lfstack_test.go
index e51877704..68f221d6e 100644
--- a/src/runtime/lfstack_test.go
+++ b/src/runtime/lfstack_test.go
@@ -121,7 +121,7 @@ func TestLFStackStress(t *testing.T) {
}
cnt++
sum2 += node.data
- node.Next = nil
+ node.Next = 0
}
}
if cnt != K {
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index d73d1ba6a..e9fec7bb1 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -140,14 +140,14 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
// Allocate a new maxTinySize block.
s = c.alloc[tinySizeClass]
v := s.freelist
- if v == nil {
+ if v.ptr() == nil {
systemstack(func() {
mCache_Refill(c, tinySizeClass)
})
s = c.alloc[tinySizeClass]
v = s.freelist
}
- s.freelist = v.next
+ s.freelist = v.ptr().next
s.ref++
//TODO: prefetch v.next
x = unsafe.Pointer(v)
@@ -170,19 +170,19 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
size = uintptr(class_to_size[sizeclass])
s = c.alloc[sizeclass]
v := s.freelist
- if v == nil {
+ if v.ptr() == nil {
systemstack(func() {
mCache_Refill(c, int32(sizeclass))
})
s = c.alloc[sizeclass]
v = s.freelist
}
- s.freelist = v.next
+ s.freelist = v.ptr().next
s.ref++
//TODO: prefetch
x = unsafe.Pointer(v)
if flags&flagNoZero == 0 {
- v.next = nil
+ v.ptr().next = 0
if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 {
memclr(unsafe.Pointer(v), size)
}
@@ -241,6 +241,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
masksize = masksize * pointersPerByte / 8 // 4 bits per word
masksize++ // unroll flag in the beginning
if masksize > maxGCMask && typ.gc[1] != 0 {
+ // write barriers have not been updated to deal with this case yet.
+ gothrow("maxGCMask too small for now")
// If the mask is too large, unroll the program directly
// into the GC bitmap. It's 7 times slower than copying
// from the pre-unrolled mask, but saves 1/16 of type size
@@ -295,6 +297,17 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
}
}
marked:
+
+ // GCmarkterminate allocates black
+ // All slots hold nil so no scanning is needed.
+ // This may be racing with GC so do it atomically if there can be
+ // a race marking the bit.
+ if gcphase == _GCmarktermination {
+ systemstack(func() {
+ gcmarknewobject_m(uintptr(x))
+ })
+ }
+
if raceenabled {
racemalloc(x, size)
}
@@ -328,13 +341,43 @@ marked:
}
}
- if memstats.heap_alloc >= memstats.next_gc {
+ if memstats.heap_alloc >= memstats.next_gc/2 {
gogc(0)
}
return x
}
+func loadPtrMask(typ *_type) []uint8 {
+ var ptrmask *uint8
+ nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
+ if typ.kind&kindGCProg != 0 {
+ masksize := nptr
+ if masksize%2 != 0 {
+ masksize *= 2 // repeated
+ }
+ masksize = masksize * pointersPerByte / 8 // 4 bits per word
+ masksize++ // unroll flag in the beginning
+ if masksize > maxGCMask && typ.gc[1] != 0 {
+ // write barriers have not been updated to deal with this case yet.
+ gothrow("maxGCMask too small for now")
+ }
+ ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
+ // Check whether the program is already unrolled
+ // by checking if the unroll flag byte is set
+ maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
+ if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
+ systemstack(func() {
+ unrollgcprog_m(typ)
+ })
+ }
+ ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
+ } else {
+ ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
+ }
+ return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2]
+}
+
// implementation of new builtin
func newobject(typ *_type) unsafe.Pointer {
flags := uint32(0)
@@ -429,7 +472,21 @@ func gogc(force int32) {
mp = acquirem()
mp.gcing = 1
releasem(mp)
+
systemstack(stoptheworld)
+ systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
+ if true { // To turn on concurrent scan and mark set to true...
+ systemstack(starttheworld)
+ // Do a concurrent heap scan before we stop the world.
+ systemstack(gcscan_m)
+ systemstack(stoptheworld)
+ systemstack(gcinstallmarkwb_m)
+ systemstack(starttheworld)
+ systemstack(gcmark_m)
+ systemstack(stoptheworld)
+ systemstack(gcinstalloffwb_m)
+ }
+
if mp != acquirem() {
gothrow("gogc: rescheduled")
}
@@ -445,17 +502,21 @@ func gogc(force int32) {
if debug.gctrace > 1 {
n = 2
}
+ eagersweep := force >= 2
for i := 0; i < n; i++ {
if i > 0 {
startTime = nanotime()
}
// switch to g0, call gc, then switch back
- eagersweep := force >= 2
systemstack(func() {
gc_m(startTime, eagersweep)
})
}
+ systemstack(func() {
+ gccheckmark_m(startTime, eagersweep)
+ })
+
// all done
mp.gcing = 0
semrelease(&worldsema)
@@ -470,6 +531,14 @@ func gogc(force int32) {
}
}
+func GCcheckmarkenable() {
+ systemstack(gccheckmarkenable_m)
+}
+
+func GCcheckmarkdisable() {
+ systemstack(gccheckmarkdisable_m)
+}
+
// GC runs a garbage collection.
func GC() {
gogc(2)
diff --git a/src/runtime/malloc2.go b/src/runtime/malloc2.go
index c175c2aec..a9d40de30 100644
--- a/src/runtime/malloc2.go
+++ b/src/runtime/malloc2.go
@@ -139,10 +139,35 @@ const (
)
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
+// Since assignments to mlink.next will result in a write barrier being preformed
+// this can not be used by some of the internal GC structures. For example when
+// the sweeper is placing an unmarked object on the free list it does not want the
+// write barrier to be called since that could result in the object being reachable.
type mlink struct {
next *mlink
}
+// A gclink is a node in a linked list of blocks, like mlink,
+// but it is opaque to the garbage collector.
+// The GC does not trace the pointers during collection,
+// and the compiler does not emit write barriers for assignments
+// of gclinkptr values. Code should store references to gclinks
+// as gclinkptr, not as *gclink.
+type gclink struct {
+ next gclinkptr
+}
+
+// A gclinkptr is a pointer to a gclink, but it is opaque
+// to the garbage collector.
+type gclinkptr uintptr
+
+// ptr returns the *gclink form of p.
+// The result should be used for accessing fields, not stored
+// in other data structures.
+func (p gclinkptr) ptr() *gclink {
+ return (*gclink)(unsafe.Pointer(p))
+}
+
// sysAlloc obtains a large chunk of zeroed memory from the
// operating system, typically on the order of a hundred kilobytes
// or a megabyte.
@@ -275,8 +300,8 @@ type mcachelist struct {
}
type stackfreelist struct {
- list *mlink // linked list of free stacks
- size uintptr // total size of stacks in list
+ list gclinkptr // linked list of free stacks
+ size uintptr // total size of stacks in list
}
// Per-thread (in Go, per-P) cache for small objects.
@@ -299,8 +324,6 @@ type mcache struct {
sudogcache *sudog
- gcworkbuf unsafe.Pointer
-
// Local allocator stats, flushed during GC.
local_nlookup uintptr // number of pointer lookups
local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
@@ -348,11 +371,11 @@ const (
)
type mspan struct {
- next *mspan // in a span linked list
- prev *mspan // in a span linked list
- start pageID // starting page number
- npages uintptr // number of pages in span
- freelist *mlink // list of free objects
+ next *mspan // in a span linked list
+ prev *mspan // in a span linked list
+ start pageID // starting page number
+ npages uintptr // number of pages in span
+ freelist gclinkptr // list of free objects
// sweep generation:
// if sweepgen == h->sweepgen - 2, the span needs sweeping
// if sweepgen == h->sweepgen - 1, the span is currently being swept
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index d3afef6be..f8389c5cb 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -38,7 +38,12 @@ func freemcache(c *mcache) {
systemstack(func() {
mCache_ReleaseAll(c)
stackcache_clear(c)
- gcworkbuffree(c.gcworkbuf)
+
+ // NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate
+ // with the stealing of gcworkbufs during garbage collection to avoid
+ // a race where the workbuf is double-freed.
+ // gcworkbuffree(c.gcworkbuf)
+
lock(&mheap_.lock)
purgecachedstats(c)
fixAlloc_Free(&mheap_.cachealloc, unsafe.Pointer(c))
@@ -54,7 +59,7 @@ func mCache_Refill(c *mcache, sizeclass int32) *mspan {
_g_.m.locks++
// Return the current cached span to the central lists.
s := c.alloc[sizeclass]
- if s.freelist != nil {
+ if s.freelist.ptr() != nil {
gothrow("refill on a nonempty span")
}
if s != &emptymspan {
@@ -66,7 +71,7 @@ func mCache_Refill(c *mcache, sizeclass int32) *mspan {
if s == nil {
gothrow("out of memory")
}
- if s.freelist == nil {
+ if s.freelist.ptr() == nil {
println(s.ref, (s.npages<<_PageShift)/s.elemsize)
gothrow("empty span")
}
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
index 0d172a08b..ae5c6f1d5 100644
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -55,7 +55,7 @@ retry:
mSpanList_InsertBack(&c.empty, s)
unlock(&c.lock)
mSpan_Sweep(s, true)
- if s.freelist != nil {
+ if s.freelist.ptr() != nil {
goto havespan
}
lock(&c.lock)
@@ -90,7 +90,7 @@ havespan:
if n == 0 {
gothrow("empty span")
}
- if s.freelist == nil {
+ if s.freelist.ptr() == nil {
gothrow("freelist empty")
}
s.incache = true
@@ -122,14 +122,14 @@ func mCentral_UncacheSpan(c *mcentral, s *mspan) {
// the latest generation.
// If preserve=true, don't return the span to heap nor relink in MCentral lists;
// caller takes care of it.
-func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start *mlink, end *mlink, preserve bool) bool {
+func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start gclinkptr, end gclinkptr, preserve bool) bool {
if s.incache {
gothrow("freespan into cached span")
}
// Add the objects back to s's free list.
- wasempty := s.freelist == nil
- end.next = s.freelist
+ wasempty := s.freelist.ptr() == nil
+ end.ptr().next = s.freelist
s.freelist = start
s.ref -= uint16(n)
@@ -165,7 +165,7 @@ func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start *mlink, end *mlink,
// s is completely freed, return it to the heap.
mSpanList_Remove(s)
s.needzero = 1
- s.freelist = nil
+ s.freelist = 0
unlock(&c.lock)
unmarkspan(uintptr(s.start)<<_PageShift, s.npages<<_PageShift)
mHeap_Free(&mheap_, s, 0)
@@ -183,17 +183,21 @@ func mCentral_Grow(c *mcentral) *mspan {
return nil
}
- // Carve span into sequence of blocks.
- tailp := &s.freelist
p := uintptr(s.start << _PageShift)
s.limit = p + size*n
- for i := uintptr(0); i < n; i++ {
- v := (*mlink)(unsafe.Pointer(p))
- *tailp = v
- tailp = &v.next
+ head := gclinkptr(p)
+ tail := gclinkptr(p)
+ // i==0 iteration already done
+ for i := uintptr(1); i < n; i++ {
p += size
+ tail.ptr().next = gclinkptr(p)
+ tail = gclinkptr(p)
}
- *tailp = nil
+ if s.freelist.ptr() != nil {
+ gothrow("freelist not empty")
+ }
+ tail.ptr().next = 0
+ s.freelist = head
markspan(unsafe.Pointer(uintptr(s.start)<<_PageShift), size, n, size*n < s.npages<<_PageShift)
return s
}
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index f44d7ddbc..a13de0488 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -7,22 +7,72 @@
// Garbage collector (GC).
//
-// GC is:
-// - mark&sweep
-// - mostly precise (with the exception of some C-allocated objects, assembly frames/arguments, etc)
-// - parallel (up to MaxGcproc threads)
-// - partially concurrent (mark is stop-the-world, while sweep is concurrent)
-// - non-moving/non-compacting
-// - full (non-partial)
+// The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple GC
+// thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is
+// non-generational and non-compacting. Allocation is done using size segregated per P allocation
+// areas to minimize fragmentation while eliminating locks in the common case.
//
-// GC rate.
-// Next GC is after we've allocated an extra amount of memory proportional to
-// the amount already in use. The proportion is controlled by GOGC environment variable
-// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
-// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
-// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
-// (and also the amount of extra memory used).
+// The algorithm decomposes into several steps.
+// This is a high level description of the algorithm being used. For an overview of GC a good
+// place to start is Richard Jones' gchandbook.org.
+//
+// The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see
+// Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978.
+// On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 966-975.
+// For journal quality proofs that these steps are complete, correct, and terminate see
+// Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world.
+// Concurrency and Computation: Practice and Experience 15(3-5), 2003.
//
+// 0. Set phase = GCscan from GCoff.
+// 1. Wait for all P's to acknowledge phase change.
+// At this point all goroutines have passed through a GC safepoint and
+// know we are in the GCscan phase.
+// 2. GC scans all goroutine stacks, mark and enqueues all encountered pointers
+// (marking avoids most duplicate enqueuing but races may produce duplication which is benign).
+// Preempted goroutines are scanned before P schedules next goroutine.
+// 3. Set phase = GCmark.
+// 4. Wait for all P's to acknowledge phase change.
+// 5. Now write barrier marks and enqueues black, grey, or white to white pointers.
+// Malloc still allocates white (non-marked) objects.
+// 6. Meanwhile GC transitively walks the heap marking reachable objects.
+// 7. When GC finishes marking heap, it preempts P's one-by-one and
+// retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine
+// currently scheduled on the P).
+// 8. Once the GC has exhausted all available marking work it sets phase = marktermination.
+// 9. Wait for all P's to acknowledge phase change.
+// 10. Malloc now allocates black objects, so number of unmarked reachable objects
+// monotonically decreases.
+// 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet reachable objects.
+// 12. When GC completes a full cycle over P's and discovers no new grey
+// objects, (which means all reachable objects are marked) set phase = GCsweep.
+// 13. Wait for all P's to acknowledge phase change.
+// 14. Now malloc allocates white (but sweeps spans before use).
+// Write barrier becomes nop.
+// 15. GC does background sweeping, see description below.
+// 16. When sweeping is complete set phase to GCoff.
+// 17. When sufficient allocation has taken place replay the sequence starting at 0 above,
+// see discussion of GC rate below.
+
+// Changing phases.
+// Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase.
+// All phase action must be benign in the presence of a change.
+// Starting with GCoff
+// GCoff to GCscan
+// GSscan scans stacks and globals greying them and never marks an object black.
+// Once all the P's are aware of the new phase they will scan gs on preemption.
+// This means that the scanning of preempted gs can't start until all the Ps
+// have acknowledged.
+// GCscan to GCmark
+// GCMark turns on the write barrier which also only greys objects. No scanning
+// of objects (making them black) can happen until all the Ps have acknowledged
+// the phase change.
+// GCmark to GCmarktermination
+// The only change here is that we start allocating black so the Ps must acknowledge
+// the change before we begin the termination algorithm
+// GCmarktermination to GSsweep
+// Object currently on the freelist must be marked black for this to work.
+// Are things on the free lists black or white? How does the sweep phase work?
+
// Concurrent sweep.
// The sweep phase proceeds concurrently with normal program execution.
// The heap is swept span-by-span both lazily (when a goroutine needs another span)
@@ -53,6 +103,14 @@
// The finalizer goroutine is kicked off only when all spans are swept.
// When the next GC starts, it sweeps all not-yet-swept spans (if any).
+// GC rate.
+// Next GC is after we've allocated an extra amount of memory proportional to
+// the amount already in use. The proportion is controlled by GOGC environment variable
+// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
+// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
+// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
+// (and also the amount of extra memory used).
+
package runtime
import "unsafe"
@@ -75,7 +133,7 @@ const (
// ptrmask for an allocation containing a single pointer.
var oneptr = [...]uint8{bitsPointer}
-// Initialized from $GOGC. GOGC=off means no gc.
+// Initialized from $GOGC. GOGC=off means no GC.
var gcpercent int32
// Holding worldsema grants an M the right to try to stop the world.
@@ -93,6 +151,17 @@ var gcpercent int32
//
var worldsema uint32 = 1
+// It is a bug if bits does not have bitBoundary set but
+// there are still some cases where this happens related
+// to stack spans.
+type markbits struct {
+ bitp *byte // pointer to the byte holding xbits
+ shift uintptr // bits xbits needs to be shifted to get bits
+ xbits byte // byte holding all the bits from *bitp
+ bits byte // mark and boundary bits relevant to corresponding slot.
+ tbits byte // pointer||scalar bits relevant to corresponding slot.
+}
+
type workbuf struct {
node lfnode // must be first
nobj uintptr
@@ -121,6 +190,7 @@ var nbadblock int32
type workdata struct {
full uint64 // lock-free list of full blocks
empty uint64 // lock-free list of empty blocks
+ partial uint64 // lock-free list of partially filled blocks
pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
nproc uint32
tstart int64
@@ -143,293 +213,405 @@ func have_cgo_allocate() bool {
return &weak_cgo_allocate != nil
}
-// scanblock scans a block of n bytes starting at pointer b for references
-// to other objects, scanning any it finds recursively until there are no
-// unscanned objects left. Instead of using an explicit recursion, it keeps
-// a work list in the Workbuf* structures and loops in the main function
-// body. Keeping an explicit work list is easier on the stack allocator and
-// more efficient.
-func scanblock(b, n uintptr, ptrmask *uint8) {
- // Cache memory arena parameters in local vars.
- arena_start := mheap_.arena_start
- arena_used := mheap_.arena_used
-
- wbuf := getempty(nil)
- nobj := wbuf.nobj
- wp := &wbuf.obj[nobj]
- keepworking := b == 0
+// To help debug the concurrent GC we remark with the world
+// stopped ensuring that any object encountered has their normal
+// mark bit set. To do this we use an orthogonal bit
+// pattern to indicate the object is marked. The following pattern
+// uses the upper two bits in the object's bounday nibble.
+// 01: scalar not marked
+// 10: pointer not marked
+// 11: pointer marked
+// 00: scalar marked
+// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
+// The higher bit is 1 for pointers and 0 for scalars, whether the object
+// is marked or not.
+// The first nibble no longer holds the bitsDead pattern indicating that the
+// there are no more pointers in the object. This information is held
+// in the second nibble.
+
+// When marking an object if the bool checkmark is true one uses the above
+// encoding, otherwise one uses the bitMarked bit in the lower two bits
+// of the nibble.
+var (
+ checkmark = false
+ gccheckmarkenable = true
+)
- var ptrbitp unsafe.Pointer
+// Is address b in the known heap. If it doesn't have a valid gcmap
+// returns false. For example pointers into stacks will return false.
+func inheap(b uintptr) bool {
+ if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
+ return false
+ }
+ // Not a beginning of a block, consult span table to find the block beginning.
+ k := b >> _PageShift
+ x := k
+ x -= mheap_.arena_start >> _PageShift
+ s := h_spans[x]
+ if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse {
+ return false
+ }
+ return true
+}
- // ptrmask can have 2 possible values:
- // 1. nil - obtain pointer mask from GC bitmap.
- // 2. pointer to a compact mask (for stacks and data).
- goto_scanobj := b != 0
+// Given an address in the heap return the relevant byte from the gcmap. This routine
+// can be used on addresses to the start of an object or to the interior of the an object.
+func slottombits(obj uintptr, mbits *markbits) {
+ off := (obj&^(ptrSize-1) - mheap_.arena_start) / ptrSize
+ mbits.bitp = (*byte)(unsafe.Pointer(mheap_.arena_start - off/wordsPerBitmapByte - 1))
+ mbits.shift = off % wordsPerBitmapByte * gcBits
+ mbits.xbits = *mbits.bitp
+ mbits.bits = (mbits.xbits >> mbits.shift) & bitMask
+ mbits.tbits = ((mbits.xbits >> mbits.shift) & bitPtrMask) >> 2
+}
+// b is a pointer into the heap.
+// Find the start of the object refered to by b.
+// Set mbits to the associated bits from the bit map.
+// If b is not a valid heap object return nil and
+// undefined values in mbits.
+func objectstart(b uintptr, mbits *markbits) uintptr {
+ obj := b &^ (ptrSize - 1)
for {
- if goto_scanobj {
- goto_scanobj = false
- } else {
- if nobj == 0 {
- // Out of work in workbuf.
- if !keepworking {
- putempty(wbuf)
- return
- }
+ slottombits(obj, mbits)
+ if mbits.bits&bitBoundary == bitBoundary {
+ break
+ }
- // Refill workbuf from global queue.
- wbuf = getfull(wbuf)
- if wbuf == nil {
- return
- }
- nobj = wbuf.nobj
- if nobj < uintptr(len(wbuf.obj)) {
- wp = &wbuf.obj[nobj]
- } else {
- wp = nil
- }
+ // Not a beginning of a block, consult span table to find the block beginning.
+ k := b >> _PageShift
+ x := k
+ x -= mheap_.arena_start >> _PageShift
+ s := h_spans[x]
+ if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse {
+ if s != nil && s.state == _MSpanStack {
+ return 0 // This is legit.
}
- // If another proc wants a pointer, give it some.
- if work.nwait > 0 && nobj > 4 && work.full == 0 {
- wbuf.nobj = nobj
- wbuf = handoff(wbuf)
- nobj = wbuf.nobj
- if nobj < uintptr(len(wbuf.obj)) {
- wp = &wbuf.obj[nobj]
+ // The following ensures that we are rigorous about what data
+ // structures hold valid pointers
+ if false {
+ // Still happens sometimes. We don't know why.
+ printlock()
+ print("runtime:objectstart Span weird: obj=", hex(obj), " k=", hex(k))
+ if s == nil {
+ print(" s=nil\n")
} else {
- wp = nil
+ print(" s.start=", hex(s.start<<_PageShift), " s.limit=", hex(s.limit), " s.state=", s.state, "\n")
}
+ printunlock()
+ gothrow("objectstart: bad pointer in unexpected span")
}
-
- nobj--
- wp = &wbuf.obj[nobj]
- b = *wp
- n = arena_used - uintptr(b)
- ptrmask = nil // use GC bitmap for pointer info
+ return 0
}
- if _DebugGCPtrs {
- print("scanblock ", b, " +", hex(n), " ", ptrmask, "\n")
+ p := uintptr(s.start) << _PageShift
+ if s.sizeclass != 0 {
+ size := s.elemsize
+ idx := (obj - p) / size
+ p = p + idx*size
}
-
- // Find bits of the beginning of the object.
- if ptrmask == nil {
- off := (uintptr(b) - arena_start) / ptrSize
- ptrbitp = unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1)
+ if p == obj {
+ print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n")
+ gothrow("failed to find block beginning")
}
+ obj = p
+ }
- var i uintptr
- for i = 0; i < n; i += ptrSize {
- // Find bits for this word.
- var bits uintptr
- if ptrmask == nil {
- // Check if we have reached end of span.
- if (uintptr(b)+i)%_PageSize == 0 &&
- h_spans[(uintptr(b)-arena_start)>>_PageShift] != h_spans[(uintptr(b)+i-arena_start)>>_PageShift] {
- break
- }
+ // if size(obj.firstfield) < PtrSize, the &obj.secondfield could map to the boundary bit
+ // Clear any low bits to get to the start of the object.
+ // greyobject depends on this.
+ return obj
+}
- // Consult GC bitmap.
- bits = uintptr(*(*byte)(ptrbitp))
+// Slow for now as we serialize this, since this is on a debug path
+// speed is not critical at this point.
+var andlock mutex
- if wordsPerBitmapByte != 2 {
- gothrow("alg doesn't work for wordsPerBitmapByte != 2")
- }
- j := (uintptr(b) + i) / ptrSize & 1
- ptrbitp = add(ptrbitp, -j)
- bits >>= gcBits * j
+func atomicand8(src *byte, val byte) {
+ lock(&andlock)
+ *src &= val
+ unlock(&andlock)
+}
- if bits&bitBoundary != 0 && i != 0 {
- break // reached beginning of the next object
- }
- bits = (bits >> 2) & bitsMask
- if bits == bitsDead {
- break // reached no-scan part of the object
- }
- } else {
- // dense mask (stack or data)
- bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * bitsPerPointer)) & bitsMask
- }
+// Mark using the checkmark scheme.
+func docheckmark(mbits *markbits) {
+ // xor 01 moves 01(scalar unmarked) to 00(scalar marked)
+ // and 10(pointer unmarked) to 11(pointer marked)
+ if mbits.tbits == _BitsScalar {
+ atomicand8(mbits.bitp, ^byte(_BitsCheckMarkXor<<mbits.shift<<2))
+ } else if mbits.tbits == _BitsPointer {
+ atomicor8(mbits.bitp, byte(_BitsCheckMarkXor<<mbits.shift<<2))
+ }
- if bits <= _BitsScalar { // BitsScalar || BitsDead
- continue
- }
+ // reload bits for ischeckmarked
+ mbits.xbits = *mbits.bitp
+ mbits.bits = (mbits.xbits >> mbits.shift) & bitMask
+ mbits.tbits = ((mbits.xbits >> mbits.shift) & bitPtrMask) >> 2
+}
- if bits != _BitsPointer {
- gothrow("unexpected garbage collection bits")
- }
+// In the default scheme does mbits refer to a marked object.
+func ismarked(mbits *markbits) bool {
+ if mbits.bits&bitBoundary != bitBoundary {
+ gothrow("ismarked: bits should have boundary bit set")
+ }
+ return mbits.bits&bitMarked == bitMarked
+}
- obj := *(*uintptr)(unsafe.Pointer(b + i))
- obj0 := obj
+// In the checkmark scheme does mbits refer to a marked object.
+func ischeckmarked(mbits *markbits) bool {
+ if mbits.bits&bitBoundary != bitBoundary {
+ gothrow("ischeckmarked: bits should have boundary bit set")
+ }
+ return mbits.tbits == _BitsScalarMarked || mbits.tbits == _BitsPointerMarked
+}
- markobj:
- var s *mspan
- var off, bitp, shift, xbits uintptr
+// When in GCmarkterminate phase we allocate black.
+func gcmarknewobject_m(obj uintptr) {
+ if gcphase != _GCmarktermination {
+ gothrow("marking new object while not in mark termination phase")
+ }
+ if checkmark { // The world should be stopped so this should not happen.
+ gothrow("gcmarknewobject called while doing checkmark")
+ }
- // At this point we have extracted the next potential pointer.
- // Check if it points into heap.
- if obj == 0 {
- continue
- }
- if obj < arena_start || arena_used <= obj {
- if uintptr(obj) < _PhysPageSize && invalidptr != 0 {
- s = nil
- goto badobj
- }
- continue
- }
+ var mbits markbits
+ slottombits(obj, &mbits)
+ if mbits.bits&bitMarked != 0 {
+ return
+ }
- // Mark the object.
- obj &^= ptrSize - 1
- off = (obj - arena_start) / ptrSize
- bitp = arena_start - off/wordsPerBitmapByte - 1
- shift = (off % wordsPerBitmapByte) * gcBits
- xbits = uintptr(*(*byte)(unsafe.Pointer(bitp)))
- bits = (xbits >> shift) & bitMask
- if (bits & bitBoundary) == 0 {
- // Not a beginning of a block, consult span table to find the block beginning.
- k := pageID(obj >> _PageShift)
- x := k
- x -= pageID(arena_start >> _PageShift)
- s = h_spans[x]
- if s == nil || k < s.start || s.limit <= obj || s.state != mSpanInUse {
- // Stack pointers lie within the arena bounds but are not part of the GC heap.
- // Ignore them.
- if s != nil && s.state == _MSpanStack {
- continue
- }
- goto badobj
- }
- p := uintptr(s.start) << _PageShift
- if s.sizeclass != 0 {
- size := s.elemsize
- idx := (obj - p) / size
- p = p + idx*size
- }
- if p == obj {
- print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n")
- gothrow("failed to find block beginning")
+ // Each byte of GC bitmap holds info for two words.
+ // If the current object is larger than two words, or if the object is one word
+ // but the object it shares the byte with is already marked,
+ // then all the possible concurrent updates are trying to set the same bit,
+ // so we can use a non-atomic update.
+ if mbits.xbits&(bitMask|(bitMask<<gcBits)) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 {
+ *mbits.bitp = mbits.xbits | bitMarked<<mbits.shift
+ } else {
+ atomicor8(mbits.bitp, bitMarked<<mbits.shift)
+ }
+}
+
+// obj is the start of an object with mark mbits.
+// If it isn't already marked, mark it and enqueue into workbuf.
+// Return possibly new workbuf to use.
+func greyobject(obj uintptr, mbits *markbits, wbuf *workbuf) *workbuf {
+ // obj should be start of allocation, and so must be at least pointer-aligned.
+ if obj&(ptrSize-1) != 0 {
+ gothrow("greyobject: obj not pointer-aligned")
+ }
+
+ if checkmark {
+ if !ismarked(mbits) {
+ print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), ", mbits->bits=", hex(mbits.bits), " *mbits->bitp=", hex(*mbits.bitp), "\n")
+
+ k := obj >> _PageShift
+ x := k
+ x -= mheap_.arena_start >> _PageShift
+ s := h_spans[x]
+ printlock()
+ print("runtime:greyobject Span: obj=", hex(obj), " k=", hex(k))
+ if s == nil {
+ print(" s=nil\n")
+ } else {
+ print(" s.start=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
+ // NOTE(rsc): This code is using s.sizeclass as an approximation of the
+ // number of pointer-sized words in an object. Perhaps not what was intended.
+ for i := 0; i < int(s.sizeclass); i++ {
+ print(" *(obj+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)*ptrSize))), "\n")
}
- obj = p
- goto markobj
}
+ gothrow("checkmark found unmarked object")
+ }
+ if ischeckmarked(mbits) {
+ return wbuf
+ }
+ docheckmark(mbits)
+ if !ischeckmarked(mbits) {
+ print("mbits xbits=", hex(mbits.xbits), " bits=", hex(mbits.bits), " tbits=", hex(mbits.tbits), " shift=", mbits.shift, "\n")
+ gothrow("docheckmark and ischeckmarked disagree")
+ }
+ } else {
+ // If marked we have nothing to do.
+ if mbits.bits&bitMarked != 0 {
+ return wbuf
+ }
- if _DebugGCPtrs {
- print("scan *", hex(b+i), " = ", hex(obj0), " => base ", hex(obj), "\n")
- }
+ // Each byte of GC bitmap holds info for two words.
+ // If the current object is larger than two words, or if the object is one word
+ // but the object it shares the byte with is already marked,
+ // then all the possible concurrent updates are trying to set the same bit,
+ // so we can use a non-atomic update.
+ if mbits.xbits&(bitMask|bitMask<<gcBits) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 {
+ *mbits.bitp = mbits.xbits | bitMarked<<mbits.shift
+ } else {
+ atomicor8(mbits.bitp, bitMarked<<mbits.shift)
+ }
+ }
- if nbadblock > 0 && obj == badblock[nbadblock-1] {
- // Running garbage collection again because
- // we want to find the path from a root to a bad pointer.
- // Found possible next step; extend or finish path.
- for j := int32(0); j < nbadblock; j++ {
- if badblock[j] == b {
- goto AlreadyBad
- }
- }
- print("runtime: found *(", hex(b), "+", hex(i), ") = ", hex(obj0), "+", hex(obj-obj0), "\n")
- if ptrmask != nil {
- gothrow("bad pointer")
- }
- if nbadblock >= int32(len(badblock)) {
- gothrow("badblock trace too long")
- }
- badblock[nbadblock] = uintptr(b)
- nbadblock++
- AlreadyBad:
+ if !checkmark && (mbits.xbits>>(mbits.shift+2))&_BitsMask == _BitsDead {
+ return wbuf // noscan object
+ }
+
+ // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
+ // seems like a nice optimization that can be added back in.
+ // There needs to be time between the PREFETCH and the use.
+ // Previously we put the obj in an 8 element buffer that is drained at a rate
+ // to give the PREFETCH time to do its work.
+ // Use of PREFETCHNTA might be more appropriate than PREFETCH
+
+ // If workbuf is full, obtain an empty one.
+ if wbuf.nobj >= uintptr(len(wbuf.obj)) {
+ wbuf = getempty(wbuf)
+ }
+
+ wbuf.obj[wbuf.nobj] = obj
+ wbuf.nobj++
+ return wbuf
+}
+
+// Scan the object b of size n, adding pointers to wbuf.
+// Return possibly new wbuf to use.
+// If ptrmask != nil, it specifies where pointers are in b.
+// If ptrmask == nil, the GC bitmap should be consulted.
+// In this case, n may be an overestimate of the size; the GC bitmap
+// must also be used to make sure the scan stops at the end of b.
+func scanobject(b, n uintptr, ptrmask *uint8, wbuf *workbuf) *workbuf {
+ arena_start := mheap_.arena_start
+ arena_used := mheap_.arena_used
+
+ // Find bits of the beginning of the object.
+ var ptrbitp unsafe.Pointer
+ var mbits markbits
+ if ptrmask == nil {
+ b = objectstart(b, &mbits)
+ if b == 0 {
+ return wbuf
+ }
+ ptrbitp = unsafe.Pointer(mbits.bitp)
+ }
+ for i := uintptr(0); i < n; i += ptrSize {
+ // Find bits for this word.
+ var bits uintptr
+ if ptrmask != nil {
+ // dense mask (stack or data)
+ bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * bitsPerPointer)) & bitsMask
+ } else {
+ // Check if we have reached end of span.
+ // n is an overestimate of the size of the object.
+ if (b+i)%_PageSize == 0 && h_spans[(b-arena_start)>>_PageShift] != h_spans[(b+i-arena_start)>>_PageShift] {
+ break
}
- // Now we have bits, bitp, and shift correct for
- // obj pointing at the base of the object.
- // Only care about not marked objects.
- if bits&bitMarked != 0 {
- continue
+ // Consult GC bitmap.
+ bits = uintptr(*(*byte)(ptrbitp))
+ if wordsPerBitmapByte != 2 {
+ gothrow("alg doesn't work for wordsPerBitmapByte != 2")
}
+ j := (uintptr(b) + i) / ptrSize & 1 // j indicates upper nibble or lower nibble
+ bits >>= gcBits * j
+ if i == 0 {
+ bits &^= bitBoundary
+ }
+ ptrbitp = add(ptrbitp, -j)
- // If obj size is greater than 8, then each byte of GC bitmap
- // contains info for at most one object. In such case we use
- // non-atomic byte store to mark the object. This can lead
- // to double enqueue of the object for scanning, but scanning
- // is an idempotent operation, so it is OK. This cannot lead
- // to bitmap corruption because the single marked bit is the
- // only thing that can change in the byte.
- // For 8-byte objects we use non-atomic store, if the other
- // quadruple is already marked. Otherwise we resort to CAS
- // loop for marking.
- if xbits&(bitMask|bitMask<<gcBits) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 {
- *(*byte)(unsafe.Pointer(bitp)) = uint8(xbits | bitMarked<<shift)
- } else {
- atomicor8((*byte)(unsafe.Pointer(bitp)), bitMarked<<shift)
+ if bits&bitBoundary != 0 && i != 0 {
+ break // reached beginning of the next object
}
+ bits = (bits & bitPtrMask) >> 2 // bits refer to the type bits.
- if (xbits>>(shift+2))&bitsMask == bitsDead {
- continue // noscan object
+ if i != 0 && bits == bitsDead { // BitsDead in first nibble not valid during checkmark
+ break // reached no-scan part of the object
}
+ }
- // Queue the obj for scanning.
- // TODO: PREFETCH here.
+ if bits <= _BitsScalar { // _BitsScalar, _BitsDead, _BitsScalarMarked
+ continue
+ }
- // If workbuf is full, obtain an empty one.
- if nobj >= uintptr(len(wbuf.obj)) {
- wbuf.nobj = nobj
- wbuf = getempty(wbuf)
- nobj = wbuf.nobj
- wp = &wbuf.obj[nobj]
- }
- *wp = obj
- nobj++
- if nobj < uintptr(len(wbuf.obj)) {
- wp = &wbuf.obj[nobj]
- } else {
- wp = nil
- }
+ if bits&_BitsPointer != _BitsPointer {
+ print("gc checkmark=", checkmark, " b=", hex(b), " ptrmask=", ptrmask, " mbits.bitp=", mbits.bitp, " mbits.xbits=", hex(mbits.xbits), " bits=", hex(bits), "\n")
+ gothrow("unexpected garbage collection bits")
+ }
+
+ obj := *(*uintptr)(unsafe.Pointer(b + i))
+
+ // At this point we have extracted the next potential pointer.
+ // Check if it points into heap.
+ if obj == 0 || obj < arena_start || obj >= arena_used {
+ continue
+ }
+
+ // Mark the object. return some important bits.
+ // We we combine the following two rotines we don't have to pass mbits or obj around.
+ var mbits markbits
+ obj = objectstart(obj, &mbits)
+ if obj == 0 {
continue
+ }
+ wbuf = greyobject(obj, &mbits, wbuf)
+ }
+ return wbuf
+}
- badobj:
- // If cgo_allocate is linked into the binary, it can allocate
- // memory as []unsafe.Pointer that may not contain actual
- // pointers and must be scanned conservatively.
- // In this case alone, allow the bad pointer.
- if have_cgo_allocate() && ptrmask == nil {
- continue
+// scanblock starts by scanning b as scanobject would.
+// If the gcphase is GCscan, that's all scanblock does.
+// Otherwise it traverses some fraction of the pointers it found in b, recursively.
+// As a special case, scanblock(nil, 0, nil) means to scan previously queued work,
+// stopping only when no work is left in the system.
+func scanblock(b, n uintptr, ptrmask *uint8) {
+ wbuf := getpartialorempty()
+ if b != 0 {
+ wbuf = scanobject(b, n, ptrmask, wbuf)
+ if gcphase == _GCscan {
+ if inheap(b) && ptrmask == nil {
+ // b is in heap, we are in GCscan so there should be a ptrmask.
+ gothrow("scanblock: In GCscan phase and inheap is true.")
}
+ // GCscan only goes one level deep since mark wb not turned on.
+ putpartial(wbuf)
+ return
+ }
+ }
+ if gcphase == _GCscan {
+ gothrow("scanblock: In GCscan phase but no b passed in.")
+ }
- // Anything else indicates a bug somewhere.
- // If we're in the middle of chasing down a different bad pointer,
- // don't confuse the trace by printing about this one.
- if nbadblock > 0 {
- continue
- }
+ keepworking := b == 0
- print("runtime: garbage collector found invalid heap pointer *(", hex(b), "+", hex(i), ")=", hex(obj))
- if s == nil {
- print(" s=nil\n")
- } else {
- print(" span=", uintptr(s.start)<<_PageShift, "-", s.limit, "-", (uintptr(s.start)+s.npages)<<_PageShift, " state=", s.state, "\n")
+ // ptrmask can have 2 possible values:
+ // 1. nil - obtain pointer mask from GC bitmap.
+ // 2. pointer to a compact mask (for stacks and data).
+ for {
+ if wbuf.nobj == 0 {
+ if !keepworking {
+ putempty(wbuf)
+ return
}
- if ptrmask != nil {
- gothrow("invalid heap pointer")
+ // Refill workbuf from global queue.
+ wbuf = getfull(wbuf)
+ if wbuf == nil { // nil means out of work barrier reached
+ return
}
- // Add to badblock list, which will cause the garbage collection
- // to keep repeating until it has traced the chain of pointers
- // leading to obj all the way back to a root.
- if nbadblock == 0 {
- badblock[nbadblock] = uintptr(b)
- nbadblock++
+
+ if wbuf.nobj <= 0 {
+ gothrow("runtime:scanblock getfull returns empty buffer")
}
}
- if _DebugGCPtrs {
- print("end scanblock ", hex(b), " +", hex(n), " ", ptrmask, "\n")
- }
- if _DebugGC > 0 && ptrmask == nil {
- // For heap objects ensure that we did not overscan.
- var p, n uintptr
- if mlookup(b, &p, &n, nil) == 0 || b != p || i > n {
- print("runtime: scanned (", hex(b), "+", hex(i), "), heap object (", hex(p), "+", hex(n), ")\n")
- gothrow("scanblock: scanned invalid object")
- }
+
+ // If another proc wants a pointer, give it some.
+ if work.nwait > 0 && wbuf.nobj > 4 && work.full == 0 {
+ wbuf = handoff(wbuf)
}
+
+ // This might be a good place to add prefetch code...
+ // if(wbuf->nobj > 4) {
+ // PREFETCH(wbuf->obj[wbuf->nobj - 3];
+ // }
+ wbuf.nobj--
+ b = wbuf.obj[wbuf.nobj]
+ wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
}
}
@@ -455,7 +637,8 @@ func markroot(desc *parfor, i uint32) {
if s.state != mSpanInUse {
continue
}
- if s.sweepgen != sg {
+ if !checkmark && s.sweepgen != sg {
+ // sweepgen was updated (+2) during non-checkmark GC pass
print("sweep ", s.sweepgen, " ", sg, "\n")
gothrow("gc: unswept span")
}
@@ -468,13 +651,17 @@ func markroot(desc *parfor, i uint32) {
spf := (*specialfinalizer)(unsafe.Pointer(sp))
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
- scanblock(p, s.elemsize, nil)
+ if gcphase != _GCscan {
+ scanblock(p, s.elemsize, nil) // scanned during mark phase
+ }
scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0])
}
}
case _RootFlushCaches:
- flushallmcaches()
+ if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
+ flushallmcaches()
+ }
default:
// the rest is scanning goroutine stacks
@@ -482,21 +669,44 @@ func markroot(desc *parfor, i uint32) {
gothrow("markroot: bad index")
}
gp := allgs[i-_RootCount]
+
// remember when we've first observed the G blocked
// needed only to output in traceback
- status := readgstatus(gp)
+ status := readgstatus(gp) // We are not in a scan state
if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
gp.waitsince = work.tstart
}
- // Shrink a stack if not much of it is being used.
- shrinkstack(gp)
+
+ // Shrink a stack if not much of it is being used but not in the scan phase.
+ if gcphase != _GCscan { // Do not shrink during GCscan phase.
+ shrinkstack(gp)
+ }
if readgstatus(gp) == _Gdead {
gp.gcworkdone = true
} else {
gp.gcworkdone = false
}
restart := stopg(gp)
- scanstack(gp)
+
+ // goroutine will scan its own stack when it stops running.
+ // Wait until it has.
+ for readgstatus(gp) == _Grunning && !gp.gcworkdone {
+ }
+
+ // scanstack(gp) is done as part of gcphasework
+ // But to make sure we finished we need to make sure that
+ // the stack traps have all responded so drop into
+ // this while loop until they respond.
+ for !gp.gcworkdone {
+ status = readgstatus(gp)
+ if status == _Gdead {
+ gp.gcworkdone = true // scan is a noop
+ break
+ }
+ if status == _Gwaiting || status == _Grunnable {
+ restart = stopg(gp)
+ }
+ }
if restart {
restartg(gp)
}
@@ -506,48 +716,83 @@ func markroot(desc *parfor, i uint32) {
// Get an empty work buffer off the work.empty list,
// allocating new buffers as needed.
func getempty(b *workbuf) *workbuf {
- _g_ := getg()
if b != nil {
- lfstackpush(&work.full, &b.node)
+ putfull(b)
+ b = nil
}
- b = nil
- c := _g_.m.mcache
- if c.gcworkbuf != nil {
- b = (*workbuf)(c.gcworkbuf)
- c.gcworkbuf = nil
- }
- if b == nil {
+ if work.empty != 0 {
b = (*workbuf)(lfstackpop(&work.empty))
}
+ if b != nil && b.nobj != 0 {
+ _g_ := getg()
+ print("m", _g_.m.id, ": getempty: popped b=", b, " with non-zero b.nobj=", b.nobj, "\n")
+ gothrow("getempty: workbuffer not empty, b->nobj not 0")
+ }
if b == nil {
b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys))
+ b.nobj = 0
}
- b.nobj = 0
return b
}
func putempty(b *workbuf) {
- _g_ := getg()
- c := _g_.m.mcache
- if c.gcworkbuf == nil {
- c.gcworkbuf = (unsafe.Pointer)(b)
- return
+ if b.nobj != 0 {
+ gothrow("putempty: b->nobj not 0")
}
lfstackpush(&work.empty, &b.node)
}
-func gcworkbuffree(b unsafe.Pointer) {
- if b != nil {
- putempty((*workbuf)(b))
+func putfull(b *workbuf) {
+ if b.nobj <= 0 {
+ gothrow("putfull: b->nobj <= 0")
+ }
+ lfstackpush(&work.full, &b.node)
+}
+
+// Get an partially empty work buffer
+// if none are available get an empty one.
+func getpartialorempty() *workbuf {
+ b := (*workbuf)(lfstackpop(&work.partial))
+ if b == nil {
+ b = getempty(nil)
+ }
+ return b
+}
+
+func putpartial(b *workbuf) {
+ if b.nobj == 0 {
+ lfstackpush(&work.empty, &b.node)
+ } else if b.nobj < uintptr(len(b.obj)) {
+ lfstackpush(&work.partial, &b.node)
+ } else if b.nobj == uintptr(len(b.obj)) {
+ lfstackpush(&work.full, &b.node)
+ } else {
+ print("b=", b, " b.nobj=", b.nobj, " len(b.obj)=", len(b.obj), "\n")
+ gothrow("putpartial: bad Workbuf b.nobj")
}
}
-// Get a full work buffer off the work.full list, or return nil.
+// Get a full work buffer off the work.full or a partially
+// filled one off the work.partial list. If nothing is available
+// wait until all the other gc helpers have finished and then
+// return nil.
+// getfull acts as a barrier for work.nproc helpers. As long as one
+// gchelper is actively marking objects it
+// may create a workbuffer that the other helpers can work on.
+// The for loop either exits when a work buffer is found
+// or when _all_ of the work.nproc GC helpers are in the loop
+// looking for work and thus not capable of creating new work.
+// This is in fact the termination condition for the STW mark
+// phase.
func getfull(b *workbuf) *workbuf {
if b != nil {
- lfstackpush(&work.empty, &b.node)
+ putempty(b)
}
+
b = (*workbuf)(lfstackpop(&work.full))
+ if b == nil {
+ b = (*workbuf)(lfstackpop(&work.partial))
+ }
if b != nil || work.nproc == 1 {
return b
}
@@ -557,6 +802,9 @@ func getfull(b *workbuf) *workbuf {
if work.full != 0 {
xadd(&work.nwait, -1)
b = (*workbuf)(lfstackpop(&work.full))
+ if b == nil {
+ b = (*workbuf)(lfstackpop(&work.partial))
+ }
if b != nil {
return b
}
@@ -675,14 +923,11 @@ func scanframe(frame *stkframe, unused unsafe.Pointer) bool {
}
func scanstack(gp *g) {
- // TODO(rsc): Due to a precedence error, this was never checked in the original C version.
- // If you enable the check, the gothrow happens.
- /*
- if readgstatus(gp)&_Gscan == 0 {
- print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
- gothrow("mark - bad status")
- }
- */
+
+ if readgstatus(gp)&_Gscan == 0 {
+ print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
+ gothrow("scanstack - bad status")
+ }
switch readgstatus(gp) &^ _Gscan {
default:
@@ -692,7 +937,7 @@ func scanstack(gp *g) {
return
case _Grunning:
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
- gothrow("mark - world not stopped")
+ gothrow("scanstack: goroutine not stopped")
case _Grunnable, _Gsyscall, _Gwaiting:
// ok
}
@@ -709,18 +954,114 @@ func scanstack(gp *g) {
tracebackdefers(gp, scanframe, nil)
}
-// The gp has been moved to a gc safepoint. If there is gcphase specific
-// work it is done here.
+// If the slot is grey or black return true, if white return false.
+// If the slot is not in the known heap and thus does not have a valid GC bitmap then
+// it is considered grey. Globals and stacks can hold such slots.
+// The slot is grey if its mark bit is set and it is enqueued to be scanned.
+// The slot is black if it has already been scanned.
+// It is white if it has a valid mark bit and the bit is not set.
+func shaded(slot uintptr) bool {
+ if !inheap(slot) { // non-heap slots considered grey
+ return true
+ }
+
+ var mbits markbits
+ valid := objectstart(slot, &mbits)
+ if valid == 0 {
+ return true
+ }
+
+ if checkmark {
+ return ischeckmarked(&mbits)
+ }
+
+ return mbits.bits&bitMarked != 0
+}
+
+// Shade the object if it isn't already.
+// The object is not nil and known to be in the heap.
+func shade(b uintptr) {
+ if !inheap(b) {
+ gothrow("shade: passed an address not in the heap")
+ }
+
+ wbuf := getpartialorempty()
+ // Mark the object, return some important bits.
+ // If we combine the following two rotines we don't have to pass mbits or obj around.
+ var mbits markbits
+ obj := objectstart(b, &mbits)
+ if obj != 0 {
+ wbuf = greyobject(obj, &mbits, wbuf) // augments the wbuf
+ }
+ putpartial(wbuf)
+}
+
+// This is the Dijkstra barrier coarsened to always shade the ptr (dst) object.
+// The original Dijkstra barrier only shaded ptrs being placed in black slots.
+//
+// Shade indicates that it has seen a white pointer by adding the referent
+// to wbuf as well as marking it.
+//
+// slot is the destination (dst) in go code
+// ptr is the value that goes into the slot (src) in the go code
+//
+// Dijkstra pointed out that maintaining the no black to white
+// pointers means that white to white pointers not need
+// to be noted by the write barrier. Furthermore if either
+// white object dies before it is reached by the
+// GC then the object can be collected during this GC cycle
+// instead of waiting for the next cycle. Unfortunately the cost of
+// ensure that the object holding the slot doesn't concurrently
+// change to black without the mutator noticing seems prohibitive.
+//
+// Consider the following example where the mutator writes into
+// a slot and then loads the slot's mark bit while the GC thread
+// writes to the slot's mark bit and then as part of scanning reads
+// the slot.
+//
+// Initially both [slot] and [slotmark] are 0 (nil)
+// Mutator thread GC thread
+// st [slot], ptr st [slotmark], 1
+//
+// ld r1, [slotmark] ld r2, [slot]
+//
+// This is a classic example of independent reads of independent writes,
+// aka IRIW. The question is if r1==r2==0 is allowed and for most HW the
+// answer is yes without inserting a memory barriers between the st and the ld.
+// These barriers are expensive so we have decided that we will
+// always grey the ptr object regardless of the slot's color.
+func gcmarkwb_m(slot *uintptr, ptr uintptr) {
+ switch gcphase {
+ default:
+ gothrow("gcphasework in bad gcphase")
+
+ case _GCoff, _GCquiesce, _GCstw, _GCsweep, _GCscan:
+ // ok
+
+ case _GCmark, _GCmarktermination:
+ if ptr != 0 && inheap(ptr) {
+ shade(ptr)
+ }
+ }
+}
+
+// The gp has been moved to a GC safepoint. GC phase specific
+// work is done here.
func gcphasework(gp *g) {
switch gcphase {
default:
gothrow("gcphasework in bad gcphase")
case _GCoff, _GCquiesce, _GCstw, _GCsweep:
- // No work for now.
+ // No work.
+ case _GCscan:
+ // scan the stack, mark the objects, put pointers in work buffers
+ // hanging off the P where this is being run.
+ scanstack(gp)
case _GCmark:
- // Disabled until concurrent GC is implemented
- // but indicate the scan has been done.
- // scanstack(gp);
+ // No work.
+ case _GCmarktermination:
+ scanstack(gp)
+ // All available mark work will be emptied before returning.
}
gp.gcworkdone = true
}
@@ -797,6 +1138,7 @@ func iterate_finq(callback func(*funcval, unsafe.Pointer, uintptr, *_type, *ptrt
}
}
+// Returns only when span s has been swept.
func mSpan_EnsureSwept(s *mspan) {
// Caller must disable preemption.
// Otherwise when this function returns the span can become unswept again
@@ -810,6 +1152,7 @@ func mSpan_EnsureSwept(s *mspan) {
if atomicload(&s.sweepgen) == sg {
return
}
+ // The caller must be sure that the span is a MSpanInUse span.
if cas(&s.sweepgen, sg-2, sg-1) {
mSpan_Sweep(s, false)
return
@@ -826,6 +1169,10 @@ func mSpan_EnsureSwept(s *mspan) {
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
func mSpan_Sweep(s *mspan, preserve bool) bool {
+ if checkmark {
+ gothrow("MSpan_Sweep: checkmark only runs in STW and after the sweep")
+ }
+
// It's critical that we enter this function with preemption disabled,
// GC must not start while we are in the middle of this function.
_g_ := getg()
@@ -851,13 +1198,14 @@ func mSpan_Sweep(s *mspan, preserve bool) bool {
}
res := false
nfree := 0
- var head mlink
- end := &head
+
+ var head, end gclinkptr
+
c := _g_.m.mcache
sweepgenset := false
// Mark any free objects in this span so we don't collect them.
- for link := s.freelist; link != nil; link = link.next {
+ for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
off := (uintptr(unsafe.Pointer(link)) - arena_start) / ptrSize
bitp := arena_start - off/wordsPerBitmapByte - 1
shift := (off % wordsPerBitmapByte) * gcBits
@@ -978,8 +1326,13 @@ func mSpan_Sweep(s *mspan, preserve bool) bool {
} else if size > ptrSize {
*(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0
}
- end.next = (*mlink)(unsafe.Pointer(p))
- end = end.next
+ if head.ptr() == nil {
+ head = gclinkptr(p)
+ } else {
+ end.ptr().next = gclinkptr(p)
+ }
+ end = gclinkptr(p)
+ end.ptr().next = gclinkptr(0x0bade5)
nfree++
}
}
@@ -1002,7 +1355,7 @@ func mSpan_Sweep(s *mspan, preserve bool) bool {
c.local_nsmallfree[cl] += uintptr(nfree)
c.local_cachealloc -= intptr(uintptr(nfree) * size)
xadd64(&memstats.next_gc, -int64(nfree)*int64(size)*int64(gcpercent+100)/100)
- res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head.next, end, preserve)
+ res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head, end, preserve)
// MCentral_FreeSpan updates sweepgen
}
return res
@@ -1073,11 +1426,11 @@ func gchelper() {
_g_.m.traceback = 2
gchelperstart()
- // parallel mark for over gc roots
+ // parallel mark for over GC roots
parfordo(work.markfor)
-
- // help other threads scan secondary blocks
- scanblock(0, 0, nil)
+ if gcphase != _GCscan {
+ scanblock(0, 0, nil) // blocks in getfull
+ }
nproc := work.nproc // work.nproc can change right after we increment work.ndone
if xadd(&work.ndone, +1) == nproc-1 {
@@ -1204,6 +1557,7 @@ func gcinit() {
gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcbss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)))
}
+// Called from malloc.go using onM, stopping and starting the world handled in caller.
func gc_m(start_time int64, eagersweep bool) {
_g_ := getg()
gp := _g_.m.curg
@@ -1211,18 +1565,266 @@ func gc_m(start_time int64, eagersweep bool) {
gp.waitreason = "garbage collection"
gc(start_time, eagersweep)
+ casgstatus(gp, _Gwaiting, _Grunning)
+}
+
+// Similar to clearcheckmarkbits but works on a single span.
+// It preforms two tasks.
+// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01)
+// for nibbles with the BoundaryBit set.
+// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and
+// BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding.
+// For the second case it is possible to restore the BitsDead pattern but since
+// clearmark is a debug tool performance has a lower priority than simplicity.
+// The span is MSpanInUse and the world is stopped.
+func clearcheckmarkbitsspan(s *mspan) {
+ if s.state != _MSpanInUse {
+ print("runtime:clearcheckmarkbitsspan: state=", s.state, "\n")
+ gothrow("clearcheckmarkbitsspan: bad span state")
+ }
- if nbadblock > 0 {
- // Work out path from root to bad block.
- for {
- gc(start_time, eagersweep)
- if nbadblock >= int32(len(badblock)) {
- gothrow("cannot find path to bad pointer")
+ arena_start := mheap_.arena_start
+ cl := s.sizeclass
+ size := s.elemsize
+ var n int32
+ if cl == 0 {
+ n = 1
+ } else {
+ // Chunk full of small blocks
+ npages := class_to_allocnpages[cl]
+ n = npages << _PageShift / int32(size)
+ }
+
+ // MSpan_Sweep has similar code but instead of overloading and
+ // complicating that routine we do a simpler walk here.
+ // Sweep through n objects of given size starting at p.
+ // This thread owns the span now, so it can manipulate
+ // the block bitmap without atomic operations.
+ p := uintptr(s.start) << _PageShift
+
+ // Find bits for the beginning of the span.
+ off := (p - arena_start) / ptrSize
+ bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+ step := size / (ptrSize * wordsPerBitmapByte)
+
+ // The type bit values are:
+ // 00 - BitsDead, for us BitsScalarMarked
+ // 01 - BitsScalar
+ // 10 - BitsPointer
+ // 11 - unused, for us BitsPointerMarked
+ //
+ // When called to prepare for the checkmark phase (checkmark==1),
+ // we change BitsDead to BitsScalar, so that there are no BitsScalarMarked
+ // type bits anywhere.
+ //
+ // The checkmark phase marks by changing BitsScalar to BitsScalarMarked
+ // and BitsPointer to BitsPointerMarked.
+ //
+ // When called to clean up after the checkmark phase (checkmark==0),
+ // we unmark by changing BitsScalarMarked back to BitsScalar and
+ // BitsPointerMarked back to BitsPointer.
+ //
+ // There are two problems with the scheme as just described.
+ // First, the setup rewrites BitsDead to BitsScalar, but the type bits
+ // following a BitsDead are uninitialized and must not be used.
+ // Second, objects that are free are expected to have their type
+ // bits zeroed (BitsDead), so in the cleanup we need to restore
+ // any BitsDeads that were there originally.
+ //
+ // In a one-word object (8-byte allocation on 64-bit system),
+ // there is no difference between BitsScalar and BitsDead, because
+ // neither is a pointer and there are no more words in the object,
+ // so using BitsScalar during the checkmark is safe and mapping
+ // both back to BitsDead during cleanup is also safe.
+ //
+ // In a larger object, we need to be more careful. During setup,
+ // if the type of the first word is BitsDead, we change it to BitsScalar
+ // (as we must) but also initialize the type of the second
+ // word to BitsDead, so that a scan during the checkmark phase
+ // will still stop before seeing the uninitialized type bits in the
+ // rest of the object. The sequence 'BitsScalar BitsDead' never
+ // happens in real type bitmaps - BitsDead is always as early
+ // as possible, so immediately after the last BitsPointer.
+ // During cleanup, if we see a BitsScalar, we can check to see if it
+ // is followed by BitsDead. If so, it was originally BitsDead and
+ // we can change it back.
+
+ if step == 0 {
+ // updating top and bottom nibbles, all boundaries
+ for i := int32(0); i < n/2; i, bitp = i+1, addb(bitp, uintptrMask&-1) {
+ if *bitp&bitBoundary == 0 {
+ gothrow("missing bitBoundary")
+ }
+ b := (*bitp & bitPtrMask) >> 2
+ if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) {
+ *bitp &^= 0x0c // convert to _BitsDead
+ } else if b == _BitsScalarMarked || b == _BitsPointerMarked {
+ *bitp &^= _BitsCheckMarkXor << 2
+ }
+
+ if (*bitp>>gcBits)&bitBoundary == 0 {
+ gothrow("missing bitBoundary")
+ }
+ b = ((*bitp >> gcBits) & bitPtrMask) >> 2
+ if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) {
+ *bitp &^= 0xc0 // convert to _BitsDead
+ } else if b == _BitsScalarMarked || b == _BitsPointerMarked {
+ *bitp &^= _BitsCheckMarkXor << (2 + gcBits)
+ }
+ }
+ } else {
+ // updating bottom nibble for first word of each object
+ for i := int32(0); i < n; i, bitp = i+1, addb(bitp, -step) {
+ if *bitp&bitBoundary == 0 {
+ gothrow("missing bitBoundary")
+ }
+ b := (*bitp & bitPtrMask) >> 2
+
+ if checkmark && b == _BitsDead {
+ // move BitsDead into second word.
+ // set bits to BitsScalar in preparation for checkmark phase.
+ *bitp &^= 0xc0
+ *bitp |= _BitsScalar << 2
+ } else if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) && *bitp&0xc0 == 0 {
+ // Cleaning up after checkmark phase.
+ // First word is scalar or dead (we forgot)
+ // and second word is dead.
+ // First word might as well be dead too.
+ *bitp &^= 0x0c
+ } else if b == _BitsScalarMarked || b == _BitsPointerMarked {
+ *bitp ^= _BitsCheckMarkXor << 2
}
}
}
+}
- casgstatus(gp, _Gwaiting, _Grunning)
+// clearcheckmarkbits preforms two tasks.
+// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01)
+// for nibbles with the BoundaryBit set.
+// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and
+// BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding.
+// This is a bit expensive but preserves the BitsDead encoding during the normal marking.
+// BitsDead remains valid for every nibble except the ones with BitsBoundary set.
+func clearcheckmarkbits() {
+ for _, s := range work.spans {
+ if s.state == _MSpanInUse {
+ clearcheckmarkbitsspan(s)
+ }
+ }
+}
+
+// Called from malloc.go using onM.
+// The world is stopped. Rerun the scan and mark phases
+// using the bitMarkedCheck bit instead of the
+// bitMarked bit. If the marking encounters an
+// bitMarked bit that is not set then we throw.
+func gccheckmark_m(startTime int64, eagersweep bool) {
+ if !gccheckmarkenable {
+ return
+ }
+
+ if checkmark {
+ gothrow("gccheckmark_m, entered with checkmark already true")
+ }
+
+ checkmark = true
+ clearcheckmarkbits() // Converts BitsDead to BitsScalar.
+ gc_m(startTime, eagersweep) // turns off checkmark
+ // Work done, fixed up the GC bitmap to remove the checkmark bits.
+ clearcheckmarkbits()
+}
+
+func gccheckmarkenable_m() {
+ gccheckmarkenable = true
+}
+
+func gccheckmarkdisable_m() {
+ gccheckmarkenable = false
+}
+
+func finishsweep_m() {
+ // The world is stopped so we should be able to complete the sweeps
+ // quickly.
+ for sweepone() != ^uintptr(0) {
+ sweep.npausesweep++
+ }
+
+ // There may be some other spans being swept concurrently that
+ // we need to wait for. If finishsweep_m is done with the world stopped
+ // this code is not required.
+ sg := mheap_.sweepgen
+ for _, s := range work.spans {
+ if s.sweepgen != sg && s.state == _MSpanInUse {
+ mSpan_EnsureSwept(s)
+ }
+ }
+}
+
+// Scan all of the stacks, greying (or graying if in America) the referents
+// but not blackening them since the mark write barrier isn't installed.
+func gcscan_m() {
+ _g_ := getg()
+
+ // Grab the g that called us and potentially allow rescheduling.
+ // This allows it to be scanned like other goroutines.
+ mastergp := _g_.m.curg
+ casgstatus(mastergp, _Grunning, _Gwaiting)
+ mastergp.waitreason = "garbage collection scan"
+
+ // Span sweeping has been done by finishsweep_m.
+ // Long term we will want to make this goroutine runnable
+ // by placing it onto a scanenqueue state and then calling
+ // runtime·restartg(mastergp) to make it Grunnable.
+ // At the bottom we will want to return this p back to the scheduler.
+ oldphase := gcphase
+
+ // Prepare flag indicating that the scan has not been completed.
+ lock(&allglock)
+ local_allglen := allglen
+ for i := uintptr(0); i < local_allglen; i++ {
+ gp := allgs[i]
+ gp.gcworkdone = false // set to true in gcphasework
+ }
+ unlock(&allglock)
+
+ work.nwait = 0
+ work.ndone = 0
+ work.nproc = 1 // For now do not do this in parallel.
+ gcphase = _GCscan
+ // ackgcphase is not needed since we are not scanning running goroutines.
+ parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), nil, false, markroot)
+ parfordo(work.markfor)
+
+ lock(&allglock)
+ // Check that gc work is done.
+ for i := uintptr(0); i < local_allglen; i++ {
+ gp := allgs[i]
+ if !gp.gcworkdone {
+ gothrow("scan missed a g")
+ }
+ }
+ unlock(&allglock)
+
+ gcphase = oldphase
+ casgstatus(mastergp, _Gwaiting, _Grunning)
+ // Let the g that called us continue to run.
+}
+
+// Mark all objects that are known about.
+func gcmark_m() {
+ scanblock(0, 0, nil)
+}
+
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
+// all go routines see the new barrier.
+func gcinstallmarkwb_m() {
+ gcphase = _GCmark
+}
+
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
+// all go routines see the new barrier.
+func gcinstalloffwb_m() {
+ gcphase = _GCoff
}
func gc(start_time int64, eagersweep bool) {
@@ -1244,9 +1846,8 @@ func gc(start_time int64, eagersweep bool) {
t1 = nanotime()
}
- // Sweep what is not sweeped by bgsweep.
- for sweepone() != ^uintptr(0) {
- sweep.npausesweep++
+ if !checkmark {
+ finishsweep_m() // skip during checkmark debug phase.
}
// Cache runtime.mheap_.allspans in work.spans to avoid conflicts with
@@ -1266,10 +1867,19 @@ func gc(start_time int64, eagersweep bool) {
mheap_.gcspans = mheap_.allspans
work.spans = h_allspans
unlock(&mheap_.lock)
+ oldphase := gcphase
work.nwait = 0
work.ndone = 0
work.nproc = uint32(gcprocs())
+ gcphase = _GCmarktermination
+
+ // World is stopped so allglen will not change.
+ for i := uintptr(0); i < allglen; i++ {
+ gp := allgs[i]
+ gp.gcworkdone = false // set to true in gcphasework
+ }
+
parforsetup(work.markfor, work.nproc, uint32(_RootCount+allglen), nil, false, markroot)
if work.nproc > 1 {
noteclear(&work.alldone)
@@ -1285,6 +1895,14 @@ func gc(start_time int64, eagersweep bool) {
parfordo(work.markfor)
scanblock(0, 0, nil)
+ if work.full != 0 {
+ gothrow("work.full != 0")
+ }
+ if work.partial != 0 {
+ gothrow("work.partial != 0")
+ }
+
+ gcphase = oldphase
var t3 int64
if debug.gctrace > 0 {
t3 = nanotime()
@@ -1349,6 +1967,15 @@ func gc(start_time int64, eagersweep bool) {
sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys)
}
+ if gccheckmarkenable {
+ if !checkmark {
+ // first half of two-pass; don't set up sweep
+ unlock(&mheap_.lock)
+ return
+ }
+ checkmark = false // done checking marks
+ }
+
// Cache the current array for sweeping.
mheap_.gcspans = mheap_.allspans
mheap_.sweepgen += 2
diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go
index 38406f33a..7797894fc 100644
--- a/src/runtime/mgc0.go
+++ b/src/runtime/mgc0.go
@@ -97,54 +97,135 @@ func bgsweep() {
}
}
+const (
+ _PoisonGC = 0xf969696969696969 & (1<<(8*ptrSize) - 1)
+ _PoisonStack = 0x6868686868686868 & (1<<(8*ptrSize) - 1)
+)
+
// NOTE: Really dst *unsafe.Pointer, src unsafe.Pointer,
// but if we do that, Go inserts a write barrier on *dst = src.
//go:nosplit
func writebarrierptr(dst *uintptr, src uintptr) {
*dst = src
+ writebarrierptr_nostore(dst, src)
+}
+
+// Like writebarrierptr, but the store has already been applied.
+// Do not reapply.
+//go:nosplit
+func writebarrierptr_nostore(dst *uintptr, src uintptr) {
+ if getg() == nil { // very low-level startup
+ return
+ }
+
+ if src != 0 && (src < _PageSize || src == _PoisonGC || src == _PoisonStack) {
+ systemstack(func() { gothrow("bad pointer in write barrier") })
+ }
+
+ mp := acquirem()
+ if mp.inwb || mp.dying > 0 {
+ releasem(mp)
+ return
+ }
+ mp.inwb = true
+ systemstack(func() {
+ gcmarkwb_m(dst, src)
+ })
+ mp.inwb = false
+ releasem(mp)
}
//go:nosplit
func writebarrierstring(dst *[2]uintptr, src [2]uintptr) {
- dst[0] = src[0]
+ writebarrierptr(&dst[0], src[0])
dst[1] = src[1]
}
//go:nosplit
func writebarrierslice(dst *[3]uintptr, src [3]uintptr) {
- dst[0] = src[0]
+ writebarrierptr(&dst[0], src[0])
dst[1] = src[1]
dst[2] = src[2]
}
//go:nosplit
func writebarrieriface(dst *[2]uintptr, src [2]uintptr) {
- dst[0] = src[0]
- dst[1] = src[1]
-}
-
-//go:nosplit
-func writebarrierfat2(dst *[2]uintptr, _ *byte, src [2]uintptr) {
- dst[0] = src[0]
- dst[1] = src[1]
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
}
-//go:nosplit
-func writebarrierfat3(dst *[3]uintptr, _ *byte, src [3]uintptr) {
- dst[0] = src[0]
- dst[1] = src[1]
- dst[2] = src[2]
-}
+//go:generate go run wbfat_gen.go -- wbfat.go
+//
+// The above line generates multiword write barriers for
+// all the combinations of ptr+scalar up to four words.
+// The implementations are written to wbfat.go.
//go:nosplit
-func writebarrierfat4(dst *[4]uintptr, _ *byte, src [4]uintptr) {
- dst[0] = src[0]
- dst[1] = src[1]
- dst[2] = src[2]
- dst[3] = src[3]
+func writebarrierfat(typ *_type, dst, src unsafe.Pointer) {
+ mask := loadPtrMask(typ)
+ nptr := typ.size / ptrSize
+ for i := uintptr(0); i < nptr; i += 2 {
+ bits := mask[i/2]
+ if (bits>>2)&_BitsMask == _BitsPointer {
+ writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
+ } else {
+ *(*uintptr)(dst) = *(*uintptr)(src)
+ }
+ dst = add(dst, ptrSize)
+ src = add(src, ptrSize)
+ if i+1 == nptr {
+ break
+ }
+ bits >>= 4
+ if (bits>>2)&_BitsMask == _BitsPointer {
+ writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
+ } else {
+ *(*uintptr)(dst) = *(*uintptr)(src)
+ }
+ dst = add(dst, ptrSize)
+ src = add(src, ptrSize)
+ }
}
//go:nosplit
-func writebarrierfat(typ *_type, dst, src unsafe.Pointer) {
- memmove(dst, src, typ.size)
+func writebarriercopy(typ *_type, dst, src slice) int {
+ n := dst.len
+ if n > src.len {
+ n = src.len
+ }
+ if n == 0 {
+ return 0
+ }
+ dstp := unsafe.Pointer(dst.array)
+ srcp := unsafe.Pointer(src.array)
+
+ if uintptr(srcp) < uintptr(dstp) && uintptr(srcp)+uintptr(n)*typ.size > uintptr(dstp) {
+ // Overlap with src before dst.
+ // Copy backward, being careful not to move dstp/srcp
+ // out of the array they point into.
+ dstp = add(dstp, uintptr(n-1)*typ.size)
+ srcp = add(srcp, uintptr(n-1)*typ.size)
+ i := uint(0)
+ for {
+ writebarrierfat(typ, dstp, srcp)
+ if i++; i >= n {
+ break
+ }
+ dstp = add(dstp, -typ.size)
+ srcp = add(srcp, -typ.size)
+ }
+ } else {
+ // Copy forward, being careful not to move dstp/srcp
+ // out of the array they point into.
+ i := uint(0)
+ for {
+ writebarrierfat(typ, dstp, srcp)
+ if i++; i >= n {
+ break
+ }
+ dstp = add(dstp, typ.size)
+ srcp = add(srcp, typ.size)
+ }
+ }
+ return int(n)
}
diff --git a/src/runtime/mgc0.h b/src/runtime/mgc0.h
index 62726b4f0..dd0c46024 100644
--- a/src/runtime/mgc0.h
+++ b/src/runtime/mgc0.h
@@ -12,9 +12,11 @@ enum {
BitsPointer = 2,
BitsMask = 3,
PointersPerByte = 8/BitsPerPointer,
- MaxGCMask = 64,
insData = 1,
insArray,
insArrayEnd,
insEnd,
+
+ // 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
+ MaxGCMask = 65536, // TODO(rsc): change back to 64
};
diff --git a/src/runtime/mgc1.go b/src/runtime/mgc1.go
index d1aab4554..04a5207e5 100644
--- a/src/runtime/mgc1.go
+++ b/src/runtime/mgc1.go
@@ -50,12 +50,15 @@ const (
// If you change these, also change scanblock.
// scanblock does "if(bits == BitsScalar || bits == BitsDead)" as "if(bits <= BitsScalar)".
- _BitsDead = 0
- _BitsScalar = 1
- _BitsPointer = 2
+ _BitsDead = 0
+ _BitsScalar = 1 // 01
+ _BitsPointer = 2 // 10
+ _BitsCheckMarkXor = 1 // 10
+ _BitsScalarMarked = _BitsScalar ^ _BitsCheckMarkXor // 00
+ _BitsPointerMarked = _BitsPointer ^ _BitsCheckMarkXor // 11
// 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
- _MaxGCMask = 64
+ _MaxGCMask = 65536 // TODO(rsc): change back to 64
)
// Bits in per-word bitmap.
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index fedcd69c5..30205d68d 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -196,7 +196,7 @@ func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan
// able to map interior pointer to containing span.
atomicstore(&s.sweepgen, h.sweepgen)
s.state = _MSpanInUse
- s.freelist = nil
+ s.freelist = 0
s.ref = 0
s.sizeclass = uint8(sizeclass)
if sizeclass == 0 {
@@ -248,7 +248,7 @@ func mHeap_AllocStack(h *mheap, npage uintptr) *mspan {
s := mHeap_AllocSpanLocked(h, npage)
if s != nil {
s.state = _MSpanStack
- s.freelist = nil
+ s.freelist = 0
s.ref = 0
memstats.stacks_inuse += uint64(s.npages << _PageShift)
}
@@ -571,7 +571,7 @@ func mSpan_Init(span *mspan, start pageID, npages uintptr) {
span.prev = nil
span.start = start
span.npages = npages
- span.freelist = nil
+ span.freelist = 0
span.ref = 0
span.sizeclass = 0
span.incache = false
diff --git a/src/runtime/os_linux_386.go b/src/runtime/os_linux_386.go
index c4f95804a..adcd5a1c4 100644
--- a/src/runtime/os_linux_386.go
+++ b/src/runtime/os_linux_386.go
@@ -14,8 +14,7 @@ const (
var _vdso uint32
-//go:nosplit
-func linux_setup_vdso(argc int32, argv **byte) {
+func sysargs(argc int32, argv **byte) {
// skip over argv, envv to get to auxv
n := argc + 1
for argv_index(argv, n) != nil {
diff --git a/src/runtime/print1.go b/src/runtime/print1.go
index 8f8268873..3d812bd04 100644
--- a/src/runtime/print1.go
+++ b/src/runtime/print1.go
@@ -41,7 +41,31 @@ func snprintf(dst *byte, n int32, s *byte) {
gp.writebuf = nil
}
-//var debuglock mutex
+var debuglock mutex
+
+// The compiler emits calls to printlock and printunlock around
+// the multiple calls that implement a single Go print or println
+// statement. Some of the print helpers (printsp, for example)
+// call print recursively. There is also the problem of a crash
+// happening during the print routines and needing to acquire
+// the print lock to print information about the crash.
+// For both these reasons, let a thread acquire the printlock 'recursively'.
+
+func printlock() {
+ mp := getg().m
+ mp.printlock++
+ if mp.printlock == 1 {
+ lock(&debuglock)
+ }
+}
+
+func printunlock() {
+ mp := getg().m
+ mp.printlock--
+ if mp.printlock == 0 {
+ unlock(&debuglock)
+ }
+}
// write to goroutine-local buffer if diverting output,
// or else standard error.
@@ -80,7 +104,7 @@ func printnl() {
// Very simple printf. Only for debugging prints.
// Do not add to this without checking with Rob.
func vprintf(str string, arg unsafe.Pointer) {
- //lock(&debuglock);
+ printlock()
s := bytes(str)
start := 0
@@ -160,7 +184,7 @@ func vprintf(str string, arg unsafe.Pointer) {
gwrite(s[start:i])
}
- //unlock(&debuglock);
+ printunlock()
}
func printpc(p unsafe.Pointer) {
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 295190cb4..64f6a3520 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -181,6 +181,9 @@ func acquireSudog() *sudog {
// which keeps the garbage collector from being invoked.
mp := acquirem()
p := new(sudog)
+ if p.elem != nil {
+ gothrow("acquireSudog: found p.elem != nil after new")
+ }
releasem(mp)
return p
}
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
index aeded0e77..5a898ff41 100644
--- a/src/runtime/proc1.go
+++ b/src/runtime/proc1.go
@@ -316,6 +316,10 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
// Check that transition is valid.
switch oldval {
+ default:
+ print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
+ dumpgstatus(gp)
+ gothrow("casfrom_Gscanstatus:top gp->status is not in scan state")
case _Gscanrunnable,
_Gscanwaiting,
_Gscanrunning,
@@ -377,12 +381,12 @@ func casgstatus(gp *g, oldval, newval uint32) {
})
}
// Help GC if needed.
- if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
- gp.preemptscan = false
- systemstack(func() {
- gcphasework(gp)
- })
- }
+ // if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
+ // gp.preemptscan = false
+ // systemstack(func() {
+ // gcphasework(gp)
+ // })
+ // }
}
}
@@ -512,9 +516,10 @@ func stopscanstart(gp *g) {
// Runs on g0 and does the actual work after putting the g back on the run queue.
func mquiesce(gpmaster *g) {
- activeglen := len(allgs)
// enqueue the calling goroutine.
restartg(gpmaster)
+
+ activeglen := len(allgs)
for i := 0; i < activeglen; i++ {
gp := allgs[i]
if readgstatus(gp) == _Gdead {
@@ -1579,7 +1584,8 @@ func save(pc, sp uintptr) {
_g_.sched.lr = 0
_g_.sched.ret = 0
_g_.sched.ctxt = nil
- _g_.sched.g = _g_
+ // write as uintptr to avoid write barrier, which will smash _g_.sched.
+ *(*uintptr)(unsafe.Pointer(&_g_.sched.g)) = uintptr(unsafe.Pointer(_g_))
}
// The goroutine g is about to enter a system call.
@@ -1625,7 +1631,10 @@ func reentersyscall(pc, sp uintptr) {
_g_.syscallpc = pc
casgstatus(_g_, _Grunning, _Gsyscall)
if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
- systemstack(entersyscall_bad)
+ systemstack(func() {
+ print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+ gothrow("entersyscall")
+ })
}
if atomicload(&sched.sysmonwait) != 0 { // TODO: fast atomic
@@ -1654,13 +1663,6 @@ func entersyscall(dummy int32) {
reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
}
-func entersyscall_bad() {
- var gp *g
- gp = getg().m.curg
- print("entersyscall inconsistent ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n")
- gothrow("entersyscall")
-}
-
func entersyscall_sysmon() {
lock(&sched.lock)
if atomicload(&sched.sysmonwait) != 0 {
@@ -1692,12 +1694,26 @@ func entersyscallblock(dummy int32) {
_g_.stackguard0 = stackPreempt // see comment in entersyscall
// Leave SP around for GC and traceback.
- save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
+ pc := getcallerpc(unsafe.Pointer(&dummy))
+ sp := getcallersp(unsafe.Pointer(&dummy))
+ save(pc, sp)
_g_.syscallsp = _g_.sched.sp
_g_.syscallpc = _g_.sched.pc
+ if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+ sp1 := sp
+ sp2 := _g_.sched.sp
+ sp3 := _g_.syscallsp
+ systemstack(func() {
+ print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+ gothrow("entersyscallblock")
+ })
+ }
casgstatus(_g_, _Grunning, _Gsyscall)
if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
- systemstack(entersyscall_bad)
+ systemstack(func() {
+ print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+ gothrow("entersyscallblock")
+ })
}
systemstack(entersyscallblock_handoff)
@@ -1776,6 +1792,7 @@ func exitsyscallfast() bool {
// Freezetheworld sets stopwait but does not retake P's.
if sched.stopwait != 0 {
+ _g_.m.mcache = nil
_g_.m.p = nil
return false
}
@@ -1789,6 +1806,7 @@ func exitsyscallfast() bool {
}
// Try to get any other idle P.
+ _g_.m.mcache = nil
_g_.m.p = nil
if sched.pidle != nil {
var ok bool
@@ -2363,6 +2381,8 @@ func setcpuprofilerate_m(hz int32) {
}
// Change number of processors. The world is stopped, sched is locked.
+// gcworkbufs are not being modified by either the GC or
+// the write barrier code.
func procresize(new int32) {
old := gomaxprocs
if old < 0 || old > _MaxGomaxprocs || new <= 0 || new > _MaxGomaxprocs {
diff --git a/src/runtime/rt0_linux_386.s b/src/runtime/rt0_linux_386.s
index 352e594d5..47fd908e7 100644
--- a/src/runtime/rt0_linux_386.s
+++ b/src/runtime/rt0_linux_386.s
@@ -9,7 +9,6 @@ TEXT _rt0_386_linux(SB),NOSPLIT,$8
LEAL 12(SP), BX
MOVL AX, 0(SP)
MOVL BX, 4(SP)
- CALL runtime·linux_setup_vdso(SB)
CALL main(SB)
INT $3
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index 15dea01a3..9e19b68be 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -97,7 +97,10 @@ func testAtomic64() {
z64 = 42
x64 = 0
- // TODO: PREFETCH((unsafe.Pointer)(&z64))
+ prefetcht0(uintptr(unsafe.Pointer(&z64)))
+ prefetcht1(uintptr(unsafe.Pointer(&z64)))
+ prefetcht2(uintptr(unsafe.Pointer(&z64)))
+ prefetchnta(uintptr(unsafe.Pointer(&z64)))
if cas64(&z64, x64, 1) {
gothrow("cas64 failed")
}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 7987a7373..d18178d09 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -227,6 +227,8 @@ type m struct {
helpgc int32
spinning bool // m is out of work and is actively looking for work
blocked bool // m is blocked on a note
+ inwb bool // m is executing a write barrier
+ printlock int8
fastrand uint32
ncgocall uint64 // number of cgo calls in total
ncgo int32 // number of cgo calls currently in progress
@@ -402,8 +404,9 @@ type itab struct {
}
// Lock-free stack node.
+// // Also known to export_test.go.
type lfnode struct {
- next *lfnode
+ next uint64
pushcnt uintptr
}
@@ -448,11 +451,13 @@ type debugvars struct {
// Indicates to write barrier and sychronization task to preform.
const (
- _GCoff = iota // stop and start nop
- _GCquiesce // stop and start nop
- _GCstw // stop the ps nop
- _GCmark // scan the stacks and start no white to black
- _GCsweep // stop and start nop
+ _GCoff = iota // GC not running, write barrier disabled
+ _GCquiesce // unused state
+ _GCstw // unused state
+ _GCscan // GC collecting roots into workbufs, write barrier disabled
+ _GCmark // GC marking from workbufs, write barrier ENABLED
+ _GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED
+ _GCsweep // GC mark completed; sweeping in background, write barrier disabled
)
type forcegcstate struct {
diff --git a/src/runtime/select.go b/src/runtime/select.go
index 6717b9395..5e5047bc1 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -377,12 +377,7 @@ loop:
// iterating through the linked list they are in reverse order.
cas = nil
sglist = gp.waiting
- // Clear all selectdone and elem before unlinking from gp.waiting.
- // They must be cleared before being put back into the sudog cache.
- // Clear before unlinking, because if a stack copy happens after the unlink,
- // they will not be updated, they will be left pointing to the old stack,
- // which creates dangling pointers, which may be detected by the
- // garbage collector.
+ // Clear all elem before unlinking from gp.waiting.
for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink {
sg1.selectdone = nil
sg1.elem = nil
diff --git a/src/runtime/stack1.go b/src/runtime/stack1.go
index 1fd61ce1a..28000864d 100644
--- a/src/runtime/stack1.go
+++ b/src/runtime/stack1.go
@@ -58,7 +58,7 @@ func stackinit() {
// Allocates a stack from the free pool. Must be called with
// stackpoolmu held.
-func stackpoolalloc(order uint8) *mlink {
+func stackpoolalloc(order uint8) gclinkptr {
list := &stackpool[order]
s := list.next
if s == list {
@@ -70,23 +70,23 @@ func stackpoolalloc(order uint8) *mlink {
if s.ref != 0 {
gothrow("bad ref")
}
- if s.freelist != nil {
+ if s.freelist.ptr() != nil {
gothrow("bad freelist")
}
for i := uintptr(0); i < _StackCacheSize; i += _FixedStack << order {
- x := (*mlink)(unsafe.Pointer(uintptr(s.start)<<_PageShift + i))
- x.next = s.freelist
+ x := gclinkptr(uintptr(s.start)<<_PageShift + i)
+ x.ptr().next = s.freelist
s.freelist = x
}
mSpanList_Insert(list, s)
}
x := s.freelist
- if x == nil {
+ if x.ptr() == nil {
gothrow("span has no free stacks")
}
- s.freelist = x.next
+ s.freelist = x.ptr().next
s.ref++
- if s.freelist == nil {
+ if s.freelist.ptr() == nil {
// all stacks in s are allocated.
mSpanList_Remove(s)
}
@@ -94,22 +94,22 @@ func stackpoolalloc(order uint8) *mlink {
}
// Adds stack x to the free pool. Must be called with stackpoolmu held.
-func stackpoolfree(x *mlink, order uint8) {
+func stackpoolfree(x gclinkptr, order uint8) {
s := mHeap_Lookup(&mheap_, (unsafe.Pointer)(x))
if s.state != _MSpanStack {
gothrow("freeing stack not in a stack span")
}
- if s.freelist == nil {
+ if s.freelist.ptr() == nil {
// s will now have a free stack
mSpanList_Insert(&stackpool[order], s)
}
- x.next = s.freelist
+ x.ptr().next = s.freelist
s.freelist = x
s.ref--
if s.ref == 0 {
// span is completely free - return to heap
mSpanList_Remove(s)
- s.freelist = nil
+ s.freelist = 0
mHeap_FreeStack(&mheap_, s)
}
}
@@ -123,12 +123,12 @@ func stackcacherefill(c *mcache, order uint8) {
// Grab some stacks from the global cache.
// Grab half of the allowed capacity (to prevent thrashing).
- var list *mlink
+ var list gclinkptr
var size uintptr
lock(&stackpoolmu)
for size < _StackCacheSize/2 {
x := stackpoolalloc(order)
- x.next = list
+ x.ptr().next = list
list = x
size += _FixedStack << order
}
@@ -145,7 +145,7 @@ func stackcacherelease(c *mcache, order uint8) {
size := c.stackcache[order].size
lock(&stackpoolmu)
for size > _StackCacheSize/2 {
- y := x.next
+ y := x.ptr().next
stackpoolfree(x, order)
x = y
size -= _FixedStack << order
@@ -162,12 +162,12 @@ func stackcache_clear(c *mcache) {
lock(&stackpoolmu)
for order := uint8(0); order < _NumStackOrders; order++ {
x := c.stackcache[order].list
- for x != nil {
- y := x.next
+ for x.ptr() != nil {
+ y := x.ptr().next
stackpoolfree(x, order)
x = y
}
- c.stackcache[order].list = nil
+ c.stackcache[order].list = 0
c.stackcache[order].size = 0
}
unlock(&stackpoolmu)
@@ -207,7 +207,7 @@ func stackalloc(n uint32) stack {
order++
n2 >>= 1
}
- var x *mlink
+ var x gclinkptr
c := thisg.m.mcache
if c == nil || thisg.m.gcing != 0 || thisg.m.helpgc != 0 {
// c == nil can happen in the guts of exitsyscall or
@@ -219,11 +219,11 @@ func stackalloc(n uint32) stack {
unlock(&stackpoolmu)
} else {
x = c.stackcache[order].list
- if x == nil {
+ if x.ptr() == nil {
stackcacherefill(c, order)
x = c.stackcache[order].list
}
- c.stackcache[order].list = x.next
+ c.stackcache[order].list = x.ptr().next
c.stackcache[order].size -= uintptr(n)
}
v = (unsafe.Pointer)(x)
@@ -270,7 +270,7 @@ func stackfree(stk stack) {
order++
n2 >>= 1
}
- x := (*mlink)(v)
+ x := gclinkptr(v)
c := gp.m.mcache
if c == nil || gp.m.gcing != 0 || gp.m.helpgc != 0 {
lock(&stackpoolmu)
@@ -280,7 +280,7 @@ func stackfree(stk stack) {
if c.stackcache[order].size >= _StackCacheSize {
stackcacherelease(c, order)
}
- x.next = c.stackcache[order].list
+ x.ptr().next = c.stackcache[order].list
c.stackcache[order].list = x
c.stackcache[order].size += n
}
@@ -526,6 +526,7 @@ func fillstack(stk stack, b byte) {
}
// Copies gp's stack to a new stack of a different size.
+// Caller must have changed gp status to Gcopystack.
func copystack(gp *g, newsize uintptr) {
if gp.syscallsp != 0 {
gothrow("stack growth not allowed in system call")
@@ -563,15 +564,11 @@ func copystack(gp *g, newsize uintptr) {
}
memmove(unsafe.Pointer(new.hi-used), unsafe.Pointer(old.hi-used), used)
- oldstatus := casgcopystack(gp) // cas from Gwaiting or Grunnable to Gcopystack, return old status
-
// Swap out old stack for new one
gp.stack = new
gp.stackguard0 = new.lo + _StackGuard // NOTE: might clobber a preempt request
gp.sched.sp = new.hi - used
- casgstatus(gp, _Gcopystack, oldstatus) // oldstatus is Gwaiting or Grunnable
-
// free old stack
if stackPoisonCopy != 0 {
fillstack(old, 0xfc)
@@ -669,6 +666,14 @@ func newstack() {
gothrow("runtime: split stack overflow")
}
+ if gp.sched.ctxt != nil {
+ // morestack wrote sched.ctxt on its way in here,
+ // without a write barrier. Run the write barrier now.
+ // It is not possible to be preempted between then
+ // and now, so it's okay.
+ writebarrierptr_nostore((*uintptr)(unsafe.Pointer(&gp.sched.ctxt)), uintptr(gp.sched.ctxt))
+ }
+
if gp.stackguard0 == stackPreempt {
if gp == thisg.m.g0 {
gothrow("runtime: preempt g0")
@@ -677,7 +682,12 @@ func newstack() {
gothrow("runtime: g is running but p is not")
}
if gp.preemptscan {
+ for !castogscanstatus(gp, _Gwaiting, _Gscanwaiting) {
+ // Likely to be racing with the GC as it sees a _Gwaiting and does the stack scan.
+ // If so this stack will be scanned twice which does not change correctness.
+ }
gcphasework(gp)
+ casfrom_Gscanstatus(gp, _Gscanwaiting, _Gwaiting)
casgstatus(gp, _Gwaiting, _Grunning)
gp.stackguard0 = gp.stack.lo + _StackGuard
gp.preempt = false
@@ -708,13 +718,15 @@ func newstack() {
gothrow("stack overflow")
}
- // Note that the concurrent GC might be scanning the stack as we try to replace it.
- // copystack takes care of the appropriate coordination with the stack scanner.
+ casgstatus(gp, _Gwaiting, _Gcopystack)
+
+ // The concurrent GC will not scan the stack while we are doing the copy since
+ // the gp is in a Gcopystack status.
copystack(gp, uintptr(newsize))
if stackDebug >= 1 {
print("stack grow done\n")
}
- casgstatus(gp, _Gwaiting, _Grunning)
+ casgstatus(gp, _Gcopystack, _Grunning)
gogo(&gp.sched)
}
@@ -767,17 +779,17 @@ func shrinkstack(gp *g) {
if gp.syscallsp != 0 {
return
}
-
- /* TODO
- if goos_windows && gp.m != nil && gp.m.libcallsp != 0 {
+ if goos_windows != 0 && gp.m != nil && gp.m.libcallsp != 0 {
return
}
- */
if stackDebug > 0 {
print("shrinking stack ", oldsize, "->", newsize, "\n")
}
+
+ oldstatus := casgcopystack(gp)
copystack(gp, newsize)
+ casgstatus(gp, _Gcopystack, oldstatus)
}
// Do any delayed stack freeing that was queued up during GC.
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index aa7577cf9..4408e22bf 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@@ -231,3 +231,8 @@ func call536870912(fn, arg unsafe.Pointer, n, retoffset uint32)
func call1073741824(fn, arg unsafe.Pointer, n, retoffset uint32)
func systemstack_switch()
+
+func prefetcht0(addr uintptr)
+func prefetcht1(addr uintptr)
+func prefetcht2(addr uintptr)
+func prefetchnta(addr uintptr)
diff --git a/src/runtime/vdso_none.go b/src/runtime/vdso_none.go
index ac6f8cb18..6f83ecc89 100644
--- a/src/runtime/vdso_none.go
+++ b/src/runtime/vdso_none.go
@@ -3,6 +3,7 @@
// license that can be found in the LICENSE file.
// +build !linux !amd64
+// +build !linux !386
package runtime
diff --git a/src/runtime/wbfat.go b/src/runtime/wbfat.go
new file mode 100644
index 000000000..75c58b26b
--- /dev/null
+++ b/src/runtime/wbfat.go
@@ -0,0 +1,190 @@
+// generated by wbfat_gen.go; use go generate
+
+package runtime
+
+//go:nosplit
+func writebarrierfat01(dst *[2]uintptr, _ *byte, src [2]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+}
+
+//go:nosplit
+func writebarrierfat10(dst *[2]uintptr, _ *byte, src [2]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+}
+
+//go:nosplit
+func writebarrierfat11(dst *[2]uintptr, _ *byte, src [2]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+}
+
+//go:nosplit
+func writebarrierfat001(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ dst[0] = src[0]
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+}
+
+//go:nosplit
+func writebarrierfat010(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+}
+
+//go:nosplit
+func writebarrierfat011(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+}
+
+//go:nosplit
+func writebarrierfat100(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ dst[2] = src[2]
+}
+
+//go:nosplit
+func writebarrierfat101(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+}
+
+//go:nosplit
+func writebarrierfat110(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+}
+
+//go:nosplit
+func writebarrierfat111(dst *[3]uintptr, _ *byte, src [3]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+}
+
+//go:nosplit
+func writebarrierfat0001(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ dst[1] = src[1]
+ dst[2] = src[2]
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat0010(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat0011(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat0100(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat0101(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat0110(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat0111(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ dst[0] = src[0]
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat1000(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ dst[2] = src[2]
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat1001(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ dst[2] = src[2]
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat1010(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat1011(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ dst[1] = src[1]
+ writebarrierptr(&dst[2], src[2])
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat1100(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat1101(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ dst[2] = src[2]
+ writebarrierptr(&dst[3], src[3])
+}
+
+//go:nosplit
+func writebarrierfat1110(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+ dst[3] = src[3]
+}
+
+//go:nosplit
+func writebarrierfat1111(dst *[4]uintptr, _ *byte, src [4]uintptr) {
+ writebarrierptr(&dst[0], src[0])
+ writebarrierptr(&dst[1], src[1])
+ writebarrierptr(&dst[2], src[2])
+ writebarrierptr(&dst[3], src[3])
+}
diff --git a/src/runtime/wbfat_gen.go b/src/runtime/wbfat_gen.go
new file mode 100644
index 000000000..78d5b6271
--- /dev/null
+++ b/src/runtime/wbfat_gen.go
@@ -0,0 +1,41 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+ "flag"
+ "fmt"
+ "log"
+ "os"
+)
+
+func main() {
+ flag.Parse()
+ if flag.NArg() > 0 {
+ f, err := os.Create(flag.Arg(0))
+ if err != nil {
+ log.Fatal(err)
+ }
+ os.Stdout = f
+ }
+ fmt.Printf("// generated by wbfat_gen.go; use go generate\n\n")
+ fmt.Printf("package runtime\n")
+ for i := uint(2); i <= 4; i++ {
+ for j := 1; j < 1<<i; j++ {
+ fmt.Printf("\n//go:nosplit\n")
+ fmt.Printf("func writebarrierfat%0*b(dst *[%d]uintptr, _ *byte, src [%d]uintptr) {\n", int(i), j, i, i)
+ for k := uint(0); k < i; k++ {
+ if j&(1<<(i-1-k)) != 0 {
+ fmt.Printf("\twritebarrierptr(&dst[%d], src[%d])\n", k, k)
+ } else {
+ fmt.Printf("\tdst[%d] = src[%d]\n", k, k)
+ }
+ }
+ fmt.Printf("}\n")
+ }
+ }
+}
diff --git a/src/sync/atomic/atomic_test.go b/src/sync/atomic/atomic_test.go
index 9f13af48b..ec573aa8c 100644
--- a/src/sync/atomic/atomic_test.go
+++ b/src/sync/atomic/atomic_test.go
@@ -164,7 +164,7 @@ func TestSwapPointer(t *testing.T) {
x.before = magicptr
x.after = magicptr
var j uintptr
- for delta := uintptr(1); delta+delta > delta; delta += delta {
+ for delta := uintptr(1 << 16); delta+delta > delta; delta += delta {
k := SwapPointer(&x.i, unsafe.Pointer(delta))
if uintptr(x.i) != delta || uintptr(k) != j {
t.Fatalf("delta=%d i=%d j=%d k=%d", delta, x.i, j, k)
@@ -456,7 +456,7 @@ func TestCompareAndSwapPointer(t *testing.T) {
magicptr := uintptr(m)
x.before = magicptr
x.after = magicptr
- for val := uintptr(1); val+val > val; val += val {
+ for val := uintptr(1 << 16); val+val > val; val += val {
x.i = unsafe.Pointer(val)
if !CompareAndSwapPointer(&x.i, unsafe.Pointer(val), unsafe.Pointer(val+1)) {
t.Fatalf("should have swapped %#x %#x", val, val+1)
@@ -595,7 +595,7 @@ func TestLoadPointer(t *testing.T) {
magicptr := uintptr(m)
x.before = magicptr
x.after = magicptr
- for delta := uintptr(1); delta+delta > delta; delta += delta {
+ for delta := uintptr(1 << 16); delta+delta > delta; delta += delta {
k := LoadPointer(&x.i)
if k != x.i {
t.Fatalf("delta=%d i=%d k=%d", delta, x.i, k)
@@ -731,7 +731,7 @@ func TestStorePointer(t *testing.T) {
x.before = magicptr
x.after = magicptr
v := unsafe.Pointer(uintptr(0))
- for delta := uintptr(1); delta+delta > delta; delta += delta {
+ for delta := uintptr(1 << 16); delta+delta > delta; delta += delta {
StorePointer(&x.i, unsafe.Pointer(v))
if x.i != v {
t.Fatalf("delta=%d i=%d v=%d", delta, x.i, v)
diff --git a/test/live.go b/test/live.go
index f69d0a4c1..62c6a0b0e 100644
--- a/test/live.go
+++ b/test/live.go
@@ -9,20 +9,39 @@
package main
+func printnl()
+
+//go:noescape
+func printpointer(**int)
+
+//go:noescape
+func printintpointer(*int)
+
+//go:noescape
+func printstringpointer(*string)
+
+//go:noescape
+func printstring(string)
+
+//go:noescape
+func printbytepointer(*byte)
+
+func printint(int)
+
func f1() {
var x *int
- print(&x) // ERROR "live at call to printpointer: x$"
- print(&x) // ERROR "live at call to printpointer: x$"
+ printpointer(&x) // ERROR "live at call to printpointer: x$"
+ printpointer(&x) // ERROR "live at call to printpointer: x$"
}
func f2(b bool) {
if b {
- print(0) // nothing live here
+ printint(0) // nothing live here
return
}
var x *int
- print(&x) // ERROR "live at call to printpointer: x$"
- print(&x) // ERROR "live at call to printpointer: x$"
+ printpointer(&x) // ERROR "live at call to printpointer: x$"
+ printpointer(&x) // ERROR "live at call to printpointer: x$"
}
func f3(b bool) {
@@ -30,22 +49,22 @@ func f3(b bool) {
// live throughout the function, to avoid being poisoned
// in GODEBUG=gcdead=1 mode.
- print(0) // ERROR "live at call to printint: x y$"
+ printint(0) // ERROR "live at call to printint: x y$"
if b == false {
- print(0) // ERROR "live at call to printint: x y$"
+ printint(0) // ERROR "live at call to printint: x y$"
return
}
if b {
var x *int
- print(&x) // ERROR "live at call to printpointer: x y$"
- print(&x) // ERROR "live at call to printpointer: x y$"
+ printpointer(&x) // ERROR "live at call to printpointer: x y$"
+ printpointer(&x) // ERROR "live at call to printpointer: x y$"
} else {
var y *int
- print(&y) // ERROR "live at call to printpointer: x y$"
- print(&y) // ERROR "live at call to printpointer: x y$"
+ printpointer(&y) // ERROR "live at call to printpointer: x y$"
+ printpointer(&y) // ERROR "live at call to printpointer: x y$"
}
- print(0) // ERROR "live at call to printint: x y$" "x \(type \*int\) is ambiguously live" "y \(type \*int\) is ambiguously live"
+ printint(0) // ERROR "live at call to printint: x y$" "x \(type \*int\) is ambiguously live" "y \(type \*int\) is ambiguously live"
}
// The old algorithm treated x as live on all code that
@@ -56,20 +75,20 @@ func f3(b bool) {
func f4(b1, b2 bool) { // x not live here
if b2 {
- print(0) // x not live here
+ printint(0) // x not live here
return
}
var z **int
x := new(int)
*x = 42
z = &x
- print(**z) // ERROR "live at call to printint: x z$"
+ printint(**z) // ERROR "live at call to printint: x z$"
if b2 {
- print(1) // ERROR "live at call to printint: x$"
+ printint(1) // ERROR "live at call to printint: x$"
return
}
for {
- print(**z) // ERROR "live at call to printint: x z$"
+ printint(**z) // ERROR "live at call to printint: x z$"
}
}
@@ -84,7 +103,7 @@ func f5(b1 bool) {
*y = 54
z = &y
}
- print(**z) // ERROR "live at call to printint: x y$" "x \(type \*int\) is ambiguously live" "y \(type \*int\) is ambiguously live"
+ printint(**z) // ERROR "live at call to printint: x y$" "x \(type \*int\) is ambiguously live" "y \(type \*int\) is ambiguously live"
}
// confusion about the _ result used to cause spurious "live at entry to f6: _".
@@ -155,8 +174,8 @@ func f11b() *int {
// At this point p is dead: the code here cannot
// get to the bottom of the function.
// This used to have a spurious "live at call to printint: p".
- print(1) // nothing live here!
- select { // ERROR "live at call to newselect: autotmp" "live at call to selectgo: autotmp"
+ printint(1) // nothing live here!
+ select { // ERROR "live at call to newselect: autotmp" "live at call to selectgo: autotmp"
case <-c: // ERROR "live at call to selectrecv: autotmp"
return nil
case <-c: // ERROR "live at call to selectrecv: autotmp"
@@ -172,8 +191,8 @@ func f11c() *int {
if b {
// Unlike previous, the cases in this select fall through,
// so we can get to the println, so p is not dead.
- print(1) // ERROR "live at call to printint: p"
- select { // ERROR "live at call to newselect: autotmp.* p" "live at call to selectgo: autotmp.* p"
+ printint(1) // ERROR "live at call to printint: p"
+ select { // ERROR "live at call to newselect: autotmp.* p" "live at call to selectgo: autotmp.* p"
case <-c: // ERROR "live at call to selectrecv: autotmp.* p"
case <-c: // ERROR "live at call to selectrecv: autotmp.* p"
}
@@ -209,7 +228,7 @@ func h13(string, string) string
func f14() {
x := g14()
- print(&x) // ERROR "live at call to printpointer: x"
+ printstringpointer(&x) // ERROR "live at call to printstringpointer: x"
}
func g14() string
@@ -217,8 +236,8 @@ func g14() string
func f15() {
var x string
_ = &x
- x = g15() // ERROR "live at call to g15: x"
- print(x) // ERROR "live at call to printstring: x"
+ x = g15() // ERROR "live at call to g15: x"
+ printstring(x) // ERROR "live at call to printstring: x"
}
func g15() string
@@ -282,7 +301,7 @@ func f18() {
}
z = m2[g18()] // ERROR "live at call to mapaccess1: autotmp_[0-9]+$"
z = m2[g18()] // ERROR "live at call to mapaccess1: autotmp_[0-9]+$"
- print(z)
+ printbytepointer(z)
}
var ch chan *byte
@@ -296,7 +315,7 @@ func f19() {
}
z = <-ch // ERROR "live at call to chanrecv1: autotmp_[0-9]+$"
z = <-ch // ERROR "live at call to chanrecv1: autotmp_[0-9]+$"
- print(z)
+ printbytepointer(z)
}
func f20() {
@@ -316,7 +335,7 @@ func f21() {
}
z = m2[[2]string{"x", "y"}] // ERROR "live at call to mapaccess1: autotmp_[0-9]+$"
z = m2[[2]string{"x", "y"}] // ERROR "live at call to mapaccess1: autotmp_[0-9]+$"
- print(z)
+ printbytepointer(z)
}
func f23() {
@@ -328,7 +347,8 @@ func f23() {
}
z, ok = m2[[2]string{"x", "y"}] // ERROR "live at call to mapaccess2: autotmp_[0-9]+$"
z, ok = m2[[2]string{"x", "y"}] // ERROR "live at call to mapaccess2: autotmp_[0-9]+$"
- print(z, ok)
+ printbytepointer(z)
+ print(ok)
}
func f24() {
@@ -350,8 +370,8 @@ func f25(b bool) {
}
var x string
_ = &x
- x = g15() // ERROR "live at call to g15: x"
- print(x) // ERROR "live at call to printstring: x"
+ x = g15() // ERROR "live at call to g15: x"
+ printstring(x) // ERROR "live at call to printstring: x"
} // ERROR "live at call to deferreturn: x"
func g25()
@@ -366,7 +386,7 @@ func f26(b bool) {
}
print26((*int)(nil), (*int)(nil), (*int)(nil)) // ERROR "live at call to print26: autotmp_[0-9]+$"
print26((*int)(nil), (*int)(nil), (*int)(nil)) // ERROR "live at call to print26: autotmp_[0-9]+$"
- println()
+ printnl()
}
//go:noescape
@@ -381,7 +401,7 @@ func f27(b bool) {
}
call27(func() { x++ }) // ERROR "live at call to call27: autotmp_[0-9]+$"
call27(func() { x++ }) // ERROR "live at call to call27: autotmp_[0-9]+$"
- println()
+ printnl()
}
// but defer does escape to later execution in the function
@@ -392,7 +412,7 @@ func f27defer(b bool) {
defer call27(func() { x++ }) // ERROR "live at call to deferproc: autotmp_[0-9]+$" "live at call to deferreturn: autotmp_[0-9]+$"
}
defer call27(func() { x++ }) // ERROR "live at call to deferproc: autotmp_[0-9]+ autotmp_[0-9]+$" "live at call to deferreturn: autotmp_[0-9]+ autotmp_[0-9]+$" "ambiguously live"
- println() // ERROR "live at call to printnl: autotmp_[0-9]+ autotmp_[0-9]+$"
+ printnl() // ERROR "live at call to printnl: autotmp_[0-9]+ autotmp_[0-9]+$"
} // ERROR "live at call to deferreturn: autotmp_[0-9]+ autotmp_[0-9]+$"
// and newproc (go) escapes to the heap
@@ -403,7 +423,7 @@ func f27go(b bool) {
go call27(func() { x++ }) // ERROR "live at call to newobject: &x" "live at call to newproc: &x$"
}
go call27(func() { x++ }) // ERROR "live at call to newobject: &x"
- println()
+ printnl()
}
//go:noescape
@@ -415,10 +435,10 @@ var s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 string
func f28(b bool) {
if b {
- print(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$"
+ printstring(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$"
}
- print(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$"
- print(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$"
+ printstring(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$"
+ printstring(s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10) // ERROR "live at call to concatstrings: autotmp_[0-9]+$" "live at call to printstring: autotmp_[0-9]+$"
}
// map iterator should die on end of range loop
@@ -426,14 +446,14 @@ func f28(b bool) {
func f29(b bool) {
if b {
for k := range m { // ERROR "live at call to mapiterinit: autotmp_[0-9]+$" "live at call to mapiternext: autotmp_[0-9]+$"
- print(k) // ERROR "live at call to printstring: autotmp_[0-9]+$"
+ printstring(k) // ERROR "live at call to printstring: autotmp_[0-9]+$"
}
}
for k := range m { // ERROR "live at call to mapiterinit: autotmp_[0-9]+$" "live at call to mapiternext: autotmp_[0-9]+$"
- print(k) // ERROR "live at call to printstring: autotmp_[0-9]+$"
+ printstring(k) // ERROR "live at call to printstring: autotmp_[0-9]+$"
}
for k := range m { // ERROR "live at call to mapiterinit: autotmp_[0-9]+$" "live at call to mapiternext: autotmp_[0-9]+$"
- print(k) // ERROR "live at call to printstring: autotmp_[0-9]+$"
+ printstring(k) // ERROR "live at call to printstring: autotmp_[0-9]+$"
}
}
@@ -446,14 +466,14 @@ func f30(b bool) {
// the copy of ptrarr and the internal iterator pointer.
if b {
for _, p := range ptrarr {
- print(p) // ERROR "live at call to printpointer: autotmp_[0-9]+ autotmp_[0-9]+$"
+ printintpointer(p) // ERROR "live at call to printintpointer: autotmp_[0-9]+ autotmp_[0-9]+$"
}
}
for _, p := range ptrarr {
- print(p) // ERROR "live at call to printpointer: autotmp_[0-9]+ autotmp_[0-9]+$"
+ printintpointer(p) // ERROR "live at call to printintpointer: autotmp_[0-9]+ autotmp_[0-9]+$"
}
for _, p := range ptrarr {
- print(p) // ERROR "live at call to printpointer: autotmp_[0-9]+ autotmp_[0-9]+$"
+ printintpointer(p) // ERROR "live at call to printintpointer: autotmp_[0-9]+ autotmp_[0-9]+$"
}
}
@@ -503,44 +523,44 @@ var m33 map[interface{}]int
func f33() {
if m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$"
- println()
+ printnl()
return
} else {
- println()
+ printnl()
}
- println()
+ printnl()
}
func f34() {
if m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$"
- println()
+ printnl()
return
}
- println()
+ printnl()
}
func f35() {
if m33[nil] == 0 && m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$"
- println()
+ printnl()
return
}
- println()
+ printnl()
}
func f36() {
if m33[nil] == 0 || m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$"
- println()
+ printnl()
return
}
- println()
+ printnl()
}
func f37() {
if (m33[nil] == 0 || m33[nil] == 0) && m33[nil] == 0 { // ERROR "live at call to mapaccess1: autotmp_[0-9]+$"
- println()
+ printnl()
return
}
- println()
+ printnl()
}
// select temps should disappear in the case bodies
@@ -558,44 +578,44 @@ func f38(b bool) {
if b {
select { // ERROR "live at call"
case <-fc38(): // ERROR "live at call"
- println()
+ printnl()
case fc38() <- *fi38(1): // ERROR "live at call"
- println()
+ printnl()
case *fi38(2) = <-fc38(): // ERROR "live at call"
- println()
+ printnl()
case *fi38(3), *fb38() = <-fc38(): // ERROR "live at call"
- println()
+ printnl()
}
- println()
+ printnl()
}
- println()
+ printnl()
}
// issue 8097: mishandling of x = x during return.
func f39() (x []int) {
x = []int{1}
- println() // ERROR "live at call to printnl: x"
+ printnl() // ERROR "live at call to printnl: x"
return x
}
func f39a() (x []int) {
x = []int{1}
- println() // ERROR "live at call to printnl: x"
+ printnl() // ERROR "live at call to printnl: x"
return
}
func f39b() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to newobject: x"
- println() // ERROR "live at call to printnl: x"
+ printnl() // ERROR "live at call to printnl: x"
return x
}
func f39c() (x [10]*int) {
x = [10]*int{}
x[0] = new(int) // ERROR "live at call to newobject: x"
- println() // ERROR "live at call to printnl: x"
+ printnl() // ERROR "live at call to printnl: x"
return
}
@@ -615,13 +635,13 @@ func newT40() *T40 {
func bad40() {
t := newT40()
_ = t
- println()
+ printnl()
}
func good40() {
ret := T40{}
ret.m = make(map[int]int) // ERROR "live at call to makemap: ret"
t := &ret
- println() // ERROR "live at call to printnl: ret"
+ printnl() // ERROR "live at call to printnl: ret"
_ = t
}
diff --git a/test/live2.go b/test/live2.go
index ef6ad994c..1bd0af2cc 100644
--- a/test/live2.go
+++ b/test/live2.go
@@ -12,6 +12,8 @@ package main
// issue 8142: lost 'addrtaken' bit on inlined variables.
// no inlining in this test, so just checking that non-inlined works.
+func printnl()
+
type T40 struct {
m map[int]int
}
@@ -24,7 +26,7 @@ func newT40() *T40 {
func bad40() {
t := newT40() // ERROR "live at call to makemap: ret"
- println() // ERROR "live at call to printnl: ret"
+ printnl() // ERROR "live at call to printnl: ret"
_ = t
}
@@ -32,6 +34,6 @@ func good40() {
ret := T40{}
ret.m = make(map[int]int) // ERROR "live at call to makemap: ret"
t := &ret
- println() // ERROR "live at call to printnl: ret"
+ printnl() // ERROR "live at call to printnl: ret"
_ = t
}