From 5756b3560141d0c09c4a27d2025f5438f49f59f2 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 10 Sep 2020 21:20:46 +0000 Subject: runtime: align 12-byte objects to 8 bytes on 32-bit systems Currently on 32-bit systems 8-byte fields in a struct have an alignment of 4 bytes, which means that atomic instructions may fault. This issue is tracked in #36606. Our current workaround is to allocate memory and put any such atomically accessed fields at the beginning of the object. This workaround fails because the tiny allocator might not align the object right. This case specifically only happens with 12-byte objects because a type's size is rounded up to its alignment. So if e.g. we have a type like: type obj struct { a uint64 b byte } then its size will be 12 bytes, because "a" will require a 4 byte alignment. This argument may be extended to all objects of size 9-15 bytes. So, make this workaround work by specifically aligning such objects to 8 bytes on 32-bit systems. This change leaves a TODO to remove the code once #36606 gets resolved. It also adds a test which will presumably no longer be necessary (the compiler should enforce the right alignment) when it gets resolved as well. Fixes #37262. Change-Id: I3a34e5b014b3c37ed2e5e75e62d71d8640aa42bc Reviewed-on: https://go-review.googlesource.com/c/go/+/254057 Reviewed-by: Cherry Zhang Reviewed-by: Austin Clements Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Trust: Michael Knyszek --- src/runtime/malloc.go | 8 +++++++ src/runtime/malloc_test.go | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 4fa14996c2..c71f856f09 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1016,6 +1016,14 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { // Align tiny pointer for required (conservative) alignment. if size&7 == 0 { off = alignUp(off, 8) + } else if sys.PtrSize == 4 && size == 12 { + // Conservatively align 12-byte objects to 8 bytes on 32-bit + // systems so that objects whose first field is a 64-bit + // value is aligned to 8 bytes and does not cause a fault on + // atomic access. See issue 37262. + // TODO(mknyszek): Remove this workaround if/when issue 36606 + // is resolved. + off = alignUp(off, 8) } else if size&3 == 0 { off = alignUp(off, 4) } else if size&1 == 0 { diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go index 5c97f548fd..4ba94d0494 100644 --- a/src/runtime/malloc_test.go +++ b/src/runtime/malloc_test.go @@ -12,8 +12,10 @@ import ( "os" "os/exec" "reflect" + "runtime" . "runtime" "strings" + "sync/atomic" "testing" "time" "unsafe" @@ -168,6 +170,61 @@ func TestTinyAlloc(t *testing.T) { } } +var ( + tinyByteSink *byte + tinyUint32Sink *uint32 + tinyObj12Sink *obj12 +) + +type obj12 struct { + a uint64 + b uint32 +} + +func TestTinyAllocIssue37262(t *testing.T) { + // Try to cause an alignment access fault + // by atomically accessing the first 64-bit + // value of a tiny-allocated object. + // See issue 37262 for details. + + // GC twice, once to reach a stable heap state + // and again to make sure we finish the sweep phase. + runtime.GC() + runtime.GC() + + // Make 1-byte allocations until we get a fresh tiny slot. + aligned := false + for i := 0; i < 16; i++ { + tinyByteSink = new(byte) + if uintptr(unsafe.Pointer(tinyByteSink))&0xf == 0xf { + aligned = true + break + } + } + if !aligned { + t.Fatal("unable to get a fresh tiny slot") + } + + // Create a 4-byte object so that the current + // tiny slot is partially filled. + tinyUint32Sink = new(uint32) + + // Create a 12-byte object, which fits into the + // tiny slot. If it actually gets place there, + // then the field "a" will be improperly aligned + // for atomic access on 32-bit architectures. + // This won't be true if issue 36606 gets resolved. + tinyObj12Sink = new(obj12) + + // Try to atomically access "x.a". + atomic.StoreUint64(&tinyObj12Sink.a, 10) + + // Clear the sinks. + tinyByteSink = nil + tinyUint32Sink = nil + tinyObj12Sink = nil +} + func TestPageCacheLeak(t *testing.T) { defer GOMAXPROCS(GOMAXPROCS(1)) leaked := PageCachePagesLeaked() -- cgit v1.2.1 From fe2cfb74ba6352990f5b41260b99e80f78e4a90a Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 1 Oct 2020 14:49:33 -0700 Subject: all: drop 387 support My last 387 CL. So sad ... ... ... ... not! Fixes #40255 Change-Id: I8d4ddb744b234b8adc735db2f7c3c7b6d8bbdfa4 Reviewed-on: https://go-review.googlesource.com/c/go/+/258957 Trust: Keith Randall Run-TryBot: Keith Randall TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/runtime/mkpreempt.go | 33 +++++++++------------------------ src/runtime/preempt_386.s | 45 ++++++++++++++++++--------------------------- src/runtime/vlrt.go | 5 ++--- 3 files changed, 29 insertions(+), 54 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index c2e14cdcd6..c5bfb0f207 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -190,40 +190,25 @@ func (l *layout) restore() { func gen386() { p("PUSHFL") - // Save general purpose registers. + // Assign stack offsets. var l = layout{sp: "SP"} for _, reg := range regNames386 { - if reg == "SP" || strings.HasPrefix(reg, "X") { + if reg == "SP" { continue } - l.add("MOVL", reg, 4) - } - - // Save the 387 state. - l.addSpecial( - "FSAVE %d(SP)\nFLDCW runtime·controlWord64(SB)", - "FRSTOR %d(SP)", - 108) - - // Save SSE state only if supported. - lSSE := layout{stack: l.stack, sp: "SP"} - for i := 0; i < 8; i++ { - lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16) + if strings.HasPrefix(reg, "X") { + l.add("MOVUPS", reg, 16) + } else { + l.add("MOVL", reg, 4) + } } - p("ADJSP $%d", lSSE.stack) + p("ADJSP $%d", l.stack) p("NOP SP") l.save() - p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse") - lSSE.save() - label("nosse:") p("CALL ·asyncPreempt2(SB)") - p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2") - lSSE.restore() - label("nosse2:") l.restore() - p("ADJSP $%d", -lSSE.stack) - + p("ADJSP $%d", -l.stack) p("POPFL") p("RET") } diff --git a/src/runtime/preempt_386.s b/src/runtime/preempt_386.s index a00ac8f385..5c9b8ea224 100644 --- a/src/runtime/preempt_386.s +++ b/src/runtime/preempt_386.s @@ -5,7 +5,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 PUSHFL - ADJSP $264 + ADJSP $156 NOP SP MOVL AX, 0(SP) MOVL CX, 4(SP) @@ -14,32 +14,23 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVL BP, 16(SP) MOVL SI, 20(SP) MOVL DI, 24(SP) - FSAVE 28(SP) - FLDCW runtime·controlWord64(SB) - CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 - JNE nosse - MOVUPS X0, 136(SP) - MOVUPS X1, 152(SP) - MOVUPS X2, 168(SP) - MOVUPS X3, 184(SP) - MOVUPS X4, 200(SP) - MOVUPS X5, 216(SP) - MOVUPS X6, 232(SP) - MOVUPS X7, 248(SP) -nosse: + MOVUPS X0, 28(SP) + MOVUPS X1, 44(SP) + MOVUPS X2, 60(SP) + MOVUPS X3, 76(SP) + MOVUPS X4, 92(SP) + MOVUPS X5, 108(SP) + MOVUPS X6, 124(SP) + MOVUPS X7, 140(SP) CALL ·asyncPreempt2(SB) - CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 - JNE nosse2 - MOVUPS 248(SP), X7 - MOVUPS 232(SP), X6 - MOVUPS 216(SP), X5 - MOVUPS 200(SP), X4 - MOVUPS 184(SP), X3 - MOVUPS 168(SP), X2 - MOVUPS 152(SP), X1 - MOVUPS 136(SP), X0 -nosse2: - FRSTOR 28(SP) + MOVUPS 140(SP), X7 + MOVUPS 124(SP), X6 + MOVUPS 108(SP), X5 + MOVUPS 92(SP), X4 + MOVUPS 76(SP), X3 + MOVUPS 60(SP), X2 + MOVUPS 44(SP), X1 + MOVUPS 28(SP), X0 MOVL 24(SP), DI MOVL 20(SP), SI MOVL 16(SP), BP @@ -47,6 +38,6 @@ nosse2: MOVL 8(SP), DX MOVL 4(SP), CX MOVL 0(SP), AX - ADJSP $-264 + ADJSP $-156 POPFL RET diff --git a/src/runtime/vlrt.go b/src/runtime/vlrt.go index 38e0b32801..996c0611fd 100644 --- a/src/runtime/vlrt.go +++ b/src/runtime/vlrt.go @@ -263,7 +263,7 @@ func slowdodiv(n, d uint64) (q, r uint64) { return q, n } -// Floating point control word values for GOARCH=386 GO386=387. +// Floating point control word values. // Bits 0-5 are bits to disable floating-point exceptions. // Bits 8-9 are the precision control: // 0 = single precision a.k.a. float32 @@ -273,6 +273,5 @@ func slowdodiv(n, d uint64) (q, r uint64) { // 3 = round toward zero var ( controlWord64 uint16 = 0x3f + 2<<8 + 0<<10 - controlWord32 = 0x3f + 0<<8 + 0<<10 - controlWord64trunc = 0x3f + 2<<8 + 3<<10 + controlWord64trunc uint16 = 0x3f + 2<<8 + 3<<10 ) -- cgit v1.2.1 From d888f1d5c06828e9d7b0166f770a443f6315c2d1 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 1 Oct 2020 16:06:03 -0400 Subject: runtime: add debugging to TestTimePprof We've seen timeouts of TestTimePprof, but the tracebacks aren't useful because goroutines are running on other threads. Add GOTRACEBACK=crash to catch these in the future. For #41120. Change-Id: I97318172ef78d0cbab10df5e4ffcbfeadff579e3 Reviewed-on: https://go-review.googlesource.com/c/go/+/258802 Trust: Austin Clements Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Bryan C. Mills --- src/runtime/crash_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go index 34f30c9a37..eae4f538c1 100644 --- a/src/runtime/crash_test.go +++ b/src/runtime/crash_test.go @@ -667,7 +667,9 @@ func TestBadTraceback(t *testing.T) { } func TestTimePprof(t *testing.T) { - fn := runTestProg(t, "testprog", "TimeProf") + // Pass GOTRACEBACK for issue #41120 to try to get more + // information on timeout. + fn := runTestProg(t, "testprog", "TimeProf", "GOTRACEBACK=crash") fn = strings.TrimSpace(fn) defer os.Remove(fn) -- cgit v1.2.1 From f89d05eb7ba1885474d03bb62f0a36a2d3cf56ea Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 1 Oct 2020 10:58:47 -0400 Subject: runtime: update and tidy cgo callback description The documentation on how cgo callbacks (C -> Go calls) works internally has gotten somewhat stale. This CL refreshes it. Change-Id: I1ab66225c9da52d698d97ebeb4f3c7b9b5ee97db Reviewed-on: https://go-review.googlesource.com/c/go/+/258937 Trust: Austin Clements Reviewed-by: Ian Lance Taylor Reviewed-by: Cherry Zhang --- src/runtime/cgocall.go | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index 427ed0ffb9..0b69ff3233 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -35,44 +35,48 @@ // cgo writes a gcc-compiled function named GoF (not p.GoF, since gcc doesn't // know about packages). The gcc-compiled C function f calls GoF. // -// GoF calls crosscall2(_cgoexp_GoF, frame, framesize). Crosscall2 -// (in cgo/gcc_$GOARCH.S, a gcc-compiled assembly file) is a two-argument -// adapter from the gcc function call ABI to the 6c function call ABI. -// It is called from gcc to call 6c functions. In this case it calls -// _cgoexp_GoF(frame, framesize), still running on m->g0's stack +// GoF calls crosscall2(_cgoexp_GoF, frame, framesize, ctxt). +// Crosscall2 (in cgo/asm_$GOARCH.s) is a four-argument adapter from +// the gcc function call ABI to the gc function call ABI. +// It is called from gcc to call gc functions. In this case it calls +// _cgoexp_GoF(frame, framesize), still running on m.g0's stack // and outside the $GOMAXPROCS limit. Thus, this code cannot yet // call arbitrary Go code directly and must be careful not to allocate -// memory or use up m->g0's stack. +// memory or use up m.g0's stack. // -// _cgoexp_GoF calls runtime.cgocallback(p.GoF, frame, framesize, ctxt). +// _cgoexp_GoF (generated by cmd/cgo) calls +// runtime.cgocallback(funcPC(p.GoF), frame, framesize, ctxt). // (The reason for having _cgoexp_GoF instead of writing a crosscall3 // to make this call directly is that _cgoexp_GoF, because it is compiled -// with 6c instead of gcc, can refer to dotted names like +// with gc instead of gcc, can refer to dotted names like // runtime.cgocallback and p.GoF.) // -// runtime.cgocallback (in asm_$GOARCH.s) switches from m->g0's -// stack to the original g (m->curg)'s stack, on which it calls +// runtime.cgocallback (in asm_$GOARCH.s) turns the raw PC of p.GoF +// into a Go function value and calls runtime.cgocallback_gofunc. +// +// runtime.cgocallback_gofunc (in asm_$GOARCH.s) switches from m.g0's +// stack to the original g (m.curg)'s stack, on which it calls // runtime.cgocallbackg(p.GoF, frame, framesize). // As part of the stack switch, runtime.cgocallback saves the current -// SP as m->g0->sched.sp, so that any use of m->g0's stack during the +// SP as m.g0.sched.sp, so that any use of m.g0's stack during the // execution of the callback will be done below the existing stack frames. -// Before overwriting m->g0->sched.sp, it pushes the old value on the -// m->g0 stack, so that it can be restored later. +// Before overwriting m.g0.sched.sp, it pushes the old value on the +// m.g0 stack, so that it can be restored later. // // runtime.cgocallbackg (below) is now running on a real goroutine -// stack (not an m->g0 stack). First it calls runtime.exitsyscall, which will +// stack (not an m.g0 stack). First it calls runtime.exitsyscall, which will // block until the $GOMAXPROCS limit allows running this goroutine. // Once exitsyscall has returned, it is safe to do things like call the memory // allocator or invoke the Go callback function p.GoF. runtime.cgocallbackg -// first defers a function to unwind m->g0.sched.sp, so that if p.GoF -// panics, m->g0.sched.sp will be restored to its old value: the m->g0 stack -// and the m->curg stack will be unwound in lock step. +// first defers a function to unwind m.g0.sched.sp, so that if p.GoF +// panics, m.g0.sched.sp will be restored to its old value: the m.g0 stack +// and the m.curg stack will be unwound in lock step. // Then it calls p.GoF. Finally it pops but does not execute the deferred // function, calls runtime.entersyscall, and returns to runtime.cgocallback. // // After it regains control, runtime.cgocallback switches back to -// m->g0's stack (the pointer is still in m->g0.sched.sp), restores the old -// m->g0.sched.sp value from the stack, and returns to _cgoexp_GoF. +// m.g0's stack (the pointer is still in m.g0.sched.sp), restores the old +// m.g0.sched.sp value from the stack, and returns to _cgoexp_GoF. // // _cgoexp_GoF immediately returns to crosscall2, which restores the // callee-save registers for gcc and returns to GoF, which returns to f. -- cgit v1.2.1 From 869c02ce1f635960bfc2f06bb52e2b4e17eaa199 Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Wed, 16 Sep 2020 15:23:58 +0200 Subject: misc/ios: add support for running programs on the iOS simulator Update the README to mention the emulator. Remove reference to gomobile while here; there are multiple ways to develop for iOS today, including using the c-archive buildmode directly. Updates #38485 Change-Id: Iccef75e646ea8e1b9bc3fc37419cc2d6bf3dfdf4 Reviewed-on: https://go-review.googlesource.com/c/go/+/255257 Run-TryBot: Elias Naur TryBot-Result: Go Bot Trust: Elias Naur Reviewed-by: Cherry Zhang --- src/runtime/cgo/gcc_darwin_arm64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/cgo/gcc_darwin_arm64.c b/src/runtime/cgo/gcc_darwin_arm64.c index fd7d4084c9..9ea43ae4af 100644 --- a/src/runtime/cgo/gcc_darwin_arm64.c +++ b/src/runtime/cgo/gcc_darwin_arm64.c @@ -131,7 +131,7 @@ init_working_dir() fprintf(stderr, "runtime/cgo: chdir(%s) failed\n", dir); } - // The test harness in go_darwin_arm_exec passes the relative working directory + // The test harness in go_ios_exec passes the relative working directory // in the GoExecWrapperWorkingDirectory property of the app bundle. CFStringRef wd_ref = CFBundleGetValueForInfoDictionaryKey(bundle, CFSTR("GoExecWrapperWorkingDirectory")); if (wd_ref != NULL) { -- cgit v1.2.1 From 9dc65d7dc9268d5150174ec55cc4753fe18f554c Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sat, 3 Oct 2020 16:44:22 -0400 Subject: runtime: correct signature of call16 The signature of call16 is currently missing the "typ" parameter. This CL fixes this. This wasn't caught by vet because call16 is defined by macro expansion (see #17544), and we didn't notice the mismatch with the other call* functions because call16 is defined only on 32-bit architectures and lives alone in stubs32.go. Unfortunately, this means its GC signature is also wrong: the "arg" parameter is treated as a scalar rather than a pointer, so GC won't trace it and stack copying won't adjust it. This turns out to matter in exactly one case right now: on 32-bit architectures (which are the only architectures where call16 is defined), a stack-allocated defer of a function with a 16-byte or smaller argument frame including a non-empty result area can corrupt memory if the deferred function grows the stack and is invoked during a panic. Whew. All other current uses of reflectcall pass a heap-allocated "arg" frame (which happens to be reachable from other stack roots, so tracing isn't a problem). Curiously, in 2016, the signatures of all call* functions were wrong in exactly this way. CL 31654 fixed all of them in stubs.go, but missed the one in stubs32.go. Fixes #41795. Change-Id: I31e3c0df201f79ee5707eeb8dc4ff0d13fc10ada Reviewed-on: https://go-review.googlesource.com/c/go/+/259338 Trust: Austin Clements Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/runtime/stubs32.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/stubs32.go b/src/runtime/stubs32.go index a7f52f6b9e..c4715fe989 100644 --- a/src/runtime/stubs32.go +++ b/src/runtime/stubs32.go @@ -11,4 +11,4 @@ import "unsafe" // Declarations for runtime services implemented in C or assembly that // are only present on 32 bit systems. -func call16(fn, arg unsafe.Pointer, n, retoffset uint32) +func call16(typ, fn, arg unsafe.Pointer, n, retoffset uint32) -- cgit v1.2.1 From 40bff82885b8de850f909f38357c53670562f815 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sat, 3 Oct 2020 20:40:49 -0400 Subject: runtime: define and use call16 everywhere Currently, runtime.call16 is defined and used only on 32-bit architectures, while 64-bit architectures all start at call32 and go up from there. This led to unnecessary complexity because call16's prototype needed to be in a different file, separate from all of the other call* prototypes, which in turn led to it getting out of sync with the other call* prototypes. This CL adds call16 on 64-bit architectures, bringing them all into sync, and moves the call16 prototype to live with the others. Prior to CL 31655 (in 2016), call16 couldn't be implemented on 64-bit architectures because it needed at least four words of argument space to invoke "callwritebarrier" after copying back the results. CL 31655 changed the way call* invoked the write barrier in preparation for the hybrid barrier; since the hybrid barrier had to be invoked prior to copying back results, it needed a different solution that didn't reuse call*'s stack space. At this point, call16 was no longer a problem on 64-bit, but we never added it. Until now. Change-Id: Id10ade0e4f75c6ea76afa6229ddaee2b994c27dd Reviewed-on: https://go-review.googlesource.com/c/go/+/259339 Trust: Austin Clements Reviewed-by: Cherry Zhang --- src/runtime/asm_amd64.s | 2 ++ src/runtime/asm_arm64.s | 2 ++ src/runtime/asm_mips64x.s | 1 + src/runtime/asm_ppc64x.s | 2 ++ src/runtime/asm_riscv64.s | 1 + src/runtime/asm_s390x.s | 2 ++ src/runtime/asm_wasm.s | 2 ++ src/runtime/stubs.go | 1 + src/runtime/stubs32.go | 14 -------------- 9 files changed, 13 insertions(+), 14 deletions(-) delete mode 100644 src/runtime/stubs32.go (limited to 'src/runtime') diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index fa25c55b96..256f4112cd 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -470,6 +470,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 TEXT ·reflectcall(SB), NOSPLIT, $0-32 MOVLQZX argsize+24(FP), CX + DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -537,6 +538,7 @@ TEXT callRet<>(SB), NOSPLIT, $32-0 CALL runtime·reflectcallmove(SB) RET +CALLFN(·call16, 16) CALLFN(·call32, 32) CALLFN(·call64, 64) CALLFN(·call128, 128) diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s index 6b3d1e779e..5eda3063d7 100644 --- a/src/runtime/asm_arm64.s +++ b/src/runtime/asm_arm64.s @@ -331,6 +331,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32 MOVWU argsize+24(FP), R16 + DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -416,6 +417,7 @@ TEXT callRet<>(SB), NOSPLIT, $40-0 // These have 8 added to make the overall frame size a multiple of 16, // as required by the ABI. (There is another +8 for the saved LR.) +CALLFN(·call16, 24 ) CALLFN(·call32, 40 ) CALLFN(·call64, 72 ) CALLFN(·call128, 136 ) diff --git a/src/runtime/asm_mips64x.s b/src/runtime/asm_mips64x.s index 7330f40e85..0ff1b24225 100644 --- a/src/runtime/asm_mips64x.s +++ b/src/runtime/asm_mips64x.s @@ -294,6 +294,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32 MOVWU argsize+24(FP), R1 + DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 23387a2165..603058a61b 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -372,6 +372,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32 MOVWZ argsize+24(FP), R3 + DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -478,6 +479,7 @@ TEXT callRet<>(SB), NOSPLIT, $32-0 BL runtime·reflectcallmove(SB) RET +CALLFN(·call16, 16) CALLFN(·call32, 32) CALLFN(·call64, 64) CALLFN(·call128, 128) diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s index 8f6c8773eb..4084ced7f8 100644 --- a/src/runtime/asm_riscv64.s +++ b/src/runtime/asm_riscv64.s @@ -342,6 +342,7 @@ TEXT reflect·call(SB), NOSPLIT, $0-0 // func reflectcall(argtype *_type, fn, arg unsafe.Pointer, argsize uint32, retoffset uint32) TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32 MOVWU argsize+24(FP), T0 + DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s index cb39451faa..46a434119b 100644 --- a/src/runtime/asm_s390x.s +++ b/src/runtime/asm_s390x.s @@ -383,6 +383,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 TEXT ·reflectcall(SB), NOSPLIT, $-8-32 MOVWZ argsize+24(FP), R3 + DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -461,6 +462,7 @@ TEXT callRet<>(SB), NOSPLIT, $32-0 BL runtime·reflectcallmove(SB) RET +CALLFN(·call16, 16) CALLFN(·call32, 32) CALLFN(·call64, 64) CALLFN(·call128, 128) diff --git a/src/runtime/asm_wasm.s b/src/runtime/asm_wasm.s index 7d88beb537..1275af136b 100644 --- a/src/runtime/asm_wasm.s +++ b/src/runtime/asm_wasm.s @@ -308,6 +308,7 @@ TEXT ·reflectcall(SB), NOSPLIT, $0-32 MOVW argsize+24(FP), R0 + DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -398,6 +399,7 @@ TEXT callRet<>(SB), NOSPLIT, $32-0 CALL runtime·reflectcallmove(SB) RET +CALLFN(·call16, 16) CALLFN(·call32, 32) CALLFN(·call64, 64) CALLFN(·call128, 128) diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go index b891a12fdd..bd2514e862 100644 --- a/src/runtime/stubs.go +++ b/src/runtime/stubs.go @@ -271,6 +271,7 @@ func return0() // in asm_*.s // not called directly; definitions here supply type information for traceback. +func call16(typ, fn, arg unsafe.Pointer, n, retoffset uint32) func call32(typ, fn, arg unsafe.Pointer, n, retoffset uint32) func call64(typ, fn, arg unsafe.Pointer, n, retoffset uint32) func call128(typ, fn, arg unsafe.Pointer, n, retoffset uint32) diff --git a/src/runtime/stubs32.go b/src/runtime/stubs32.go deleted file mode 100644 index c4715fe989..0000000000 --- a/src/runtime/stubs32.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build 386 arm mips mipsle - -package runtime - -import "unsafe" - -// Declarations for runtime services implemented in C or assembly that -// are only present on 32 bit systems. - -func call16(typ, fn, arg unsafe.Pointer, n, retoffset uint32) -- cgit v1.2.1 From a517c3422e808ae51533a0700e05d59e8a799136 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 5 Oct 2020 12:17:30 -0400 Subject: runtime: clean up runtime.call* frame sizes on ARM64 ARM64 used to require that all assembly frame sizes were of the form 16*N+8 because ARM64 requires 16-byte SP alignment and the assembler added an 8 byte LR slot. This made all of the runtime.call* frame sizes wonky. The assembler now rounds up the frame size appropriately after adding any additional slots it needs, so this is no longer necessary. This CL cleans up the frame sizes of these functions so they look the way you'd expect and match all other architectures. Change-Id: I47819092296b8983c43eadf2e66c7c1e0d518555 Reviewed-on: https://go-review.googlesource.com/c/go/+/259448 Trust: Austin Clements Reviewed-by: Cherry Zhang --- src/runtime/asm_arm64.s | 56 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s index 5eda3063d7..1f46d1962c 100644 --- a/src/runtime/asm_arm64.s +++ b/src/runtime/asm_arm64.s @@ -415,35 +415,33 @@ TEXT callRet<>(SB), NOSPLIT, $40-0 BL runtime·reflectcallmove(SB) RET -// These have 8 added to make the overall frame size a multiple of 16, -// as required by the ABI. (There is another +8 for the saved LR.) -CALLFN(·call16, 24 ) -CALLFN(·call32, 40 ) -CALLFN(·call64, 72 ) -CALLFN(·call128, 136 ) -CALLFN(·call256, 264 ) -CALLFN(·call512, 520 ) -CALLFN(·call1024, 1032 ) -CALLFN(·call2048, 2056 ) -CALLFN(·call4096, 4104 ) -CALLFN(·call8192, 8200 ) -CALLFN(·call16384, 16392 ) -CALLFN(·call32768, 32776 ) -CALLFN(·call65536, 65544 ) -CALLFN(·call131072, 131080 ) -CALLFN(·call262144, 262152 ) -CALLFN(·call524288, 524296 ) -CALLFN(·call1048576, 1048584 ) -CALLFN(·call2097152, 2097160 ) -CALLFN(·call4194304, 4194312 ) -CALLFN(·call8388608, 8388616 ) -CALLFN(·call16777216, 16777224 ) -CALLFN(·call33554432, 33554440 ) -CALLFN(·call67108864, 67108872 ) -CALLFN(·call134217728, 134217736 ) -CALLFN(·call268435456, 268435464 ) -CALLFN(·call536870912, 536870920 ) -CALLFN(·call1073741824, 1073741832 ) +CALLFN(·call16, 16) +CALLFN(·call32, 32) +CALLFN(·call64, 64) +CALLFN(·call128, 128) +CALLFN(·call256, 256) +CALLFN(·call512, 512) +CALLFN(·call1024, 1024) +CALLFN(·call2048, 2048) +CALLFN(·call4096, 4096) +CALLFN(·call8192, 8192) +CALLFN(·call16384, 16384) +CALLFN(·call32768, 32768) +CALLFN(·call65536, 65536) +CALLFN(·call131072, 131072) +CALLFN(·call262144, 262144) +CALLFN(·call524288, 524288) +CALLFN(·call1048576, 1048576) +CALLFN(·call2097152, 2097152) +CALLFN(·call4194304, 4194304) +CALLFN(·call8388608, 8388608) +CALLFN(·call16777216, 16777216) +CALLFN(·call33554432, 33554432) +CALLFN(·call67108864, 67108864) +CALLFN(·call134217728, 134217728) +CALLFN(·call268435456, 268435456) +CALLFN(·call536870912, 536870912) +CALLFN(·call1073741824, 1073741824) // func memhash32(p unsafe.Pointer, h uintptr) uintptr TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24 -- cgit v1.2.1 From 28e549dec3954b36d0c83442be913d8709d7e5ae Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sat, 12 Sep 2020 12:33:24 -0400 Subject: runtime: use sigaltstack on macOS/ARM64 Currently we don't use sigaltstack on darwin/arm64, as is not supported on iOS. However, it is supported on macOS. Use it. (iOS remains unchanged.) Change-Id: Icc154c5e2edf2dbdc8ca68741ad9157fc15a72ee Reviewed-on: https://go-review.googlesource.com/c/go/+/256917 Trust: Cherry Zhang Reviewed-by: Ian Lance Taylor --- src/runtime/mkpreempt.go | 7 ++----- src/runtime/os_darwin.go | 8 ++++---- src/runtime/preempt_arm64.s | 3 --- src/runtime/stack.go | 2 +- src/runtime/sys_darwin_arm64.s | 22 ++++++++++++++++++---- 5 files changed, 25 insertions(+), 17 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index c5bfb0f207..40683bb9d9 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -340,12 +340,9 @@ func genARM64() { p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux) p("SUB $8, RSP, R29") // set up new frame pointer p("#endif") - // On darwin, save the LR again after decrementing SP. We run the - // signal handler on the G stack (as it doesn't support SA_ONSTACK), + // On iOS, save the LR again after decrementing SP. We run the + // signal handler on the G stack (as it doesn't support sigaltstack), // so any writes below SP may be clobbered. - p("#ifdef GOOS_darwin") - p("MOVD R30, (RSP)") - p("#endif") p("#ifdef GOOS_ios") p("MOVD R30, (RSP)") p("#endif") diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go index 01c40b4813..394bd6fb0f 100644 --- a/src/runtime/os_darwin.go +++ b/src/runtime/os_darwin.go @@ -289,9 +289,9 @@ func mpreinit(mp *m) { // Called to initialize a new m (including the bootstrap m). // Called on the new thread, cannot allocate memory. func minit() { - // The alternate signal stack is buggy on arm64. + // iOS does not support alternate signal stack. // The signal handler handles it directly. - if GOARCH != "arm64" { + if !(GOOS == "ios" && GOARCH == "arm64") { minitSignalStack() } minitSignalMask() @@ -301,9 +301,9 @@ func minit() { // Called from dropm to undo the effect of an minit. //go:nosplit func unminit() { - // The alternate signal stack is buggy on arm64. + // iOS does not support alternate signal stack. // See minit. - if GOARCH != "arm64" { + if !(GOOS == "ios" && GOARCH == "arm64") { unminitSignals() } } diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s index d0e77659c3..36ee13282c 100644 --- a/src/runtime/preempt_arm64.s +++ b/src/runtime/preempt_arm64.s @@ -10,9 +10,6 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVD R29, -8(RSP) SUB $8, RSP, R29 #endif - #ifdef GOOS_darwin - MOVD R30, (RSP) - #endif #ifdef GOOS_ios MOVD R30, (RSP) #endif diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 3802cd049e..2afc2635aa 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -66,7 +66,7 @@ const ( // to each stack below the usual guard area for OS-specific // purposes like signal handling. Used on Windows, Plan 9, // and iOS because they do not use a separate stack. - _StackSystem = sys.GoosWindows*512*sys.PtrSize + sys.GoosPlan9*512 + (sys.GoosDarwin+sys.GoosIos)*sys.GoarchArm64*1024 + _StackSystem = sys.GoosWindows*512*sys.PtrSize + sys.GoosPlan9*512 + sys.GoosIos*sys.GoarchArm64*1024 // The minimum size of stack used by Go code _StackMin = 2048 diff --git a/src/runtime/sys_darwin_arm64.s b/src/runtime/sys_darwin_arm64.s index 585d4f2c64..427cb17781 100644 --- a/src/runtime/sys_darwin_arm64.s +++ b/src/runtime/sys_darwin_arm64.s @@ -202,6 +202,7 @@ TEXT runtime·sigtramp(SB),NOSPLIT,$192 BEQ 2(PC) BL runtime·load_g(SB) +#ifdef GOOS_ios MOVD RSP, R6 CMP $0, g BEQ nog @@ -226,16 +227,21 @@ nog: // Switch to gsignal stack. MOVD R6, RSP - // Call sigtrampgo. + // Save arguments. MOVW R0, (8*1)(RSP) MOVD R1, (8*2)(RSP) MOVD R2, (8*3)(RSP) +#endif + + // Call sigtrampgo. MOVD $runtime·sigtrampgo(SB), R11 BL (R11) +#ifdef GOOS_ios // Switch to old stack. MOVD (8*4)(RSP), R5 MOVD R5, RSP +#endif // Restore callee-save registers. MOVD (8*4)(RSP), R19 @@ -329,12 +335,20 @@ TEXT runtime·fcntl_trampoline(SB),NOSPLIT,$0 ADD $16, RSP RET -// sigaltstack on iOS is not supported and will always -// run the signal handler on the main stack, so our sigtramp has -// to do the stack switch ourselves. TEXT runtime·sigaltstack_trampoline(SB),NOSPLIT,$0 +#ifdef GOOS_ios + // sigaltstack on iOS is not supported and will always + // run the signal handler on the main stack, so our sigtramp has + // to do the stack switch ourselves. MOVW $43, R0 BL libc_exit(SB) +#else + MOVD 8(R0), R1 // arg 2 old + MOVD 0(R0), R0 // arg 1 new + CALL libc_sigaltstack(SB) + CBZ R0, 2(PC) + BL notok<>(SB) +#endif RET // Thread related functions -- cgit v1.2.1 From a739306ca7d9ea3a98acca59b853fe889f04c28c Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Thu, 17 Sep 2020 10:53:10 -0400 Subject: runtime: enable more address bits on macOS/ARM64 Apparently macOS/ARM64 has 47-bit addresses, instead of 33-bit as on ios/ARM64. Enable more address bits. Updates #38485. Change-Id: I8aa64ba22a3933e3d9c4fffd17d902b5f31c30e3 Reviewed-on: https://go-review.googlesource.com/c/go/+/256918 Trust: Cherry Zhang Reviewed-by: Ian Lance Taylor Reviewed-by: Michael Knyszek --- src/runtime/malloc.go | 8 ++++---- src/runtime/mpagealloc_32bit.go | 4 ++-- src/runtime/mpagealloc_64bit.go | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index c71f856f09..f7e9b7c4b4 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -198,7 +198,7 @@ const ( // mips32 only has access to the low 2GB of virtual memory, so // we further limit it to 31 bits. // - // On darwin/arm64, although 64-bit pointers are presumably + // On ios/arm64, although 64-bit pointers are presumably // available, pointers are truncated to 33 bits. Furthermore, // only the top 4 GiB of the address space are actually available // to the application, but we allow the whole 33 bits anyway for @@ -207,7 +207,7 @@ const ( // arenaBaseOffset to offset into the top 4 GiB. // // WebAssembly currently has a limit of 4GB linear memory. - heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-(sys.GoosDarwin+sys.GoosIos)*sys.GoarchArm64))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 33*(sys.GoosDarwin+sys.GoosIos)*sys.GoarchArm64 + heapAddrBits = (_64bit*(1-sys.GoarchWasm)*(1-sys.GoosIos*sys.GoarchArm64))*48 + (1-_64bit+sys.GoarchWasm)*(32-(sys.GoarchMips+sys.GoarchMipsle)) + 33*sys.GoosIos*sys.GoarchArm64 // maxAlloc is the maximum size of an allocation. On 64-bit, // it's theoretically possible to allocate 1<= 0; i-- { var p uintptr switch { - case GOARCH == "arm64" && (GOOS == "darwin" || GOOS == "ios"): + case GOARCH == "arm64" && GOOS == "ios": p = uintptr(i)<<40 | uintptrMask&(0x0013<<28) case GOARCH == "arm64": p = uintptr(i)<<40 | uintptrMask&(0x0040<<32) diff --git a/src/runtime/mpagealloc_32bit.go b/src/runtime/mpagealloc_32bit.go index 6658a900ac..90f1e54d6c 100644 --- a/src/runtime/mpagealloc_32bit.go +++ b/src/runtime/mpagealloc_32bit.go @@ -2,14 +2,14 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build 386 arm mips mipsle wasm darwin,arm64 +// +build 386 arm mips mipsle wasm ios,arm64 // wasm is a treated as a 32-bit architecture for the purposes of the page // allocator, even though it has 64-bit pointers. This is because any wasm // pointer always has its top 32 bits as zero, so the effective heap address // space is only 2^32 bytes in size (see heapAddrBits). -// darwin/arm64 is treated as a 32-bit architecture for the purposes of the +// ios/arm64 is treated as a 32-bit architecture for the purposes of the // page allocator, even though it has 64-bit pointers and a 33-bit address // space (see heapAddrBits). The 33 bit address space cannot be rounded up // to 64 bits because there are too many summary levels to fit in just 33 diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go index 831626e4b2..a1691ba802 100644 --- a/src/runtime/mpagealloc_64bit.go +++ b/src/runtime/mpagealloc_64bit.go @@ -2,9 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x +// +build amd64 !ios,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x -// See mpagealloc_32bit.go for why darwin/arm64 is excluded here. +// See mpagealloc_32bit.go for why ios/arm64 is excluded here. package runtime -- cgit v1.2.1 From db428ad7b61ed757671162054252b4326045e96c Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Thu, 17 Sep 2020 15:02:26 -0400 Subject: all: enable more tests on macOS/ARM64 Updates #38485. Change-Id: Iac96f5ffe88521fcb11eab306d0df6463bdce046 Reviewed-on: https://go-review.googlesource.com/c/go/+/256920 Trust: Cherry Zhang Reviewed-by: Dmitri Shuralyov Reviewed-by: Ian Lance Taylor --- src/runtime/debug/panic_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/debug/panic_test.go b/src/runtime/debug/panic_test.go index 93be216985..b67a3de4f9 100644 --- a/src/runtime/debug/panic_test.go +++ b/src/runtime/debug/panic_test.go @@ -20,8 +20,8 @@ func TestPanicOnFault(t *testing.T) { if runtime.GOARCH == "s390x" { t.Skip("s390x fault addresses are missing the low order bits") } - if (runtime.GOOS == "darwin" || runtime.GOOS == "ios") && runtime.GOARCH == "arm64" { - t.Skip("darwin/arm64 doesn't provide fault addresses") + if runtime.GOOS == "ios" { + t.Skip("iOS doesn't provide fault addresses") } m, err := syscall.Mmap(-1, 0, 0x1000, syscall.PROT_READ /* Note: no PROT_WRITE */, syscall.MAP_SHARED|syscall.MAP_ANON) if err != nil { -- cgit v1.2.1 From 3923460dda205721d9bee2714a7f0dd403082a90 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sat, 3 Oct 2020 16:18:43 -0400 Subject: runtime/cgo: only build xx_cgo_panicmem on iOS On iOS, when running under lldb, we install xx_cgo_panicmem as EXC_BAD_ACCESS handler so we can get a proper Go panic for SIGSEGV. Only build it on iOS. Updates #38485. Change-Id: I801c477439e05920a4bb8fdf5eae6f4923ab8274 Reviewed-on: https://go-review.googlesource.com/c/go/+/259440 Trust: Cherry Zhang Reviewed-by: Ian Lance Taylor --- src/runtime/cgo/gcc_signal2_darwin_arm64.c | 11 -- src/runtime/cgo/gcc_signal2_ios_arm64.c | 11 ++ src/runtime/cgo/gcc_signal_darwin_arm64.c | 213 ----------------------------- src/runtime/cgo/gcc_signal_darwin_lldb.c | 12 -- src/runtime/cgo/gcc_signal_darwin_nolldb.c | 12 ++ src/runtime/cgo/gcc_signal_ios_arm64.c | 213 +++++++++++++++++++++++++++++ src/runtime/cgo/signal_darwin_arm64.go | 10 -- src/runtime/cgo/signal_darwin_arm64.s | 56 -------- src/runtime/cgo/signal_ios_arm64.go | 10 ++ src/runtime/cgo/signal_ios_arm64.s | 56 ++++++++ 10 files changed, 302 insertions(+), 302 deletions(-) delete mode 100644 src/runtime/cgo/gcc_signal2_darwin_arm64.c create mode 100644 src/runtime/cgo/gcc_signal2_ios_arm64.c delete mode 100644 src/runtime/cgo/gcc_signal_darwin_arm64.c delete mode 100644 src/runtime/cgo/gcc_signal_darwin_lldb.c create mode 100644 src/runtime/cgo/gcc_signal_darwin_nolldb.c create mode 100644 src/runtime/cgo/gcc_signal_ios_arm64.c delete mode 100644 src/runtime/cgo/signal_darwin_arm64.go delete mode 100644 src/runtime/cgo/signal_darwin_arm64.s create mode 100644 src/runtime/cgo/signal_ios_arm64.go create mode 100644 src/runtime/cgo/signal_ios_arm64.s (limited to 'src/runtime') diff --git a/src/runtime/cgo/gcc_signal2_darwin_arm64.c b/src/runtime/cgo/gcc_signal2_darwin_arm64.c deleted file mode 100644 index 5b8a18ffd6..0000000000 --- a/src/runtime/cgo/gcc_signal2_darwin_arm64.c +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build lldb - -// Used by gcc_signal_darwin_arm64.c when doing the test build during cgo. -// We hope that for real binaries the definition provided by Go will take precedence -// and the linker will drop this .o file altogether, which is why this definition -// is all by itself in its own file. -void __attribute__((weak)) xx_cgo_panicmem(void) {} diff --git a/src/runtime/cgo/gcc_signal2_ios_arm64.c b/src/runtime/cgo/gcc_signal2_ios_arm64.c new file mode 100644 index 0000000000..5b8a18ffd6 --- /dev/null +++ b/src/runtime/cgo/gcc_signal2_ios_arm64.c @@ -0,0 +1,11 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build lldb + +// Used by gcc_signal_darwin_arm64.c when doing the test build during cgo. +// We hope that for real binaries the definition provided by Go will take precedence +// and the linker will drop this .o file altogether, which is why this definition +// is all by itself in its own file. +void __attribute__((weak)) xx_cgo_panicmem(void) {} diff --git a/src/runtime/cgo/gcc_signal_darwin_arm64.c b/src/runtime/cgo/gcc_signal_darwin_arm64.c deleted file mode 100644 index 6519edd4cc..0000000000 --- a/src/runtime/cgo/gcc_signal_darwin_arm64.c +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Emulation of the Unix signal SIGSEGV. -// -// On iOS, Go tests and apps under development are run by lldb. -// The debugger uses a task-level exception handler to intercept signals. -// Despite having a 'handle' mechanism like gdb, lldb will not allow a -// SIGSEGV to pass to the running program. For Go, this means we cannot -// generate a panic, which cannot be recovered, and so tests fail. -// -// We work around this by registering a thread-level mach exception handler -// and intercepting EXC_BAD_ACCESS. The kernel offers thread handlers a -// chance to resolve exceptions before the task handler, so we can generate -// the panic and avoid lldb's SIGSEGV handler. -// -// The dist tool enables this by build flag when testing. - -// +build lldb - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "libcgo.h" -#include "libcgo_unix.h" - -void xx_cgo_panicmem(void); -uintptr_t x_cgo_panicmem = (uintptr_t)xx_cgo_panicmem; - -static pthread_mutex_t mach_exception_handler_port_set_mu; -static mach_port_t mach_exception_handler_port_set = MACH_PORT_NULL; - -kern_return_t -catch_exception_raise( - mach_port_t exception_port, - mach_port_t thread, - mach_port_t task, - exception_type_t exception, - exception_data_t code_vector, - mach_msg_type_number_t code_count) -{ - kern_return_t ret; - arm_unified_thread_state_t thread_state; - mach_msg_type_number_t state_count = ARM_UNIFIED_THREAD_STATE_COUNT; - - // Returning KERN_SUCCESS intercepts the exception. - // - // Returning KERN_FAILURE lets the exception fall through to the - // next handler, which is the standard signal emulation code - // registered on the task port. - - if (exception != EXC_BAD_ACCESS) { - return KERN_FAILURE; - } - - ret = thread_get_state(thread, ARM_UNIFIED_THREAD_STATE, (thread_state_t)&thread_state, &state_count); - if (ret) { - fprintf(stderr, "runtime/cgo: thread_get_state failed: %d\n", ret); - abort(); - } - - // Bounce call to sigpanic through asm that makes it look like - // we call sigpanic directly from the faulting code. -#ifdef __arm64__ - thread_state.ts_64.__x[1] = thread_state.ts_64.__lr; - thread_state.ts_64.__x[2] = thread_state.ts_64.__pc; - thread_state.ts_64.__pc = x_cgo_panicmem; -#else - thread_state.ts_32.__r[1] = thread_state.ts_32.__lr; - thread_state.ts_32.__r[2] = thread_state.ts_32.__pc; - thread_state.ts_32.__pc = x_cgo_panicmem; -#endif - - if (0) { - // Useful debugging logic when panicmem is broken. - // - // Sends the first SIGSEGV and lets lldb catch the - // second one, avoiding a loop that locks up iOS - // devices requiring a hard reboot. - fprintf(stderr, "runtime/cgo: caught exc_bad_access\n"); - fprintf(stderr, "__lr = %llx\n", thread_state.ts_64.__lr); - fprintf(stderr, "__pc = %llx\n", thread_state.ts_64.__pc); - static int pass1 = 0; - if (pass1) { - return KERN_FAILURE; - } - pass1 = 1; - } - - ret = thread_set_state(thread, ARM_UNIFIED_THREAD_STATE, (thread_state_t)&thread_state, state_count); - if (ret) { - fprintf(stderr, "runtime/cgo: thread_set_state failed: %d\n", ret); - abort(); - } - - return KERN_SUCCESS; -} - -void -darwin_arm_init_thread_exception_port() -{ - // Called by each new OS thread to bind its EXC_BAD_ACCESS exception - // to mach_exception_handler_port_set. - int ret; - mach_port_t port = MACH_PORT_NULL; - - ret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); - if (ret) { - fprintf(stderr, "runtime/cgo: mach_port_allocate failed: %d\n", ret); - abort(); - } - ret = mach_port_insert_right( - mach_task_self(), - port, - port, - MACH_MSG_TYPE_MAKE_SEND); - if (ret) { - fprintf(stderr, "runtime/cgo: mach_port_insert_right failed: %d\n", ret); - abort(); - } - - ret = thread_set_exception_ports( - mach_thread_self(), - EXC_MASK_BAD_ACCESS, - port, - EXCEPTION_DEFAULT, - THREAD_STATE_NONE); - if (ret) { - fprintf(stderr, "runtime/cgo: thread_set_exception_ports failed: %d\n", ret); - abort(); - } - - ret = pthread_mutex_lock(&mach_exception_handler_port_set_mu); - if (ret) { - fprintf(stderr, "runtime/cgo: pthread_mutex_lock failed: %d\n", ret); - abort(); - } - ret = mach_port_move_member( - mach_task_self(), - port, - mach_exception_handler_port_set); - if (ret) { - fprintf(stderr, "runtime/cgo: mach_port_move_member failed: %d\n", ret); - abort(); - } - ret = pthread_mutex_unlock(&mach_exception_handler_port_set_mu); - if (ret) { - fprintf(stderr, "runtime/cgo: pthread_mutex_unlock failed: %d\n", ret); - abort(); - } -} - -static void* -mach_exception_handler(void *port) -{ - // Calls catch_exception_raise. - extern boolean_t exc_server(); - mach_msg_server(exc_server, 2048, (mach_port_t)port, 0); - abort(); // never returns -} - -void -darwin_arm_init_mach_exception_handler() -{ - pthread_mutex_init(&mach_exception_handler_port_set_mu, NULL); - - // Called once per process to initialize a mach port server, listening - // for EXC_BAD_ACCESS thread exceptions. - int ret; - pthread_t thr = NULL; - pthread_attr_t attr; - sigset_t ign, oset; - - ret = mach_port_allocate( - mach_task_self(), - MACH_PORT_RIGHT_PORT_SET, - &mach_exception_handler_port_set); - if (ret) { - fprintf(stderr, "runtime/cgo: mach_port_allocate failed for port_set: %d\n", ret); - abort(); - } - - // Block all signals to the exception handler thread - sigfillset(&ign); - pthread_sigmask(SIG_SETMASK, &ign, &oset); - - // Start a thread to handle exceptions. - uintptr_t port_set = (uintptr_t)mach_exception_handler_port_set; - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - ret = _cgo_try_pthread_create(&thr, &attr, mach_exception_handler, (void*)port_set); - - pthread_sigmask(SIG_SETMASK, &oset, nil); - - if (ret) { - fprintf(stderr, "runtime/cgo: pthread_create failed: %d\n", ret); - abort(); - } - pthread_attr_destroy(&attr); -} diff --git a/src/runtime/cgo/gcc_signal_darwin_lldb.c b/src/runtime/cgo/gcc_signal_darwin_lldb.c deleted file mode 100644 index 0ccdae324e..0000000000 --- a/src/runtime/cgo/gcc_signal_darwin_lldb.c +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !lldb -// +build darwin -// +build arm64 - -#include - -void darwin_arm_init_thread_exception_port() {} -void darwin_arm_init_mach_exception_handler() {} diff --git a/src/runtime/cgo/gcc_signal_darwin_nolldb.c b/src/runtime/cgo/gcc_signal_darwin_nolldb.c new file mode 100644 index 0000000000..26be71bd1d --- /dev/null +++ b/src/runtime/cgo/gcc_signal_darwin_nolldb.c @@ -0,0 +1,12 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !lldb !ios +// +build darwin +// +build arm64 + +#include + +void darwin_arm_init_thread_exception_port() {} +void darwin_arm_init_mach_exception_handler() {} diff --git a/src/runtime/cgo/gcc_signal_ios_arm64.c b/src/runtime/cgo/gcc_signal_ios_arm64.c new file mode 100644 index 0000000000..6519edd4cc --- /dev/null +++ b/src/runtime/cgo/gcc_signal_ios_arm64.c @@ -0,0 +1,213 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Emulation of the Unix signal SIGSEGV. +// +// On iOS, Go tests and apps under development are run by lldb. +// The debugger uses a task-level exception handler to intercept signals. +// Despite having a 'handle' mechanism like gdb, lldb will not allow a +// SIGSEGV to pass to the running program. For Go, this means we cannot +// generate a panic, which cannot be recovered, and so tests fail. +// +// We work around this by registering a thread-level mach exception handler +// and intercepting EXC_BAD_ACCESS. The kernel offers thread handlers a +// chance to resolve exceptions before the task handler, so we can generate +// the panic and avoid lldb's SIGSEGV handler. +// +// The dist tool enables this by build flag when testing. + +// +build lldb + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "libcgo.h" +#include "libcgo_unix.h" + +void xx_cgo_panicmem(void); +uintptr_t x_cgo_panicmem = (uintptr_t)xx_cgo_panicmem; + +static pthread_mutex_t mach_exception_handler_port_set_mu; +static mach_port_t mach_exception_handler_port_set = MACH_PORT_NULL; + +kern_return_t +catch_exception_raise( + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + exception_data_t code_vector, + mach_msg_type_number_t code_count) +{ + kern_return_t ret; + arm_unified_thread_state_t thread_state; + mach_msg_type_number_t state_count = ARM_UNIFIED_THREAD_STATE_COUNT; + + // Returning KERN_SUCCESS intercepts the exception. + // + // Returning KERN_FAILURE lets the exception fall through to the + // next handler, which is the standard signal emulation code + // registered on the task port. + + if (exception != EXC_BAD_ACCESS) { + return KERN_FAILURE; + } + + ret = thread_get_state(thread, ARM_UNIFIED_THREAD_STATE, (thread_state_t)&thread_state, &state_count); + if (ret) { + fprintf(stderr, "runtime/cgo: thread_get_state failed: %d\n", ret); + abort(); + } + + // Bounce call to sigpanic through asm that makes it look like + // we call sigpanic directly from the faulting code. +#ifdef __arm64__ + thread_state.ts_64.__x[1] = thread_state.ts_64.__lr; + thread_state.ts_64.__x[2] = thread_state.ts_64.__pc; + thread_state.ts_64.__pc = x_cgo_panicmem; +#else + thread_state.ts_32.__r[1] = thread_state.ts_32.__lr; + thread_state.ts_32.__r[2] = thread_state.ts_32.__pc; + thread_state.ts_32.__pc = x_cgo_panicmem; +#endif + + if (0) { + // Useful debugging logic when panicmem is broken. + // + // Sends the first SIGSEGV and lets lldb catch the + // second one, avoiding a loop that locks up iOS + // devices requiring a hard reboot. + fprintf(stderr, "runtime/cgo: caught exc_bad_access\n"); + fprintf(stderr, "__lr = %llx\n", thread_state.ts_64.__lr); + fprintf(stderr, "__pc = %llx\n", thread_state.ts_64.__pc); + static int pass1 = 0; + if (pass1) { + return KERN_FAILURE; + } + pass1 = 1; + } + + ret = thread_set_state(thread, ARM_UNIFIED_THREAD_STATE, (thread_state_t)&thread_state, state_count); + if (ret) { + fprintf(stderr, "runtime/cgo: thread_set_state failed: %d\n", ret); + abort(); + } + + return KERN_SUCCESS; +} + +void +darwin_arm_init_thread_exception_port() +{ + // Called by each new OS thread to bind its EXC_BAD_ACCESS exception + // to mach_exception_handler_port_set. + int ret; + mach_port_t port = MACH_PORT_NULL; + + ret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); + if (ret) { + fprintf(stderr, "runtime/cgo: mach_port_allocate failed: %d\n", ret); + abort(); + } + ret = mach_port_insert_right( + mach_task_self(), + port, + port, + MACH_MSG_TYPE_MAKE_SEND); + if (ret) { + fprintf(stderr, "runtime/cgo: mach_port_insert_right failed: %d\n", ret); + abort(); + } + + ret = thread_set_exception_ports( + mach_thread_self(), + EXC_MASK_BAD_ACCESS, + port, + EXCEPTION_DEFAULT, + THREAD_STATE_NONE); + if (ret) { + fprintf(stderr, "runtime/cgo: thread_set_exception_ports failed: %d\n", ret); + abort(); + } + + ret = pthread_mutex_lock(&mach_exception_handler_port_set_mu); + if (ret) { + fprintf(stderr, "runtime/cgo: pthread_mutex_lock failed: %d\n", ret); + abort(); + } + ret = mach_port_move_member( + mach_task_self(), + port, + mach_exception_handler_port_set); + if (ret) { + fprintf(stderr, "runtime/cgo: mach_port_move_member failed: %d\n", ret); + abort(); + } + ret = pthread_mutex_unlock(&mach_exception_handler_port_set_mu); + if (ret) { + fprintf(stderr, "runtime/cgo: pthread_mutex_unlock failed: %d\n", ret); + abort(); + } +} + +static void* +mach_exception_handler(void *port) +{ + // Calls catch_exception_raise. + extern boolean_t exc_server(); + mach_msg_server(exc_server, 2048, (mach_port_t)port, 0); + abort(); // never returns +} + +void +darwin_arm_init_mach_exception_handler() +{ + pthread_mutex_init(&mach_exception_handler_port_set_mu, NULL); + + // Called once per process to initialize a mach port server, listening + // for EXC_BAD_ACCESS thread exceptions. + int ret; + pthread_t thr = NULL; + pthread_attr_t attr; + sigset_t ign, oset; + + ret = mach_port_allocate( + mach_task_self(), + MACH_PORT_RIGHT_PORT_SET, + &mach_exception_handler_port_set); + if (ret) { + fprintf(stderr, "runtime/cgo: mach_port_allocate failed for port_set: %d\n", ret); + abort(); + } + + // Block all signals to the exception handler thread + sigfillset(&ign); + pthread_sigmask(SIG_SETMASK, &ign, &oset); + + // Start a thread to handle exceptions. + uintptr_t port_set = (uintptr_t)mach_exception_handler_port_set; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + ret = _cgo_try_pthread_create(&thr, &attr, mach_exception_handler, (void*)port_set); + + pthread_sigmask(SIG_SETMASK, &oset, nil); + + if (ret) { + fprintf(stderr, "runtime/cgo: pthread_create failed: %d\n", ret); + abort(); + } + pthread_attr_destroy(&attr); +} diff --git a/src/runtime/cgo/signal_darwin_arm64.go b/src/runtime/cgo/signal_darwin_arm64.go deleted file mode 100644 index 3425c448c4..0000000000 --- a/src/runtime/cgo/signal_darwin_arm64.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package cgo - -import _ "unsafe" - -//go:cgo_export_static xx_cgo_panicmem xx_cgo_panicmem -func xx_cgo_panicmem() diff --git a/src/runtime/cgo/signal_darwin_arm64.s b/src/runtime/cgo/signal_darwin_arm64.s deleted file mode 100644 index 1ae00d13f3..0000000000 --- a/src/runtime/cgo/signal_darwin_arm64.s +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "textflag.h" - -// xx_cgo_panicmem is the entrypoint for SIGSEGV as intercepted via a -// mach thread port as EXC_BAD_ACCESS. As the segfault may have happened -// in C code, we first need to load_g then call xx_cgo_panicmem. -// -// R1 - LR at moment of fault -// R2 - PC at moment of fault -TEXT xx_cgo_panicmem(SB),NOSPLIT|NOFRAME,$0 - // If in external C code, we need to load the g register. - BL runtime·load_g(SB) - CMP $0, g - BNE ongothread - - // On a foreign thread. - // TODO(crawshaw): call badsignal - MOVD.W $0, -16(RSP) - MOVW $139, R1 - MOVW R1, 8(RSP) - B runtime·exit(SB) - -ongothread: - // Trigger a SIGSEGV panic. - // - // The goal is to arrange the stack so it looks like the runtime - // function sigpanic was called from the PC that faulted. It has - // to be sigpanic, as the stack unwinding code in traceback.go - // looks explicitly for it. - // - // To do this we call into runtime·setsigsegv, which sets the - // appropriate state inside the g object. We give it the faulting - // PC on the stack, then put it in the LR before calling sigpanic. - - // Build a 32-byte stack frame for us for this call. - // Saved LR (none available) is at the bottom, - // then the PC argument for setsigsegv, - // then a copy of the LR for us to restore. - MOVD.W $0, -32(RSP) - MOVD R1, 8(RSP) - MOVD R2, 16(RSP) - BL runtime·setsigsegv(SB) - MOVD 8(RSP), R1 - MOVD 16(RSP), R2 - - // Build a 16-byte stack frame for the simulated - // call to sigpanic, by taking 16 bytes away from the - // 32-byte stack frame above. - // The saved LR in this frame is the LR at time of fault, - // and the LR on entry to sigpanic is the PC at time of fault. - MOVD.W R1, 16(RSP) - MOVD R2, R30 - B runtime·sigpanic(SB) diff --git a/src/runtime/cgo/signal_ios_arm64.go b/src/runtime/cgo/signal_ios_arm64.go new file mode 100644 index 0000000000..3425c448c4 --- /dev/null +++ b/src/runtime/cgo/signal_ios_arm64.go @@ -0,0 +1,10 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cgo + +import _ "unsafe" + +//go:cgo_export_static xx_cgo_panicmem xx_cgo_panicmem +func xx_cgo_panicmem() diff --git a/src/runtime/cgo/signal_ios_arm64.s b/src/runtime/cgo/signal_ios_arm64.s new file mode 100644 index 0000000000..1ae00d13f3 --- /dev/null +++ b/src/runtime/cgo/signal_ios_arm64.s @@ -0,0 +1,56 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// xx_cgo_panicmem is the entrypoint for SIGSEGV as intercepted via a +// mach thread port as EXC_BAD_ACCESS. As the segfault may have happened +// in C code, we first need to load_g then call xx_cgo_panicmem. +// +// R1 - LR at moment of fault +// R2 - PC at moment of fault +TEXT xx_cgo_panicmem(SB),NOSPLIT|NOFRAME,$0 + // If in external C code, we need to load the g register. + BL runtime·load_g(SB) + CMP $0, g + BNE ongothread + + // On a foreign thread. + // TODO(crawshaw): call badsignal + MOVD.W $0, -16(RSP) + MOVW $139, R1 + MOVW R1, 8(RSP) + B runtime·exit(SB) + +ongothread: + // Trigger a SIGSEGV panic. + // + // The goal is to arrange the stack so it looks like the runtime + // function sigpanic was called from the PC that faulted. It has + // to be sigpanic, as the stack unwinding code in traceback.go + // looks explicitly for it. + // + // To do this we call into runtime·setsigsegv, which sets the + // appropriate state inside the g object. We give it the faulting + // PC on the stack, then put it in the LR before calling sigpanic. + + // Build a 32-byte stack frame for us for this call. + // Saved LR (none available) is at the bottom, + // then the PC argument for setsigsegv, + // then a copy of the LR for us to restore. + MOVD.W $0, -32(RSP) + MOVD R1, 8(RSP) + MOVD R2, 16(RSP) + BL runtime·setsigsegv(SB) + MOVD 8(RSP), R1 + MOVD 16(RSP), R2 + + // Build a 16-byte stack frame for the simulated + // call to sigpanic, by taking 16 bytes away from the + // 32-byte stack frame above. + // The saved LR in this frame is the LR at time of fault, + // and the LR on entry to sigpanic is the PC at time of fault. + MOVD.W R1, 16(RSP) + MOVD R2, R30 + B runtime·sigpanic(SB) -- cgit v1.2.1 From 67edc0ed81947a55adbcd0c9d2317abb93ac9510 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Tue, 6 Oct 2020 22:07:15 -0400 Subject: runtime: restore SSE guard in asyncPreempt on 386 So we don't use SSE instructions under GO386=softfloat. Change-Id: I8ecc92340ee567f84a22501df2543ec041d25ef2 Reviewed-on: https://go-review.googlesource.com/c/go/+/260137 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Keith Randall --- src/runtime/mkpreempt.go | 28 ++++++++++++++++++---------- src/runtime/preempt_386.s | 6 ++++++ 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 40683bb9d9..76237bc31b 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -189,26 +189,34 @@ func (l *layout) restore() { func gen386() { p("PUSHFL") - - // Assign stack offsets. + // Save general purpose registers. var l = layout{sp: "SP"} for _, reg := range regNames386 { - if reg == "SP" { + if reg == "SP" || strings.HasPrefix(reg, "X") { continue } - if strings.HasPrefix(reg, "X") { - l.add("MOVUPS", reg, 16) - } else { - l.add("MOVL", reg, 4) - } + l.add("MOVL", reg, 4) } - p("ADJSP $%d", l.stack) + // Save SSE state only if supported. + lSSE := layout{stack: l.stack, sp: "SP"} + for i := 0; i < 8; i++ { + lSSE.add("MOVUPS", fmt.Sprintf("X%d", i), 16) + } + + p("ADJSP $%d", lSSE.stack) p("NOP SP") l.save() + p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse") + lSSE.save() + label("nosse:") p("CALL ·asyncPreempt2(SB)") + p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2") + lSSE.restore() + label("nosse2:") l.restore() - p("ADJSP $%d", -l.stack) + p("ADJSP $%d", -lSSE.stack) + p("POPFL") p("RET") } diff --git a/src/runtime/preempt_386.s b/src/runtime/preempt_386.s index 5c9b8ea224..c3a5fa1f36 100644 --- a/src/runtime/preempt_386.s +++ b/src/runtime/preempt_386.s @@ -14,6 +14,8 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVL BP, 16(SP) MOVL SI, 20(SP) MOVL DI, 24(SP) + CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 + JNE nosse MOVUPS X0, 28(SP) MOVUPS X1, 44(SP) MOVUPS X2, 60(SP) @@ -22,7 +24,10 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVUPS X5, 108(SP) MOVUPS X6, 124(SP) MOVUPS X7, 140(SP) +nosse: CALL ·asyncPreempt2(SB) + CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1 + JNE nosse2 MOVUPS 140(SP), X7 MOVUPS 124(SP), X6 MOVUPS 108(SP), X5 @@ -31,6 +36,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVUPS 60(SP), X2 MOVUPS 44(SP), X1 MOVUPS 28(SP), X0 +nosse2: MOVL 24(SP), DI MOVL 20(SP), SI MOVL 16(SP), BP -- cgit v1.2.1 From ade5161f51f2b7239705047875dc36c35139b253 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Tue, 6 Oct 2020 21:13:16 -0400 Subject: crypto/x509: use macOS/AMD64 implementation on macOS/ARM64 Updates #38485. Change-Id: I0582a53171ce803ca1b0237cfa9bc022fc1da6f9 Reviewed-on: https://go-review.googlesource.com/c/go/+/260340 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor --- src/runtime/sys_darwin_arm64.s | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/sys_darwin_arm64.s b/src/runtime/sys_darwin_arm64.s index 427cb17781..f8d6f28dc7 100644 --- a/src/runtime/sys_darwin_arm64.s +++ b/src/runtime/sys_darwin_arm64.s @@ -707,3 +707,23 @@ TEXT runtime·syscall6X(SB),NOSPLIT,$0 MOVD R0, 72(R2) // save err ok: RET + +// syscallNoErr is like syscall6 but does not check for errors, and +// only returns one value, for use with standard C ABI library functions. +TEXT runtime·syscallNoErr(SB),NOSPLIT,$0 + SUB $16, RSP // push structure pointer + MOVD R0, (RSP) + + MOVD 0(R0), R12 // fn + MOVD 16(R0), R1 // a2 + MOVD 24(R0), R2 // a3 + MOVD 32(R0), R3 // a4 + MOVD 40(R0), R4 // a5 + MOVD 48(R0), R5 // a6 + MOVD 8(R0), R0 // a1 + BL (R12) + + MOVD (RSP), R2 // pop structure pointer + ADD $16, RSP + MOVD R0, 56(R2) // save r1 + RET -- cgit v1.2.1 From f8df205e74d5122c43f41923280451641e566ee2 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 7 Oct 2020 18:29:51 -0400 Subject: all: enable more tests on macOS/ARM64 On macOS, we can do "go build", can exec, and have the source tree available, so we can enable more tests. Skip ones that don't work. Most of them are due to that it requires external linking (for now) and some tests don't work with external linking (e.g. runtime deadlock detection). For them, helper functions CanInternalLink/MustInternalLink are introduced. I still want to have internal linking implemented, but it is still a good idea to identify which tests don't work with external linking. Updates #38485. Change-Id: I6b14697573cf3f371daf54b9ddd792acf232f2f2 Reviewed-on: https://go-review.googlesource.com/c/go/+/260719 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Brad Fitzpatrick Reviewed-by: Than McIntosh --- src/runtime/crash_test.go | 21 +++++++++++++++++++++ src/runtime/time_test.go | 4 ++++ 2 files changed, 25 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go index eae4f538c1..5e22b7593e 100644 --- a/src/runtime/crash_test.go +++ b/src/runtime/crash_test.go @@ -181,6 +181,9 @@ func TestCrashHandler(t *testing.T) { } func testDeadlock(t *testing.T, name string) { + // External linking brings in cgo, causing deadlock detection not working. + testenv.MustInternalLink(t) + output := runTestProg(t, "testprog", name) want := "fatal error: all goroutines are asleep - deadlock!\n" if !strings.HasPrefix(output, want) { @@ -205,6 +208,9 @@ func TestLockedDeadlock2(t *testing.T) { } func TestGoexitDeadlock(t *testing.T) { + // External linking brings in cgo, causing deadlock detection not working. + testenv.MustInternalLink(t) + output := runTestProg(t, "testprog", "GoexitDeadlock") want := "no goroutines (main called runtime.Goexit) - deadlock!" if !strings.Contains(output, want) { @@ -290,6 +296,9 @@ func TestRecursivePanic4(t *testing.T) { } func TestGoexitCrash(t *testing.T) { + // External linking brings in cgo, causing deadlock detection not working. + testenv.MustInternalLink(t) + output := runTestProg(t, "testprog", "GoexitExit") want := "no goroutines (main called runtime.Goexit) - deadlock!" if !strings.Contains(output, want) { @@ -348,6 +357,9 @@ func TestBreakpoint(t *testing.T) { } func TestGoexitInPanic(t *testing.T) { + // External linking brings in cgo, causing deadlock detection not working. + testenv.MustInternalLink(t) + // see issue 8774: this code used to trigger an infinite recursion output := runTestProg(t, "testprog", "GoexitInPanic") want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!" @@ -412,6 +424,9 @@ func TestPanicAfterGoexit(t *testing.T) { } func TestRecoveredPanicAfterGoexit(t *testing.T) { + // External linking brings in cgo, causing deadlock detection not working. + testenv.MustInternalLink(t) + output := runTestProg(t, "testprog", "RecoveredPanicAfterGoexit") want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!" if !strings.HasPrefix(output, want) { @@ -420,6 +435,9 @@ func TestRecoveredPanicAfterGoexit(t *testing.T) { } func TestRecoverBeforePanicAfterGoexit(t *testing.T) { + // External linking brings in cgo, causing deadlock detection not working. + testenv.MustInternalLink(t) + t.Parallel() output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit") want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!" @@ -429,6 +447,9 @@ func TestRecoverBeforePanicAfterGoexit(t *testing.T) { } func TestRecoverBeforePanicAfterGoexit2(t *testing.T) { + // External linking brings in cgo, causing deadlock detection not working. + testenv.MustInternalLink(t) + t.Parallel() output := runTestProg(t, "testprog", "RecoverBeforePanicAfterGoexit2") want := "fatal error: no goroutines (main called runtime.Goexit) - deadlock!" diff --git a/src/runtime/time_test.go b/src/runtime/time_test.go index a8dab7db8e..afd9af2af4 100644 --- a/src/runtime/time_test.go +++ b/src/runtime/time_test.go @@ -20,6 +20,10 @@ func TestFakeTime(t *testing.T) { t.Skip("faketime not supported on windows") } + // Faketime is advanced in checkdead. External linking brings in cgo, + // causing checkdead not working. + testenv.MustInternalLink(t) + t.Parallel() exe, err := buildTestProg(t, "testfaketime", "-tags=faketime") -- cgit v1.2.1 From 8f26b57f9afc238bdecb9b7030bc2f4364093885 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Sat, 3 Oct 2020 01:23:47 +0700 Subject: cmd/compile: split exported/non-exported methods for interface type Currently, mhdr/methods is emitted with the same len/cap. There's no way to distinguish between exported and non-exported methods statically. This CL splits mhdr/methods into two parts, use "len" for number of exported methods, and "cap" for all methods. This fixes the bug in issue #22075, which intends to return the number of exported methods but currently return all methods. Note that with this encoding, we still can access either all/exported-only/non-exported-only methods: mhdr[:cap(mhdr)] // all methods mhdr // exported methods mhdr[len(mhdr):cap(mhdr)] // non-exported methods Thank to Matthew Dempsky (@mdempsky) for suggesting this encoding. Fixes #22075 Change-Id: If662adb03ccff27407d55a5578a0ed05a15e7cdd Reviewed-on: https://go-review.googlesource.com/c/go/+/259237 Trust: Cuong Manh Le Run-TryBot: Cuong Manh Le TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Reviewed-by: Matthew Dempsky --- src/runtime/alg.go | 2 +- src/runtime/iface.go | 12 +++++++----- src/runtime/mfinal.go | 4 ++-- src/runtime/type.go | 26 ++++++++++++++++++++------ 4 files changed, 30 insertions(+), 14 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/alg.go b/src/runtime/alg.go index 0af48ab25c..4a98b84e4a 100644 --- a/src/runtime/alg.go +++ b/src/runtime/alg.go @@ -185,7 +185,7 @@ func typehash(t *_type, p unsafe.Pointer, h uintptr) uintptr { return strhash(p, h) case kindInterface: i := (*interfacetype)(unsafe.Pointer(t)) - if len(i.mhdr) == 0 { + if i.isEmpty() { return nilinterhash(p, h) } return interhash(p, h) diff --git a/src/runtime/iface.go b/src/runtime/iface.go index 0504b89363..f8b7d429a3 100644 --- a/src/runtime/iface.go +++ b/src/runtime/iface.go @@ -31,16 +31,17 @@ func itabHashFunc(inter *interfacetype, typ *_type) uintptr { } func getitab(inter *interfacetype, typ *_type, canfail bool) *itab { - if len(inter.mhdr) == 0 { + if inter.isEmpty() { throw("internal error - misuse of itab") } + imethods := inter.methods() // easy case if typ.tflag&tflagUncommon == 0 { if canfail { return nil } - name := inter.typ.nameOff(inter.mhdr[0].name) + name := inter.typ.nameOff(imethods[0].name) panic(&TypeAssertionError{nil, typ, &inter.typ, name.name()}) } @@ -63,7 +64,7 @@ func getitab(inter *interfacetype, typ *_type, canfail bool) *itab { } // Entry doesn't exist yet. Make a new entry & add it. - m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(inter.mhdr)-1)*sys.PtrSize, 0, &memstats.other_sys)) + m = (*itab)(persistentalloc(unsafe.Sizeof(itab{})+uintptr(len(imethods)-1)*sys.PtrSize, 0, &memstats.other_sys)) m.inter = inter m._type = typ // The hash is used in type switches. However, compiler statically generates itab's @@ -197,7 +198,8 @@ func (m *itab) init() string { // and interface names are unique, // so can iterate over both in lock step; // the loop is O(ni+nt) not O(ni*nt). - ni := len(inter.mhdr) + imethods := inter.methods() + ni := len(imethods) nt := int(x.mcount) xmhdr := (*[1 << 16]method)(add(unsafe.Pointer(x), uintptr(x.moff)))[:nt:nt] j := 0 @@ -205,7 +207,7 @@ func (m *itab) init() string { var fun0 unsafe.Pointer imethods: for k := 0; k < ni; k++ { - i := &inter.mhdr[k] + i := &imethods[k] itype := inter.typ.typeOff(i.ityp) name := inter.typ.nameOff(i.name) iname := name.name() diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go index cd6196dcab..6676ae6736 100644 --- a/src/runtime/mfinal.go +++ b/src/runtime/mfinal.go @@ -210,7 +210,7 @@ func runfinq() { // set up with empty interface (*eface)(frame)._type = &f.ot.typ (*eface)(frame).data = f.arg - if len(ityp.mhdr) != 0 { + if !ityp.isEmpty() { // convert to interface with methods // this conversion is guaranteed to succeed - we checked in SetFinalizer *(*iface)(frame) = assertE2I(ityp, *(*eface)(frame)) @@ -394,7 +394,7 @@ func SetFinalizer(obj interface{}, finalizer interface{}) { } case fint.kind&kindMask == kindInterface: ityp := (*interfacetype)(unsafe.Pointer(fint)) - if len(ityp.mhdr) == 0 { + if ityp.isEmpty() { // ok - satisfies empty interface goto okarg } diff --git a/src/runtime/type.go b/src/runtime/type.go index 81455f3532..36492619e1 100644 --- a/src/runtime/type.go +++ b/src/runtime/type.go @@ -366,7 +366,19 @@ type imethod struct { type interfacetype struct { typ _type pkgpath name - mhdr []imethod + // expMethods contains all interface methods. + // + // - len(expMethods) returns number of exported methods. + // - cap(expMethods) returns all interface methods, including both exported/non-exported methods. + expMethods []imethod +} + +func (it *interfacetype) methods() []imethod { + return it.expMethods[:cap(it.expMethods)] +} + +func (it *interfacetype) isEmpty() bool { + return cap(it.expMethods) == 0 } type maptype struct { @@ -664,13 +676,15 @@ func typesEqual(t, v *_type, seen map[_typePair]struct{}) bool { if it.pkgpath.name() != iv.pkgpath.name() { return false } - if len(it.mhdr) != len(iv.mhdr) { + itmethods := it.methods() + ivmethods := iv.methods() + if len(itmethods) != len(ivmethods) { return false } - for i := range it.mhdr { - tm := &it.mhdr[i] - vm := &iv.mhdr[i] - // Note the mhdr array can be relocated from + for i := range itmethods { + tm := &itmethods[i] + vm := &ivmethods[i] + // Note the expMethods array can be relocated from // another module. See #17724. tname := resolveNameOff(unsafe.Pointer(tm), tm.name) vname := resolveNameOff(unsafe.Pointer(vm), vm.name) -- cgit v1.2.1 From 39b527691495902279da7ac8405a070ded7dd4a2 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 7 Jul 2020 09:07:16 -0400 Subject: net: remove dependency on math/rand Like we did for sync, let the runtime give net random numbers, to avoid forcing an import of math/rand for DNS. Change-Id: Iab3e64121d687d288a3961a8ccbcebe589047253 Reviewed-on: https://go-review.googlesource.com/c/go/+/241258 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor --- src/runtime/stubs.go | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go index bd2514e862..6290142a41 100644 --- a/src/runtime/stubs.go +++ b/src/runtime/stubs.go @@ -130,6 +130,9 @@ func fastrandn(n uint32) uint32 { //go:linkname sync_fastrand sync.fastrand func sync_fastrand() uint32 { return fastrand() } +//go:linkname net_fastrand net.fastrand +func net_fastrand() uint32 { return fastrand() } + // in internal/bytealg/equal_*.s //go:noescape func memequal(a, b unsafe.Pointer, size uintptr) bool -- cgit v1.2.1 From e08059f4fcacce2ff18d1cfc5fa48c942f8d46aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20M=C3=B6hrmann?= Date: Tue, 13 Oct 2020 08:26:36 +0200 Subject: runtime: remove unused alg constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CL 191198 removed algarray from the runtime which used these constants as indices. Change-Id: Ia669cf410372ef5113dadccd115a39ff8d47e5c8 Reviewed-on: https://go-review.googlesource.com/c/go/+/261364 Reviewed-by: Keith Randall Run-TryBot: Martin Möhrmann TryBot-Result: Go Bot Trust: Emmanuel Odeke Trust: Martin Möhrmann --- src/runtime/alg.go | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/alg.go b/src/runtime/alg.go index 4a98b84e4a..2ec3fc3658 100644 --- a/src/runtime/alg.go +++ b/src/runtime/alg.go @@ -15,25 +15,6 @@ const ( c1 = uintptr((8-sys.PtrSize)/4*3267000013 + (sys.PtrSize-4)/4*23344194077549503) ) -// type algorithms - known to compiler -const ( - alg_NOEQ = iota - alg_MEM0 - alg_MEM8 - alg_MEM16 - alg_MEM32 - alg_MEM64 - alg_MEM128 - alg_STRING - alg_INTER - alg_NILINTER - alg_FLOAT32 - alg_FLOAT64 - alg_CPLX64 - alg_CPLX128 - alg_max -) - func memhash0(p unsafe.Pointer, h uintptr) uintptr { return h } -- cgit v1.2.1 From 7c58ef732efd9bf0d0882bb95371ce1909924a75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20M=C3=B6hrmann?= Date: Mon, 14 Sep 2020 16:55:34 +0200 Subject: runtime: implement GODEBUG=inittrace=1 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting inittrace=1 causes the runtime to emit a single line to standard error for each package with init work, summarizing the execution time and memory allocation. The emitted debug information for init functions can be used to find bottlenecks or regressions in Go startup performance. Packages with no init function work (user defined or compiler generated) are omitted. Tracing plugin inits is not supported as they can execute concurrently. This would make the implementation of tracing more complex while adding support for a very rare use case. Plugin inits can be traced separately by testing a main package importing the plugins package imports explicitly. $ GODEBUG=inittrace=1 go test init internal/bytealg @0.008 ms, 0 ms clock, 0 bytes, 0 allocs init runtime @0.059 ms, 0.026 ms clock, 0 bytes, 0 allocs init math @0.19 ms, 0.001 ms clock, 0 bytes, 0 allocs init errors @0.22 ms, 0.004 ms clock, 0 bytes, 0 allocs init strconv @0.24 ms, 0.002 ms clock, 32 bytes, 2 allocs init sync @0.28 ms, 0.003 ms clock, 16 bytes, 1 allocs init unicode @0.44 ms, 0.11 ms clock, 23328 bytes, 24 allocs ... Inspired by stapelberg@google.com who instrumented doInit in a prototype to measure init times with GDB. Fixes #41378 Change-Id: Ic37c6a0cfc95488de9e737f5e346b8dbb39174e1 Reviewed-on: https://go-review.googlesource.com/c/go/+/254659 Trust: Martin Möhrmann Run-TryBot: Martin Möhrmann TryBot-Result: Go Bot Reviewed-by: Keith Randall --- src/runtime/extern.go | 13 ++++++++++ src/runtime/malloc.go | 56 ++++++++++++++++++++++++--------------- src/runtime/proc.go | 69 ++++++++++++++++++++++++++++++++++++++++++++----- src/runtime/runtime1.go | 13 ++++++++-- src/runtime/symtab.go | 16 ++++++++++++ 5 files changed, 138 insertions(+), 29 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/extern.go b/src/runtime/extern.go index 7316503ed2..b75507b8f8 100644 --- a/src/runtime/extern.go +++ b/src/runtime/extern.go @@ -78,6 +78,19 @@ It is a comma-separated list of name=val pairs setting these named variables: If the line ends with "(forced)", this GC was forced by a runtime.GC() call. + inittrace: setting inittrace=1 causes the runtime to emit a single line to standard + error for each package with init work, summarizing the execution time and memory + allocation. No information is printed for inits executed as part of plugin loading + and for packages without both user defined and compiler generated init work. + The format of this line is subject to change. Currently, it is: + init # @#ms, # ms clock, # bytes, # allocs + where the fields are as follows: + init # the package name + @# ms time in milliseconds when the init started since program start + # clock wall-clock time for package initialization work + # bytes memory allocated on the heap + # allocs number of heap allocations + madvdontneed: setting madvdontneed=1 will use MADV_DONTNEED instead of MADV_FREE on Linux when returning memory to the kernel. This is less efficient, but causes RSS numbers to drop diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index f7e9b7c4b4..b19d1f2671 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -909,27 +909,34 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { return unsafe.Pointer(&zerobase) } - if debug.sbrk != 0 { - align := uintptr(16) - if typ != nil { - // TODO(austin): This should be just - // align = uintptr(typ.align) - // but that's only 4 on 32-bit platforms, - // even if there's a uint64 field in typ (see #599). - // This causes 64-bit atomic accesses to panic. - // Hence, we use stricter alignment that matches - // the normal allocator better. - if size&7 == 0 { - align = 8 - } else if size&3 == 0 { - align = 4 - } else if size&1 == 0 { - align = 2 - } else { - align = 1 + if debug.malloc { + if debug.sbrk != 0 { + align := uintptr(16) + if typ != nil { + // TODO(austin): This should be just + // align = uintptr(typ.align) + // but that's only 4 on 32-bit platforms, + // even if there's a uint64 field in typ (see #599). + // This causes 64-bit atomic accesses to panic. + // Hence, we use stricter alignment that matches + // the normal allocator better. + if size&7 == 0 { + align = 8 + } else if size&3 == 0 { + align = 4 + } else if size&1 == 0 { + align = 2 + } else { + align = 1 + } } + return persistentalloc(size, align, &memstats.other_sys) + } + + if inittrace.active && inittrace.id == getg().goid { + // Init functions are executed sequentially in a single Go routine. + inittrace.allocs += 1 } - return persistentalloc(size, align, &memstats.other_sys) } // assistG is the G to charge for this allocation, or nil if @@ -1136,8 +1143,15 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { mp.mallocing = 0 releasem(mp) - if debug.allocfreetrace != 0 { - tracealloc(x, size, typ) + if debug.malloc { + if debug.allocfreetrace != 0 { + tracealloc(x, size, typ) + } + + if inittrace.active && inittrace.id == getg().goid { + // Init functions are executed sequentially in a single Go routine. + inittrace.bytes += uint64(size) + } } if rate := MemProfileRate; rate > 0 { diff --git a/src/runtime/proc.go b/src/runtime/proc.go index a1e2ed0680..4872480314 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -154,11 +154,20 @@ func main() { throw("runtime.main not on m0") } - doInit(&runtime_inittask) // must be before defer - if nanotime() == 0 { + // Record when the world started. + // Must be before doInit for tracing init. + runtimeInitTime = nanotime() + if runtimeInitTime == 0 { throw("nanotime returning zero") } + if debug.inittrace != 0 { + inittrace.id = getg().goid + inittrace.active = true + } + + doInit(&runtime_inittask) // Must be before defer. + // Defer unlock so that runtime.Goexit during init does the unlock too. needUnlock := true defer func() { @@ -167,9 +176,6 @@ func main() { } }() - // Record when the world started. - runtimeInitTime = nanotime() - gcenable() main_init_done = make(chan bool) @@ -196,6 +202,10 @@ func main() { doInit(&main_inittask) + // Disable init tracing after main init done to avoid overhead + // of collecting statistics in malloc and newproc + inittrace.active = false + close(main_init_done) needUnlock = false @@ -5665,6 +5675,17 @@ type initTask struct { // followed by nfns pcs, one per init function to run } +// inittrace stores statistics for init functions which are +// updated by malloc and newproc when active is true. +var inittrace tracestat + +type tracestat struct { + active bool // init tracing activation status + id int64 // init go routine id + allocs uint64 // heap allocations + bytes uint64 // heap allocated bytes +} + func doInit(t *initTask) { switch t.state { case 2: // fully initialized @@ -5673,16 +5694,52 @@ func doInit(t *initTask) { throw("recursive call during initialization - linker skew") default: // not initialized yet t.state = 1 // initialization in progress + for i := uintptr(0); i < t.ndeps; i++ { p := add(unsafe.Pointer(t), (3+i)*sys.PtrSize) t2 := *(**initTask)(p) doInit(t2) } + + if t.nfns == 0 { + t.state = 2 // initialization done + return + } + + var ( + start int64 + before tracestat + ) + + if inittrace.active { + start = nanotime() + // Load stats non-atomically since tracinit is updated only by this init go routine. + before = inittrace + } + + firstFunc := add(unsafe.Pointer(t), (3+t.ndeps)*sys.PtrSize) for i := uintptr(0); i < t.nfns; i++ { - p := add(unsafe.Pointer(t), (3+t.ndeps+i)*sys.PtrSize) + p := add(firstFunc, i*sys.PtrSize) f := *(*func())(unsafe.Pointer(&p)) f() } + + if inittrace.active { + end := nanotime() + // Load stats non-atomically since tracinit is updated only by this init go routine. + after := inittrace + + pkg := funcpkgpath(findfunc(funcPC(firstFunc))) + + var sbuf [24]byte + print("init ", pkg, " @") + print(string(fmtNSAsMS(sbuf[:], uint64(start-runtimeInitTime))), " ms, ") + print(string(fmtNSAsMS(sbuf[:], uint64(end-start))), " ms clock, ") + print(string(itoa(sbuf[:], after.bytes-before.bytes)), " bytes, ") + print(string(itoa(sbuf[:], after.allocs-before.allocs)), " allocs") + print("\n") + } + t.state = 2 // initialization done } } diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index 7c893aa25c..0f182ac58e 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -300,7 +300,6 @@ type dbgVar struct { // existing int var for that value, which may // already have an initial value. var debug struct { - allocfreetrace int32 cgocheck int32 clobberfree int32 efence int32 @@ -311,13 +310,20 @@ var debug struct { gctrace int32 invalidptr int32 madvdontneed int32 // for Linux; issue 28466 - sbrk int32 scavenge int32 scavtrace int32 scheddetail int32 schedtrace int32 tracebackancestors int32 asyncpreemptoff int32 + + // debug.malloc is used as a combined debug check + // in the malloc function and should be set + // if any of the below debug options is != 0. + malloc bool + allocfreetrace int32 + inittrace int32 + sbrk int32 } var dbgvars = []dbgVar{ @@ -339,6 +345,7 @@ var dbgvars = []dbgVar{ {"schedtrace", &debug.schedtrace}, {"tracebackancestors", &debug.tracebackancestors}, {"asyncpreemptoff", &debug.asyncpreemptoff}, + {"inittrace", &debug.inittrace}, } func parsedebugvars() { @@ -378,6 +385,8 @@ func parsedebugvars() { } } + debug.malloc = (debug.allocfreetrace | debug.inittrace | debug.sbrk) != 0 + setTraceback(gogetenv("GOTRACEBACK")) traceback_env = traceback_cache } diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go index a14f5c13d9..84637376bf 100644 --- a/src/runtime/symtab.go +++ b/src/runtime/symtab.go @@ -844,6 +844,22 @@ func funcname(f funcInfo) string { return gostringnocopy(cfuncname(f)) } +func funcpkgpath(f funcInfo) string { + name := funcname(f) + i := len(name) - 1 + for ; i > 0; i-- { + if name[i] == '/' { + break + } + } + for ; i < len(name); i++ { + if name[i] == '.' { + break + } + } + return name[:i] +} + func cfuncnameFromNameoff(f funcInfo, nameoff int32) *byte { if !f.valid() { return nil -- cgit v1.2.1 From e4ec30965b9ca629922e83b8d335224ae4bdf062 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Mon, 12 Oct 2020 13:44:21 -0400 Subject: cmd/link: support internal linking on darwin/arm64 Add support of internal linking on darwin/arm64 (macOS). Still incomplete. Pure Go binaries work. Cgo doesn't. TLS is not set up when cgo is not used (as before) (so asynchronous preemption is not enabled). Internal linking is not enabled by default but can be requested via -ldflags=-linkmode=internal. Updates #38485. Change-Id: I1e0c81b6028edcb1ac26dcdafeb9bb3f788cf732 Reviewed-on: https://go-review.googlesource.com/c/go/+/261643 Trust: Cherry Zhang Reviewed-by: Than McIntosh --- src/runtime/rt0_darwin_arm64.s | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/rt0_darwin_arm64.s b/src/runtime/rt0_darwin_arm64.s index e3972f4924..0040361215 100644 --- a/src/runtime/rt0_darwin_arm64.s +++ b/src/runtime/rt0_darwin_arm64.s @@ -4,11 +4,14 @@ #include "textflag.h" -// No need for _rt0_arm64_darwin as darwin/arm64 only -// supports external linking. TEXT _rt0_arm64_darwin(SB),NOSPLIT|NOFRAME,$0 - MOVD $42, R0 - BL libc_exit(SB) + MOVD $runtime·rt0_go(SB), R2 + BL (R2) +exit: + MOVD $0, R0 + MOVD $1, R16 // sys_exit + SVC $0x80 + B exit // When linking with -buildmode=c-archive or -buildmode=c-shared, // this symbol is called from a global initialization function. @@ -86,11 +89,6 @@ GLOBL _rt0_arm64_darwin_lib_argc<>(SB),NOPTR, $8 DATA _rt0_arm64_darwin_lib_argv<>(SB)/8, $0 GLOBL _rt0_arm64_darwin_lib_argv<>(SB),NOPTR, $8 +// external linking entry point. TEXT main(SB),NOSPLIT|NOFRAME,$0 - MOVD $runtime·rt0_go(SB), R2 - BL (R2) -exit: - MOVD $0, R0 - MOVD $1, R16 // sys_exit - SVC $0x80 - B exit + JMP _rt0_arm64_darwin(SB) -- cgit v1.2.1 From 2517f4946b42b8deedb864c884f1b41311d45850 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 14 Oct 2020 17:18:27 -0400 Subject: runtime: remove debugCachedWork debugCachedWork and all of its dependent fields and code were added to aid in debugging issue #27993. Now that the source of the problem is known and mitigated (via the extra work check after STW in gcMarkDone), these extra checks are no longer required and simply make the code more difficult to follow. Remove it all. Updates #27993 Change-Id: I594beedd5ca61733ba9cc9eaad8f80ea92df1a0d Reviewed-on: https://go-review.googlesource.com/c/go/+/262350 Trust: Michael Pratt Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Austin Clements --- src/runtime/mgc.go | 121 ++++++++++--------------------------------------- src/runtime/mgcwork.go | 74 ------------------------------ src/runtime/mwbbuf.go | 32 +------------ src/runtime/panic.go | 9 ---- 4 files changed, 26 insertions(+), 210 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index bd87144355..0a4d5616a5 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1407,19 +1407,6 @@ func gcStart(trigger gcTrigger) { // This is protected by markDoneSema. var gcMarkDoneFlushed uint32 -// debugCachedWork enables extra checks for debugging premature mark -// termination. -// -// For debugging issue #27993. -const debugCachedWork = false - -// gcWorkPauseGen is for debugging the mark completion algorithm. -// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen. -// Only used if debugCachedWork is true. -// -// For debugging issue #27993. -var gcWorkPauseGen uint32 = 1 - // gcMarkDone transitions the GC from mark to mark termination if all // reachable objects have been marked (that is, there are no grey // objects and can be no more in the future). Otherwise, it flushes @@ -1475,15 +1462,7 @@ top: // Flush the write barrier buffer, since this may add // work to the gcWork. wbBufFlush1(_p_) - // For debugging, shrink the write barrier - // buffer so it flushes immediately. - // wbBuf.reset will keep it at this size as - // long as throwOnGCWork is set. - if debugCachedWork { - b := &_p_.wbBuf - b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) - b.debugGen = gcWorkPauseGen - } + // Flush the gcWork, since this may create global work // and set the flushedWork flag. // @@ -1494,29 +1473,12 @@ top: if _p_.gcw.flushedWork { atomic.Xadd(&gcMarkDoneFlushed, 1) _p_.gcw.flushedWork = false - } else if debugCachedWork { - // For debugging, freeze the gcWork - // until we know whether we've reached - // completion or not. If we think - // we've reached completion, but - // there's a paused gcWork, then - // that's a bug. - _p_.gcw.pauseGen = gcWorkPauseGen - // Capture the G's stack. - for i := range _p_.gcw.pauseStack { - _p_.gcw.pauseStack[i] = 0 - } - callers(1, _p_.gcw.pauseStack[:]) } }) casgstatus(gp, _Gwaiting, _Grunning) }) if gcMarkDoneFlushed != 0 { - if debugCachedWork { - // Release paused gcWorks. - atomic.Xadd(&gcWorkPauseGen, 1) - } // More grey objects were discovered since the // previous termination check, so there may be more // work to do. Keep going. It's possible the @@ -1526,13 +1488,6 @@ top: goto top } - if debugCachedWork { - throwOnGCWork = true - // Release paused gcWorks. If there are any, they - // should now observe throwOnGCWork and panic. - atomic.Xadd(&gcWorkPauseGen, 1) - } - // There was no global work, no local work, and no Ps // communicated work since we took markDoneSema. Therefore // there are no grey objects and no more objects can be @@ -1549,59 +1504,33 @@ top: // below. The important thing is that the wb remains active until // all marking is complete. This includes writes made by the GC. - if debugCachedWork { - // For debugging, double check that no work was added after we - // went around above and disable write barrier buffering. + // There is sometimes work left over when we enter mark termination due + // to write barriers performed after the completion barrier above. + // Detect this and resume concurrent mark. This is obviously + // unfortunate. + // + // See issue #27993 for details. + // + // Switch to the system stack to call wbBufFlush1, though in this case + // it doesn't matter because we're non-preemptible anyway. + restart := false + systemstack(func() { for _, p := range allp { - gcw := &p.gcw - if !gcw.empty() { - printlock() - print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) - if gcw.wbuf1 == nil { - print(" wbuf1=") - } else { - print(" wbuf1.n=", gcw.wbuf1.nobj) - } - if gcw.wbuf2 == nil { - print(" wbuf2=") - } else { - print(" wbuf2.n=", gcw.wbuf2.nobj) - } - print("\n") - if gcw.pauseGen == gcw.putGen { - println("runtime: checkPut already failed at this generation") - } - throw("throwOnGCWork") + wbBufFlush1(p) + if !p.gcw.empty() { + restart = true + break } } - } else { - // For unknown reasons (see issue #27993), there is - // sometimes work left over when we enter mark - // termination. Detect this and resume concurrent - // mark. This is obviously unfortunate. - // - // Switch to the system stack to call wbBufFlush1, - // though in this case it doesn't matter because we're - // non-preemptible anyway. - restart := false + }) + if restart { + getg().m.preemptoff = "" systemstack(func() { - for _, p := range allp { - wbBufFlush1(p) - if !p.gcw.empty() { - restart = true - break - } - } + now := startTheWorldWithSema(true) + work.pauseNS += now - work.pauseStart }) - if restart { - getg().m.preemptoff = "" - systemstack(func() { - now := startTheWorldWithSema(true) - work.pauseNS += now - work.pauseStart - }) - semrelease(&worldsema) - goto top - } + semrelease(&worldsema) + goto top } // Disable assists and background workers. We must do @@ -2085,7 +2014,7 @@ func gcMark(start_time int64) { // ensured all reachable objects were marked, all of // these must be pointers to black objects. Hence we // can just discard the write barrier buffer. - if debug.gccheckmark > 0 || throwOnGCWork { + if debug.gccheckmark > 0 { // For debugging, flush the buffer and make // sure it really was all marked. wbBufFlush1(p) @@ -2117,8 +2046,6 @@ func gcMark(start_time int64) { gcw.dispose() } - throwOnGCWork = false - cachestats() // Update the marked heap stat. diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go index 46101657d5..51e0fe9219 100644 --- a/src/runtime/mgcwork.go +++ b/src/runtime/mgcwork.go @@ -22,13 +22,6 @@ const ( workbufAlloc = 32 << 10 ) -// throwOnGCWork causes any operations that add pointers to a gcWork -// buffer to throw. -// -// TODO(austin): This is a temporary debugging measure for issue -// #27993. To be removed before release. -var throwOnGCWork bool - func init() { if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 { throw("bad workbufAlloc") @@ -93,17 +86,6 @@ type gcWork struct { // termination check. Specifically, this indicates that this // gcWork may have communicated work to another gcWork. flushedWork bool - - // pauseGen causes put operations to spin while pauseGen == - // gcWorkPauseGen if debugCachedWork is true. - pauseGen uint32 - - // putGen is the pauseGen of the last putGen. - putGen uint32 - - // pauseStack is the stack at which this P was paused if - // debugCachedWork is true. - pauseStack [16]uintptr } // Most of the methods of gcWork are go:nowritebarrierrec because the @@ -122,60 +104,10 @@ func (w *gcWork) init() { w.wbuf2 = wbuf2 } -func (w *gcWork) checkPut(ptr uintptr, ptrs []uintptr) { - if debugCachedWork { - alreadyFailed := w.putGen == w.pauseGen - w.putGen = w.pauseGen - if !canPreemptM(getg().m) { - // If we were to spin, the runtime may - // deadlock. Since we can't be preempted, the - // spin could prevent gcMarkDone from - // finishing the ragged barrier, which is what - // releases us from the spin. - return - } - for atomic.Load(&gcWorkPauseGen) == w.pauseGen { - } - if throwOnGCWork { - printlock() - if alreadyFailed { - println("runtime: checkPut already failed at this generation") - } - println("runtime: late gcWork put") - if ptr != 0 { - gcDumpObject("ptr", ptr, ^uintptr(0)) - } - for _, ptr := range ptrs { - gcDumpObject("ptrs", ptr, ^uintptr(0)) - } - println("runtime: paused at") - for _, pc := range w.pauseStack { - if pc == 0 { - break - } - f := findfunc(pc) - if f.valid() { - // Obviously this doesn't - // relate to ancestor - // tracebacks, but this - // function prints what we - // want. - printAncestorTracebackFuncInfo(f, pc) - } else { - println("\tunknown PC ", hex(pc), "\n") - } - } - throw("throwOnGCWork") - } - } -} - // put enqueues a pointer for the garbage collector to trace. // obj must point to the beginning of a heap object or an oblet. //go:nowritebarrierrec func (w *gcWork) put(obj uintptr) { - w.checkPut(obj, nil) - flushed := false wbuf := w.wbuf1 // Record that this may acquire the wbufSpans or heap lock to @@ -214,8 +146,6 @@ func (w *gcWork) put(obj uintptr) { // otherwise it returns false and the caller needs to call put. //go:nowritebarrierrec func (w *gcWork) putFast(obj uintptr) bool { - w.checkPut(obj, nil) - wbuf := w.wbuf1 if wbuf == nil { return false @@ -237,8 +167,6 @@ func (w *gcWork) putBatch(obj []uintptr) { return } - w.checkPut(0, obj) - flushed := false wbuf := w.wbuf1 if wbuf == nil { @@ -360,12 +288,10 @@ func (w *gcWork) balance() { return } if wbuf := w.wbuf2; wbuf.nobj != 0 { - w.checkPut(0, wbuf.obj[:wbuf.nobj]) putfull(wbuf) w.flushedWork = true w.wbuf2 = getempty() } else if wbuf := w.wbuf1; wbuf.nobj > 4 { - w.checkPut(0, wbuf.obj[:wbuf.nobj]) w.wbuf1 = handoff(wbuf) w.flushedWork = true // handoff did putfull } else { diff --git a/src/runtime/mwbbuf.go b/src/runtime/mwbbuf.go index 632769c114..6efc00007d 100644 --- a/src/runtime/mwbbuf.go +++ b/src/runtime/mwbbuf.go @@ -57,12 +57,6 @@ type wbBuf struct { // on. This must be a multiple of wbBufEntryPointers because // the write barrier only checks for overflow once per entry. buf [wbBufEntryPointers * wbBufEntries]uintptr - - // debugGen causes the write barrier buffer to flush after - // every write barrier if equal to gcWorkPauseGen. This is for - // debugging #27993. This is only set if debugCachedWork is - // set. - debugGen uint32 } const ( @@ -86,7 +80,7 @@ const ( func (b *wbBuf) reset() { start := uintptr(unsafe.Pointer(&b.buf[0])) b.next = start - if writeBarrier.cgo || (debugCachedWork && (throwOnGCWork || b.debugGen == atomic.Load(&gcWorkPauseGen))) { + if writeBarrier.cgo { // Effectively disable the buffer by forcing a flush // on every barrier. b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) @@ -204,32 +198,10 @@ func wbBufFlush(dst *uintptr, src uintptr) { // Switch to the system stack so we don't have to worry about // the untyped stack slots or safe points. systemstack(func() { - if debugCachedWork { - // For debugging, include the old value of the - // slot and some other data in the traceback. - wbBuf := &getg().m.p.ptr().wbBuf - var old uintptr - if dst != nil { - // dst may be nil in direct calls to wbBufFlush. - old = *dst - } - wbBufFlush1Debug(old, wbBuf.buf[0], wbBuf.buf[1], &wbBuf.buf[0], wbBuf.next) - } else { - wbBufFlush1(getg().m.p.ptr()) - } + wbBufFlush1(getg().m.p.ptr()) }) } -// wbBufFlush1Debug is a temporary function for debugging issue -// #27993. It exists solely to add some context to the traceback. -// -//go:nowritebarrierrec -//go:systemstack -//go:noinline -func wbBufFlush1Debug(old, buf1, buf2 uintptr, start *uintptr, next uintptr) { - wbBufFlush1(getg().m.p.ptr()) -} - // wbBufFlush1 flushes p's write barrier buffer to the GC work queue. // // This must not have write barriers because it is part of the write diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 6050a34d29..aed17d6fc6 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -421,15 +421,6 @@ func newdefer(siz int32) *_defer { total := roundupsize(totaldefersize(uintptr(siz))) d = (*_defer)(mallocgc(total, deferType, true)) }) - if debugCachedWork { - // Duplicate the tail below so if there's a - // crash in checkPut we can tell if d was just - // allocated or came from the pool. - d.siz = siz - d.link = gp._defer - gp._defer = d - return d - } } d.siz = siz d.heap = true -- cgit v1.2.1 From 64fb6ae95f1c322486cbfb758552bb8439a8e6e8 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Wed, 14 Oct 2020 16:03:48 -0700 Subject: runtime: stop preemption during syscall.Exec on Darwin On current macOS versions a program that receives a signal during an execve can fail with a SIGILL signal. This appears to be a macOS kernel bug. It has been reported to Apple. This CL partially works around the problem by using execLock to not send preemption signals during execve. Of course some other stray signal could occur, but at least we can avoid exacerbating the problem. We can't simply disable signals, as that would mean that the exec'ed process would start with all signals blocked, which it likely does not expect. Fixes #41702 Change-Id: I91b0add967b315671ddcf73269c4d30136e579b4 Reviewed-on: https://go-review.googlesource.com/c/go/+/262438 Trust: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/runtime/signal_unix.go | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index a3d6f34c88..c228de47b4 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -356,6 +356,13 @@ func preemptM(mp *m) { // required). return } + + // On Darwin, don't try to preempt threads during exec. + // Issue #41702. + if GOOS == "darwin" { + execLock.rlock() + } + if atomic.Cas(&mp.signalPending, 0, 1) { // If multiple threads are preempting the same M, it may send many // signals to the same M such that it hardly make progress, causing @@ -364,6 +371,10 @@ func preemptM(mp *m) { // Only send a signal if there isn't already one pending. signalM(mp, sigPreempt) } + + if GOOS == "darwin" { + execLock.runlock() + } } // sigFetchG fetches the value of G safely when running in a signal handler. -- cgit v1.2.1 From afba990169f41d9026c923da5235584db32cab67 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 15 Oct 2020 16:11:10 -0400 Subject: runtime/internal/atomic: drop package prefixes This drops package prefixes from the assembly code on 386 and arm. In addition to just being nicer, this allows the assembler to automatically pick up the argument stack map from the Go signatures of these functions. This doesn't matter right now because these functions never call back out to Go, but prepares us for the next CL. Change-Id: I90fed7d4dd63ad49274529c62804211b6390e2e9 Reviewed-on: https://go-review.googlesource.com/c/go/+/262777 Trust: Austin Clements Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Reviewed-by: Michael Knyszek --- src/runtime/funcdata.h | 6 +-- src/runtime/internal/atomic/asm_386.s | 76 ++++++++++++++++----------------- src/runtime/internal/atomic/asm_arm.s | 80 +++++++++++++++++------------------ 3 files changed, 81 insertions(+), 81 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/funcdata.h b/src/runtime/funcdata.h index 0fb50ddfba..cd76c06992 100644 --- a/src/runtime/funcdata.h +++ b/src/runtime/funcdata.h @@ -32,9 +32,9 @@ // defines the pointer map for the function's arguments. // GO_ARGS should be the first instruction in a function that uses it. // It can be omitted if there are no arguments at all. -// GO_ARGS is inserted implicitly by the linker for any function -// that also has a Go prototype and therefore is usually not necessary -// to write explicitly. +// GO_ARGS is inserted implicitly by the linker for any function whose +// name starts with a middle-dot and that also has a Go prototype; it +// is therefore usually not necessary to write explicitly. #define GO_ARGS FUNCDATA $FUNCDATA_ArgsPointerMaps, go_args_stackmap(SB) // GO_RESULTS_INITIALIZED indicates that the assembly function diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s index 9b9dc14a60..357ca95625 100644 --- a/src/runtime/internal/atomic/asm_386.s +++ b/src/runtime/internal/atomic/asm_386.s @@ -11,7 +11,7 @@ // return 1; // }else // return 0; -TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-13 +TEXT ·Cas(SB), NOSPLIT, $0-13 MOVL ptr+0(FP), BX MOVL old+4(FP), AX MOVL new+8(FP), CX @@ -20,32 +20,32 @@ TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-13 SETEQ ret+12(FP) RET -TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-13 - JMP runtime∕internal∕atomic·Cas(SB) +TEXT ·Casuintptr(SB), NOSPLIT, $0-13 + JMP ·Cas(SB) -TEXT runtime∕internal∕atomic·CasRel(SB), NOSPLIT, $0-13 - JMP runtime∕internal∕atomic·Cas(SB) +TEXT ·CasRel(SB), NOSPLIT, $0-13 + JMP ·Cas(SB) -TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT, $0-8 - JMP runtime∕internal∕atomic·Load(SB) +TEXT ·Loaduintptr(SB), NOSPLIT, $0-8 + JMP ·Load(SB) -TEXT runtime∕internal∕atomic·Loaduint(SB), NOSPLIT, $0-8 - JMP runtime∕internal∕atomic·Load(SB) +TEXT ·Loaduint(SB), NOSPLIT, $0-8 + JMP ·Load(SB) -TEXT runtime∕internal∕atomic·Storeuintptr(SB), NOSPLIT, $0-8 - JMP runtime∕internal∕atomic·Store(SB) +TEXT ·Storeuintptr(SB), NOSPLIT, $0-8 + JMP ·Store(SB) -TEXT runtime∕internal∕atomic·Xadduintptr(SB), NOSPLIT, $0-12 - JMP runtime∕internal∕atomic·Xadd(SB) +TEXT ·Xadduintptr(SB), NOSPLIT, $0-12 + JMP ·Xadd(SB) -TEXT runtime∕internal∕atomic·Loadint64(SB), NOSPLIT, $0-12 - JMP runtime∕internal∕atomic·Load64(SB) +TEXT ·Loadint64(SB), NOSPLIT, $0-12 + JMP ·Load64(SB) -TEXT runtime∕internal∕atomic·Xaddint64(SB), NOSPLIT, $0-20 - JMP runtime∕internal∕atomic·Xadd64(SB) +TEXT ·Xaddint64(SB), NOSPLIT, $0-20 + JMP ·Xadd64(SB) -// bool runtime∕internal∕atomic·Cas64(uint64 *val, uint64 old, uint64 new) +// bool ·Cas64(uint64 *val, uint64 old, uint64 new) // Atomically: // if(*val == *old){ // *val = new; @@ -53,7 +53,7 @@ TEXT runtime∕internal∕atomic·Xaddint64(SB), NOSPLIT, $0-20 // } else { // return 0; // } -TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-21 +TEXT ·Cas64(SB), NOSPLIT, $0-21 MOVL ptr+0(FP), BP TESTL $7, BP JZ 2(PC) @@ -74,7 +74,7 @@ TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-21 // return 1; // }else // return 0; -TEXT runtime∕internal∕atomic·Casp1(SB), NOSPLIT, $0-13 +TEXT ·Casp1(SB), NOSPLIT, $0-13 MOVL ptr+0(FP), BX MOVL old+4(FP), AX MOVL new+8(FP), CX @@ -87,7 +87,7 @@ TEXT runtime∕internal∕atomic·Casp1(SB), NOSPLIT, $0-13 // Atomically: // *val += delta; // return *val; -TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12 +TEXT ·Xadd(SB), NOSPLIT, $0-12 MOVL ptr+0(FP), BX MOVL delta+4(FP), AX MOVL AX, CX @@ -97,7 +97,7 @@ TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-12 MOVL AX, ret+8(FP) RET -TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-20 +TEXT ·Xadd64(SB), NOSPLIT, $0-20 // no XADDQ so use CMPXCHG8B loop MOVL ptr+0(FP), BP TESTL $7, BP @@ -133,17 +133,17 @@ addloop: MOVL CX, ret_hi+16(FP) RET -TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-12 +TEXT ·Xchg(SB), NOSPLIT, $0-12 MOVL ptr+0(FP), BX MOVL new+4(FP), AX XCHGL AX, 0(BX) MOVL AX, ret+8(FP) RET -TEXT runtime∕internal∕atomic·Xchguintptr(SB), NOSPLIT, $0-12 - JMP runtime∕internal∕atomic·Xchg(SB) +TEXT ·Xchguintptr(SB), NOSPLIT, $0-12 + JMP ·Xchg(SB) -TEXT runtime∕internal∕atomic·Xchg64(SB),NOSPLIT,$0-20 +TEXT ·Xchg64(SB),NOSPLIT,$0-20 // no XCHGQ so use CMPXCHG8B loop MOVL ptr+0(FP), BP TESTL $7, BP @@ -171,23 +171,23 @@ swaploop: MOVL DX, ret_hi+16(FP) RET -TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-8 +TEXT ·StorepNoWB(SB), NOSPLIT, $0-8 MOVL ptr+0(FP), BX MOVL val+4(FP), AX XCHGL AX, 0(BX) RET -TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-8 +TEXT ·Store(SB), NOSPLIT, $0-8 MOVL ptr+0(FP), BX MOVL val+4(FP), AX XCHGL AX, 0(BX) RET -TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-8 - JMP runtime∕internal∕atomic·Store(SB) +TEXT ·StoreRel(SB), NOSPLIT, $0-8 + JMP ·Store(SB) // uint64 atomicload64(uint64 volatile* addr); -TEXT runtime∕internal∕atomic·Load64(SB), NOSPLIT, $0-12 +TEXT ·Load64(SB), NOSPLIT, $0-12 MOVL ptr+0(FP), AX TESTL $7, AX JZ 2(PC) @@ -197,8 +197,8 @@ TEXT runtime∕internal∕atomic·Load64(SB), NOSPLIT, $0-12 EMMS RET -// void runtime∕internal∕atomic·Store64(uint64 volatile* addr, uint64 v); -TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12 +// void ·Store64(uint64 volatile* addr, uint64 v); +TEXT ·Store64(SB), NOSPLIT, $0-12 MOVL ptr+0(FP), AX TESTL $7, AX JZ 2(PC) @@ -214,23 +214,23 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-12 XADDL AX, (SP) RET -// void runtime∕internal∕atomic·Or8(byte volatile*, byte); -TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-5 +// void ·Or8(byte volatile*, byte); +TEXT ·Or8(SB), NOSPLIT, $0-5 MOVL ptr+0(FP), AX MOVB val+4(FP), BX LOCK ORB BX, (AX) RET -// void runtime∕internal∕atomic·And8(byte volatile*, byte); -TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-5 +// void ·And8(byte volatile*, byte); +TEXT ·And8(SB), NOSPLIT, $0-5 MOVL ptr+0(FP), AX MOVB val+4(FP), BX LOCK ANDB BX, (AX) RET -TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-5 +TEXT ·Store8(SB), NOSPLIT, $0-5 MOVL ptr+0(FP), BX MOVB val+4(FP), AX XCHGB AX, 0(BX) diff --git a/src/runtime/internal/atomic/asm_arm.s b/src/runtime/internal/atomic/asm_arm.s index d4ef11560e..db1267423d 100644 --- a/src/runtime/internal/atomic/asm_arm.s +++ b/src/runtime/internal/atomic/asm_arm.s @@ -12,13 +12,13 @@ // }else // return 0; // -// To implement runtime∕internal∕atomic·cas in sys_$GOOS_arm.s +// To implement ·cas in sys_$GOOS_arm.s // using the native instructions, use: // -// TEXT runtime∕internal∕atomic·cas(SB),NOSPLIT,$0 -// B runtime∕internal∕atomic·armcas(SB) +// TEXT ·cas(SB),NOSPLIT,$0 +// B ·armcas(SB) // -TEXT runtime∕internal∕atomic·armcas(SB),NOSPLIT,$0-13 +TEXT ·armcas(SB),NOSPLIT,$0-13 MOVW ptr+0(FP), R1 MOVW old+4(FP), R2 MOVW new+8(FP), R3 @@ -50,44 +50,44 @@ casfail: // stubs -TEXT runtime∕internal∕atomic·Loadp(SB),NOSPLIT|NOFRAME,$0-8 - B runtime∕internal∕atomic·Load(SB) +TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-8 + B ·Load(SB) -TEXT runtime∕internal∕atomic·LoadAcq(SB),NOSPLIT|NOFRAME,$0-8 - B runtime∕internal∕atomic·Load(SB) +TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-8 + B ·Load(SB) -TEXT runtime∕internal∕atomic·Casuintptr(SB),NOSPLIT,$0-13 - B runtime∕internal∕atomic·Cas(SB) +TEXT ·Casuintptr(SB),NOSPLIT,$0-13 + B ·Cas(SB) -TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0-13 - B runtime∕internal∕atomic·Cas(SB) +TEXT ·Casp1(SB),NOSPLIT,$0-13 + B ·Cas(SB) -TEXT runtime∕internal∕atomic·CasRel(SB),NOSPLIT,$0-13 - B runtime∕internal∕atomic·Cas(SB) +TEXT ·CasRel(SB),NOSPLIT,$0-13 + B ·Cas(SB) -TEXT runtime∕internal∕atomic·Loaduintptr(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Load(SB) +TEXT ·Loaduintptr(SB),NOSPLIT,$0-8 + B ·Load(SB) -TEXT runtime∕internal∕atomic·Loaduint(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Load(SB) +TEXT ·Loaduint(SB),NOSPLIT,$0-8 + B ·Load(SB) -TEXT runtime∕internal∕atomic·Storeuintptr(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Store(SB) +TEXT ·Storeuintptr(SB),NOSPLIT,$0-8 + B ·Store(SB) -TEXT runtime∕internal∕atomic·StorepNoWB(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Store(SB) +TEXT ·StorepNoWB(SB),NOSPLIT,$0-8 + B ·Store(SB) -TEXT runtime∕internal∕atomic·StoreRel(SB),NOSPLIT,$0-8 - B runtime∕internal∕atomic·Store(SB) +TEXT ·StoreRel(SB),NOSPLIT,$0-8 + B ·Store(SB) -TEXT runtime∕internal∕atomic·Xadduintptr(SB),NOSPLIT,$0-12 - B runtime∕internal∕atomic·Xadd(SB) +TEXT ·Xadduintptr(SB),NOSPLIT,$0-12 + B ·Xadd(SB) -TEXT runtime∕internal∕atomic·Loadint64(SB),NOSPLIT,$0-12 - B runtime∕internal∕atomic·Load64(SB) +TEXT ·Loadint64(SB),NOSPLIT,$0-12 + B ·Load64(SB) -TEXT runtime∕internal∕atomic·Xaddint64(SB),NOSPLIT,$0-20 - B runtime∕internal∕atomic·Xadd64(SB) +TEXT ·Xaddint64(SB),NOSPLIT,$0-20 + B ·Xadd64(SB) // 64-bit atomics // The native ARM implementations use LDREXD/STREXD, which are @@ -95,7 +95,7 @@ TEXT runtime∕internal∕atomic·Xaddint64(SB),NOSPLIT,$0-20 // On older ARM, we use Go implementations which simulate 64-bit // atomics with locks. -TEXT armCas64<>(SB),NOSPLIT,$0-21 +TEXT armCas64<>(SB),NOSPLIT,$0-21 MOVW addr+0(FP), R1 // make unaligned atomic access panic AND.S $7, R1, R2 @@ -128,7 +128,7 @@ cas64fail: MOVBU R0, swapped+20(FP) RET -TEXT armXadd64<>(SB),NOSPLIT,$0-20 +TEXT armXadd64<>(SB),NOSPLIT,$0-20 MOVW addr+0(FP), R1 // make unaligned atomic access panic AND.S $7, R1, R2 @@ -154,7 +154,7 @@ add64loop: MOVW R5, new_hi+16(FP) RET -TEXT armXchg64<>(SB),NOSPLIT,$0-20 +TEXT armXchg64<>(SB),NOSPLIT,$0-20 MOVW addr+0(FP), R1 // make unaligned atomic access panic AND.S $7, R1, R2 @@ -178,7 +178,7 @@ swap64loop: MOVW R5, old_hi+16(FP) RET -TEXT armLoad64<>(SB),NOSPLIT,$0-12 +TEXT armLoad64<>(SB),NOSPLIT,$0-12 MOVW addr+0(FP), R1 // make unaligned atomic access panic AND.S $7, R1, R2 @@ -192,7 +192,7 @@ TEXT armLoad64<>(SB),NOSPLIT,$0-12 MOVW R3, val_hi+8(FP) RET -TEXT armStore64<>(SB),NOSPLIT,$0-12 +TEXT armStore64<>(SB),NOSPLIT,$0-12 MOVW addr+0(FP), R1 // make unaligned atomic access panic AND.S $7, R1, R2 @@ -213,35 +213,35 @@ store64loop: DMB MB_ISH RET -TEXT ·Cas64(SB),NOSPLIT,$0-21 +TEXT ·Cas64(SB),NOSPLIT,$0-21 MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armCas64<>(SB) JMP ·goCas64(SB) -TEXT ·Xadd64(SB),NOSPLIT,$0-20 +TEXT ·Xadd64(SB),NOSPLIT,$0-20 MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armXadd64<>(SB) JMP ·goXadd64(SB) -TEXT ·Xchg64(SB),NOSPLIT,$0-20 +TEXT ·Xchg64(SB),NOSPLIT,$0-20 MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armXchg64<>(SB) JMP ·goXchg64(SB) -TEXT ·Load64(SB),NOSPLIT,$0-12 +TEXT ·Load64(SB),NOSPLIT,$0-12 MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armLoad64<>(SB) JMP ·goLoad64(SB) -TEXT ·Store64(SB),NOSPLIT,$0-12 +TEXT ·Store64(SB),NOSPLIT,$0-12 MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) -- cgit v1.2.1 From 83317d9e3cb0674f71d1118d8814aefb31ac1239 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 15 Oct 2020 15:52:58 -0400 Subject: runtime/internal/atomic: panic nicely on unaligned 64-bit atomics On 386 and arm, unaligned 64-bit atomics aren't safe, so we check for this and panic. Currently, we panic by dereferencing nil, which may be expedient but is pretty user-hostile since it gives no hint of what the actual problem was. This CL replaces this with an actual panic. The only subtlety here is now the atomic assembly implementations are calling back into Go, so they have to play nicely with stack maps and stack scanning. On 386, this just requires declaring NO_LOCAL_POINTERS. On arm, this is somewhat more complicated: first, we have to move the alignment check into the functions that have Go signatures. Then we have to support both the tail call from these functions to the underlying implementation (which requires that they have no frame) and the call into Go to panic (which requires that they have a frame). We resolve this by forcing them to have no frame and setting up the frame manually just before the panic call. Change-Id: I19f1e860045df64088013db37a18acea47342c69 Reviewed-on: https://go-review.googlesource.com/c/go/+/262778 Trust: Austin Clements Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Reviewed-by: Michael Knyszek --- src/runtime/internal/atomic/asm_386.s | 17 ++++-- src/runtime/internal/atomic/asm_arm.s | 89 +++++++++++++++++++---------- src/runtime/internal/atomic/atomic_mipsx.go | 2 +- src/runtime/internal/atomic/atomic_test.go | 9 ++- src/runtime/internal/atomic/unaligned.go | 9 +++ 5 files changed, 88 insertions(+), 38 deletions(-) create mode 100644 src/runtime/internal/atomic/unaligned.go (limited to 'src/runtime') diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s index 357ca95625..bcefff373f 100644 --- a/src/runtime/internal/atomic/asm_386.s +++ b/src/runtime/internal/atomic/asm_386.s @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "textflag.h" +#include "funcdata.h" // bool Cas(int32 *val, int32 old, int32 new) // Atomically: @@ -44,7 +45,6 @@ TEXT ·Loadint64(SB), NOSPLIT, $0-12 TEXT ·Xaddint64(SB), NOSPLIT, $0-20 JMP ·Xadd64(SB) - // bool ·Cas64(uint64 *val, uint64 old, uint64 new) // Atomically: // if(*val == *old){ @@ -54,10 +54,11 @@ TEXT ·Xaddint64(SB), NOSPLIT, $0-20 // return 0; // } TEXT ·Cas64(SB), NOSPLIT, $0-21 + NO_LOCAL_POINTERS MOVL ptr+0(FP), BP TESTL $7, BP JZ 2(PC) - MOVL 0, BP // crash with nil ptr deref + CALL ·panicUnaligned(SB) MOVL old_lo+4(FP), AX MOVL old_hi+8(FP), DX MOVL new_lo+12(FP), BX @@ -98,11 +99,12 @@ TEXT ·Xadd(SB), NOSPLIT, $0-12 RET TEXT ·Xadd64(SB), NOSPLIT, $0-20 + NO_LOCAL_POINTERS // no XADDQ so use CMPXCHG8B loop MOVL ptr+0(FP), BP TESTL $7, BP JZ 2(PC) - MOVL 0, AX // crash when unaligned + CALL ·panicUnaligned(SB) // DI:SI = delta MOVL delta_lo+4(FP), SI MOVL delta_hi+8(FP), DI @@ -144,11 +146,12 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-12 JMP ·Xchg(SB) TEXT ·Xchg64(SB),NOSPLIT,$0-20 + NO_LOCAL_POINTERS // no XCHGQ so use CMPXCHG8B loop MOVL ptr+0(FP), BP TESTL $7, BP JZ 2(PC) - MOVL 0, AX // crash when unaligned + CALL ·panicUnaligned(SB) // CX:BX = new MOVL new_lo+4(FP), BX MOVL new_hi+8(FP), CX @@ -188,10 +191,11 @@ TEXT ·StoreRel(SB), NOSPLIT, $0-8 // uint64 atomicload64(uint64 volatile* addr); TEXT ·Load64(SB), NOSPLIT, $0-12 + NO_LOCAL_POINTERS MOVL ptr+0(FP), AX TESTL $7, AX JZ 2(PC) - MOVL 0, AX // crash with nil ptr deref + CALL ·panicUnaligned(SB) MOVQ (AX), M0 MOVQ M0, ret+4(FP) EMMS @@ -199,10 +203,11 @@ TEXT ·Load64(SB), NOSPLIT, $0-12 // void ·Store64(uint64 volatile* addr, uint64 v); TEXT ·Store64(SB), NOSPLIT, $0-12 + NO_LOCAL_POINTERS MOVL ptr+0(FP), AX TESTL $7, AX JZ 2(PC) - MOVL 0, AX // crash with nil ptr deref + CALL ·panicUnaligned(SB) // MOVQ and EMMS were introduced on the Pentium MMX. MOVQ val+4(FP), M0 MOVQ M0, (AX) diff --git a/src/runtime/internal/atomic/asm_arm.s b/src/runtime/internal/atomic/asm_arm.s index db1267423d..c3d1d9025d 100644 --- a/src/runtime/internal/atomic/asm_arm.s +++ b/src/runtime/internal/atomic/asm_arm.s @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include "textflag.h" +#include "funcdata.h" // bool armcas(int32 *val, int32 old, int32 new) // Atomically: @@ -96,11 +97,7 @@ TEXT ·Xaddint64(SB),NOSPLIT,$0-20 // atomics with locks. TEXT armCas64<>(SB),NOSPLIT,$0-21 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. + // addr is already in R1 MOVW old_lo+4(FP), R2 MOVW old_hi+8(FP), R3 MOVW new_lo+12(FP), R4 @@ -129,11 +126,7 @@ cas64fail: RET TEXT armXadd64<>(SB),NOSPLIT,$0-20 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. + // addr is already in R1 MOVW delta_lo+4(FP), R2 MOVW delta_hi+8(FP), R3 @@ -155,11 +148,7 @@ add64loop: RET TEXT armXchg64<>(SB),NOSPLIT,$0-20 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. + // addr is already in R1 MOVW new_lo+4(FP), R2 MOVW new_hi+8(FP), R3 @@ -179,11 +168,7 @@ swap64loop: RET TEXT armLoad64<>(SB),NOSPLIT,$0-12 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. + // addr is already in R1 LDREXD (R1), R2 // loads R2 and R3 DMB MB_ISH @@ -193,11 +178,7 @@ TEXT armLoad64<>(SB),NOSPLIT,$0-12 RET TEXT armStore64<>(SB),NOSPLIT,$0-12 - MOVW addr+0(FP), R1 - // make unaligned atomic access panic - AND.S $7, R1, R2 - BEQ 2(PC) - MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2. + // addr is already in R1 MOVW val_lo+4(FP), R2 MOVW val_hi+8(FP), R3 @@ -213,35 +194,83 @@ store64loop: DMB MB_ISH RET -TEXT ·Cas64(SB),NOSPLIT,$0-21 +// The following functions all panic if their address argument isn't +// 8-byte aligned. Since we're calling back into Go code to do this, +// we have to cooperate with stack unwinding. In the normal case, the +// functions tail-call into the appropriate implementation, which +// means they must not open a frame. Hence, when they go down the +// panic path, at that point they push the LR to create a real frame +// (they don't need to pop it because panic won't return). + +TEXT ·Cas64(SB),NOSPLIT,$-4-21 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armCas64<>(SB) JMP ·goCas64(SB) -TEXT ·Xadd64(SB),NOSPLIT,$0-20 +TEXT ·Xadd64(SB),NOSPLIT,$-4-20 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armXadd64<>(SB) JMP ·goXadd64(SB) -TEXT ·Xchg64(SB),NOSPLIT,$0-20 +TEXT ·Xchg64(SB),NOSPLIT,$-4-20 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armXchg64<>(SB) JMP ·goXchg64(SB) -TEXT ·Load64(SB),NOSPLIT,$0-12 +TEXT ·Load64(SB),NOSPLIT,$-4-12 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) JMP armLoad64<>(SB) JMP ·goLoad64(SB) -TEXT ·Store64(SB),NOSPLIT,$0-12 +TEXT ·Store64(SB),NOSPLIT,$-4-12 + NO_LOCAL_POINTERS + MOVW addr+0(FP), R1 + // make unaligned atomic access panic + AND.S $7, R1, R2 + BEQ 3(PC) + MOVW.W R14, -4(R13) // prepare a real frame + BL ·panicUnaligned(SB) + MOVB runtime·goarm(SB), R11 CMP $7, R11 BLT 2(PC) diff --git a/src/runtime/internal/atomic/atomic_mipsx.go b/src/runtime/internal/atomic/atomic_mipsx.go index 0e2d77ade1..b99bfe7dbf 100644 --- a/src/runtime/internal/atomic/atomic_mipsx.go +++ b/src/runtime/internal/atomic/atomic_mipsx.go @@ -34,7 +34,7 @@ func spinUnlock(state *uint32) func lockAndCheck(addr *uint64) { // ensure 8-byte alignment if uintptr(unsafe.Pointer(addr))&7 != 0 { - addr = nil + panicUnaligned() } // force dereference before taking lock _ = *addr diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go index b0a8fa0610..a9f95077c0 100644 --- a/src/runtime/internal/atomic/atomic_test.go +++ b/src/runtime/internal/atomic/atomic_test.go @@ -73,8 +73,15 @@ func TestXadduintptrOnUint64(t *testing.T) { func shouldPanic(t *testing.T, name string, f func()) { defer func() { - if recover() == nil { + // Check that all GC maps are sane. + runtime.GC() + + err := recover() + want := "unaligned 64-bit atomic operation" + if err == nil { t.Errorf("%s did not panic", name) + } else if s, _ := err.(string); s != want { + t.Errorf("%s: wanted panic %q, got %q", name, want, err) } }() f() diff --git a/src/runtime/internal/atomic/unaligned.go b/src/runtime/internal/atomic/unaligned.go new file mode 100644 index 0000000000..a859de4144 --- /dev/null +++ b/src/runtime/internal/atomic/unaligned.go @@ -0,0 +1,9 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package atomic + +func panicUnaligned() { + panic("unaligned 64-bit atomic operation") +} -- cgit v1.2.1 From 9cec50f50c29f5ef7264bf06ee7ac0991b4b36d6 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 16 Oct 2020 19:35:09 +0200 Subject: internal/poll, net, syscall: use accept4 on illumos Illumos supports the accept4 syscall, use it in internal/poll.accept like on other platforms. Add Accept4 to package syscall despite the package being frozen. The other option would have been to add this to internal/syscall/unix, but adding it to syscall avoids duplicating a lot of code in internal/poll and net/internal/socktest. Also, all other platforms supporting the accept4 syscall already export Accept4. Follow CL 97196, CL 40895 and CL 94295 Change-Id: I13b32f0163a683840c02b16722730d9dfdb98f56 Reviewed-on: https://go-review.googlesource.com/c/go/+/256101 Trust: Tobias Klauser Run-TryBot: Tobias Klauser TryBot-Result: Go Bot Reviewed-by: Brad Fitzpatrick --- src/runtime/cgo/cgo.go | 1 + 1 file changed, 1 insertion(+) (limited to 'src/runtime') diff --git a/src/runtime/cgo/cgo.go b/src/runtime/cgo/cgo.go index c02b837978..4d2caf6c4f 100644 --- a/src/runtime/cgo/cgo.go +++ b/src/runtime/cgo/cgo.go @@ -21,6 +21,7 @@ package cgo #cgo openbsd LDFLAGS: -lpthread #cgo aix LDFLAGS: -Wl,-berok #cgo solaris LDFLAGS: -lxnet +#cgo illumos LDFLAGS: -lsocket // Issue 35247. #cgo darwin CFLAGS: -Wno-nullability-completeness -- cgit v1.2.1 From 05739d6f17c57f09264272621b88725a463234d0 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 15 Oct 2020 14:39:12 -0700 Subject: runtime: wait for preemption signals before syscall.Exec Fixes #41702 Fixes #42023 Change-Id: If07f40b1d73b8f276ee28ffb8b7214175e56c24d Reviewed-on: https://go-review.googlesource.com/c/go/+/262817 Trust: Ian Lance Taylor Trust: Bryan C. Mills Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/runtime/proc.go | 21 +++++++++++++++++++++ src/runtime/signal_unix.go | 11 +++++++++++ 2 files changed, 32 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 4872480314..e1de70a997 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1311,6 +1311,14 @@ found: checkdead() unlock(&sched.lock) + if GOOS == "darwin" { + // Make sure pendingPreemptSignals is correct when an M exits. + // For #41702. + if atomic.Load(&m.signalPending) != 0 { + atomic.Xadd(&pendingPreemptSignals, -1) + } + } + if osStack { // Return from mstart and let the system thread // library free the g0 stack and terminate the thread. @@ -3510,11 +3518,24 @@ func syscall_runtime_AfterForkInChild() { inForkedChild = false } +// pendingPreemptSignals is the number of preemption signals +// that have been sent but not received. This is only used on Darwin. +// For #41702. +var pendingPreemptSignals uint32 + // Called from syscall package before Exec. //go:linkname syscall_runtime_BeforeExec syscall.runtime_BeforeExec func syscall_runtime_BeforeExec() { // Prevent thread creation during exec. execLock.lock() + + // On Darwin, wait for all pending preemption signals to + // be received. See issue #41702. + if GOOS == "darwin" { + for int32(atomic.Load(&pendingPreemptSignals)) > 0 { + osyield() + } + } } // Called from syscall package after Exec. diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index c228de47b4..e8b6f95d8f 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -335,6 +335,10 @@ func doSigPreempt(gp *g, ctxt *sigctxt) { // Acknowledge the preemption. atomic.Xadd(&gp.m.preemptGen, 1) atomic.Store(&gp.m.signalPending, 0) + + if GOOS == "darwin" { + atomic.Xadd(&pendingPreemptSignals, -1) + } } const preemptMSupported = true @@ -364,6 +368,10 @@ func preemptM(mp *m) { } if atomic.Cas(&mp.signalPending, 0, 1) { + if GOOS == "darwin" { + atomic.Xadd(&pendingPreemptSignals, 1) + } + // If multiple threads are preempting the same M, it may send many // signals to the same M such that it hardly make progress, causing // live-lock problem. Apparently this could happen on darwin. See @@ -435,6 +443,9 @@ func sigtrampgo(sig uint32, info *siginfo, ctx unsafe.Pointer) { // no non-Go signal handler for sigPreempt. // The default behavior for sigPreempt is to ignore // the signal, so badsignal will be a no-op anyway. + if GOOS == "darwin" { + atomic.Xadd(&pendingPreemptSignals, -1) + } return } c.fixsigcode(sig) -- cgit v1.2.1 From f1e3c8f14232cde8da8666ad68df493563287634 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 14 Oct 2020 19:26:20 -0400 Subject: runtime/cgo: build iOS-specific code only on iOS Don't build them on macOS/ARM64. Updates #38485. Change-Id: I9fbea838fdce52db22742487926879761dea0d6a Reviewed-on: https://go-review.googlesource.com/c/go/+/262559 Trust: Cherry Zhang Run-TryBot: Cherry Zhang Reviewed-by: Ian Lance Taylor --- src/runtime/cgo/gcc_darwin_arm64.c | 21 ++++++++++++++++----- src/runtime/cgo/gcc_signal_darwin_nolldb.c | 12 ------------ src/runtime/cgo/gcc_signal_ios_nolldb.c | 12 ++++++++++++ 3 files changed, 28 insertions(+), 17 deletions(-) delete mode 100644 src/runtime/cgo/gcc_signal_darwin_nolldb.c create mode 100644 src/runtime/cgo/gcc_signal_ios_nolldb.c (limited to 'src/runtime') diff --git a/src/runtime/cgo/gcc_darwin_arm64.c b/src/runtime/cgo/gcc_darwin_arm64.c index 9ea43ae4af..dbe848b4ee 100644 --- a/src/runtime/cgo/gcc_darwin_arm64.c +++ b/src/runtime/cgo/gcc_darwin_arm64.c @@ -10,12 +10,16 @@ #include #include -#include -#include - #include "libcgo.h" #include "libcgo_unix.h" +#include + +#if TARGET_OS_IPHONE +#include +#include +#endif + #define magic (0xc476c475c47957UL) // inittls allocates a thread-local storage slot for g. @@ -87,14 +91,18 @@ threadentry(void *v) ts = *(ThreadStart*)v; free(v); +#if TARGET_OS_IPHONE darwin_arm_init_thread_exception_port(); +#endif crosscall1(ts.fn, setg_gcc, (void*)ts.g); return nil; } +#if TARGET_OS_IPHONE + // init_working_dir sets the current working directory to the app root. -// By default darwin/arm64 processes start in "/". +// By default ios/arm64 processes start in "/". static void init_working_dir() { @@ -145,6 +153,8 @@ init_working_dir() } } +#endif // TARGET_OS_IPHONE + void x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase) { @@ -161,8 +171,9 @@ x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase) // yes, tlsbase from mrs might not be correctly aligned. inittls(tlsg, (void**)((uintptr)tlsbase & ~7)); +#if TARGET_OS_IPHONE darwin_arm_init_mach_exception_handler(); darwin_arm_init_thread_exception_port(); - init_working_dir(); +#endif } diff --git a/src/runtime/cgo/gcc_signal_darwin_nolldb.c b/src/runtime/cgo/gcc_signal_darwin_nolldb.c deleted file mode 100644 index 26be71bd1d..0000000000 --- a/src/runtime/cgo/gcc_signal_darwin_nolldb.c +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !lldb !ios -// +build darwin -// +build arm64 - -#include - -void darwin_arm_init_thread_exception_port() {} -void darwin_arm_init_mach_exception_handler() {} diff --git a/src/runtime/cgo/gcc_signal_ios_nolldb.c b/src/runtime/cgo/gcc_signal_ios_nolldb.c new file mode 100644 index 0000000000..cfa4025414 --- /dev/null +++ b/src/runtime/cgo/gcc_signal_ios_nolldb.c @@ -0,0 +1,12 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !lldb +// +build ios +// +build arm64 + +#include + +void darwin_arm_init_thread_exception_port() {} +void darwin_arm_init_mach_exception_handler() {} -- cgit v1.2.1 From 689a7a13780dc7a5138215aa4d369bdcf789fee8 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Fri, 16 Oct 2020 19:11:42 -0400 Subject: runtime/cgo: fix build tag placement vet warning Change-Id: Ie6583b46213caae897fc2189d4973c88759f5f4b Reviewed-on: https://go-review.googlesource.com/c/go/+/263258 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor --- src/runtime/cgo/gcc_libinit_windows.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/runtime') diff --git a/src/runtime/cgo/gcc_libinit_windows.c b/src/runtime/cgo/gcc_libinit_windows.c index 9fd7d36bfb..2732248bdc 100644 --- a/src/runtime/cgo/gcc_libinit_windows.c +++ b/src/runtime/cgo/gcc_libinit_windows.c @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. // +build cgo + #define WIN64_LEAN_AND_MEAN #include #include -- cgit v1.2.1 From 515e6a9b12dfe654c86cfd070ee5d6ac144fe116 Mon Sep 17 00:00:00 2001 From: Alex Brainman Date: Sun, 19 Jul 2020 16:06:48 +1000 Subject: runtime: use CreateWaitableTimerEx to implement usleep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @jstarks suggested that recent versions of Windows provide access to high resolution timers. See https://github.com/golang/go/issues/8687#issuecomment-656259353 for details. I tried to run this C program on my Windows 10 computer ``` #include #include #pragma comment(lib, "Winmm.lib") // Apparently this is already defined when I use msvc cl. //#define CREATE_WAITABLE_TIMER_HIGH_RESOLUTION = 0x00000002; int usleep(HANDLE timer, LONGLONG d) { LARGE_INTEGER liDueTime; DWORD ret; LARGE_INTEGER StartingTime, EndingTime, ElapsedMicroseconds; LARGE_INTEGER Frequency; QueryPerformanceFrequency(&Frequency); QueryPerformanceCounter(&StartingTime); liDueTime.QuadPart = d; liDueTime.QuadPart = liDueTime.QuadPart * 10; // us into 100 of ns units liDueTime.QuadPart = -liDueTime.QuadPart; // negative for relative dure time if (!SetWaitableTimer(timer, &liDueTime, 0, NULL, NULL, 0)) { printf("SetWaitableTimer failed: errno=%d\n", GetLastError()); return 1; } ret = WaitForSingleObject(timer, INFINITE); if (ret != WAIT_OBJECT_0) { printf("WaitForSingleObject failed: ret=%d errno=%d\n", ret, GetLastError()); return 1; } QueryPerformanceCounter(&EndingTime); ElapsedMicroseconds.QuadPart = EndingTime.QuadPart - StartingTime.QuadPart; ElapsedMicroseconds.QuadPart *= 1000000; ElapsedMicroseconds.QuadPart /= Frequency.QuadPart; printf("delay is %lld us - slept for %lld us\n", d, ElapsedMicroseconds.QuadPart); return 0; } int testTimer(DWORD createFlag) { HANDLE timer; timer = CreateWaitableTimerEx(NULL, NULL, createFlag, TIMER_ALL_ACCESS); if (timer == NULL) { printf("CreateWaitableTimerEx failed: errno=%d\n", GetLastError()); return 1; } usleep(timer, 1000LL); usleep(timer, 100LL); usleep(timer, 10LL); usleep(timer, 1LL); CloseHandle(timer); return 0; } int main() { printf("\n1. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is off\n"); testTimer(0); printf("\n2. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is off\n"); testTimer(CREATE_WAITABLE_TIMER_HIGH_RESOLUTION); timeBeginPeriod(1); printf("\n3. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is on\n"); testTimer(0); printf("\n4. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is on\n"); testTimer(CREATE_WAITABLE_TIMER_HIGH_RESOLUTION); } ``` and I see this output ``` 1. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is off delay is 1000 us - slept for 4045 us delay is 100 us - slept for 3915 us delay is 10 us - slept for 3291 us delay is 1 us - slept for 2234 us 2. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is off delay is 1000 us - slept for 1076 us delay is 100 us - slept for 569 us delay is 10 us - slept for 585 us delay is 1 us - slept for 17 us 3. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is off - timeBeginPeriod is on delay is 1000 us - slept for 742 us delay is 100 us - slept for 893 us delay is 10 us - slept for 414 us delay is 1 us - slept for 920 us 4. CREATE_WAITABLE_TIMER_HIGH_RESOLUTION is on - timeBeginPeriod is on delay is 1000 us - slept for 1466 us delay is 100 us - slept for 559 us delay is 10 us - slept for 535 us delay is 1 us - slept for 5 us ``` That shows, that indeed using CREATE_WAITABLE_TIMER_HIGH_RESOLUTION will provide sleeps as low as about 500 microseconds, while our current approach provides about 1 millisecond sleep. New approach also does not require for timeBeginPeriod to be on, so this change solves long standing problem with go programs draining laptop battery, because it calls timeBeginPeriod. This change will only run on systems where CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag is available. If not available, the runtime will fallback to original code that uses timeBeginPeriod. This is how this change affects benchmark reported in issue #14790 name               old time/op  new time/op  delta ChanToSyscallPing  1.05ms ± 2%  0.68ms ±11%  -35.43%  (p=0.000 n=10+10) The benchmark was run with GOMAXPROCS set to 1. Fixes #8687 Updates #14790 Change-Id: I5b97ba58289c088c17c05292e12e45285c467eae Reviewed-on: https://go-review.googlesource.com/c/go/+/248699 Run-TryBot: Alex Brainman TryBot-Result: Go Bot Trust: Alex Brainman Reviewed-by: Austin Clements --- src/runtime/os_windows.go | 73 +++++++++++++++++++++++++++++++++++++++-- src/runtime/sys_windows_386.s | 36 ++++++++++++++++++++ src/runtime/sys_windows_amd64.s | 32 ++++++++++++++++++ src/runtime/sys_windows_arm.s | 5 +++ 4 files changed, 144 insertions(+), 2 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index a62e941229..9dd140c952 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -21,6 +21,7 @@ const ( //go:cgo_import_dynamic runtime._CreateIoCompletionPort CreateIoCompletionPort%4 "kernel32.dll" //go:cgo_import_dynamic runtime._CreateThread CreateThread%6 "kernel32.dll" //go:cgo_import_dynamic runtime._CreateWaitableTimerA CreateWaitableTimerA%3 "kernel32.dll" +//go:cgo_import_dynamic runtime._CreateWaitableTimerExW CreateWaitableTimerExW%4 "kernel32.dll" //go:cgo_import_dynamic runtime._DuplicateHandle DuplicateHandle%7 "kernel32.dll" //go:cgo_import_dynamic runtime._ExitProcess ExitProcess%1 "kernel32.dll" //go:cgo_import_dynamic runtime._FreeEnvironmentStringsW FreeEnvironmentStringsW%1 "kernel32.dll" @@ -68,6 +69,7 @@ var ( _CreateIoCompletionPort, _CreateThread, _CreateWaitableTimerA, + _CreateWaitableTimerExW, _DuplicateHandle, _ExitProcess, _FreeEnvironmentStringsW, @@ -151,6 +153,8 @@ type mOS struct { waitsema uintptr // semaphore for parking on locks resumesema uintptr // semaphore to indicate suspend/resume + highResTimer uintptr // high resolution timer handle used in usleep + // preemptExtLock synchronizes preemptM with entry/exit from // external C code. // @@ -402,11 +406,21 @@ const osRelaxMinNS = 60 * 1e6 // osRelax is called by the scheduler when transitioning to and from // all Ps being idle. // -// On Windows, it adjusts the system-wide timer resolution. Go needs a +// Some versions of Windows have high resolution timer. For those +// versions osRelax is noop. +// For Windows versions without high resolution timer, osRelax +// adjusts the system-wide timer resolution. Go needs a // high resolution timer while running and there's little extra cost // if we're already using the CPU, but if all Ps are idle there's no // need to consume extra power to drive the high-res timer. func osRelax(relax bool) uint32 { + if haveHighResTimer { + // If the high resolution timer is available, the runtime uses the timer + // to sleep for short durations. This means there's no need to adjust + // the global clock frequency. + return 0 + } + if relax { return uint32(stdcall1(_timeEndPeriod, 1)) } else { @@ -414,6 +428,42 @@ func osRelax(relax bool) uint32 { } } +// haveHighResTimer indicates that the CreateWaitableTimerEx +// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag is available. +var haveHighResTimer = false + +// createHighResTimer calls CreateWaitableTimerEx with +// CREATE_WAITABLE_TIMER_HIGH_RESOLUTION flag to create high +// resolution timer. createHighResTimer returns new timer +// handle or 0, if CreateWaitableTimerEx failed. +func createHighResTimer() uintptr { + const ( + // As per @jstarks, see + // https://github.com/golang/go/issues/8687#issuecomment-656259353 + _CREATE_WAITABLE_TIMER_HIGH_RESOLUTION = 0x00000002 + + _SYNCHRONIZE = 0x00100000 + _TIMER_QUERY_STATE = 0x0001 + _TIMER_MODIFY_STATE = 0x0002 + ) + return stdcall4(_CreateWaitableTimerExW, 0, 0, + _CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, + _SYNCHRONIZE|_TIMER_QUERY_STATE|_TIMER_MODIFY_STATE) +} + +func initHighResTimer() { + if GOARCH == "arm" { + // TODO: Not yet implemented. + return + } + h := createHighResTimer() + if h != 0 { + haveHighResTimer = true + usleep2Addr = unsafe.Pointer(funcPC(usleep2HighRes)) + stdcall1(_CloseHandle, h) + } +} + func osinit() { asmstdcallAddr = unsafe.Pointer(funcPC(asmstdcall)) usleep2Addr = unsafe.Pointer(funcPC(usleep2)) @@ -429,6 +479,7 @@ func osinit() { stdcall2(_SetConsoleCtrlHandler, funcPC(ctrlhandler), 1) + initHighResTimer() timeBeginPeriodRetValue = osRelax(false) ncpu = getproccount() @@ -844,9 +895,20 @@ func minit() { var thandle uintptr stdcall7(_DuplicateHandle, currentProcess, currentThread, currentProcess, uintptr(unsafe.Pointer(&thandle)), 0, 0, _DUPLICATE_SAME_ACCESS) + // Configure usleep timer, if possible. + var timer uintptr + if haveHighResTimer { + timer = createHighResTimer() + if timer == 0 { + print("runtime: CreateWaitableTimerEx failed; errno=", getlasterror(), "\n") + throw("CreateWaitableTimerEx when creating timer failed") + } + } + mp := getg().m lock(&mp.threadLock) mp.thread = thandle + mp.highResTimer = timer unlock(&mp.threadLock) // Query the true stack base from the OS. Currently we're @@ -884,6 +946,10 @@ func unminit() { lock(&mp.threadLock) stdcall1(_CloseHandle, mp.thread) mp.thread = 0 + if mp.highResTimer != 0 { + stdcall1(_CloseHandle, mp.highResTimer) + mp.highResTimer = 0 + } unlock(&mp.threadLock) } @@ -976,9 +1042,12 @@ func stdcall7(fn stdFunction, a0, a1, a2, a3, a4, a5, a6 uintptr) uintptr { return stdcall(fn) } -// in sys_windows_386.s and sys_windows_amd64.s +// In sys_windows_386.s and sys_windows_amd64.s. func onosstack(fn unsafe.Pointer, arg uint32) + +// These are not callable functions. They should only be called via onosstack. func usleep2(usec uint32) +func usleep2HighRes(usec uint32) func switchtothread() var usleep2Addr unsafe.Pointer diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s index 9e1f40925d..4ac1527ab1 100644 --- a/src/runtime/sys_windows_386.s +++ b/src/runtime/sys_windows_386.s @@ -428,6 +428,42 @@ TEXT runtime·usleep2(SB),NOSPLIT,$20 MOVL BP, SP RET +// Runs on OS stack. duration (in 100ns units) is in BX. +TEXT runtime·usleep2HighRes(SB),NOSPLIT,$36 + // Want negative 100ns units. + NEGL BX + MOVL $-1, hi-4(SP) + MOVL BX, lo-8(SP) + + get_tls(CX) + MOVL g(CX), CX + MOVL g_m(CX), CX + MOVL (m_mOS+mOS_highResTimer)(CX), CX + MOVL CX, saved_timer-12(SP) + + MOVL $0, fResume-16(SP) + MOVL $0, lpArgToCompletionRoutine-20(SP) + MOVL $0, pfnCompletionRoutine-24(SP) + MOVL $0, lPeriod-28(SP) + LEAL lo-8(SP), BX + MOVL BX, lpDueTime-32(SP) + MOVL CX, hTimer-36(SP) + MOVL SP, BP + MOVL runtime·_SetWaitableTimer(SB), AX + CALL AX + MOVL BP, SP + + MOVL $0, ptime-28(SP) + MOVL $0, alertable-32(SP) + MOVL saved_timer-12(SP), CX + MOVL CX, handle-36(SP) + MOVL SP, BP + MOVL runtime·_NtWaitForSingleObject(SB), AX + CALL AX + MOVL BP, SP + + RET + // Runs on OS stack. TEXT runtime·switchtothread(SB),NOSPLIT,$0 MOVL SP, BP diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s index 6c8eecd4e7..847542592b 100644 --- a/src/runtime/sys_windows_amd64.s +++ b/src/runtime/sys_windows_amd64.s @@ -457,6 +457,38 @@ TEXT runtime·usleep2(SB),NOSPLIT|NOFRAME,$48 MOVQ 40(SP), SP RET +// Runs on OS stack. duration (in 100ns units) is in BX. +TEXT runtime·usleep2HighRes(SB),NOSPLIT|NOFRAME,$72 + MOVQ SP, AX + ANDQ $~15, SP // alignment as per Windows requirement + MOVQ AX, 64(SP) + + get_tls(CX) + MOVQ g(CX), CX + MOVQ g_m(CX), CX + MOVQ (m_mOS+mOS_highResTimer)(CX), CX // hTimer + MOVQ CX, 48(SP) // save hTimer for later + // Want negative 100ns units. + NEGQ BX + LEAQ 56(SP), DX // lpDueTime + MOVQ BX, (DX) + MOVQ $0, R8 // lPeriod + MOVQ $0, R9 // pfnCompletionRoutine + MOVQ $0, AX + MOVQ AX, 32(SP) // lpArgToCompletionRoutine + MOVQ AX, 40(SP) // fResume + MOVQ runtime·_SetWaitableTimer(SB), AX + CALL AX + + MOVQ 48(SP), CX // handle + MOVQ $0, DX // alertable + MOVQ $0, R8 // ptime + MOVQ runtime·_NtWaitForSingleObject(SB), AX + CALL AX + + MOVQ 64(SP), SP + RET + // Runs on OS stack. TEXT runtime·switchtothread(SB),NOSPLIT|NOFRAME,$0 MOVQ SP, AX diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s index 256b5ff7f0..57415e1306 100644 --- a/src/runtime/sys_windows_arm.s +++ b/src/runtime/sys_windows_arm.s @@ -468,6 +468,11 @@ TEXT runtime·usleep2(SB),NOSPLIT|NOFRAME,$0 MOVW R4, R13 // Restore SP MOVM.IA.W (R13), [R4, R15] // pop {R4, pc} +// Runs on OS stack. Duration (in 100ns units) is in R0. +// TODO: neeeds to be implemented properly. +TEXT runtime·usleep2HighRes(SB),NOSPLIT|NOFRAME,$0 + B runtime·abort(SB) + // Runs on OS stack. TEXT runtime·switchtothread(SB),NOSPLIT|NOFRAME,$0 MOVM.DB.W [R4, R14], (R13) // push {R4, lr} -- cgit v1.2.1 From 0040adfd0f98e1012837b0317fabf69cf1f8855b Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Fri, 16 Oct 2020 21:39:36 -0400 Subject: runtime: define ios/arm64 entry points Updates #38485. Change-Id: I030346c7f0c3ce89209588525b210284fdea4efd Reviewed-on: https://go-review.googlesource.com/c/go/+/263638 Trust: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Reviewed-by: Than McIntosh --- src/runtime/rt0_ios_arm64.s | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 src/runtime/rt0_ios_arm64.s (limited to 'src/runtime') diff --git a/src/runtime/rt0_ios_arm64.s b/src/runtime/rt0_ios_arm64.s new file mode 100644 index 0000000000..dcc83656e2 --- /dev/null +++ b/src/runtime/rt0_ios_arm64.s @@ -0,0 +1,14 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// internal linking executable entry point. +// ios/arm64 only supports external linking. +TEXT _rt0_arm64_ios(SB),NOSPLIT|NOFRAME,$0 + UNDEF + +// library entry point. +TEXT _rt0_arm64_ios_lib(SB),NOSPLIT|NOFRAME,$0 + JMP _rt0_arm64_darwin_lib(SB) -- cgit v1.2.1 From 2754d911641c3a4569f48d61c541fc2ac395d23b Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 19 Oct 2020 17:55:55 +0000 Subject: runtime: add lock rank partial-order edge between fin and mheap finlock may be held across a write barrier, which could then acquire the mheap lock. Notably, this occurs in the mp.unlockf write in gopark where finlock is held by the finalizer goroutines and is going to sleep. Fixes #42062. Change-Id: Icf76637ae6fc12795436272633dca3d473780875 Reviewed-on: https://go-review.googlesource.com/c/go/+/263678 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Dan Scales --- src/runtime/lockrank.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go index 042f10b1d3..0cbbfc4f45 100644 --- a/src/runtime/lockrank.go +++ b/src/runtime/lockrank.go @@ -231,7 +231,7 @@ var lockPartialOrder [][]lockRank = [][]lockRank{ lockRankDefer: {}, lockRankSudog: {lockRankNotifyList, lockRankHchan}, lockRankWbufSpans: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankSweep, lockRankSched, lockRankAllg, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankHchan, lockRankFin, lockRankNotifyList, lockRankTraceStrings, lockRankMspanSpecial, lockRankProf, lockRankRoot, lockRankGscan, lockRankDefer, lockRankSudog}, - lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine}, + lockRankMheap: {lockRankSysmon, lockRankScavenge, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankFin, lockRankPollDesc, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan, lockRankMspanSpecial, lockRankProf, lockRankGcBitsArenas, lockRankRoot, lockRankGscan, lockRankStackpool, lockRankStackLarge, lockRankDefer, lockRankSudog, lockRankWbufSpans, lockRankSpanSetSpine}, lockRankMheapSpecial: {lockRankSysmon, lockRankScavenge, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankSched, lockRankAllg, lockRankAllp, lockRankTimers, lockRankItab, lockRankReflectOffs, lockRankNotifyList, lockRankTraceBuf, lockRankTraceStrings, lockRankHchan}, lockRankGlobalAlloc: {lockRankProf, lockRankSpanSetSpine, lockRankMheap, lockRankMheapSpecial}, -- cgit v1.2.1 From 7bb721b9384bdd196befeaed593b185f7f2a5589 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 7 Jul 2020 13:49:21 -0400 Subject: all: update references to symbols moved from os to io/fs The old os references are still valid, but update our code to reflect best practices and get used to the new locations. Code compiled with the bootstrap toolchain (cmd/asm, cmd/dist, cmd/compile, debug/elf) must remain Go 1.4-compatible and is excluded. For #41190. Change-Id: I8f9526977867c10a221e2f392f78d7dec073f1bd Reviewed-on: https://go-review.googlesource.com/c/go/+/243907 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Rob Pike --- src/runtime/testdata/testprogcgo/exec.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/testdata/testprogcgo/exec.go b/src/runtime/testdata/testprogcgo/exec.go index 94da5dc526..15723c7369 100644 --- a/src/runtime/testdata/testprogcgo/exec.go +++ b/src/runtime/testdata/testprogcgo/exec.go @@ -31,6 +31,7 @@ import "C" import ( "fmt" + "io/fs" "os" "os/exec" "os/signal" @@ -98,7 +99,7 @@ func CgoExecSignalMask() { // isEAGAIN reports whether err is an EAGAIN error from a process execution. func isEAGAIN(err error) bool { - if p, ok := err.(*os.PathError); ok { + if p, ok := err.(*fs.PathError); ok { err = p.Err } return err == syscall.EAGAIN -- cgit v1.2.1 From de932da453f68b8fc04e9c2ab25136748173c806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20M=C3=B6hrmann?= Date: Tue, 13 Oct 2020 22:30:23 +0200 Subject: internal/cpu: consolidate arm64 feature detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move code to detect and mask arm64 CPU features from runtime to internal/cpu. Change-Id: Ib784e2ff056e8def125d68827b852f07a3eff0db Reviewed-on: https://go-review.googlesource.com/c/go/+/261878 Trust: Martin Möhrmann Trust: Tobias Klauser Run-TryBot: Tobias Klauser TryBot-Result: Go Bot Reviewed-by: Tobias Klauser Reviewed-by: Benny Siegert --- src/runtime/auxv_none.go | 1 - src/runtime/os_freebsd_arm64.go | 143 --------------------------------------- src/runtime/os_freebsd_noauxv.go | 2 +- src/runtime/os_linux_arm64.go | 14 +--- src/runtime/os_netbsd.go | 1 - src/runtime/os_netbsd_386.go | 3 - src/runtime/os_netbsd_amd64.go | 3 - src/runtime/os_netbsd_arm.go | 3 - src/runtime/os_netbsd_arm64.go | 12 +--- src/runtime/os_openbsd_arm64.go | 11 --- src/runtime/sys_freebsd_arm64.s | 21 ------ 11 files changed, 3 insertions(+), 211 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/auxv_none.go b/src/runtime/auxv_none.go index 3a560a1793..3ca617b21e 100644 --- a/src/runtime/auxv_none.go +++ b/src/runtime/auxv_none.go @@ -7,7 +7,6 @@ // +build !dragonfly // +build !freebsd // +build !netbsd -// +build !openbsd !arm64 // +build !solaris package runtime diff --git a/src/runtime/os_freebsd_arm64.go b/src/runtime/os_freebsd_arm64.go index 51ebf9d478..b5b25f0dc5 100644 --- a/src/runtime/os_freebsd_arm64.go +++ b/src/runtime/os_freebsd_arm64.go @@ -4,149 +4,6 @@ package runtime -import "internal/cpu" - -const ( - hwcap_FP = 1 << 0 - hwcap_ASIMD = 1 << 1 - hwcap_EVTSTRM = 1 << 2 - hwcap_AES = 1 << 3 - hwcap_PMULL = 1 << 4 - hwcap_SHA1 = 1 << 5 - hwcap_SHA2 = 1 << 6 - hwcap_CRC32 = 1 << 7 - hwcap_ATOMICS = 1 << 8 - hwcap_FPHP = 1 << 9 - hwcap_ASIMDHP = 1 << 10 - hwcap_CPUID = 1 << 11 - hwcap_ASIMDRDM = 1 << 12 - hwcap_JSCVT = 1 << 13 - hwcap_FCMA = 1 << 14 - hwcap_LRCPC = 1 << 15 - hwcap_DCPOP = 1 << 16 - hwcap_SHA3 = 1 << 17 - hwcap_SM3 = 1 << 18 - hwcap_SM4 = 1 << 19 - hwcap_ASIMDDP = 1 << 20 - hwcap_SHA512 = 1 << 21 - hwcap_SVE = 1 << 22 - hwcap_ASIMDFHM = 1 << 23 -) - -func getisar0() uint64 -func getisar1() uint64 -func getpfr0() uint64 - -// no hwcap support on FreeBSD aarch64, we need to retrieve the info from -// ID_AA64ISAR0_EL1, ID_AA64ISAR1_EL1 and ID_AA64PFR0_EL1 -func archauxv(tag, val uintptr) { - var isar0, isar1, pfr0 uint64 - - isar0 = getisar0() - isar1 = getisar1() - pfr0 = getpfr0() - - // ID_AA64ISAR0_EL1 - switch extractBits(isar0, 4, 7) { - case 1: - cpu.HWCap |= hwcap_AES - case 2: - cpu.HWCap |= hwcap_PMULL | hwcap_AES - } - - switch extractBits(isar0, 8, 11) { - case 1: - cpu.HWCap |= hwcap_SHA1 - } - - switch extractBits(isar0, 12, 15) { - case 1: - cpu.HWCap |= hwcap_SHA2 - case 2: - cpu.HWCap |= hwcap_SHA2 | hwcap_SHA512 - } - - switch extractBits(isar0, 16, 19) { - case 1: - cpu.HWCap |= hwcap_CRC32 - } - - switch extractBits(isar0, 20, 23) { - case 2: - cpu.HWCap |= hwcap_ATOMICS - } - - switch extractBits(isar0, 28, 31) { - case 1: - cpu.HWCap |= hwcap_ASIMDRDM - } - - switch extractBits(isar0, 32, 35) { - case 1: - cpu.HWCap |= hwcap_SHA3 - } - - switch extractBits(isar0, 36, 39) { - case 1: - cpu.HWCap |= hwcap_SM3 - } - - switch extractBits(isar0, 40, 43) { - case 1: - cpu.HWCap |= hwcap_SM4 - } - - switch extractBits(isar0, 44, 47) { - case 1: - cpu.HWCap |= hwcap_ASIMDDP - } - - // ID_AA64ISAR1_EL1 - switch extractBits(isar1, 0, 3) { - case 1: - cpu.HWCap |= hwcap_DCPOP - } - - switch extractBits(isar1, 12, 15) { - case 1: - cpu.HWCap |= hwcap_JSCVT - } - - switch extractBits(isar1, 16, 19) { - case 1: - cpu.HWCap |= hwcap_FCMA - } - - switch extractBits(isar1, 20, 23) { - case 1: - cpu.HWCap |= hwcap_LRCPC - } - - // ID_AA64PFR0_EL1 - switch extractBits(pfr0, 16, 19) { - case 0: - cpu.HWCap |= hwcap_FP - case 1: - cpu.HWCap |= hwcap_FP | hwcap_FPHP - } - - switch extractBits(pfr0, 20, 23) { - case 0: - cpu.HWCap |= hwcap_ASIMD - case 1: - cpu.HWCap |= hwcap_ASIMD | hwcap_ASIMDHP - } - - switch extractBits(pfr0, 32, 35) { - case 1: - cpu.HWCap |= hwcap_SVE - } -} - -func extractBits(data uint64, start, end uint) uint { - return (uint)(data>>start) & ((1 << (end - start + 1)) - 1) -} - //go:nosplit func cputicks() int64 { // Currently cputicks() is used in blocking profiler and to seed fastrand(). diff --git a/src/runtime/os_freebsd_noauxv.go b/src/runtime/os_freebsd_noauxv.go index c6a49927c8..01efb9b7c9 100644 --- a/src/runtime/os_freebsd_noauxv.go +++ b/src/runtime/os_freebsd_noauxv.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // +build freebsd -// +build !arm,!arm64 +// +build !arm package runtime diff --git a/src/runtime/os_linux_arm64.go b/src/runtime/os_linux_arm64.go index 19968dc164..c5fd742048 100644 --- a/src/runtime/os_linux_arm64.go +++ b/src/runtime/os_linux_arm64.go @@ -11,19 +11,7 @@ import "internal/cpu" func archauxv(tag, val uintptr) { switch tag { case _AT_HWCAP: - // arm64 doesn't have a 'cpuid' instruction equivalent and relies on - // HWCAP/HWCAP2 bits for hardware capabilities. - hwcap := uint(val) - if GOOS == "android" { - // The Samsung S9+ kernel reports support for atomics, but not all cores - // actually support them, resulting in SIGILL. See issue #28431. - // TODO(elias.naur): Only disable the optimization on bad chipsets. - const hwcap_ATOMICS = 1 << 8 - hwcap &= ^uint(hwcap_ATOMICS) - } - cpu.HWCap = hwcap - case _AT_HWCAP2: - cpu.HWCap2 = uint(val) + cpu.HWCap = uint(val) } } diff --git a/src/runtime/os_netbsd.go b/src/runtime/os_netbsd.go index c4c3d8e2fe..f7f90cedc1 100644 --- a/src/runtime/os_netbsd.go +++ b/src/runtime/os_netbsd.go @@ -359,7 +359,6 @@ func sysargs(argc int32, argv **byte) { // now argv+n is auxv auxv := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*sys.PtrSize)) sysauxv(auxv[:]) - archauxv(auxv[:]) } const ( diff --git a/src/runtime/os_netbsd_386.go b/src/runtime/os_netbsd_386.go index c203af9cef..037f7e36dc 100644 --- a/src/runtime/os_netbsd_386.go +++ b/src/runtime/os_netbsd_386.go @@ -14,6 +14,3 @@ func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintp mc.__gregs[_REG_EDX] = uint32(uintptr(unsafe.Pointer(gp))) mc.__gregs[_REG_ESI] = uint32(fn) } - -func archauxv(auxv []uintptr) { -} diff --git a/src/runtime/os_netbsd_amd64.go b/src/runtime/os_netbsd_amd64.go index ea9d125492..5118b0c4ff 100644 --- a/src/runtime/os_netbsd_amd64.go +++ b/src/runtime/os_netbsd_amd64.go @@ -14,6 +14,3 @@ func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintp mc.__gregs[_REG_R9] = uint64(uintptr(unsafe.Pointer(gp))) mc.__gregs[_REG_R12] = uint64(fn) } - -func archauxv(auxv []uintptr) { -} diff --git a/src/runtime/os_netbsd_arm.go b/src/runtime/os_netbsd_arm.go index 646da9dc0b..b5ec23e45b 100644 --- a/src/runtime/os_netbsd_arm.go +++ b/src/runtime/os_netbsd_arm.go @@ -32,6 +32,3 @@ func cputicks() int64 { // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler. return nanotime() } - -func archauxv(auxv []uintptr) { -} diff --git a/src/runtime/os_netbsd_arm64.go b/src/runtime/os_netbsd_arm64.go index ae2638c778..8d21b0a430 100644 --- a/src/runtime/os_netbsd_arm64.go +++ b/src/runtime/os_netbsd_arm64.go @@ -4,10 +4,7 @@ package runtime -import ( - "internal/cpu" - "unsafe" -) +import "unsafe" func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintptr) { // Machine dependent mcontext initialisation for LWP. @@ -24,10 +21,3 @@ func cputicks() int64 { // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler. return nanotime() } - -func archauxv(auxv []uintptr) { - // NetBSD does not supply AT_HWCAP, however we still need to initialise cpu.HWCaps. - // For now specify the bare minimum until we add some form of capabilities - // detection. See issue https://golang.org/issue/30824#issuecomment-494901591 - cpu.HWCap = 1<<1 | 1<<0 // ASIMD, FP -} diff --git a/src/runtime/os_openbsd_arm64.go b/src/runtime/os_openbsd_arm64.go index d559a2a3e5..d71de7d196 100644 --- a/src/runtime/os_openbsd_arm64.go +++ b/src/runtime/os_openbsd_arm64.go @@ -4,20 +4,9 @@ package runtime -import ( - "internal/cpu" -) - //go:nosplit func cputicks() int64 { // Currently cputicks() is used in blocking profiler and to seed runtime·fastrand(). // runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler. return nanotime() } - -func sysargs(argc int32, argv **byte) { - // OpenBSD does not have auxv, however we still need to initialise cpu.HWCaps. - // For now specify the bare minimum until we add some form of capabilities - // detection. See issue #31746. - cpu.HWCap = 1<<1 | 1<<0 // ASIMD, FP -} diff --git a/src/runtime/sys_freebsd_arm64.s b/src/runtime/sys_freebsd_arm64.s index 2330f2ffe2..8a4f9b7fa1 100644 --- a/src/runtime/sys_freebsd_arm64.s +++ b/src/runtime/sys_freebsd_arm64.s @@ -515,24 +515,3 @@ TEXT runtime·getCntxct(SB),NOSPLIT,$0 MOVW R0, ret+8(FP) RET - -// func getisar0() uint64 -TEXT runtime·getisar0(SB),NOSPLIT,$0 - // get Instruction Set Attributes 0 into R0 - MRS ID_AA64ISAR0_EL1, R0 - MOVD R0, ret+0(FP) - RET - -// func getisar1() uint64 -TEXT runtime·getisar1(SB),NOSPLIT,$0 - // get Instruction Set Attributes 1 into R0 - MRS ID_AA64ISAR1_EL1, R0 - MOVD R0, ret+0(FP) - RET - -// func getpfr0() uint64 -TEXT runtime·getpfr0(SB),NOSPLIT,$0 - // get Processor Feature Register 0 into R0 - MRS ID_AA64PFR0_EL1, R0 - MOVD R0, ret+0(FP) - RET -- cgit v1.2.1 From 05f5ae74bc95b0d77a512029bc1a6739c5d0f181 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 20 Oct 2020 12:57:14 +0000 Subject: runtime: fix scavenging tests for pallocChunkBytes huge pages and larger Currently the scavenging tests implicitly assume that the system huge page size is always strictly less than 4 MiB, or pallocChunkBytes. This leads to failures on systems with huge pages of this size, and larger. Filter out those tests on such platforms and add a test for the 4 MiB case. The scavenger is already equipped to handle this case. Huge page sizes > 4 MiB are effectively ignored, so also add a test case to ensure that happens. Unfortunately we can't actually run these tests in our CI because they require the platform to provide the right huge page size, but we really should just parameterize this value so we can test it (there's a TODO about this already). Fixes #42053. Change-Id: Ia576cbf67e178a14a178a893967efbed27d6eb17 Reviewed-on: https://go-review.googlesource.com/c/go/+/263837 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Keith Randall Trust: Michael Knyszek --- src/runtime/mgcscavenge_test.go | 53 +++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 18 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgcscavenge_test.go b/src/runtime/mgcscavenge_test.go index 7f619b1e7d..250343077f 100644 --- a/src/runtime/mgcscavenge_test.go +++ b/src/runtime/mgcscavenge_test.go @@ -235,26 +235,43 @@ func TestPallocDataFindScavengeCandidate(t *testing.T) { if PhysHugePageSize > uintptr(PageSize) { // Check hugepage preserving behavior. bits := uint(PhysHugePageSize / uintptr(PageSize)) - tests["PreserveHugePageBottom"] = test{ - alloc: []BitRange{{bits + 2, PallocChunkPages - (bits + 2)}}, - min: 1, - max: 3, // Make it so that max would have us try to break the huge page. - want: BitRange{0, bits + 2}, - } - if 3*bits < PallocChunkPages { - // We need at least 3 huge pages in a chunk for this test to make sense. - tests["PreserveHugePageMiddle"] = test{ - alloc: []BitRange{{0, bits - 10}, {2*bits + 10, PallocChunkPages - (2*bits + 10)}}, + if bits < PallocChunkPages { + tests["PreserveHugePageBottom"] = test{ + alloc: []BitRange{{bits + 2, PallocChunkPages - (bits + 2)}}, min: 1, - max: 12, // Make it so that max would have us try to break the huge page. - want: BitRange{bits, bits + 10}, + max: 3, // Make it so that max would have us try to break the huge page. + want: BitRange{0, bits + 2}, + } + if 3*bits < PallocChunkPages { + // We need at least 3 huge pages in a chunk for this test to make sense. + tests["PreserveHugePageMiddle"] = test{ + alloc: []BitRange{{0, bits - 10}, {2*bits + 10, PallocChunkPages - (2*bits + 10)}}, + min: 1, + max: 12, // Make it so that max would have us try to break the huge page. + want: BitRange{bits, bits + 10}, + } + } + tests["PreserveHugePageTop"] = test{ + alloc: []BitRange{{0, PallocChunkPages - bits}}, + min: 1, + max: 1, // Even one page would break a huge page in this case. + want: BitRange{PallocChunkPages - bits, bits}, + } + } else if bits == PallocChunkPages { + tests["PreserveHugePageAll"] = test{ + min: 1, + max: 1, // Even one page would break a huge page in this case. + want: BitRange{0, PallocChunkPages}, + } + } else { + // The huge page size is greater than pallocChunkPages, so it should + // be effectively disabled. There's no way we can possible scavenge + // a huge page out of this bitmap chunk. + tests["PreserveHugePageNone"] = test{ + min: 1, + max: 1, + want: BitRange{PallocChunkPages - 1, 1}, } - } - tests["PreserveHugePageTop"] = test{ - alloc: []BitRange{{0, PallocChunkPages - bits}}, - min: 1, - max: 1, // Even one page would break a huge page in this case. - want: BitRange{PallocChunkPages - bits, bits}, } } for name, v := range tests { -- cgit v1.2.1 From 1b09d430678d4a6f73b2443463d11f75851aba8a Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Fri, 16 Oct 2020 00:49:02 -0400 Subject: all: update references to symbols moved from io/ioutil to io The old ioutil references are still valid, but update our code to reflect best practices and get used to the new locations. Code compiled with the bootstrap toolchain (cmd/asm, cmd/dist, cmd/compile, debug/elf) must remain Go 1.4-compatible and is excluded. Also excluded vendored code. For #41190. Change-Id: I6d86f2bf7bc37a9d904b6cee3fe0c7af6d94d5b1 Reviewed-on: https://go-review.googlesource.com/c/go/+/263142 Trust: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Go Bot Reviewed-by: Emmanuel Odeke --- src/runtime/crash_unix_test.go | 2 +- src/runtime/testdata/testprogcgo/eintr.go | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/crash_unix_test.go b/src/runtime/crash_unix_test.go index 8ef52aba48..fc87f37408 100644 --- a/src/runtime/crash_unix_test.go +++ b/src/runtime/crash_unix_test.go @@ -241,7 +241,7 @@ func TestPanicSystemstack(t *testing.T) { } // Get traceback. - tb, err := ioutil.ReadAll(pr) + tb, err := io.ReadAll(pr) if err != nil { t.Fatal("reading traceback from pipe: ", err) } diff --git a/src/runtime/testdata/testprogcgo/eintr.go b/src/runtime/testdata/testprogcgo/eintr.go index 791ff1bedc..1722a75eb9 100644 --- a/src/runtime/testdata/testprogcgo/eintr.go +++ b/src/runtime/testdata/testprogcgo/eintr.go @@ -32,7 +32,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "log" "net" "os" @@ -242,5 +241,5 @@ func testExec(wg *sync.WaitGroup) { // Block blocks until stdin is closed. func Block() { - io.Copy(ioutil.Discard, os.Stdin) + io.Copy(io.Discard, os.Stdin) } -- cgit v1.2.1 From 7f736694fe9b254efa7155a0a5da87c2c18e6078 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 20 Oct 2020 12:54:20 -0700 Subject: runtime: use GOTRACEBACK=system for TestCgoExecSignalMask Try to get a bit more information to understand #42093. For #42093 Change-Id: I818feb08d7561151d52eba3e88c418b55b9f9c1e Reviewed-on: https://go-review.googlesource.com/c/go/+/264018 Trust: Ian Lance Taylor Run-TryBot: Ian Lance Taylor Reviewed-by: Bryan C. Mills TryBot-Result: Go Bot --- src/runtime/crash_cgo_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/crash_cgo_test.go b/src/runtime/crash_cgo_test.go index 4872189f16..b200984050 100644 --- a/src/runtime/crash_cgo_test.go +++ b/src/runtime/crash_cgo_test.go @@ -154,7 +154,7 @@ func TestCgoExecSignalMask(t *testing.T) { case "windows", "plan9": t.Skipf("skipping signal mask test on %s", runtime.GOOS) } - got := runTestProg(t, "testprogcgo", "CgoExecSignalMask") + got := runTestProg(t, "testprogcgo", "CgoExecSignalMask", "GOTRACEBACK=system") want := "OK\n" if got != want { t.Errorf("expected %q, got %v", want, got) -- cgit v1.2.1 From 15ead857dbc638b9d83a7686acf0dc746fc45918 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Wed, 9 Sep 2020 17:24:23 -0500 Subject: cmd/compiler,cmd/go,sync: add internal {LoadAcq,StoreRel}64 on ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an internal atomic intrinsic for load with acquire semantics (extending LoadAcq to 64b) and add LoadAcquintptr for internal use within the sync package. For other arches, this remaps to the appropriate atomic.Load{,64} intrinsic which should not alter code generation. Similarly, add StoreRel{uintptr,64} for consistency, and inline. Finally, add an exception to allow sync to directly use the runtime/internal/atomic package which avoids more convoluted workarounds (contributed by Lynn Boger). In an extreme example, sync.(*Pool).pin consumes 20% of wall time during fmt tests. This is reduced to 5% on ppc64le/power9. From the fmt benchmarks on ppc64le: name old time/op new time/op delta SprintfPadding 468ns ± 0% 451ns ± 0% -3.63% SprintfEmpty 73.3ns ± 0% 51.9ns ± 0% -29.20% SprintfString 135ns ± 0% 122ns ± 0% -9.63% SprintfTruncateString 232ns ± 0% 214ns ± 0% -7.76% SprintfTruncateBytes 216ns ± 0% 202ns ± 0% -6.48% SprintfSlowParsingPath 162ns ± 0% 142ns ± 0% -12.35% SprintfQuoteString 1.00µs ± 0% 0.99µs ± 0% -1.39% SprintfInt 117ns ± 0% 104ns ± 0% -11.11% SprintfIntInt 190ns ± 0% 175ns ± 0% -7.89% SprintfPrefixedInt 232ns ± 0% 212ns ± 0% -8.62% SprintfFloat 270ns ± 0% 255ns ± 0% -5.56% SprintfComplex 1.01µs ± 0% 0.99µs ± 0% -1.68% SprintfBoolean 127ns ± 0% 111ns ± 0% -12.60% SprintfHexString 220ns ± 0% 198ns ± 0% -10.00% SprintfHexBytes 261ns ± 0% 252ns ± 0% -3.45% SprintfBytes 600ns ± 0% 590ns ± 0% -1.67% SprintfStringer 684ns ± 0% 658ns ± 0% -3.80% SprintfStructure 2.57µs ± 0% 2.57µs ± 0% -0.12% ManyArgs 669ns ± 0% 646ns ± 0% -3.44% FprintInt 140ns ± 0% 136ns ± 0% -2.86% FprintfBytes 184ns ± 0% 181ns ± 0% -1.63% FprintIntNoAlloc 140ns ± 0% 136ns ± 0% -2.86% ScanInts 929µs ± 0% 921µs ± 0% -0.79% ScanRecursiveInt 122ms ± 0% 121ms ± 0% -0.11% ScanRecursiveIntReaderWrapper 122ms ± 0% 122ms ± 0% -0.18% Change-Id: I4d66780261b57b06ef600229e475462e7313f0d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/253748 Run-TryBot: Lynn Boger Reviewed-by: Lynn Boger Reviewed-by: Keith Randall Trust: Lynn Boger TryBot-Result: Go Bot --- src/runtime/internal/atomic/asm_386.s | 3 +++ src/runtime/internal/atomic/asm_amd64.s | 6 ++++++ src/runtime/internal/atomic/asm_arm.s | 6 ++++++ src/runtime/internal/atomic/asm_mips64x.s | 6 ++++++ src/runtime/internal/atomic/asm_mipsx.s | 3 +++ src/runtime/internal/atomic/asm_ppc64x.s | 13 +++++++++++++ src/runtime/internal/atomic/atomic_386.go | 9 +++++++++ src/runtime/internal/atomic/atomic_amd64.go | 18 ++++++++++++++++++ src/runtime/internal/atomic/atomic_arm.go | 6 ++++++ src/runtime/internal/atomic/atomic_arm64.go | 12 ++++++++++++ src/runtime/internal/atomic/atomic_arm64.s | 14 ++++++++++++++ src/runtime/internal/atomic/atomic_mips64x.go | 12 ++++++++++++ src/runtime/internal/atomic/atomic_mips64x.s | 8 ++++++++ src/runtime/internal/atomic/atomic_mipsx.go | 6 ++++++ src/runtime/internal/atomic/atomic_ppc64x.go | 12 ++++++++++++ src/runtime/internal/atomic/atomic_ppc64x.s | 20 ++++++++++++++++++++ src/runtime/internal/atomic/atomic_riscv64.go | 12 ++++++++++++ src/runtime/internal/atomic/atomic_riscv64.s | 12 ++++++++++++ src/runtime/internal/atomic/atomic_s390x.go | 24 ++++++++++++++++++++++++ src/runtime/internal/atomic/atomic_wasm.go | 24 ++++++++++++++++++++++++ 20 files changed, 226 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s index bcefff373f..7ebf675ac5 100644 --- a/src/runtime/internal/atomic/asm_386.s +++ b/src/runtime/internal/atomic/asm_386.s @@ -189,6 +189,9 @@ TEXT ·Store(SB), NOSPLIT, $0-8 TEXT ·StoreRel(SB), NOSPLIT, $0-8 JMP ·Store(SB) +TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-8 + JMP runtime∕internal∕atomic·Store(SB) + // uint64 atomicload64(uint64 volatile* addr); TEXT ·Load64(SB), NOSPLIT, $0-12 NO_LOCAL_POINTERS diff --git a/src/runtime/internal/atomic/asm_amd64.s b/src/runtime/internal/atomic/asm_amd64.s index 90c56424c9..80fb31285d 100644 --- a/src/runtime/internal/atomic/asm_amd64.s +++ b/src/runtime/internal/atomic/asm_amd64.s @@ -136,6 +136,12 @@ TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12 TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12 JMP runtime∕internal∕atomic·Store(SB) +TEXT runtime∕internal∕atomic·StoreRel64(SB), NOSPLIT, $0-16 + JMP runtime∕internal∕atomic·Store64(SB) + +TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-16 + JMP runtime∕internal∕atomic·Store64(SB) + TEXT runtime∕internal∕atomic·Store8(SB), NOSPLIT, $0-9 MOVQ ptr+0(FP), BX MOVB val+8(FP), AX diff --git a/src/runtime/internal/atomic/asm_arm.s b/src/runtime/internal/atomic/asm_arm.s index c3d1d9025d..274925ed60 100644 --- a/src/runtime/internal/atomic/asm_arm.s +++ b/src/runtime/internal/atomic/asm_arm.s @@ -57,6 +57,9 @@ TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-8 TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-8 B ·Load(SB) +TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-8 + B ·Load(SB) + TEXT ·Casuintptr(SB),NOSPLIT,$0-13 B ·Cas(SB) @@ -81,6 +84,9 @@ TEXT ·StorepNoWB(SB),NOSPLIT,$0-8 TEXT ·StoreRel(SB),NOSPLIT,$0-8 B ·Store(SB) +TEXT ·StoreReluintptr(SB),NOSPLIT,$0-8 + B ·Store(SB) + TEXT ·Xadduintptr(SB),NOSPLIT,$0-12 B ·Xadd(SB) diff --git a/src/runtime/internal/atomic/asm_mips64x.s b/src/runtime/internal/atomic/asm_mips64x.s index 3290fb726a..03fb822929 100644 --- a/src/runtime/internal/atomic/asm_mips64x.s +++ b/src/runtime/internal/atomic/asm_mips64x.s @@ -158,6 +158,12 @@ TEXT ·StorepNoWB(SB), NOSPLIT, $0-16 TEXT ·StoreRel(SB), NOSPLIT, $0-12 JMP ·Store(SB) +TEXT ·StoreRel64(SB), NOSPLIT, $0-16 + JMP ·Store64(SB) + +TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16 + JMP ·Store64(SB) + TEXT ·Store(SB), NOSPLIT, $0-12 MOVV ptr+0(FP), R1 MOVW val+8(FP), R2 diff --git a/src/runtime/internal/atomic/asm_mipsx.s b/src/runtime/internal/atomic/asm_mipsx.s index 62811a6599..63bb548825 100644 --- a/src/runtime/internal/atomic/asm_mipsx.s +++ b/src/runtime/internal/atomic/asm_mipsx.s @@ -122,6 +122,9 @@ TEXT ·StorepNoWB(SB),NOSPLIT,$0-8 TEXT ·StoreRel(SB),NOSPLIT,$0-8 JMP ·Store(SB) +TEXT ·StoreReluintptr(SB),NOSPLIT,$0-8 + JMP ·Store(SB) + // void Or8(byte volatile*, byte); TEXT ·Or8(SB),NOSPLIT,$0-5 MOVW ptr+0(FP), R1 diff --git a/src/runtime/internal/atomic/asm_ppc64x.s b/src/runtime/internal/atomic/asm_ppc64x.s index 06dc931bf4..c0237de4d0 100644 --- a/src/runtime/internal/atomic/asm_ppc64x.s +++ b/src/runtime/internal/atomic/asm_ppc64x.s @@ -83,12 +83,18 @@ TEXT runtime∕internal∕atomic·Casuintptr(SB), NOSPLIT, $0-25 TEXT runtime∕internal∕atomic·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16 BR runtime∕internal∕atomic·Load64(SB) +TEXT runtime∕internal∕atomic·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16 + BR runtime∕internal∕atomic·LoadAcq64(SB) + TEXT runtime∕internal∕atomic·Loaduint(SB), NOSPLIT|NOFRAME, $0-16 BR runtime∕internal∕atomic·Load64(SB) TEXT runtime∕internal∕atomic·Storeuintptr(SB), NOSPLIT, $0-16 BR runtime∕internal∕atomic·Store64(SB) +TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-16 + BR runtime∕internal∕atomic·StoreRel64(SB) + TEXT runtime∕internal∕atomic·Xadduintptr(SB), NOSPLIT, $0-24 BR runtime∕internal∕atomic·Xadd64(SB) @@ -191,6 +197,13 @@ TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12 MOVW R4, 0(R3) RET +TEXT runtime∕internal∕atomic·StoreRel64(SB), NOSPLIT, $0-16 + MOVD ptr+0(FP), R3 + MOVD val+8(FP), R4 + LWSYNC + MOVD R4, 0(R3) + RET + // void runtime∕internal∕atomic·Or8(byte volatile*, byte); TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-9 MOVD ptr+0(FP), R3 diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go index 8d002ebfe3..06ce6a5356 100644 --- a/src/runtime/internal/atomic/atomic_386.go +++ b/src/runtime/internal/atomic/atomic_386.go @@ -30,6 +30,12 @@ func LoadAcq(ptr *uint32) uint32 { return *ptr } +//go:nosplit +//go:noinline +func LoadAcquintptr(ptr *uintptr) uintptr { + return *ptr +} + //go:noescape func Xadd64(ptr *uint64, delta int64) uint64 @@ -83,5 +89,8 @@ func Store64(ptr *uint64, val uint64) //go:noescape func StoreRel(ptr *uint32, val uint32) +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) + // NO go:noescape annotation; see atomic_pointer.go. func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) diff --git a/src/runtime/internal/atomic/atomic_amd64.go b/src/runtime/internal/atomic/atomic_amd64.go index 14b8101720..1b71a16d94 100644 --- a/src/runtime/internal/atomic/atomic_amd64.go +++ b/src/runtime/internal/atomic/atomic_amd64.go @@ -35,6 +35,18 @@ func LoadAcq(ptr *uint32) uint32 { return *ptr } +//go:nosplit +//go:noinline +func LoadAcq64(ptr *uint64) uint64 { + return *ptr +} + +//go:nosplit +//go:noinline +func LoadAcquintptr(ptr *uintptr) uintptr { + return *ptr +} + //go:noescape func Xadd(ptr *uint32, delta int32) uint32 @@ -85,6 +97,12 @@ func Store64(ptr *uint64, val uint64) //go:noescape func StoreRel(ptr *uint32, val uint32) +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) + // StorepNoWB performs *ptr = val atomically and without a write // barrier. // diff --git a/src/runtime/internal/atomic/atomic_arm.go b/src/runtime/internal/atomic/atomic_arm.go index 95713afcc1..67d529c1cb 100644 --- a/src/runtime/internal/atomic/atomic_arm.go +++ b/src/runtime/internal/atomic/atomic_arm.go @@ -81,6 +81,9 @@ func Store(addr *uint32, v uint32) //go:noescape func StoreRel(addr *uint32, v uint32) +//go:noescape +func StoreReluintptr(addr *uintptr, v uintptr) + //go:nosplit func goCas64(addr *uint64, old, new uint64) bool { if uintptr(unsafe.Pointer(addr))&7 != 0 { @@ -194,6 +197,9 @@ func Load8(addr *uint8) uint8 //go:noescape func LoadAcq(addr *uint32) uint32 +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + //go:noescape func Cas64(addr *uint64, old, new uint64) bool diff --git a/src/runtime/internal/atomic/atomic_arm64.go b/src/runtime/internal/atomic/atomic_arm64.go index 26ca94d54c..c9b4322fe9 100644 --- a/src/runtime/internal/atomic/atomic_arm64.go +++ b/src/runtime/internal/atomic/atomic_arm64.go @@ -41,6 +41,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer //go:noescape func LoadAcq(addr *uint32) uint32 +//go:noescape +func LoadAcq64(ptr *uint64) uint64 + +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + //go:noescape func Or8(ptr *uint8, val uint8) @@ -67,3 +73,9 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) //go:noescape func StoreRel(ptr *uint32, val uint32) + +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) diff --git a/src/runtime/internal/atomic/atomic_arm64.s b/src/runtime/internal/atomic/atomic_arm64.s index a2eb7568d2..36c7698b18 100644 --- a/src/runtime/internal/atomic/atomic_arm64.s +++ b/src/runtime/internal/atomic/atomic_arm64.s @@ -36,12 +36,26 @@ TEXT ·Loadp(SB),NOSPLIT,$0-16 TEXT ·LoadAcq(SB),NOSPLIT,$0-12 B ·Load(SB) +// uint64 runtime∕internal∕atomic·LoadAcquintptr(uint64 volatile* addr) +TEXT ·LoadAcq64(SB),NOSPLIT,$0-16 + B ·Load64(SB) + +// uintptr runtime∕internal∕atomic·LoadAcq64(uintptr volatile* addr) +TEXT ·LoadAcquintptr(SB),NOSPLIT,$0-16 + B ·Load64(SB) + TEXT runtime∕internal∕atomic·StorepNoWB(SB), NOSPLIT, $0-16 B runtime∕internal∕atomic·Store64(SB) TEXT runtime∕internal∕atomic·StoreRel(SB), NOSPLIT, $0-12 B runtime∕internal∕atomic·Store(SB) +TEXT runtime∕internal∕atomic·StoreRel64(SB), NOSPLIT, $0-16 + B runtime∕internal∕atomic·Store64(SB) + +TEXT runtime∕internal∕atomic·StoreReluintptr(SB), NOSPLIT, $0-16 + B runtime∕internal∕atomic·Store64(SB) + TEXT runtime∕internal∕atomic·Store(SB), NOSPLIT, $0-12 MOVD ptr+0(FP), R0 MOVW val+8(FP), R1 diff --git a/src/runtime/internal/atomic/atomic_mips64x.go b/src/runtime/internal/atomic/atomic_mips64x.go index 1d9977850b..fca2242514 100644 --- a/src/runtime/internal/atomic/atomic_mips64x.go +++ b/src/runtime/internal/atomic/atomic_mips64x.go @@ -41,6 +41,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer //go:noescape func LoadAcq(ptr *uint32) uint32 +//go:noescape +func LoadAcq64(ptr *uint64) uint64 + +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + //go:noescape func And8(ptr *uint8, val uint8) @@ -69,3 +75,9 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) //go:noescape func StoreRel(ptr *uint32, val uint32) + +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) diff --git a/src/runtime/internal/atomic/atomic_mips64x.s b/src/runtime/internal/atomic/atomic_mips64x.s index 1ed90937c9..125c0c221c 100644 --- a/src/runtime/internal/atomic/atomic_mips64x.s +++ b/src/runtime/internal/atomic/atomic_mips64x.s @@ -47,3 +47,11 @@ TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16 // uint32 runtime∕internal∕atomic·LoadAcq(uint32 volatile* ptr) TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12 JMP atomic·Load(SB) + +// uint64 runtime∕internal∕atomic·LoadAcq64(uint64 volatile* ptr) +TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16 + JMP atomic·Load64(SB) + +// uintptr runtime∕internal∕atomic·LoadAcquintptr(uintptr volatile* ptr) +TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16 + JMP atomic·Load64(SB) diff --git a/src/runtime/internal/atomic/atomic_mipsx.go b/src/runtime/internal/atomic/atomic_mipsx.go index b99bfe7dbf..be1e6a038b 100644 --- a/src/runtime/internal/atomic/atomic_mipsx.go +++ b/src/runtime/internal/atomic/atomic_mipsx.go @@ -132,6 +132,9 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer //go:noescape func LoadAcq(ptr *uint32) uint32 +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + //go:noescape func And8(ptr *uint8, val uint8) @@ -150,5 +153,8 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) //go:noescape func StoreRel(ptr *uint32, val uint32) +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) + //go:noescape func CasRel(addr *uint32, old, new uint32) bool diff --git a/src/runtime/internal/atomic/atomic_ppc64x.go b/src/runtime/internal/atomic/atomic_ppc64x.go index a48ecf5ee8..e759bb27a2 100644 --- a/src/runtime/internal/atomic/atomic_ppc64x.go +++ b/src/runtime/internal/atomic/atomic_ppc64x.go @@ -41,6 +41,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer //go:noescape func LoadAcq(ptr *uint32) uint32 +//go:noescape +func LoadAcq64(ptr *uint64) uint64 + +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + //go:noescape func And8(ptr *uint8, val uint8) @@ -67,5 +73,11 @@ func Store64(ptr *uint64, val uint64) //go:noescape func StoreRel(ptr *uint32, val uint32) +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) + // NO go:noescape annotation; see atomic_pointer.go. func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) diff --git a/src/runtime/internal/atomic/atomic_ppc64x.s b/src/runtime/internal/atomic/atomic_ppc64x.s index c2f696fb34..b79cdbca34 100644 --- a/src/runtime/internal/atomic/atomic_ppc64x.s +++ b/src/runtime/internal/atomic/atomic_ppc64x.s @@ -6,6 +6,15 @@ #include "textflag.h" + +// For more details about how various memory models are +// enforced on POWER, the following paper provides more +// details about how they enforce C/C++ like models. This +// gives context about why the strange looking code +// sequences below work. +// +// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html + // uint32 runtime∕internal∕atomic·Load(uint32 volatile* ptr) TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12 MOVD ptr+0(FP), R3 @@ -56,5 +65,16 @@ TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12 MOVWZ 0(R3), R3 CMPW R3, R3, CR7 BC 4, 30, 1(PC) // bne- cr7, 0x4 + ISYNC MOVW R3, ret+8(FP) RET + +// uint64 runtime∕internal∕atomic·LoadAcq64(uint64 volatile* ptr) +TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16 + MOVD ptr+0(FP), R3 + MOVD 0(R3), R3 + CMP R3, R3, CR7 + BC 4, 30, 1(PC) // bne- cr7, 0x4 + ISYNC + MOVD R3, ret+8(FP) + RET diff --git a/src/runtime/internal/atomic/atomic_riscv64.go b/src/runtime/internal/atomic/atomic_riscv64.go index d52512369e..617bc1a3eb 100644 --- a/src/runtime/internal/atomic/atomic_riscv64.go +++ b/src/runtime/internal/atomic/atomic_riscv64.go @@ -39,6 +39,12 @@ func Loadp(ptr unsafe.Pointer) unsafe.Pointer //go:noescape func LoadAcq(ptr *uint32) uint32 +//go:noescape +func LoadAcq64(ptr *uint64) uint64 + +//go:noescape +func LoadAcquintptr(ptr *uintptr) uintptr + //go:noescape func Or8(ptr *uint8, val uint8) @@ -65,3 +71,9 @@ func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) //go:noescape func StoreRel(ptr *uint32, val uint32) + +//go:noescape +func StoreRel64(ptr *uint64, val uint64) + +//go:noescape +func StoreReluintptr(ptr *uintptr, val uintptr) diff --git a/src/runtime/internal/atomic/atomic_riscv64.s b/src/runtime/internal/atomic/atomic_riscv64.s index d005325ca3..db139d690a 100644 --- a/src/runtime/internal/atomic/atomic_riscv64.s +++ b/src/runtime/internal/atomic/atomic_riscv64.s @@ -150,6 +150,12 @@ TEXT ·Xaddint64(SB),NOSPLIT,$0-24 TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12 JMP ·Load(SB) +TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16 + JMP ·Load64(SB) + +TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16 + JMP ·Load64(SB) + // func Loadp(ptr unsafe.Pointer) unsafe.Pointer TEXT ·Loadp(SB),NOSPLIT,$0-16 JMP ·Load64(SB) @@ -161,6 +167,12 @@ TEXT ·StorepNoWB(SB), NOSPLIT, $0-16 TEXT ·StoreRel(SB), NOSPLIT, $0-12 JMP ·Store(SB) +TEXT ·StoreRel64(SB), NOSPLIT, $0-16 + JMP ·Store64(SB) + +TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16 + JMP ·Store64(SB) + // func Xchg(ptr *uint32, new uint32) uint32 TEXT ·Xchg(SB), NOSPLIT, $0-20 MOV ptr+0(FP), A0 diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go index 4d73b39baf..b649caa39f 100644 --- a/src/runtime/internal/atomic/atomic_s390x.go +++ b/src/runtime/internal/atomic/atomic_s390x.go @@ -41,6 +41,18 @@ func LoadAcq(ptr *uint32) uint32 { return *ptr } +//go:nosplit +//go:noinline +func LoadAcq64(ptr *uint64) uint64 { + return *ptr +} + +//go:nosplit +//go:noinline +func LoadAcquintptr(ptr *uintptr) uintptr { + return *ptr +} + //go:noescape func Store(ptr *uint32, val uint32) @@ -59,6 +71,18 @@ func StoreRel(ptr *uint32, val uint32) { *ptr = val } +//go:nosplit +//go:noinline +func StoreRel64(ptr *uint64, val uint64) { + *ptr = val +} + +//go:nosplit +//go:noinline +func StoreReluintptr(ptr *uintptr, val uintptr) { + *ptr = val +} + //go:noescape func And8(ptr *uint8, val uint8) diff --git a/src/runtime/internal/atomic/atomic_wasm.go b/src/runtime/internal/atomic/atomic_wasm.go index 2c0c3a8174..60a4942884 100644 --- a/src/runtime/internal/atomic/atomic_wasm.go +++ b/src/runtime/internal/atomic/atomic_wasm.go @@ -45,6 +45,18 @@ func LoadAcq(ptr *uint32) uint32 { return *ptr } +//go:nosplit +//go:noinline +func LoadAcq64(ptr *uint64) uint64 { + return *ptr +} + +//go:nosplit +//go:noinline +func LoadAcquintptr(ptr *uintptr) uintptr { + return *ptr +} + //go:nosplit //go:noinline func Load8(ptr *uint8) uint8 { @@ -141,6 +153,18 @@ func StoreRel(ptr *uint32, val uint32) { *ptr = val } +//go:nosplit +//go:noinline +func StoreRel64(ptr *uint64, val uint64) { + *ptr = val +} + +//go:nosplit +//go:noinline +func StoreReluintptr(ptr *uintptr, val uintptr) { + *ptr = val +} + //go:nosplit //go:noinline func Store8(ptr *uint8, val uint8) { -- cgit v1.2.1 From 6f45b39e4dbabf0b179a60ffacf434e55b2d5eab Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Tue, 19 May 2020 18:55:31 +1000 Subject: cmd/compile,cmd/internal/obj/riscv: move g register on riscv64 The original riscv64 port used the thread pointer (TP aka X4) register for the g pointer, however this register is also used when TLS support is required, resulting in a conflict (for example, when a signal is received we have no way of readily knowing if X4 contains a pointer to the TCB or a pointer to a g). In order to support cgo, free up the X4 register by moving g to X27. This unfortunately means that the X4 register is unused in non-cgo mode, however the alternative is to not support cgo on this platform. Update #36641 Change-Id: Idcaf3e8ccbe42972a1b8943aeefde7149d9c960a Reviewed-on: https://go-review.googlesource.com/c/go/+/263477 Trust: Joel Sing Reviewed-by: Cherry Zhang --- src/runtime/asm_riscv64.s | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s index 4084ced7f8..a136085084 100644 --- a/src/runtime/asm_riscv64.s +++ b/src/runtime/asm_riscv64.s @@ -519,12 +519,12 @@ flush: MOV T1, 16(X2) // Also second argument to wbBufFlush // TODO: Optimise - // R3 is g. - // R4 already saved (T0) - // R5 already saved (T1) - // R9 already saved (A0) - // R10 already saved (A1) - // R30 is tmp register. + // X5 already saved (T0) + // X6 already saved (T1) + // X10 already saved (A0) + // X11 already saved (A1) + // X27 is g. + // X31 is tmp register. MOV X0, 24(X2) MOV X1, 32(X2) MOV X2, 40(X2) -- cgit v1.2.1 From 9a49f772575f8009355ac7d64d51ef41bc960c3d Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 20 Oct 2020 20:08:30 -0700 Subject: runtime/race: update race .syso files Fixes #39186 Change-Id: I624ab73b3083f190978c09716672ce1b712a5c81 Reviewed-on: https://go-review.googlesource.com/c/go/+/264082 Run-TryBot: Keith Randall TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor Trust: Keith Randall --- src/runtime/race/README | 14 +++++++------- src/runtime/race/race_darwin_amd64.syso | Bin 449292 -> 451280 bytes src/runtime/race/race_freebsd_amd64.syso | Bin 579744 -> 583264 bytes src/runtime/race/race_linux_amd64.syso | Bin 521752 -> 525176 bytes src/runtime/race/race_linux_arm64.syso | Bin 500584 -> 505224 bytes src/runtime/race/race_linux_ppc64le.syso | Bin 623824 -> 624648 bytes src/runtime/race/race_netbsd_amd64.syso | Bin 602664 -> 609424 bytes src/runtime/race/race_windows_amd64.syso | Bin 458427 -> 461185 bytes 8 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/race/README b/src/runtime/race/README index 34485f0fb2..b36d82ccfd 100644 --- a/src/runtime/race/README +++ b/src/runtime/race/README @@ -4,10 +4,10 @@ the LLVM project (https://github.com/llvm/llvm-project/tree/master/compiler-rt). To update the .syso files use golang.org/x/build/cmd/racebuild. -race_darwin_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408. -race_freebsd_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408. -race_linux_amd64.syso built with LLVM 6c75db8b4bc59eace18143ce086419d37da24746 and Go 7388956b76ce15a11346cebefcf6193db044caaf. -race_linux_ppc64le.syso built with LLVM 6c75db8b4bc59eace18143ce086419d37da24746 and Go 7388956b76ce15a11346cebefcf6193db044caaf. -race_netbsd_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408. -race_windows_amd64.syso built with LLVM 3496d6e4bea9cb99cb382939b7e79a50a3b863a5 and Go 553e003414d3aa90cc39830ee22f08453d9f3408. -race_linux_arm64.syso built with LLVM 6c75db8b4bc59eace18143ce086419d37da24746 and Go 7388956b76ce15a11346cebefcf6193db044caaf. +race_darwin_amd64.syso built with LLVM 89f7ccea6f6488c443655880229c54db1f180153 and Go f62d3202bf9dbb3a00ad2a2c63ff4fa4188c5d3b. +race_freebsd_amd64.syso built with LLVM 89f7ccea6f6488c443655880229c54db1f180153 and Go f62d3202bf9dbb3a00ad2a2c63ff4fa4188c5d3b. +race_linux_amd64.syso built with LLVM 89f7ccea6f6488c443655880229c54db1f180153 and Go f62d3202bf9dbb3a00ad2a2c63ff4fa4188c5d3b. +race_linux_ppc64le.syso built with LLVM 89f7ccea6f6488c443655880229c54db1f180153 and Go f62d3202bf9dbb3a00ad2a2c63ff4fa4188c5d3b. +race_netbsd_amd64.syso built with LLVM 89f7ccea6f6488c443655880229c54db1f180153 and Go f62d3202bf9dbb3a00ad2a2c63ff4fa4188c5d3b. +race_windows_amd64.syso built with LLVM 89f7ccea6f6488c443655880229c54db1f180153 and Go f62d3202bf9dbb3a00ad2a2c63ff4fa4188c5d3b. +race_linux_arm64.syso built with LLVM 89f7ccea6f6488c443655880229c54db1f180153 and Go f62d3202bf9dbb3a00ad2a2c63ff4fa4188c5d3b. diff --git a/src/runtime/race/race_darwin_amd64.syso b/src/runtime/race/race_darwin_amd64.syso index d03a593f5a..3f95ecc8ee 100644 Binary files a/src/runtime/race/race_darwin_amd64.syso and b/src/runtime/race/race_darwin_amd64.syso differ diff --git a/src/runtime/race/race_freebsd_amd64.syso b/src/runtime/race/race_freebsd_amd64.syso index 573591c56f..2a5b46f4ce 100644 Binary files a/src/runtime/race/race_freebsd_amd64.syso and b/src/runtime/race/race_freebsd_amd64.syso differ diff --git a/src/runtime/race/race_linux_amd64.syso b/src/runtime/race/race_linux_amd64.syso index d31f85df56..e00398c964 100644 Binary files a/src/runtime/race/race_linux_amd64.syso and b/src/runtime/race/race_linux_amd64.syso differ diff --git a/src/runtime/race/race_linux_arm64.syso b/src/runtime/race/race_linux_arm64.syso index 7c74171b0f..9dae738700 100644 Binary files a/src/runtime/race/race_linux_arm64.syso and b/src/runtime/race/race_linux_arm64.syso differ diff --git a/src/runtime/race/race_linux_ppc64le.syso b/src/runtime/race/race_linux_ppc64le.syso index a3c72bec55..b562656d56 100644 Binary files a/src/runtime/race/race_linux_ppc64le.syso and b/src/runtime/race/race_linux_ppc64le.syso differ diff --git a/src/runtime/race/race_netbsd_amd64.syso b/src/runtime/race/race_netbsd_amd64.syso index 54e276bcff..11af16f046 100644 Binary files a/src/runtime/race/race_netbsd_amd64.syso and b/src/runtime/race/race_netbsd_amd64.syso differ diff --git a/src/runtime/race/race_windows_amd64.syso b/src/runtime/race/race_windows_amd64.syso index abaf42649f..9fbf9b4391 100644 Binary files a/src/runtime/race/race_windows_amd64.syso and b/src/runtime/race/race_windows_amd64.syso differ -- cgit v1.2.1 From b4a06b20897fe7ea3be715cb51040a2ccc52c15b Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 14 Jul 2020 20:27:27 +0000 Subject: runtime: define the AddrRange used for testing in terms of addrRange Currently the AddrRange used for testing is defined separately from addrRange in the runtime, making it difficult to test it as well as addrRanges. Redefine AddrRange in terms of addrRange instead. For #40191. Change-Id: I3aa5b8df3e4c9a3c494b46ab802dd574b2488141 Reviewed-on: https://go-review.googlesource.com/c/go/+/242677 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt Reviewed-by: Austin Clements --- src/runtime/export_test.go | 30 ++++++++++--- src/runtime/mpagealloc_test.go | 98 +++++++++++++++++++++--------------------- 2 files changed, 72 insertions(+), 56 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index f2fa11dc98..25b251f4ba 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -749,10 +749,7 @@ func (p *PageAlloc) Scavenge(nbytes uintptr, mayUnlock bool) (r uintptr) { func (p *PageAlloc) InUse() []AddrRange { ranges := make([]AddrRange, 0, len(p.inUse.ranges)) for _, r := range p.inUse.ranges { - ranges = append(ranges, AddrRange{ - Base: r.base.addr(), - Limit: r.limit.addr(), - }) + ranges = append(ranges, AddrRange{r}) } return ranges } @@ -763,10 +760,29 @@ func (p *PageAlloc) PallocData(i ChunkIdx) *PallocData { return (*PallocData)((*pageAlloc)(p).tryChunkOf(ci)) } -// AddrRange represents a range over addresses. -// Specifically, it represents the range [Base, Limit). +// AddrRange is a wrapper around addrRange for testing. type AddrRange struct { - Base, Limit uintptr + addrRange +} + +// MakeAddrRange creates a new address range. +func MakeAddrRange(base, limit uintptr) AddrRange { + return AddrRange{makeAddrRange(base, limit)} +} + +// Base returns the virtual base address of the address range. +func (a AddrRange) Base() uintptr { + return a.addrRange.base.addr() +} + +// Base returns the virtual address of the limit of the address range. +func (a AddrRange) Limit() uintptr { + return a.addrRange.limit.addr() +} + +// Equals returns true if the two address ranges are exactly equal. +func (a AddrRange) Equals(b AddrRange) bool { + return a == b } // BitRange represents a range over a bitmap. diff --git a/src/runtime/mpagealloc_test.go b/src/runtime/mpagealloc_test.go index 65ba71d459..5d979fa95b 100644 --- a/src/runtime/mpagealloc_test.go +++ b/src/runtime/mpagealloc_test.go @@ -54,7 +54,7 @@ func TestPageAllocGrow(t *testing.T) { BaseChunkIdx, }, inUse: []AddrRange{ - {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)}, + MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)), }, }, "Contiguous2": { @@ -63,7 +63,7 @@ func TestPageAllocGrow(t *testing.T) { BaseChunkIdx + 1, }, inUse: []AddrRange{ - {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)}, + MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)), }, }, "Contiguous5": { @@ -75,7 +75,7 @@ func TestPageAllocGrow(t *testing.T) { BaseChunkIdx + 4, }, inUse: []AddrRange{ - {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+5, 0)}, + MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+5, 0)), }, }, "Discontiguous": { @@ -85,9 +85,9 @@ func TestPageAllocGrow(t *testing.T) { BaseChunkIdx + 4, }, inUse: []AddrRange{ - {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)}, - {PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)}, - {PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)}, + MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)), }, }, "Mixed": { @@ -98,8 +98,8 @@ func TestPageAllocGrow(t *testing.T) { BaseChunkIdx + 4, }, inUse: []AddrRange{ - {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+3, 0)}, - {PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)}, + MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+3, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)), }, }, "WildlyDiscontiguous": { @@ -110,9 +110,9 @@ func TestPageAllocGrow(t *testing.T) { BaseChunkIdx + 0x21, }, inUse: []AddrRange{ - {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)}, - {PageBase(BaseChunkIdx+0x10, 0), PageBase(BaseChunkIdx+0x11, 0)}, - {PageBase(BaseChunkIdx+0x21, 0), PageBase(BaseChunkIdx+0x22, 0)}, + MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+2, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+0x10, 0), PageBase(BaseChunkIdx+0x11, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+0x21, 0), PageBase(BaseChunkIdx+0x22, 0)), }, }, "ManyDiscontiguous": { @@ -129,39 +129,39 @@ func TestPageAllocGrow(t *testing.T) { BaseChunkIdx + 64, }, inUse: []AddrRange{ - {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)}, - {PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)}, - {PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)}, - {PageBase(BaseChunkIdx+6, 0), PageBase(BaseChunkIdx+7, 0)}, - {PageBase(BaseChunkIdx+8, 0), PageBase(BaseChunkIdx+9, 0)}, - {PageBase(BaseChunkIdx+10, 0), PageBase(BaseChunkIdx+11, 0)}, - {PageBase(BaseChunkIdx+12, 0), PageBase(BaseChunkIdx+13, 0)}, - {PageBase(BaseChunkIdx+14, 0), PageBase(BaseChunkIdx+15, 0)}, - {PageBase(BaseChunkIdx+16, 0), PageBase(BaseChunkIdx+17, 0)}, - {PageBase(BaseChunkIdx+18, 0), PageBase(BaseChunkIdx+19, 0)}, - {PageBase(BaseChunkIdx+20, 0), PageBase(BaseChunkIdx+21, 0)}, - {PageBase(BaseChunkIdx+22, 0), PageBase(BaseChunkIdx+23, 0)}, - {PageBase(BaseChunkIdx+24, 0), PageBase(BaseChunkIdx+25, 0)}, - {PageBase(BaseChunkIdx+26, 0), PageBase(BaseChunkIdx+27, 0)}, - {PageBase(BaseChunkIdx+28, 0), PageBase(BaseChunkIdx+29, 0)}, - {PageBase(BaseChunkIdx+30, 0), PageBase(BaseChunkIdx+31, 0)}, - {PageBase(BaseChunkIdx+32, 0), PageBase(BaseChunkIdx+33, 0)}, - {PageBase(BaseChunkIdx+34, 0), PageBase(BaseChunkIdx+35, 0)}, - {PageBase(BaseChunkIdx+36, 0), PageBase(BaseChunkIdx+37, 0)}, - {PageBase(BaseChunkIdx+38, 0), PageBase(BaseChunkIdx+39, 0)}, - {PageBase(BaseChunkIdx+40, 0), PageBase(BaseChunkIdx+41, 0)}, - {PageBase(BaseChunkIdx+42, 0), PageBase(BaseChunkIdx+43, 0)}, - {PageBase(BaseChunkIdx+44, 0), PageBase(BaseChunkIdx+45, 0)}, - {PageBase(BaseChunkIdx+46, 0), PageBase(BaseChunkIdx+47, 0)}, - {PageBase(BaseChunkIdx+48, 0), PageBase(BaseChunkIdx+49, 0)}, - {PageBase(BaseChunkIdx+50, 0), PageBase(BaseChunkIdx+51, 0)}, - {PageBase(BaseChunkIdx+52, 0), PageBase(BaseChunkIdx+53, 0)}, - {PageBase(BaseChunkIdx+54, 0), PageBase(BaseChunkIdx+55, 0)}, - {PageBase(BaseChunkIdx+56, 0), PageBase(BaseChunkIdx+57, 0)}, - {PageBase(BaseChunkIdx+58, 0), PageBase(BaseChunkIdx+59, 0)}, - {PageBase(BaseChunkIdx+60, 0), PageBase(BaseChunkIdx+61, 0)}, - {PageBase(BaseChunkIdx+62, 0), PageBase(BaseChunkIdx+63, 0)}, - {PageBase(BaseChunkIdx+64, 0), PageBase(BaseChunkIdx+65, 0)}, + MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+2, 0), PageBase(BaseChunkIdx+3, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+4, 0), PageBase(BaseChunkIdx+5, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+6, 0), PageBase(BaseChunkIdx+7, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+8, 0), PageBase(BaseChunkIdx+9, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+10, 0), PageBase(BaseChunkIdx+11, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+12, 0), PageBase(BaseChunkIdx+13, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+14, 0), PageBase(BaseChunkIdx+15, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+16, 0), PageBase(BaseChunkIdx+17, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+18, 0), PageBase(BaseChunkIdx+19, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+20, 0), PageBase(BaseChunkIdx+21, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+22, 0), PageBase(BaseChunkIdx+23, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+24, 0), PageBase(BaseChunkIdx+25, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+26, 0), PageBase(BaseChunkIdx+27, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+28, 0), PageBase(BaseChunkIdx+29, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+30, 0), PageBase(BaseChunkIdx+31, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+32, 0), PageBase(BaseChunkIdx+33, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+34, 0), PageBase(BaseChunkIdx+35, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+36, 0), PageBase(BaseChunkIdx+37, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+38, 0), PageBase(BaseChunkIdx+39, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+40, 0), PageBase(BaseChunkIdx+41, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+42, 0), PageBase(BaseChunkIdx+43, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+44, 0), PageBase(BaseChunkIdx+45, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+46, 0), PageBase(BaseChunkIdx+47, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+48, 0), PageBase(BaseChunkIdx+49, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+50, 0), PageBase(BaseChunkIdx+51, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+52, 0), PageBase(BaseChunkIdx+53, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+54, 0), PageBase(BaseChunkIdx+55, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+56, 0), PageBase(BaseChunkIdx+57, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+58, 0), PageBase(BaseChunkIdx+59, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+60, 0), PageBase(BaseChunkIdx+61, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+62, 0), PageBase(BaseChunkIdx+63, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+64, 0), PageBase(BaseChunkIdx+65, 0)), }, }, } @@ -172,8 +172,8 @@ func TestPageAllocGrow(t *testing.T) { BaseChunkIdx + 0x100000, // constant translates to O(TiB) }, inUse: []AddrRange{ - {PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)}, - {PageBase(BaseChunkIdx+0x100000, 0), PageBase(BaseChunkIdx+0x100001, 0)}, + MakeAddrRange(PageBase(BaseChunkIdx, 0), PageBase(BaseChunkIdx+1, 0)), + MakeAddrRange(PageBase(BaseChunkIdx+0x100000, 0), PageBase(BaseChunkIdx+0x100001, 0)), }, } } @@ -197,7 +197,7 @@ func TestPageAllocGrow(t *testing.T) { t.Fail() } else { for i := range want { - if want[i] != got[i] { + if !want[i].Equals(got[i]) { t.Fail() break } @@ -207,11 +207,11 @@ func TestPageAllocGrow(t *testing.T) { t.Logf("found inUse mismatch") t.Logf("got:") for i, r := range got { - t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit) + t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base(), r.Limit()) } t.Logf("want:") for i, r := range want { - t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base, r.Limit) + t.Logf("\t#%d [0x%x, 0x%x)", i, r.Base(), r.Limit()) } } }) -- cgit v1.2.1 From f8aecbbff5b85e67fee95033b3a14f3df665ea18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20M=C3=B6hrmann?= Date: Tue, 20 Oct 2020 14:21:07 +0200 Subject: runtime: move s390x HWCap CPU feature detection to internal/cpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I7d9e31c3b342731ddd7329962426fdfc80e9ed87 Reviewed-on: https://go-review.googlesource.com/c/go/+/263803 Trust: Martin Möhrmann Run-TryBot: Martin Möhrmann TryBot-Result: Go Bot Reviewed-by: Tobias Klauser --- src/runtime/os_linux_s390x.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/os_linux_s390x.go b/src/runtime/os_linux_s390x.go index ee18fd1dc2..b9651f186c 100644 --- a/src/runtime/os_linux_s390x.go +++ b/src/runtime/os_linux_s390x.go @@ -6,15 +6,10 @@ package runtime import "internal/cpu" -const ( - // bit masks taken from bits/hwcap.h - _HWCAP_S390_VX = 2048 // vector facility -) - func archauxv(tag, val uintptr) { switch tag { - case _AT_HWCAP: // CPU capability bit flags - cpu.S390X.HasVX = val&_HWCAP_S390_VX != 0 + case _AT_HWCAP: + cpu.HWCap = uint(val) } } -- cgit v1.2.1 From 431d58da69e8c36d654876e7808f971c5667649c Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Tue, 20 Oct 2020 11:01:46 +0200 Subject: all: add GOOS=ios GOARCH=amd64 target for the ios simulator The Go toolchain has supported the simulator for years, but always in buildmode=c-archive which is intrinsically externally linked and PIE. This CL moves that support from GOOS=darwin GOARCH=amd64 -tags=ios to just GOOS=ios GOARCH=amd64 to match the change for iOS devices. This change also forces external linking and defaults to buildmode=pie to support Go binaries in the default buildmode to run on the simulator. CL 255257 added the necessary support to the exec wrapper. Updates #38485 Fixes #42100 Change-Id: I6e6ee0e8d421be53b31e3d403880e5b9b880d031 Reviewed-on: https://go-review.googlesource.com/c/go/+/263798 Reviewed-by: Austin Clements Reviewed-by: Cherry Zhang Trust: Elias Naur --- src/runtime/rt0_ios_amd64.s | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 src/runtime/rt0_ios_amd64.s (limited to 'src/runtime') diff --git a/src/runtime/rt0_ios_amd64.s b/src/runtime/rt0_ios_amd64.s new file mode 100644 index 0000000000..c6990324f4 --- /dev/null +++ b/src/runtime/rt0_ios_amd64.s @@ -0,0 +1,14 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// internal linking executable entry point. +// ios/amd64 only supports external linking. +TEXT _rt0_amd64_ios(SB),NOSPLIT|NOFRAME,$0 + UNDEF + +// library entry point. +TEXT _rt0_amd64_ios_lib(SB),NOSPLIT|NOFRAME,$0 + JMP _rt0_amd64_darwin_lib(SB) -- cgit v1.2.1 From ad61343f886cc5ce677e7bd62385144b2ba7b8f5 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 8 Oct 2020 14:38:39 -0400 Subject: runtime/internal/atomic: add 32-bit And/Or These will be used in a following CL to perform larger bit clear and bit set than And8/Or8. Change-Id: I60f7b1099e29b69eb64add77564faee862880a8d Reviewed-on: https://go-review.googlesource.com/c/go/+/260977 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: Michael Pratt --- src/runtime/internal/atomic/asm_386.s | 16 ++++ src/runtime/internal/atomic/asm_amd64.s | 16 ++++ src/runtime/internal/atomic/asm_mips64x.s | 26 ++++++ src/runtime/internal/atomic/asm_mipsx.s | 26 ++++++ src/runtime/internal/atomic/asm_ppc64x.s | 30 ++++++- src/runtime/internal/atomic/asm_s390x.s | 22 ++++- src/runtime/internal/atomic/atomic_386.go | 6 ++ src/runtime/internal/atomic/atomic_amd64.go | 6 ++ src/runtime/internal/atomic/atomic_arm.go | 20 +++++ src/runtime/internal/atomic/atomic_arm64.go | 6 ++ src/runtime/internal/atomic/atomic_arm64.s | 19 ++++ src/runtime/internal/atomic/atomic_mips64x.go | 6 ++ src/runtime/internal/atomic/atomic_mipsx.go | 6 ++ src/runtime/internal/atomic/atomic_ppc64x.go | 6 ++ src/runtime/internal/atomic/atomic_riscv64.go | 6 ++ src/runtime/internal/atomic/atomic_riscv64.s | 14 +++ src/runtime/internal/atomic/atomic_s390x.go | 6 ++ src/runtime/internal/atomic/atomic_test.go | 119 +++++++++++++++++++++++++- src/runtime/internal/atomic/atomic_wasm.go | 12 +++ src/runtime/internal/atomic/bench_test.go | 40 +++++++++ 20 files changed, 400 insertions(+), 8 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/internal/atomic/asm_386.s b/src/runtime/internal/atomic/asm_386.s index 7ebf675ac5..d82faef1f0 100644 --- a/src/runtime/internal/atomic/asm_386.s +++ b/src/runtime/internal/atomic/asm_386.s @@ -243,3 +243,19 @@ TEXT ·Store8(SB), NOSPLIT, $0-5 MOVB val+4(FP), AX XCHGB AX, 0(BX) RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-8 + MOVL ptr+0(FP), AX + MOVL val+4(FP), BX + LOCK + ORL BX, (AX) + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-8 + MOVL ptr+0(FP), AX + MOVL val+4(FP), BX + LOCK + ANDL BX, (AX) + RET diff --git a/src/runtime/internal/atomic/asm_amd64.s b/src/runtime/internal/atomic/asm_amd64.s index 80fb31285d..2cf7c55870 100644 --- a/src/runtime/internal/atomic/asm_amd64.s +++ b/src/runtime/internal/atomic/asm_amd64.s @@ -169,3 +169,19 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9 LOCK ANDB BX, (AX) RET + +// func Or(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12 + MOVQ ptr+0(FP), AX + MOVL val+8(FP), BX + LOCK + ORL BX, (AX) + RET + +// func And(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12 + MOVQ ptr+0(FP), AX + MOVL val+8(FP), BX + LOCK + ANDL BX, (AX) + RET diff --git a/src/runtime/internal/atomic/asm_mips64x.s b/src/runtime/internal/atomic/asm_mips64x.s index 03fb822929..a515683ebb 100644 --- a/src/runtime/internal/atomic/asm_mips64x.s +++ b/src/runtime/internal/atomic/asm_mips64x.s @@ -243,3 +243,29 @@ TEXT ·And8(SB), NOSPLIT, $0-9 BEQ R4, -4(PC) SYNC RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOVV ptr+0(FP), R1 + MOVW val+8(FP), R2 + + SYNC + LL (R1), R3 + OR R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOVV ptr+0(FP), R1 + MOVW val+8(FP), R2 + + SYNC + LL (R1), R3 + AND R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET diff --git a/src/runtime/internal/atomic/asm_mipsx.s b/src/runtime/internal/atomic/asm_mipsx.s index 63bb548825..2b2cfabe08 100644 --- a/src/runtime/internal/atomic/asm_mipsx.s +++ b/src/runtime/internal/atomic/asm_mipsx.s @@ -172,3 +172,29 @@ try_and8: BEQ R4, try_and8 SYNC RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-8 + MOVW ptr+0(FP), R1 + MOVW val+4(FP), R2 + + SYNC + LL (R1), R3 + OR R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-8 + MOVW ptr+0(FP), R1 + MOVW val+4(FP), R2 + + SYNC + LL (R1), R3 + AND R2, R3 + SC R3, (R1) + BEQ R3, -4(PC) + SYNC + RET diff --git a/src/runtime/internal/atomic/asm_ppc64x.s b/src/runtime/internal/atomic/asm_ppc64x.s index c0237de4d0..bb009ab34d 100644 --- a/src/runtime/internal/atomic/asm_ppc64x.s +++ b/src/runtime/internal/atomic/asm_ppc64x.s @@ -222,8 +222,32 @@ TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9 MOVBZ val+8(FP), R4 LWSYNC again: - LBAR (R3),R6 - AND R4,R6 - STBCCC R6,(R3) + LBAR (R3), R6 + AND R4, R6 + STBCCC R6, (R3) + BNE again + RET + +// func Or(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·Or(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LWSYNC +again: + LWAR (R3), R6 + OR R4, R6 + STWCCC R6, (R3) + BNE again + RET + +// func And(addr *uint32, v uint32) +TEXT runtime∕internal∕atomic·And(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LWSYNC +again: + LWAR (R3),R6 + AND R4, R6 + STWCCC R6, (R3) BNE again RET diff --git a/src/runtime/internal/atomic/asm_s390x.s b/src/runtime/internal/atomic/asm_s390x.s index 9a19bc0ece..daf1f3cc9f 100644 --- a/src/runtime/internal/atomic/asm_s390x.s +++ b/src/runtime/internal/atomic/asm_s390x.s @@ -174,8 +174,8 @@ TEXT ·Xchguintptr(SB), NOSPLIT, $0-24 // func Or8(addr *uint8, v uint8) TEXT ·Or8(SB), NOSPLIT, $0-9 - MOVD ptr+0(FP), R3 - MOVBZ val+8(FP), R4 + MOVD ptr+0(FP), R3 + MOVBZ val+8(FP), R4 // We don't have atomic operations that work on individual bytes so we // need to align addr down to a word boundary and create a mask // containing v to OR with the entire word atomically. @@ -188,8 +188,8 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 // func And8(addr *uint8, v uint8) TEXT ·And8(SB), NOSPLIT, $0-9 - MOVD ptr+0(FP), R3 - MOVBZ val+8(FP), R4 + MOVD ptr+0(FP), R3 + MOVBZ val+8(FP), R4 // We don't have atomic operations that work on individual bytes so we // need to align addr down to a word boundary and create a mask // containing v to AND with the entire word atomically. @@ -200,3 +200,17 @@ TEXT ·And8(SB), NOSPLIT, $0-9 RLL R5, R4, R4 // R4 = rotl(R4, R5) LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic) RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LAO R4, R6, 0(R3) // R6 = *R3; *R3 |= R4; (atomic) + RET + +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R3 + MOVW val+8(FP), R4 + LAN R4, R6, 0(R3) // R6 = *R3; *R3 &= R4; (atomic) + RET diff --git a/src/runtime/internal/atomic/atomic_386.go b/src/runtime/internal/atomic/atomic_386.go index 06ce6a5356..1bfcb1143d 100644 --- a/src/runtime/internal/atomic/atomic_386.go +++ b/src/runtime/internal/atomic/atomic_386.go @@ -69,6 +69,12 @@ func And8(ptr *uint8, val uint8) //go:noescape func Or8(ptr *uint8, val uint8) +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape diff --git a/src/runtime/internal/atomic/atomic_amd64.go b/src/runtime/internal/atomic/atomic_amd64.go index 1b71a16d94..e36eb83a11 100644 --- a/src/runtime/internal/atomic/atomic_amd64.go +++ b/src/runtime/internal/atomic/atomic_amd64.go @@ -77,6 +77,12 @@ func And8(ptr *uint8, val uint8) //go:noescape func Or8(ptr *uint8, val uint8) +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + // NOTE: Do not add atomicxor8 (XOR is not idempotent). //go:noescape diff --git a/src/runtime/internal/atomic/atomic_arm.go b/src/runtime/internal/atomic/atomic_arm.go index 67d529c1cb..546b3d6120 100644 --- a/src/runtime/internal/atomic/atomic_arm.go +++ b/src/runtime/internal/atomic/atomic_arm.go @@ -182,6 +182,26 @@ func And8(addr *uint8, v uint8) { } } +//go:nosplit +func Or(addr *uint32, v uint32) { + for { + old := *addr + if Cas(addr, old, old|v) { + return + } + } +} + +//go:nosplit +func And(addr *uint32, v uint32) { + for { + old := *addr + if Cas(addr, old, old&v) { + return + } + } +} + //go:nosplit func armcas(ptr *uint32, old, new uint32) bool diff --git a/src/runtime/internal/atomic/atomic_arm64.go b/src/runtime/internal/atomic/atomic_arm64.go index c9b4322fe9..d49bee8936 100644 --- a/src/runtime/internal/atomic/atomic_arm64.go +++ b/src/runtime/internal/atomic/atomic_arm64.go @@ -53,6 +53,12 @@ func Or8(ptr *uint8, val uint8) //go:noescape func And8(ptr *uint8, val uint8) +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + //go:noescape func Cas64(ptr *uint64, old, new uint64) bool diff --git a/src/runtime/internal/atomic/atomic_arm64.s b/src/runtime/internal/atomic/atomic_arm64.s index 36c7698b18..0cf3c40223 100644 --- a/src/runtime/internal/atomic/atomic_arm64.s +++ b/src/runtime/internal/atomic/atomic_arm64.s @@ -164,3 +164,22 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 CBNZ R3, -3(PC) RET +// func And(addr *uint32, v uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R0 + MOVW val+8(FP), R1 + LDAXRW (R0), R2 + AND R1, R2 + STLXRW R2, (R0), R3 + CBNZ R3, -3(PC) + RET + +// func Or(addr *uint32, v uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOVD ptr+0(FP), R0 + MOVW val+8(FP), R1 + LDAXRW (R0), R2 + ORR R1, R2 + STLXRW R2, (R0), R3 + CBNZ R3, -3(PC) + RET diff --git a/src/runtime/internal/atomic/atomic_mips64x.go b/src/runtime/internal/atomic/atomic_mips64x.go index fca2242514..b0109d72b0 100644 --- a/src/runtime/internal/atomic/atomic_mips64x.go +++ b/src/runtime/internal/atomic/atomic_mips64x.go @@ -55,6 +55,12 @@ func Or8(ptr *uint8, val uint8) // NOTE: Do not add atomicxor8 (XOR is not idempotent). +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + //go:noescape func Cas64(ptr *uint64, old, new uint64) bool diff --git a/src/runtime/internal/atomic/atomic_mipsx.go b/src/runtime/internal/atomic/atomic_mipsx.go index be1e6a038b..1336b50121 100644 --- a/src/runtime/internal/atomic/atomic_mipsx.go +++ b/src/runtime/internal/atomic/atomic_mipsx.go @@ -141,6 +141,12 @@ func And8(ptr *uint8, val uint8) //go:noescape func Or8(ptr *uint8, val uint8) +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + //go:noescape func Store(ptr *uint32, val uint32) diff --git a/src/runtime/internal/atomic/atomic_ppc64x.go b/src/runtime/internal/atomic/atomic_ppc64x.go index e759bb27a2..e4b109f0ec 100644 --- a/src/runtime/internal/atomic/atomic_ppc64x.go +++ b/src/runtime/internal/atomic/atomic_ppc64x.go @@ -55,6 +55,12 @@ func Or8(ptr *uint8, val uint8) // NOTE: Do not add atomicxor8 (XOR is not idempotent). +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + //go:noescape func Cas64(ptr *uint64, old, new uint64) bool diff --git a/src/runtime/internal/atomic/atomic_riscv64.go b/src/runtime/internal/atomic/atomic_riscv64.go index 617bc1a3eb..8f24d61625 100644 --- a/src/runtime/internal/atomic/atomic_riscv64.go +++ b/src/runtime/internal/atomic/atomic_riscv64.go @@ -51,6 +51,12 @@ func Or8(ptr *uint8, val uint8) //go:noescape func And8(ptr *uint8, val uint8) +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + //go:noescape func Cas64(ptr *uint64, old, new uint64) bool diff --git a/src/runtime/internal/atomic/atomic_riscv64.s b/src/runtime/internal/atomic/atomic_riscv64.s index db139d690a..74c896cea6 100644 --- a/src/runtime/internal/atomic/atomic_riscv64.s +++ b/src/runtime/internal/atomic/atomic_riscv64.s @@ -242,3 +242,17 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 SLL A2, A1 AMOORW A1, (A0), ZERO RET + +// func And(ptr *uint32, val uint32) +TEXT ·And(SB), NOSPLIT, $0-12 + MOV ptr+0(FP), A0 + MOVW val+8(FP), A1 + AMOANDW A1, (A0), ZERO + RET + +// func Or(ptr *uint32, val uint32) +TEXT ·Or(SB), NOSPLIT, $0-12 + MOV ptr+0(FP), A0 + MOVW val+8(FP), A1 + AMOORW A1, (A0), ZERO + RET diff --git a/src/runtime/internal/atomic/atomic_s390x.go b/src/runtime/internal/atomic/atomic_s390x.go index b649caa39f..a058d60102 100644 --- a/src/runtime/internal/atomic/atomic_s390x.go +++ b/src/runtime/internal/atomic/atomic_s390x.go @@ -91,6 +91,12 @@ func Or8(ptr *uint8, val uint8) // NOTE: Do not add atomicxor8 (XOR is not idempotent). +//go:noescape +func And(ptr *uint32, val uint32) + +//go:noescape +func Or(ptr *uint32, val uint32) + //go:noescape func Xadd(ptr *uint32, delta int32) uint32 diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go index a9f95077c0..c9c2eba248 100644 --- a/src/runtime/internal/atomic/atomic_test.go +++ b/src/runtime/internal/atomic/atomic_test.go @@ -150,6 +150,45 @@ func TestAnd8(t *testing.T) { } } +func TestAnd(t *testing.T) { + // Basic sanity check. + x := uint32(0xffffffff) + for i := uint32(0); i < 32; i++ { + atomic.And(&x, ^(1 << i)) + if r := uint32(0xffffffff) << (i + 1); x != r { + t.Fatalf("clearing bit %#x: want %#x, got %#x", uint32(1< Date: Thu, 1 Oct 2020 15:21:37 -0400 Subject: runtime: don't attempt to steal from idle Ps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Work stealing is a scalability bottleneck in the scheduler. Since each P has a work queue, work stealing must look at every P to determine if there is any work. The number of Ps scales linearly with GOMAXPROCS (i.e., the number of Ps _is_ GOMAXPROCS), thus this work scales linearly with GOMAXPROCS. Work stealing is a later attempt by a P to find work before it goes idle. Since the P has no work of its own, extra costs here tend not to directly affect application-level benchmarks. Where they show up is extra CPU usage by the process as a whole. These costs get particularly expensive for applications that transition between blocked and running frequently. Long term, we need a more scalable approach in general, but for now we can make a simple observation: idle Ps ([1]) cannot possibly have anything in their runq, so we need not bother checking at all. We track idle Ps via a new global bitmap, updated in pidleput/pidleget. This is already a slow path (requires sched.lock), so we don't expect high contention there. Using a single bitmap avoids the need to touch every P to read p.status. Currently, the bitmap approach is not significantly better than reading p.status. However, in a future CL I'd like to apply a similiar optimization to timers. Once done, findrunnable would not touch most Ps at all (in mostly idle programs), which will avoid memory latency to pull those Ps into cache. When reading this bitmap, we are racing with Ps going in and out of idle, so there are a few cases to consider: 1. _Prunning -> _Pidle: Running P goes idle after we check the bitmap. In this case, we will try to steal (and find nothing) so there is no harm. 2. _Pidle -> _Prunning while spinning: A P that starts running may queue new work that we miss. This is OK: (a) that P cannot go back to sleep without completing its work, and (b) more fundamentally, we will recheck after we drop our P. 3. _Pidle -> _Prunning after spinning: After spinning, we really can miss work from a newly woken P. (a) above still applies here as well, but this is also the same delicate dance case described in findrunnable: if nothing is spinning anymore, the other P will unpark a thread to run the work it submits. Benchmark results from WakeupParallel/syscall/pair/race/1ms (see golang.org/cl/228577): name old msec new msec delta Perf-task-clock-8 250 ± 1% 247 ± 4% ~ (p=0.690 n=5+5) Perf-task-clock-16 258 ± 2% 259 ± 2% ~ (p=0.841 n=5+5) Perf-task-clock-32 284 ± 2% 270 ± 4% -4.94% (p=0.032 n=5+5) Perf-task-clock-64 326 ± 3% 303 ± 2% -6.92% (p=0.008 n=5+5) Perf-task-clock-128 407 ± 2% 363 ± 5% -10.69% (p=0.008 n=5+5) Perf-task-clock-256 561 ± 1% 481 ± 1% -14.20% (p=0.016 n=4+5) Perf-task-clock-512 840 ± 5% 683 ± 2% -18.70% (p=0.008 n=5+5) Perf-task-clock-1024 1.38k ±14% 1.07k ± 2% -21.85% (p=0.008 n=5+5) [1] "Idle Ps" here refers to _Pidle Ps in the sched.pidle list. In other contexts, Ps may temporarily transition through _Pidle (e.g., in handoffp); those Ps may have work. Updates #28808 Updates #18237 Change-Id: Ieeb958bd72e7d8fb375b0b1f414e8d7378b14e29 Reviewed-on: https://go-review.googlesource.com/c/go/+/259578 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Michael Knyszek Reviewed-by: Austin Clements Trust: Michael Pratt --- src/runtime/proc.go | 78 +++++++++++++++++++++++++++++++++++++++++++++---- src/runtime/runtime2.go | 12 ++++++-- 2 files changed, 82 insertions(+), 8 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/proc.go b/src/runtime/proc.go index e1de70a997..d088b969c8 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2295,8 +2295,12 @@ top: if _p_ == p2 { continue } - if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil { - return gp, false + + // Don't bother to attempt to steal if p2 is idle. + if !idlepMask.read(enum.position()) { + if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil { + return gp, false + } } // Consider stealing timers from p2. @@ -2307,8 +2311,13 @@ top: // and is not marked for preemption. If p2 is running // and not being preempted we assume it will handle its // own timers. + // // If we're still looking for work after checking all // the P's, then go ahead and steal from an active P. + // + // TODO(prattmic): Maintain a global look-aside similar + // to idlepMask to avoid looking at p2 if it can't + // possibly have timers. if i > 2 || (i > 1 && shouldStealTimers(p2)) { tnow, w, ran := checkTimers(p2, now) now = tnow @@ -2379,6 +2388,9 @@ stop: // safe-points. We don't need to snapshot the contents because // everything up to cap(allp) is immutable. allpSnapshot := allp + // Also snapshot idlepMask. Value changes are OK, but we can't allow + // len to change out from under us. + idlepMaskSnapshot := idlepMask // return P and block lock(&sched.lock) @@ -2419,8 +2431,8 @@ stop: } // check all runqueues once again - for _, _p_ := range allpSnapshot { - if !runqempty(_p_) { + for id, _p_ := range allpSnapshot { + if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(_p_) { lock(&sched.lock) _p_ = pidleget() unlock(&sched.lock) @@ -4398,6 +4410,8 @@ func procresize(nprocs int32) *p { } sched.procresizetime = now + maskWords := (nprocs+31) / 32 + // Grow allp if necessary. if nprocs > int32(len(allp)) { // Synchronize with retake, which could be running @@ -4412,6 +4426,15 @@ func procresize(nprocs int32) *p { copy(nallp, allp[:cap(allp)]) allp = nallp } + + if maskWords <= int32(cap(idlepMask)) { + idlepMask = idlepMask[:maskWords] + } else { + nidlepMask := make([]uint32, maskWords) + // No need to copy beyond len, old Ps are irrelevant. + copy(nidlepMask, idlepMask) + idlepMask = nidlepMask + } unlock(&allpLock) } @@ -4470,6 +4493,7 @@ func procresize(nprocs int32) *p { if int32(len(allp)) != nprocs { lock(&allpLock) allp = allp[:nprocs] + idlepMask = idlepMask[:maskWords] unlock(&allpLock) } @@ -5153,8 +5177,46 @@ func globrunqget(_p_ *p, max int32) *g { return gp } -// Put p to on _Pidle list. +// pIdleMask is a bitmap of of Ps in the _Pidle list, one bit per P. +type pIdleMask []uint32 + +// read returns true if P id is in the _Pidle list, and thus cannot have work. +func (p pIdleMask) read(id uint32) bool { + word := id / 32 + mask := uint32(1) << (id % 32) + return (atomic.Load(&p[word]) & mask) != 0 +} + +// set sets P id as idle in mask. +// +// Must be called only for a P owned by the caller. In order to maintain +// consistency, a P going idle must the idle mask simultaneously with updates +// to the idle P list under the sched.lock, otherwise a racing pidleget may +// clear the mask before pidleput sets the mask, corrupting the bitmap. +// +// N.B., procresize takes ownership of all Ps in stopTheWorldWithSema. +func (p pIdleMask) set(id int32) { + word := id / 32 + mask := uint32(1) << (id % 32) + atomic.Or(&p[word], mask) +} + +// clear sets P id as non-idle in mask. +// +// See comment on set. +func (p pIdleMask) clear(id int32) { + word := id / 32 + mask := uint32(1) << (id % 32) + atomic.And(&p[word], ^mask) +} + +// pidleput puts p to on the _Pidle list. +// +// This releases ownership of p. Once sched.lock is released it is no longer +// safe to use p. +// // sched.lock must be held. +// // May run during STW, so write barriers are not allowed. //go:nowritebarrierrec func pidleput(_p_ *p) { @@ -5163,13 +5225,16 @@ func pidleput(_p_ *p) { if !runqempty(_p_) { throw("pidleput: P has non-empty run queue") } + idlepMask.set(_p_.id) _p_.link = sched.pidle sched.pidle.set(_p_) atomic.Xadd(&sched.npidle, 1) // TODO: fast atomic } -// Try get a p from _Pidle list. +// pidleget tries to get a p from the _Pidle list, acquiring ownership. +// // sched.lock must be held. +// // May run during STW, so write barriers are not allowed. //go:nowritebarrierrec func pidleget() *p { @@ -5177,6 +5242,7 @@ func pidleget() *p { _p_ := sched.pidle.ptr() if _p_ != nil { + idlepMask.clear(_p_.id) sched.pidle = _p_.link atomic.Xadd(&sched.npidle, -1) // TODO: fast atomic } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 519872b8e2..0758a35e01 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -1035,14 +1035,22 @@ func (w waitReason) String() string { var ( allglen uintptr allm *m - allp []*p // len(allp) == gomaxprocs; may change at safe points, otherwise immutable - allpLock mutex // Protects P-less reads of allp and all writes gomaxprocs int32 ncpu int32 forcegc forcegcstate sched schedt newprocs int32 + // allpLock protects P-less reads and size changes of allp and + // idlepMask, and all writes to allp. + allpLock mutex + // len(allp) == gomaxprocs; may change at safe points, otherwise + // immutable. + allp []*p + // Bitmask of Ps in _Pidle list, one bit per P. Reads and writes must + // be atomic. Length may change at safe points. + idlepMask pIdleMask + // Information about what cpu features are available. // Packages outside the runtime should not use these // as they are not an external api. -- cgit v1.2.1 From e313fd7448ed0dabf98dc725bee2361e905f208b Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 9 Jul 2020 16:20:48 -0400 Subject: runtime: drop unused work.ndone field This field is unused since golang.org/cl/134785 and thus can be trivially removed. Change-Id: I1a87f8e78ffdf662440409404f0251c40bc56a4f Reviewed-on: https://go-review.googlesource.com/c/go/+/241741 Trust: Michael Pratt Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Michael Knyszek --- src/runtime/mgc.go | 1 - 1 file changed, 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 0a4d5616a5..65ac654b14 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -983,7 +983,6 @@ var work struct { nproc uint32 tstart int64 nwait uint32 - ndone uint32 // Number of roots of various root types. Set by gcMarkRootPrepare. nFlushCacheRoots int -- cgit v1.2.1 From ad642727247383079c8546ca365172859641a800 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 25 Aug 2020 12:34:02 -0400 Subject: runtime: rename pageAlloc receiver The history of pageAlloc using 's' as a receiver are lost to the depths of time (perhaps it used to be called summary?), but it doesn't make much sense anymore. Rename it to 'p'. Generated with: $ cd src/runtime $ grep -R -b "func (s \*pageAlloc" . | awk -F : '{ print $1 ":#" $2+6 }' | xargs -n 1 -I {} env GOROOT=$(pwd)/../../ gorename -offset {} -to p -v $ grep -R -b "func (s \*pageAlloc" . | awk -F : '{ print $1 ":#" $2+6 }' | xargs -n 1 -I {} env GOROOT=$(pwd)/../../ GOARCH=386 gorename -offset {} -to p -v $ GOROOT=$(pwd)/../../ gorename -offset mpagecache.go:#2397 -to p -v ($2+6 to advance past "func (".) Plus manual comment fixups. Change-Id: I2d521a1cbf6ebe2ef6aae92e654bfc33c63d1aa9 Reviewed-on: https://go-review.googlesource.com/c/go/+/250517 Trust: Michael Pratt Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Michael Knyszek --- src/runtime/mgcscavenge.go | 102 ++++++++++---------- src/runtime/mpagealloc.go | 200 ++++++++++++++++++++-------------------- src/runtime/mpagealloc_32bit.go | 14 +-- src/runtime/mpagealloc_64bit.go | 30 +++--- src/runtime/mpagecache.go | 42 ++++----- 5 files changed, 194 insertions(+), 194 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go index 9d6f551768..34646828e5 100644 --- a/src/runtime/mgcscavenge.go +++ b/src/runtime/mgcscavenge.go @@ -390,13 +390,13 @@ func bgscavenge(c chan int) { // // Returns the amount of memory scavenged in bytes. // -// s.mheapLock must be held, but may be temporarily released if +// p.mheapLock must be held, but may be temporarily released if // mayUnlock == true. // -// Must run on the system stack because s.mheapLock must be held. +// Must run on the system stack because p.mheapLock must be held. // //go:systemstack -func (s *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr { +func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr { var ( addrs addrRange gen uint32 @@ -404,17 +404,17 @@ func (s *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr { released := uintptr(0) for released < nbytes { if addrs.size() == 0 { - if addrs, gen = s.scavengeReserve(); addrs.size() == 0 { + if addrs, gen = p.scavengeReserve(); addrs.size() == 0 { break } } - r, a := s.scavengeOne(addrs, nbytes-released, mayUnlock) + r, a := p.scavengeOne(addrs, nbytes-released, mayUnlock) released += r addrs = a } // Only unreserve the space which hasn't been scavenged or searched // to ensure we always make progress. - s.scavengeUnreserve(addrs, gen) + p.scavengeUnreserve(addrs, gen) return released } @@ -440,46 +440,46 @@ func printScavTrace(gen uint32, released uintptr, forced bool) { // scavengeStartGen starts a new scavenge generation, resetting // the scavenger's search space to the full in-use address space. // -// s.mheapLock must be held. +// p.mheapLock must be held. // -// Must run on the system stack because s.mheapLock must be held. +// Must run on the system stack because p.mheapLock must be held. // //go:systemstack -func (s *pageAlloc) scavengeStartGen() { +func (p *pageAlloc) scavengeStartGen() { if debug.scavtrace > 0 { - printScavTrace(s.scav.gen, s.scav.released, false) + printScavTrace(p.scav.gen, p.scav.released, false) } - s.inUse.cloneInto(&s.scav.inUse) + p.inUse.cloneInto(&p.scav.inUse) // Pick the new starting address for the scavenger cycle. var startAddr offAddr - if s.scav.scavLWM.lessThan(s.scav.freeHWM) { + if p.scav.scavLWM.lessThan(p.scav.freeHWM) { // The "free" high watermark exceeds the "scavenged" low watermark, // so there are free scavengable pages in parts of the address space // that the scavenger already searched, the high watermark being the // highest one. Pick that as our new starting point to ensure we // see those pages. - startAddr = s.scav.freeHWM + startAddr = p.scav.freeHWM } else { // The "free" high watermark does not exceed the "scavenged" low // watermark. This means the allocator didn't free any memory in // the range we scavenged last cycle, so we might as well continue // scavenging from where we were. - startAddr = s.scav.scavLWM + startAddr = p.scav.scavLWM } - s.scav.inUse.removeGreaterEqual(startAddr.addr()) + p.scav.inUse.removeGreaterEqual(startAddr.addr()) - // reservationBytes may be zero if s.inUse.totalBytes is small, or if + // reservationBytes may be zero if p.inUse.totalBytes is small, or if // scavengeReservationShards is large. This case is fine as the scavenger // will simply be turned off, but it does mean that scavengeReservationShards, // in concert with pallocChunkBytes, dictates the minimum heap size at which // the scavenger triggers. In practice this minimum is generally less than an // arena in size, so virtually every heap has the scavenger on. - s.scav.reservationBytes = alignUp(s.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards - s.scav.gen++ - s.scav.released = 0 - s.scav.freeHWM = minOffAddr - s.scav.scavLWM = maxOffAddr + p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards + p.scav.gen++ + p.scav.released = 0 + p.scav.freeHWM = minOffAddr + p.scav.scavLWM = maxOffAddr } // scavengeReserve reserves a contiguous range of the address space @@ -489,19 +489,19 @@ func (s *pageAlloc) scavengeStartGen() { // // Returns the reserved range and the scavenge generation number for it. // -// s.mheapLock must be held. +// p.mheapLock must be held. // -// Must run on the system stack because s.mheapLock must be held. +// Must run on the system stack because p.mheapLock must be held. // //go:systemstack -func (s *pageAlloc) scavengeReserve() (addrRange, uint32) { +func (p *pageAlloc) scavengeReserve() (addrRange, uint32) { // Start by reserving the minimum. - r := s.scav.inUse.removeLast(s.scav.reservationBytes) + r := p.scav.inUse.removeLast(p.scav.reservationBytes) // Return early if the size is zero; we don't want to use // the bogus address below. if r.size() == 0 { - return r, s.scav.gen + return r, p.scav.gen } // The scavenger requires that base be aligned to a @@ -511,27 +511,27 @@ func (s *pageAlloc) scavengeReserve() (addrRange, uint32) { newBase := alignDown(r.base.addr(), pallocChunkBytes) // Remove from inUse however much extra we just pulled out. - s.scav.inUse.removeGreaterEqual(newBase) + p.scav.inUse.removeGreaterEqual(newBase) r.base = offAddr{newBase} - return r, s.scav.gen + return r, p.scav.gen } // scavengeUnreserve returns an unscavenged portion of a range that was // previously reserved with scavengeReserve. // -// s.mheapLock must be held. +// p.mheapLock must be held. // -// Must run on the system stack because s.mheapLock must be held. +// Must run on the system stack because p.mheapLock must be held. // //go:systemstack -func (s *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) { - if r.size() == 0 || gen != s.scav.gen { +func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) { + if r.size() == 0 || gen != p.scav.gen { return } if r.base.addr()%pallocChunkBytes != 0 { throw("unreserving unaligned region") } - s.scav.inUse.add(r) + p.scav.inUse.add(r) } // scavengeOne walks over address range work until it finds @@ -545,13 +545,13 @@ func (s *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) { // // work's base address must be aligned to pallocChunkBytes. // -// s.mheapLock must be held, but may be temporarily released if +// p.mheapLock must be held, but may be temporarily released if // mayUnlock == true. // -// Must run on the system stack because s.mheapLock must be held. +// Must run on the system stack because p.mheapLock must be held. // //go:systemstack -func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) { +func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) { // Defensively check if we've recieved an empty address range. // If so, just return. if work.size() == 0 { @@ -586,12 +586,12 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui // Helpers for locking and unlocking only if mayUnlock == true. lockHeap := func() { if mayUnlock { - lock(s.mheapLock) + lock(p.mheapLock) } } unlockHeap := func() { if mayUnlock { - unlock(s.mheapLock) + unlock(p.mheapLock) } } @@ -602,14 +602,14 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui // by subtracting 1. maxAddr := work.limit.addr() - 1 maxChunk := chunkIndex(maxAddr) - if s.summary[len(s.summary)-1][maxChunk].max() >= uint(minPages) { + if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) { // We only bother looking for a candidate if there at least // minPages free pages at all. - base, npages := s.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages) + base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages) // If we found something, scavenge it and return! if npages != 0 { - work.limit = offAddr{s.scavengeRangeLocked(maxChunk, base, npages)} + work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)} return uintptr(npages) * pageSize, work } } @@ -631,7 +631,7 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui // that's fine. We're being optimistic anyway. // Check quickly if there are enough free pages at all. - if s.summary[len(s.summary)-1][i].max() < uint(minPages) { + if p.summary[len(p.summary)-1][i].max() < uint(minPages) { continue } @@ -641,7 +641,7 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui // avoid races with heap growth. It may or may not be possible to also // see a nil pointer in this case if we do race with heap growth, but // just defensively ignore the nils. This operation is optimistic anyway. - l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&s.chunks[i.l1()]))) + l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&p.chunks[i.l1()]))) if l2 != nil && l2[i.l2()].hasScavengeCandidate(minPages) { return i, true } @@ -670,10 +670,10 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui } // Find, verify, and scavenge if we can. - chunk := s.chunkOf(candidateChunkIdx) + chunk := p.chunkOf(candidateChunkIdx) base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages) if npages > 0 { - work.limit = offAddr{s.scavengeRangeLocked(candidateChunkIdx, base, npages)} + work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)} return uintptr(npages) * pageSize, work } @@ -690,21 +690,21 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui // // Returns the base address of the scavenged region. // -// s.mheapLock must be held. -func (s *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr { - s.chunkOf(ci).scavenged.setRange(base, npages) +// p.mheapLock must be held. +func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr { + p.chunkOf(ci).scavenged.setRange(base, npages) // Compute the full address for the start of the range. addr := chunkBase(ci) + uintptr(base)*pageSize // Update the scavenge low watermark. - if oAddr := (offAddr{addr}); oAddr.lessThan(s.scav.scavLWM) { - s.scav.scavLWM = oAddr + if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) { + p.scav.scavLWM = oAddr } // Only perform the actual scavenging if we're not in a test. // It's dangerous to do so otherwise. - if s.test { + if p.test { return addr } sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize) diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go index c90a6378bd..560babed03 100644 --- a/src/runtime/mpagealloc.go +++ b/src/runtime/mpagealloc.go @@ -299,7 +299,7 @@ type pageAlloc struct { test bool } -func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) { +func (p *pageAlloc) init(mheapLock *mutex, sysStat *uint64) { if levelLogPages[0] > logMaxPackedValue { // We can't represent 1< s.end { - s.end = end + if end > p.end { + p.end = end } // Note that [base, limit) will never overlap with any existing // range inUse because grow only ever adds never-used memory // regions to the page allocator. - s.inUse.add(makeAddrRange(base, limit)) + p.inUse.add(makeAddrRange(base, limit)) // A grow operation is a lot like a free operation, so if our - // chunk ends up below s.searchAddr, update s.searchAddr to the + // chunk ends up below p.searchAddr, update p.searchAddr to the // new address, just like in free. - if b := (offAddr{base}); b.lessThan(s.searchAddr) { - s.searchAddr = b + if b := (offAddr{base}); b.lessThan(p.searchAddr) { + p.searchAddr = b } // Add entries into chunks, which is sparse, if needed. Then, @@ -387,21 +387,21 @@ func (s *pageAlloc) grow(base, size uintptr) { // Newly-grown memory is always considered scavenged. // Set all the bits in the scavenged bitmaps high. for c := chunkIndex(base); c < chunkIndex(limit); c++ { - if s.chunks[c.l1()] == nil { + if p.chunks[c.l1()] == nil { // Create the necessary l2 entry. // // Store it atomically to avoid races with readers which // don't acquire the heap lock. - r := sysAlloc(unsafe.Sizeof(*s.chunks[0]), s.sysStat) - atomic.StorepNoWB(unsafe.Pointer(&s.chunks[c.l1()]), r) + r := sysAlloc(unsafe.Sizeof(*p.chunks[0]), p.sysStat) + atomic.StorepNoWB(unsafe.Pointer(&p.chunks[c.l1()]), r) } - s.chunkOf(c).scavenged.setRange(0, pallocChunkPages) + p.chunkOf(c).scavenged.setRange(0, pallocChunkPages) } // Update summaries accordingly. The grow acts like a free, so // we need to ensure this newly-free memory is visible in the // summaries. - s.update(base, size/pageSize, true, false) + p.update(base, size/pageSize, true, false) } // update updates heap metadata. It must be called each time the bitmap @@ -411,8 +411,8 @@ func (s *pageAlloc) grow(base, size uintptr) { // a contiguous allocation or free between addr and addr+npages. alloc indicates // whether the operation performed was an allocation or a free. // -// s.mheapLock must be held. -func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) { +// p.mheapLock must be held. +func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) { // base, limit, start, and end are inclusive. limit := base + npages*pageSize - 1 sc, ec := chunkIndex(base), chunkIndex(limit) @@ -421,23 +421,23 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) { if sc == ec { // Fast path: the allocation doesn't span more than one chunk, // so update this one and if the summary didn't change, return. - x := s.summary[len(s.summary)-1][sc] - y := s.chunkOf(sc).summarize() + x := p.summary[len(p.summary)-1][sc] + y := p.chunkOf(sc).summarize() if x == y { return } - s.summary[len(s.summary)-1][sc] = y + p.summary[len(p.summary)-1][sc] = y } else if contig { // Slow contiguous path: the allocation spans more than one chunk // and at least one summary is guaranteed to change. - summary := s.summary[len(s.summary)-1] + summary := p.summary[len(p.summary)-1] // Update the summary for chunk sc. - summary[sc] = s.chunkOf(sc).summarize() + summary[sc] = p.chunkOf(sc).summarize() // Update the summaries for chunks in between, which are // either totally allocated or freed. - whole := s.summary[len(s.summary)-1][sc+1 : ec] + whole := p.summary[len(p.summary)-1][sc+1 : ec] if alloc { // Should optimize into a memclr. for i := range whole { @@ -450,22 +450,22 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) { } // Update the summary for chunk ec. - summary[ec] = s.chunkOf(ec).summarize() + summary[ec] = p.chunkOf(ec).summarize() } else { // Slow general path: the allocation spans more than one chunk // and at least one summary is guaranteed to change. // // We can't assume a contiguous allocation happened, so walk over // every chunk in the range and manually recompute the summary. - summary := s.summary[len(s.summary)-1] + summary := p.summary[len(p.summary)-1] for c := sc; c <= ec; c++ { - summary[c] = s.chunkOf(c).summarize() + summary[c] = p.chunkOf(c).summarize() } } // Walk up the radix tree and update the summaries appropriately. changed := true - for l := len(s.summary) - 2; l >= 0 && changed; l-- { + for l := len(p.summary) - 2; l >= 0 && changed; l-- { // Update summaries at level l from summaries at level l+1. changed = false @@ -479,12 +479,12 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) { // Iterate over each block, updating the corresponding summary in the less-granular level. for i := lo; i < hi; i++ { - children := s.summary[l+1][i<= s.end { + if chunkIndex(p.searchAddr.addr()) >= p.end { return 0, 0 } // If npages has a chance of fitting in the chunk where the searchAddr is, // search it directly. searchAddr := minOffAddr - if pallocChunkPages-chunkPageIndex(s.searchAddr.addr()) >= uint(npages) { + if pallocChunkPages-chunkPageIndex(p.searchAddr.addr()) >= uint(npages) { // npages is guaranteed to be no greater than pallocChunkPages here. - i := chunkIndex(s.searchAddr.addr()) - if max := s.summary[len(s.summary)-1][i].max(); max >= uint(npages) { - j, searchIdx := s.chunkOf(i).find(npages, chunkPageIndex(s.searchAddr.addr())) + i := chunkIndex(p.searchAddr.addr()) + if max := p.summary[len(p.summary)-1][i].max(); max >= uint(npages) { + j, searchIdx := p.chunkOf(i).find(npages, chunkPageIndex(p.searchAddr.addr())) if j == ^uint(0) { print("runtime: max = ", max, ", npages = ", npages, "\n") - print("runtime: searchIdx = ", chunkPageIndex(s.searchAddr.addr()), ", s.searchAddr = ", hex(s.searchAddr.addr()), "\n") + print("runtime: searchIdx = ", chunkPageIndex(p.searchAddr.addr()), ", p.searchAddr = ", hex(p.searchAddr.addr()), "\n") throw("bad summary data") } addr = chunkBase(i) + uintptr(j)*pageSize @@ -813,7 +813,7 @@ func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) { } // We failed to use a searchAddr for one reason or another, so try // the slow path. - addr, searchAddr = s.find(npages) + addr, searchAddr = p.find(npages) if addr == 0 { if npages == 1 { // We failed to find a single free page, the smallest unit @@ -821,41 +821,41 @@ func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) { // exhausted. Otherwise, the heap still might have free // space in it, just not enough contiguous space to // accommodate npages. - s.searchAddr = maxSearchAddr + p.searchAddr = maxSearchAddr } return 0, 0 } Found: // Go ahead and actually mark the bits now that we have an address. - scav = s.allocRange(addr, npages) + scav = p.allocRange(addr, npages) // If we found a higher searchAddr, we know that all the // heap memory before that searchAddr in an offset address space is - // allocated, so bump s.searchAddr up to the new one. - if s.searchAddr.lessThan(searchAddr) { - s.searchAddr = searchAddr + // allocated, so bump p.searchAddr up to the new one. + if p.searchAddr.lessThan(searchAddr) { + p.searchAddr = searchAddr } return addr, scav } // free returns npages worth of memory starting at base back to the page heap. // -// s.mheapLock must be held. -func (s *pageAlloc) free(base, npages uintptr) { - // If we're freeing pages below the s.searchAddr, update searchAddr. - if b := (offAddr{base}); b.lessThan(s.searchAddr) { - s.searchAddr = b +// p.mheapLock must be held. +func (p *pageAlloc) free(base, npages uintptr) { + // If we're freeing pages below the p.searchAddr, update searchAddr. + if b := (offAddr{base}); b.lessThan(p.searchAddr) { + p.searchAddr = b } // Update the free high watermark for the scavenger. limit := base + npages*pageSize - 1 - if offLimit := (offAddr{limit}); s.scav.freeHWM.lessThan(offLimit) { - s.scav.freeHWM = offLimit + if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) { + p.scav.freeHWM = offLimit } if npages == 1 { // Fast path: we're clearing a single bit, and we know exactly // where it is, so mark it directly. i := chunkIndex(base) - s.chunkOf(i).free1(chunkPageIndex(base)) + p.chunkOf(i).free1(chunkPageIndex(base)) } else { // Slow path: we're clearing more bits so we may need to iterate. sc, ec := chunkIndex(base), chunkIndex(limit) @@ -863,17 +863,17 @@ func (s *pageAlloc) free(base, npages uintptr) { if sc == ec { // The range doesn't cross any chunk boundaries. - s.chunkOf(sc).free(si, ei+1-si) + p.chunkOf(sc).free(si, ei+1-si) } else { // The range crosses at least one chunk boundary. - s.chunkOf(sc).free(si, pallocChunkPages-si) + p.chunkOf(sc).free(si, pallocChunkPages-si) for c := sc + 1; c < ec; c++ { - s.chunkOf(c).freeAll() + p.chunkOf(c).freeAll() } - s.chunkOf(ec).free(0, ei+1) + p.chunkOf(ec).free(0, ei+1) } } - s.update(base, npages, true, false) + p.update(base, npages, true, false) } const ( diff --git a/src/runtime/mpagealloc_32bit.go b/src/runtime/mpagealloc_32bit.go index 90f1e54d6c..331dadade9 100644 --- a/src/runtime/mpagealloc_32bit.go +++ b/src/runtime/mpagealloc_32bit.go @@ -60,7 +60,7 @@ var levelLogPages = [summaryLevels]uint{ } // See mpagealloc_64bit.go for details. -func (s *pageAlloc) sysInit() { +func (p *pageAlloc) sysInit() { // Calculate how much memory all our entries will take up. // // This should be around 12 KiB or less. @@ -76,7 +76,7 @@ func (s *pageAlloc) sysInit() { throw("failed to reserve page summary memory") } // There isn't much. Just map it and mark it as used immediately. - sysMap(reservation, totalSize, s.sysStat) + sysMap(reservation, totalSize, p.sysStat) sysUsed(reservation, totalSize) // Iterate over the reservation and cut it up into slices. @@ -88,29 +88,29 @@ func (s *pageAlloc) sysInit() { // Put this reservation into a slice. sl := notInHeapSlice{(*notInHeap)(reservation), 0, entries} - s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl)) + p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl)) reservation = add(reservation, uintptr(entries)*pallocSumBytes) } } // See mpagealloc_64bit.go for details. -func (s *pageAlloc) sysGrow(base, limit uintptr) { +func (p *pageAlloc) sysGrow(base, limit uintptr) { if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 { print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n") throw("sysGrow bounds not aligned to pallocChunkBytes") } // Walk up the tree and update the summary slices. - for l := len(s.summary) - 1; l >= 0; l-- { + for l := len(p.summary) - 1; l >= 0; l-- { // Figure out what part of the summary array this new address space needs. // Note that we need to align the ranges to the block width (1< len(s.summary[l]) { - s.summary[l] = s.summary[l][:hi] + if hi > len(p.summary[l]) { + p.summary[l] = p.summary[l][:hi] } } } diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go index a1691ba802..ffacb46c18 100644 --- a/src/runtime/mpagealloc_64bit.go +++ b/src/runtime/mpagealloc_64bit.go @@ -67,7 +67,7 @@ var levelLogPages = [summaryLevels]uint{ // sysInit performs architecture-dependent initialization of fields // in pageAlloc. pageAlloc should be uninitialized except for sysStat // if any runtime statistic should be updated. -func (s *pageAlloc) sysInit() { +func (p *pageAlloc) sysInit() { // Reserve memory for each level. This will get mapped in // as R/W by setArenas. for l, shift := range levelShift { @@ -82,21 +82,21 @@ func (s *pageAlloc) sysInit() { // Put this reservation into a slice. sl := notInHeapSlice{(*notInHeap)(r), 0, entries} - s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl)) + p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl)) } } // sysGrow performs architecture-dependent operations on heap // growth for the page allocator, such as mapping in new memory // for summaries. It also updates the length of the slices in -// s.summary. +// [.summary. // // base is the base of the newly-added heap memory and limit is // the first address past the end of the newly-added heap memory. // Both must be aligned to pallocChunkBytes. // -// The caller must update s.start and s.end after calling sysGrow. -func (s *pageAlloc) sysGrow(base, limit uintptr) { +// The caller must update p.start and p.end after calling sysGrow. +func (p *pageAlloc) sysGrow(base, limit uintptr) { if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 { print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n") throw("sysGrow bounds not aligned to pallocChunkBytes") @@ -111,12 +111,12 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) { } // summaryRangeToSumAddrRange converts a range of indices in any - // level of s.summary into page-aligned addresses which cover that + // level of p.summary into page-aligned addresses which cover that // range of indices. summaryRangeToSumAddrRange := func(level, sumIdxBase, sumIdxLimit int) addrRange { baseOffset := alignDown(uintptr(sumIdxBase)*pallocSumBytes, physPageSize) limitOffset := alignUp(uintptr(sumIdxLimit)*pallocSumBytes, physPageSize) - base := unsafe.Pointer(&s.summary[level][0]) + base := unsafe.Pointer(&p.summary[level][0]) return addrRange{ offAddr{uintptr(add(base, baseOffset))}, offAddr{uintptr(add(base, limitOffset))}, @@ -140,10 +140,10 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) { // // This will be used to look at what memory in the summary array is already // mapped before and after this new range. - inUseIndex := s.inUse.findSucc(base) + inUseIndex := p.inUse.findSucc(base) // Walk up the radix tree and map summaries in as needed. - for l := range s.summary { + for l := range p.summary { // Figure out what part of the summary array this new address space needs. needIdxBase, needIdxLimit := addrRangeToSummaryRange(l, makeAddrRange(base, limit)) @@ -151,8 +151,8 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) { // we get tight bounds checks on at least the top bound. // // We must do this regardless of whether we map new memory. - if needIdxLimit > len(s.summary[l]) { - s.summary[l] = s.summary[l][:needIdxLimit] + if needIdxLimit > len(p.summary[l]) { + p.summary[l] = p.summary[l][:needIdxLimit] } // Compute the needed address range in the summary array for level l. @@ -163,10 +163,10 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) { // for mapping. prune's invariants are guaranteed by the fact that this // function will never be asked to remap the same memory twice. if inUseIndex > 0 { - need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex-1])) + need = need.subtract(addrRangeToSumAddrRange(l, p.inUse.ranges[inUseIndex-1])) } - if inUseIndex < len(s.inUse.ranges) { - need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex])) + if inUseIndex < len(p.inUse.ranges) { + need = need.subtract(addrRangeToSumAddrRange(l, p.inUse.ranges[inUseIndex])) } // It's possible that after our pruning above, there's nothing new to map. if need.size() == 0 { @@ -174,7 +174,7 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) { } // Map and commit need. - sysMap(unsafe.Pointer(need.base.addr()), need.size(), s.sysStat) + sysMap(unsafe.Pointer(need.base.addr()), need.size(), p.sysStat) sysUsed(unsafe.Pointer(need.base.addr()), need.size()) } } diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go index 683a997136..5f76501a1c 100644 --- a/src/runtime/mpagecache.go +++ b/src/runtime/mpagecache.go @@ -71,8 +71,8 @@ func (c *pageCache) allocN(npages uintptr) (uintptr, uintptr) { // into s. Then, it clears the cache, such that empty returns // true. // -// s.mheapLock must be held or the world must be stopped. -func (c *pageCache) flush(s *pageAlloc) { +// p.mheapLock must be held or the world must be stopped. +func (c *pageCache) flush(p *pageAlloc) { if c.empty() { return } @@ -83,18 +83,18 @@ func (c *pageCache) flush(s *pageAlloc) { // slower, safer thing by iterating over each bit individually. for i := uint(0); i < 64; i++ { if c.cache&(1<= s.end { + if chunkIndex(p.searchAddr.addr()) >= p.end { return pageCache{} } c := pageCache{} - ci := chunkIndex(s.searchAddr.addr()) // chunk index - if s.summary[len(s.summary)-1][ci] != 0 { + ci := chunkIndex(p.searchAddr.addr()) // chunk index + if p.summary[len(p.summary)-1][ci] != 0 { // Fast path: there's free pages at or near the searchAddr address. - chunk := s.chunkOf(ci) - j, _ := chunk.find(1, chunkPageIndex(s.searchAddr.addr())) + chunk := p.chunkOf(ci) + j, _ := chunk.find(1, chunkPageIndex(p.searchAddr.addr())) if j == ^uint(0) { throw("bad summary data") } @@ -126,15 +126,15 @@ func (s *pageAlloc) allocToCache() pageCache { } else { // Slow path: the searchAddr address had nothing there, so go find // the first free page the slow way. - addr, _ := s.find(1) + addr, _ := p.find(1) if addr == 0 { // We failed to find adequate free space, so mark the searchAddr as OoM // and return an empty pageCache. - s.searchAddr = maxSearchAddr + p.searchAddr = maxSearchAddr return pageCache{} } ci := chunkIndex(addr) - chunk := s.chunkOf(ci) + chunk := p.chunkOf(ci) c = pageCache{ base: alignDown(addr, 64*pageSize), cache: ^chunk.pages64(chunkPageIndex(addr)), @@ -143,19 +143,19 @@ func (s *pageAlloc) allocToCache() pageCache { } // Set the bits as allocated and clear the scavenged bits. - s.allocRange(c.base, pageCachePages) + p.allocRange(c.base, pageCachePages) // Update as an allocation, but note that it's not contiguous. - s.update(c.base, pageCachePages, false, true) + p.update(c.base, pageCachePages, false, true) // Set the search address to the last page represented by the cache. // Since all of the pages in this block are going to the cache, and we // searched for the first free page, we can confidently start at the // next page. // - // However, s.searchAddr is not allowed to point into unmapped heap memory + // However, p.searchAddr is not allowed to point into unmapped heap memory // unless it is maxSearchAddr, so make it the last page as opposed to // the page after. - s.searchAddr = offAddr{c.base + pageSize*(pageCachePages-1)} + p.searchAddr = offAddr{c.base + pageSize*(pageCachePages-1)} return c } -- cgit v1.2.1 From d1b1145cace8b968307f9311ff611e4bb810710c Mon Sep 17 00:00:00 2001 From: "Andrew G. Morgan" Date: Mon, 9 Dec 2019 21:50:16 -0800 Subject: syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke Trust: Ian Lance Taylor Trust: Michael Pratt Run-TryBot: Emmanuel Odeke Reviewed-by: Michael Pratt Reviewed-by: Austin Clements --- src/runtime/cgo/linux.go | 74 ++++++++++++++ src/runtime/cgo/linux_syscall.c | 85 ++++++++++++++++ src/runtime/cgocall.go | 16 +++ src/runtime/proc.go | 210 +++++++++++++++++++++++++++++++++++++++- src/runtime/runtime2.go | 12 +++ 5 files changed, 392 insertions(+), 5 deletions(-) create mode 100644 src/runtime/cgo/linux.go create mode 100644 src/runtime/cgo/linux_syscall.c (limited to 'src/runtime') diff --git a/src/runtime/cgo/linux.go b/src/runtime/cgo/linux.go new file mode 100644 index 0000000000..76c0192c20 --- /dev/null +++ b/src/runtime/cgo/linux.go @@ -0,0 +1,74 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Linux system call wrappers that provide POSIX semantics through the +// corresponding cgo->libc (nptl) wrappers for various system calls. + +// +build linux + +package cgo + +import "unsafe" + +// Each of the following entries is needed to ensure that the +// syscall.syscall_linux code can conditionally call these +// function pointers: +// +// 1. find the C-defined function start +// 2. force the local byte alias to be mapped to that location +// 3. map the Go pointer to the function to the syscall package + +//go:cgo_import_static _cgo_libc_setegid +//go:linkname _cgo_libc_setegid _cgo_libc_setegid +//go:linkname cgo_libc_setegid syscall.cgo_libc_setegid +var _cgo_libc_setegid byte +var cgo_libc_setegid = unsafe.Pointer(&_cgo_libc_setegid) + +//go:cgo_import_static _cgo_libc_seteuid +//go:linkname _cgo_libc_seteuid _cgo_libc_seteuid +//go:linkname cgo_libc_seteuid syscall.cgo_libc_seteuid +var _cgo_libc_seteuid byte +var cgo_libc_seteuid = unsafe.Pointer(&_cgo_libc_seteuid) + +//go:cgo_import_static _cgo_libc_setregid +//go:linkname _cgo_libc_setregid _cgo_libc_setregid +//go:linkname cgo_libc_setregid syscall.cgo_libc_setregid +var _cgo_libc_setregid byte +var cgo_libc_setregid = unsafe.Pointer(&_cgo_libc_setregid) + +//go:cgo_import_static _cgo_libc_setresgid +//go:linkname _cgo_libc_setresgid _cgo_libc_setresgid +//go:linkname cgo_libc_setresgid syscall.cgo_libc_setresgid +var _cgo_libc_setresgid byte +var cgo_libc_setresgid = unsafe.Pointer(&_cgo_libc_setresgid) + +//go:cgo_import_static _cgo_libc_setresuid +//go:linkname _cgo_libc_setresuid _cgo_libc_setresuid +//go:linkname cgo_libc_setresuid syscall.cgo_libc_setresuid +var _cgo_libc_setresuid byte +var cgo_libc_setresuid = unsafe.Pointer(&_cgo_libc_setresuid) + +//go:cgo_import_static _cgo_libc_setreuid +//go:linkname _cgo_libc_setreuid _cgo_libc_setreuid +//go:linkname cgo_libc_setreuid syscall.cgo_libc_setreuid +var _cgo_libc_setreuid byte +var cgo_libc_setreuid = unsafe.Pointer(&_cgo_libc_setreuid) + +//go:cgo_import_static _cgo_libc_setgroups +//go:linkname _cgo_libc_setgroups _cgo_libc_setgroups +//go:linkname cgo_libc_setgroups syscall.cgo_libc_setgroups +var _cgo_libc_setgroups byte +var cgo_libc_setgroups = unsafe.Pointer(&_cgo_libc_setgroups) + +//go:cgo_import_static _cgo_libc_setgid +//go:linkname _cgo_libc_setgid _cgo_libc_setgid +//go:linkname cgo_libc_setgid syscall.cgo_libc_setgid +var _cgo_libc_setgid byte +var cgo_libc_setgid = unsafe.Pointer(&_cgo_libc_setgid) + +//go:cgo_import_static _cgo_libc_setuid +//go:linkname _cgo_libc_setuid _cgo_libc_setuid +//go:linkname cgo_libc_setuid syscall.cgo_libc_setuid +var _cgo_libc_setuid byte +var cgo_libc_setuid = unsafe.Pointer(&_cgo_libc_setuid) diff --git a/src/runtime/cgo/linux_syscall.c b/src/runtime/cgo/linux_syscall.c new file mode 100644 index 0000000000..c8e91918a1 --- /dev/null +++ b/src/runtime/cgo/linux_syscall.c @@ -0,0 +1,85 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux + +#ifndef _GNU_SOURCE // setres[ug]id() API. +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include "libcgo.h" + +/* + * Assumed POSIX compliant libc system call wrappers. For linux, the + * glibc/nptl/setxid mechanism ensures that POSIX semantics are + * honored for all pthreads (by default), and this in turn with cgo + * ensures that all Go threads launched with cgo are kept in sync for + * these function calls. + */ + +// argset_t matches runtime/cgocall.go:argset. +typedef struct { + uintptr_t* args; + uintptr_t retval; +} argset_t; + +// libc backed posix-compliant syscalls. + +#define SET_RETVAL(fn) \ + uintptr_t ret = (uintptr_t) fn ; \ + if (ret == -1) { \ + x->retval = (uintptr_t) errno; \ + } else \ + x->retval = ret + +void +_cgo_libc_setegid(argset_t* x) { + SET_RETVAL(setegid((gid_t) x->args[0])); +} + +void +_cgo_libc_seteuid(argset_t* x) { + SET_RETVAL(seteuid((uid_t) x->args[0])); +} + +void +_cgo_libc_setgid(argset_t* x) { + SET_RETVAL(setgid((gid_t) x->args[0])); +} + +void +_cgo_libc_setgroups(argset_t* x) { + SET_RETVAL(setgroups((size_t) x->args[0], (const gid_t *) x->args[1])); +} + +void +_cgo_libc_setregid(argset_t* x) { + SET_RETVAL(setregid((gid_t) x->args[0], (gid_t) x->args[1])); +} + +void +_cgo_libc_setresgid(argset_t* x) { + SET_RETVAL(setresgid((gid_t) x->args[0], (gid_t) x->args[1], + (gid_t) x->args[2])); +} + +void +_cgo_libc_setresuid(argset_t* x) { + SET_RETVAL(setresuid((uid_t) x->args[0], (uid_t) x->args[1], + (uid_t) x->args[2])); +} + +void +_cgo_libc_setreuid(argset_t* x) { + SET_RETVAL(setreuid((uid_t) x->args[0], (uid_t) x->args[1])); +} + +void +_cgo_libc_setuid(argset_t* x) { + SET_RETVAL(setuid((uid_t) x->args[0])); +} diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index 0b69ff3233..7ab42a0ed0 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -93,6 +93,22 @@ import ( // Length must match arg.Max in x_cgo_callers in runtime/cgo/gcc_traceback.c. type cgoCallers [32]uintptr +// argset matches runtime/cgo/linux_syscall.c:argset_t +type argset struct { + args unsafe.Pointer + retval uintptr +} + +// wrapper for syscall package to call cgocall for libc (cgo) calls. +//go:linkname syscall_cgocaller syscall.cgocaller +//go:nosplit +//go:uintptrescapes +func syscall_cgocaller(fn unsafe.Pointer, args ...uintptr) uintptr { + as := argset{args: unsafe.Pointer(&args[0])} + cgocall(fn, unsafe.Pointer(&as)) + return as.retval +} + // Call from Go to C. // // This must be nosplit because it's used for syscalls on some diff --git a/src/runtime/proc.go b/src/runtime/proc.go index d088b969c8..aeacb23391 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -137,6 +137,10 @@ func main() { mainStarted = true if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon + // For runtime_syscall_doAllThreadsSyscall, we + // register sysmon is not ready for the world to be + // stopped. + atomic.Store(&sched.sysmonStarting, 1) systemstack(func() { newm(sysmon, nil, -1) }) @@ -153,6 +157,7 @@ func main() { if g.m != &m0 { throw("runtime.main not on m0") } + m0.doesPark = true // Record when the world started. // Must be before doInit for tracing init. @@ -1226,6 +1231,21 @@ func mstartm0() { initsig(false) } +// mPark causes a thread to park itself - temporarily waking for +// fixups but otherwise waiting to be fully woken. This is the +// only way that m's should park themselves. +//go:nosplit +func mPark() { + g := getg() + for { + notesleep(&g.m.park) + noteclear(&g.m.park) + if !mDoFixup() { + return + } + } +} + // mexit tears down and exits the current thread. // // Don't call this directly to exit the thread, since it must run at @@ -1257,7 +1277,7 @@ func mexit(osStack bool) { sched.nmfreed++ checkdead() unlock(&sched.lock) - notesleep(&m.park) + mPark() throw("locked m0 woke up") } @@ -1424,6 +1444,127 @@ func forEachP(fn func(*p)) { releasem(mp) } +// syscall_runtime_doAllThreadsSyscall serializes Go execution and +// executes a specified fn() call on all m's. +// +// The boolean argument to fn() indicates whether the function's +// return value will be consulted or not. That is, fn(true) should +// return true if fn() succeeds, and fn(true) should return false if +// it failed. When fn(false) is called, its return status will be +// ignored. +// +// syscall_runtime_doAllThreadsSyscall first invokes fn(true) on a +// single, coordinating, m, and only if it returns true does it go on +// to invoke fn(false) on all of the other m's known to the process. +// +//go:linkname syscall_runtime_doAllThreadsSyscall syscall.runtime_doAllThreadsSyscall +func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) { + if iscgo { + panic("doAllThreadsSyscall not supported with cgo enabled") + } + if fn == nil { + return + } + for atomic.Load(&sched.sysmonStarting) != 0 { + osyield() + } + stopTheWorldGC("doAllThreadsSyscall") + if atomic.Load(&newmHandoff.haveTemplateThread) != 0 { + // Ensure that there are no in-flight thread + // creations: don't want to race with allm. + lock(&newmHandoff.lock) + for !newmHandoff.waiting { + unlock(&newmHandoff.lock) + osyield() + lock(&newmHandoff.lock) + } + unlock(&newmHandoff.lock) + } + if netpollinited() { + netpollBreak() + } + _g_ := getg() + if raceenabled { + // For m's running without racectx, we loan out the + // racectx of this call. + lock(&mFixupRace.lock) + mFixupRace.ctx = _g_.racectx + unlock(&mFixupRace.lock) + } + if ok := fn(true); ok { + tid := _g_.m.procid + for mp := allm; mp != nil; mp = mp.alllink { + if mp.procid == tid { + // This m has already completed fn() + // call. + continue + } + // Be wary of mp's without procid values if + // they are known not to park. If they are + // marked as parking with a zero procid, then + // they will be racing with this code to be + // allocated a procid and we will annotate + // them with the need to execute the fn when + // they acquire a procid to run it. + if mp.procid == 0 && !mp.doesPark { + // Reaching here, we are either + // running Windows, or cgo linked + // code. Neither of which are + // currently supported by this API. + throw("unsupported runtime environment") + } + // stopTheWorldGC() doesn't guarantee stopping + // all the threads, so we lock here to avoid + // the possibility of racing with mp. + lock(&mp.mFixup.lock) + mp.mFixup.fn = fn + if mp.doesPark { + // For non-service threads this will + // cause the wakeup to be short lived + // (once the mutex is unlocked). The + // next real wakeup will occur after + // startTheWorldGC() is called. + notewakeup(&mp.park) + } + unlock(&mp.mFixup.lock) + } + for { + done := true + for mp := allm; done && mp != nil; mp = mp.alllink { + if mp.procid == tid { + continue + } + lock(&mp.mFixup.lock) + done = done && (mp.mFixup.fn == nil) + unlock(&mp.mFixup.lock) + } + if done { + break + } + // if needed force sysmon and/or newmHandoff to wakeup. + lock(&sched.lock) + if atomic.Load(&sched.sysmonwait) != 0 { + atomic.Store(&sched.sysmonwait, 0) + notewakeup(&sched.sysmonnote) + } + unlock(&sched.lock) + lock(&newmHandoff.lock) + if newmHandoff.waiting { + newmHandoff.waiting = false + notewakeup(&newmHandoff.wake) + } + unlock(&newmHandoff.lock) + osyield() + } + } + if raceenabled { + lock(&mFixupRace.lock) + mFixupRace.ctx = 0 + unlock(&mFixupRace.lock) + } + startTheWorldGC() +} + // runSafePointFn runs the safe point function, if any, for this P. // This should be called like // @@ -1816,6 +1957,7 @@ var newmHandoff struct { //go:nowritebarrierrec func newm(fn func(), _p_ *p, id int64) { mp := allocm(_p_, fn, id) + mp.doesPark = (_p_ != nil) mp.nextp.set(_p_) mp.sigmask = initSigmask if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" { @@ -1888,6 +2030,57 @@ func startTemplateThread() { releasem(mp) } +// mFixupRace is used to temporarily borrow the race context from the +// coordinating m during a syscall_runtime_doAllThreadsSyscall and +// loan it out to each of the m's of the runtime so they can execute a +// mFixup.fn in that context. +var mFixupRace struct { + lock mutex + ctx uintptr +} + +// mDoFixup runs any outstanding fixup function for the running m. +// Returns true if a fixup was outstanding and actually executed. +// +//go:nosplit +func mDoFixup() bool { + _g_ := getg() + lock(&_g_.m.mFixup.lock) + fn := _g_.m.mFixup.fn + if fn != nil { + if gcphase != _GCoff { + // We can't have a write barrier in this + // context since we may not have a P, but we + // clear fn to signal that we've executed the + // fixup. As long as fn is kept alive + // elsewhere, technically we should have no + // issues with the GC, but fn is likely + // generated in a different package altogether + // that may change independently. Just assert + // the GC is off so this lack of write barrier + // is more obviously safe. + throw("GC must be disabled to protect validity of fn value") + } + *(*uintptr)(unsafe.Pointer(&_g_.m.mFixup.fn)) = 0 + if _g_.racectx != 0 || !raceenabled { + fn(false) + } else { + // temporarily acquire the context of the + // originator of the + // syscall_runtime_doAllThreadsSyscall and + // block others from using it for the duration + // of the fixup call. + lock(&mFixupRace.lock) + _g_.racectx = mFixupRace.ctx + fn(false) + _g_.racectx = 0 + unlock(&mFixupRace.lock) + } + } + unlock(&_g_.m.mFixup.lock) + return fn != nil +} + // templateThread is a thread in a known-good state that exists solely // to start new threads in known-good states when the calling thread // may not be in a good state. @@ -1924,6 +2117,7 @@ func templateThread() { noteclear(&newmHandoff.wake) unlock(&newmHandoff.lock) notesleep(&newmHandoff.wake) + mDoFixup() } } @@ -1945,8 +2139,7 @@ func stopm() { lock(&sched.lock) mput(_g_.m) unlock(&sched.lock) - notesleep(&_g_.m.park) - noteclear(&_g_.m.park) + mPark() acquirep(_g_.m.nextp.ptr()) _g_.m.nextp = 0 } @@ -2106,8 +2299,7 @@ func stoplockedm() { } incidlelocked(1) // Wait until another thread schedules lockedg again. - notesleep(&_g_.m.park) - noteclear(&_g_.m.park) + mPark() status := readgstatus(_g_.m.lockedg.ptr()) if status&^_Gscan != _Grunnable { print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n") @@ -4715,9 +4907,14 @@ func sysmon() { checkdead() unlock(&sched.lock) + // For syscall_runtime_doAllThreadsSyscall, sysmon is + // sufficiently up to participate in fixups. + atomic.Store(&sched.sysmonStarting, 0) + lasttrace := int64(0) idle := 0 // how many cycles in succession we had not wokeup somebody delay := uint32(0) + for { if idle == 0 { // start with 20us sleep... delay = 20 @@ -4728,6 +4925,7 @@ func sysmon() { delay = 10 * 1000 } usleep(delay) + mDoFixup() now := nanotime() next, _ := timeSleepUntil() if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) { @@ -4747,6 +4945,7 @@ func sysmon() { osRelax(true) } notetsleep(&sched.sysmonnote, sleep) + mDoFixup() if shouldRelax { osRelax(false) } @@ -4795,6 +4994,7 @@ func sysmon() { incidlelocked(1) } } + mDoFixup() if next < now { // There are timers that should have already run, // perhaps because there is an unpreemptible P. diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 0758a35e01..21dd7b3949 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -528,6 +528,7 @@ type m struct { ncgo int32 // number of cgo calls currently in progress cgoCallersUse uint32 // if non-zero, cgoCallers in use temporarily cgoCallers *cgoCallers // cgo traceback if crashing in cgo call + doesPark bool // non-P running threads: sysmon and newmHandoff never use .park park note alllink *m // on allm schedlink muintptr @@ -544,6 +545,13 @@ type m struct { syscalltick uint32 freelink *m // on sched.freem + // mFixup is used to synchronize OS related m state (credentials etc) + // use mutex to access. + mFixup struct { + lock mutex + fn func(bool) bool + } + // these are here because they are too large to be on the stack // of low-level NOSPLIT functions. libcall libcall @@ -768,6 +776,10 @@ type schedt struct { sysmonwait uint32 sysmonnote note + // While true, sysmon not ready for mFixup calls. + // Accessed atomically. + sysmonStarting uint32 + // safepointFn should be called on each P at the next GC // safepoint if p.runSafePointFn is set. safePointFn func(*p) -- cgit v1.2.1 From db185e543fe471c522790b7d93291e786dc54a84 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 7 Jul 2020 17:55:40 -0400 Subject: runtime: drop redundant gcBlackenEnabled reset This reset of gcBlackenEnabled is a no-op because it was already reset almost immediately before in gcMarkDone, which is the only caller of gcMarkTermination. Adjust the comment to clarify setGCPhase a bit more. We are coming from _GCmark, so write barriers are already enabled. Change-Id: Ieac2dadf33c3c5a44e8a25a499dea8cfe03b8d73 Reviewed-on: https://go-review.googlesource.com/c/go/+/241357 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Trust: Michael Pratt Reviewed-by: Michael Knyszek --- src/runtime/mgc.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 65ac654b14..c42c7fbd29 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1558,10 +1558,10 @@ top: gcMarkTermination(nextTriggerRatio) } +// World must be stopped and mark assists and background workers must be +// disabled. func gcMarkTermination(nextTriggerRatio float64) { - // World is stopped. - // Start marktermination which includes enabling the write barrier. - atomic.Store(&gcBlackenEnabled, 0) + // Start marktermination (write barrier remains enabled for now). setGCPhase(_GCmarktermination) work.heap1 = memstats.heap_live -- cgit v1.2.1 From e01a1c01f830e2398b773b803dce3238b1107ce9 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 14 Jul 2020 21:39:52 +0000 Subject: runtime: add tests for addrRanges.findSucc This change adds a test suite for addrRanges.findSucc so we can change the implementation more safely. For #40191. Change-Id: I14a834b6d54836cbc676eb0edb292ba6176705cc Reviewed-on: https://go-review.googlesource.com/c/go/+/242678 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Austin Clements Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 24 +++++++ src/runtime/mranges_test.go | 172 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 src/runtime/mranges_test.go (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 25b251f4ba..605bcb2294 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -785,6 +785,30 @@ func (a AddrRange) Equals(b AddrRange) bool { return a == b } +// AddrRanges is a wrapper around addrRanges for testing. +type AddrRanges struct { + addrRanges +} + +// MakeAddrRanges creates a new addrRanges populated with +// the ranges in a. +func MakeAddrRanges(a ...AddrRange) AddrRanges { + // Methods that manipulate the backing store of addrRanges.ranges should + // not be used on the result from this function (e.g. add) since they may + // trigger reallocation. + ranges := make([]addrRange, 0, len(a)) + for _, r := range a { + ranges = append(ranges, r.addrRange) + } + return AddrRanges{addrRanges{ranges: ranges, sysStat: new(uint64)}} +} + +// FindSucc returns the successor to base. See addrRanges.findSucc +// for more details. +func (a *AddrRanges) FindSucc(base uintptr) int { + return a.findSucc(base) +} + // BitRange represents a range over a bitmap. type BitRange struct { I, N uint // bit index and length in bits diff --git a/src/runtime/mranges_test.go b/src/runtime/mranges_test.go new file mode 100644 index 0000000000..3a9023adfa --- /dev/null +++ b/src/runtime/mranges_test.go @@ -0,0 +1,172 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + . "runtime" + "testing" +) + +func TestAddrRangesFindSucc(t *testing.T) { + var large []AddrRange + for i := 0; i < 100; i++ { + large = append(large, MakeAddrRange(5+uintptr(i)*5, 5+uintptr(i)*5+3)) + } + + type testt struct { + name string + base uintptr + expect int + ranges []AddrRange + } + tests := []testt{ + { + name: "Empty", + base: 12, + expect: 0, + ranges: []AddrRange{}, + }, + { + name: "OneBefore", + base: 12, + expect: 0, + ranges: []AddrRange{ + MakeAddrRange(14, 16), + }, + }, + { + name: "OneWithin", + base: 14, + expect: 1, + ranges: []AddrRange{ + MakeAddrRange(14, 16), + }, + }, + { + name: "OneAfterLimit", + base: 16, + expect: 1, + ranges: []AddrRange{ + MakeAddrRange(14, 16), + }, + }, + { + name: "OneAfter", + base: 17, + expect: 1, + ranges: []AddrRange{ + MakeAddrRange(14, 16), + }, + }, + { + name: "ThreeBefore", + base: 3, + expect: 0, + ranges: []AddrRange{ + MakeAddrRange(6, 10), + MakeAddrRange(12, 16), + MakeAddrRange(19, 22), + }, + }, + { + name: "ThreeAfter", + base: 24, + expect: 3, + ranges: []AddrRange{ + MakeAddrRange(6, 10), + MakeAddrRange(12, 16), + MakeAddrRange(19, 22), + }, + }, + { + name: "ThreeBetween", + base: 11, + expect: 1, + ranges: []AddrRange{ + MakeAddrRange(6, 10), + MakeAddrRange(12, 16), + MakeAddrRange(19, 22), + }, + }, + { + name: "ThreeWithin", + base: 9, + expect: 1, + ranges: []AddrRange{ + MakeAddrRange(6, 10), + MakeAddrRange(12, 16), + MakeAddrRange(19, 22), + }, + }, + { + name: "Zero", + base: 0, + expect: 1, + ranges: []AddrRange{ + MakeAddrRange(0, 10), + }, + }, + { + name: "Max", + base: ^uintptr(0), + expect: 1, + ranges: []AddrRange{ + MakeAddrRange(^uintptr(0)-5, ^uintptr(0)), + }, + }, + { + name: "LargeBefore", + base: 2, + expect: 0, + ranges: large, + }, + { + name: "LargeAfter", + base: 5 + uintptr(len(large))*5 + 30, + expect: len(large), + ranges: large, + }, + { + name: "LargeBetweenLow", + base: 14, + expect: 2, + ranges: large, + }, + { + name: "LargeBetweenHigh", + base: 249, + expect: 49, + ranges: large, + }, + { + name: "LargeWithinLow", + base: 25, + expect: 5, + ranges: large, + }, + { + name: "LargeWithinHigh", + base: 396, + expect: 79, + ranges: large, + }, + { + name: "LargeWithinMiddle", + base: 250, + expect: 50, + ranges: large, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + a := MakeAddrRanges(test.ranges...) + i := a.FindSucc(test.base) + if i != test.expect { + t.Fatalf("expected %d, got %d", test.expect, i) + } + }) + } +} -- cgit v1.2.1 From fe70866d1dc8c44ab19180ecab2b5c5b8628265a Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 15 Jul 2020 18:56:39 +0000 Subject: runtime: throw on zero-sized range passed to addrRanges.add addrRanges represents a set of addresses. Currently, passing in a zero-sized range will cause that range to be added to the list, even though it doesn't represent any address (addrRanges.contains will still always return false, and findSucc will give surprising results). We could ignore this input, but it's almost always a bug for the calling code to pass in a zero-sized range, so just throw. Change-Id: I8ed09e15b79a3a33e2d0cf5ed55f9e497388e7a5 Reviewed-on: https://go-review.googlesource.com/c/go/+/242817 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt Reviewed-by: Austin Clements --- src/runtime/mranges.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go index 2c0eb2c2dd..1109f506a6 100644 --- a/src/runtime/mranges.go +++ b/src/runtime/mranges.go @@ -218,7 +218,7 @@ func (a *addrRanges) contains(addr uintptr) bool { // add inserts a new address range to a. // -// r must not overlap with any address range in a. +// r must not overlap with any address range in a and r.size() must be > 0. func (a *addrRanges) add(r addrRange) { // The copies in this function are potentially expensive, but this data // structure is meant to represent the Go heap. At worst, copying this @@ -229,6 +229,12 @@ func (a *addrRanges) add(r addrRange) { // of 16) and Go heaps are usually mostly contiguous, so the chance that // an addrRanges even grows to that size is extremely low. + // An empty range has no effect on the set of addresses represented + // by a, but passing a zero-sized range is almost always a bug. + if r.size() == 0 { + print("runtime: range = {", hex(r.base.addr()), ", ", hex(r.limit.addr()), "}\n") + throw("attempted to add zero-sized address range") + } // Because we assume r is not currently represented in a, // findSucc gives us our insertion index. i := a.findSucc(r.base.addr()) -- cgit v1.2.1 From 64dc25b2db5d5be55f3f2fde3daac6c8a2873235 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 14 Jul 2020 21:41:12 +0000 Subject: runtime: add tests for addrRanges.add Change-Id: I249deb482df74068b0538e9d773b9a87bc5a6df3 Reviewed-on: https://go-review.googlesource.com/c/go/+/242681 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Austin Clements Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 62 +++++++++++++++++++++++++- src/runtime/mranges_test.go | 103 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 2 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 605bcb2294..e65b7b8ea7 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -785,22 +785,64 @@ func (a AddrRange) Equals(b AddrRange) bool { return a == b } +// Size returns the size in bytes of the address range. +func (a AddrRange) Size() uintptr { + return a.addrRange.size() +} + // AddrRanges is a wrapper around addrRanges for testing. type AddrRanges struct { addrRanges + mutable bool +} + +// NewAddrRanges creates a new empty addrRanges. +// +// Note that this initializes addrRanges just like in the +// runtime, so its memory is persistentalloc'd. Call this +// function sparingly since the memory it allocates is +// leaked. +// +// This AddrRanges is mutable, so we can test methods like +// Add. +func NewAddrRanges() AddrRanges { + r := addrRanges{} + r.init(new(uint64)) + return AddrRanges{r, true} } // MakeAddrRanges creates a new addrRanges populated with // the ranges in a. +// +// The returned AddrRanges is immutable, so methods like +// Add will fail. func MakeAddrRanges(a ...AddrRange) AddrRanges { // Methods that manipulate the backing store of addrRanges.ranges should // not be used on the result from this function (e.g. add) since they may - // trigger reallocation. + // trigger reallocation. That would normally be fine, except the new + // backing store won't come from the heap, but from persistentalloc, so + // we'll leak some memory implicitly. ranges := make([]addrRange, 0, len(a)) + total := uintptr(0) for _, r := range a { ranges = append(ranges, r.addrRange) + total += r.Size() + } + return AddrRanges{addrRanges{ + ranges: ranges, + totalBytes: total, + sysStat: new(uint64), + }, false} +} + +// Ranges returns a copy of the ranges described by the +// addrRanges. +func (a *AddrRanges) Ranges() []AddrRange { + result := make([]AddrRange, 0, len(a.addrRanges.ranges)) + for _, r := range a.addrRanges.ranges { + result = append(result, AddrRange{r}) } - return AddrRanges{addrRanges{ranges: ranges, sysStat: new(uint64)}} + return result } // FindSucc returns the successor to base. See addrRanges.findSucc @@ -809,6 +851,22 @@ func (a *AddrRanges) FindSucc(base uintptr) int { return a.findSucc(base) } +// Add adds a new AddrRange to the AddrRanges. +// +// The AddrRange must be mutable (i.e. created by NewAddrRanges), +// otherwise this method will throw. +func (a *AddrRanges) Add(r AddrRange) { + if !a.mutable { + throw("attempt to mutate immutable AddrRanges") + } + a.add(r.addrRange) +} + +// TotalBytes returns the totalBytes field of the addrRanges. +func (a *AddrRanges) TotalBytes() uintptr { + return a.addrRanges.totalBytes +} + // BitRange represents a range over a bitmap. type BitRange struct { I, N uint // bit index and length in bits diff --git a/src/runtime/mranges_test.go b/src/runtime/mranges_test.go index 3a9023adfa..ed439c56c2 100644 --- a/src/runtime/mranges_test.go +++ b/src/runtime/mranges_test.go @@ -9,6 +9,109 @@ import ( "testing" ) +func validateAddrRanges(t *testing.T, a *AddrRanges, want ...AddrRange) { + ranges := a.Ranges() + if len(ranges) != len(want) { + t.Errorf("want %v, got %v", want, ranges) + t.Fatal("different lengths") + } + gotTotalBytes := uintptr(0) + wantTotalBytes := uintptr(0) + for i := range ranges { + gotTotalBytes += ranges[i].Size() + wantTotalBytes += want[i].Size() + if ranges[i].Base() >= ranges[i].Limit() { + t.Error("empty range found") + } + // Ensure this is equivalent to what we want. + if !ranges[i].Equals(want[i]) { + t.Errorf("range %d: got [0x%x, 0x%x), want [0x%x, 0x%x)", i, + ranges[i].Base(), ranges[i].Limit(), + want[i].Base(), want[i].Limit(), + ) + } + if i != 0 { + // Ensure the ranges are sorted. + if ranges[i-1].Base() >= ranges[i].Base() { + t.Errorf("ranges %d and %d are out of sorted order", i-1, i) + } + // Check for a failure to coalesce. + if ranges[i-1].Limit() == ranges[i].Base() { + t.Errorf("ranges %d and %d should have coalesced", i-1, i) + } + // Check if any ranges overlap. Because the ranges are sorted + // by base, it's sufficient to just check neighbors. + if ranges[i-1].Limit() > ranges[i].Base() { + t.Errorf("ranges %d and %d overlap", i-1, i) + } + } + } + if wantTotalBytes != gotTotalBytes { + t.Errorf("expected %d total bytes, got %d", wantTotalBytes, gotTotalBytes) + } + if b := a.TotalBytes(); b != gotTotalBytes { + t.Errorf("inconsistent total bytes: want %d, got %d", gotTotalBytes, b) + } + if t.Failed() { + t.Errorf("addrRanges: %v", ranges) + t.Fatal("detected bad addrRanges") + } +} + +func TestAddrRangesAdd(t *testing.T) { + a := NewAddrRanges() + + // First range. + a.Add(MakeAddrRange(512, 1024)) + validateAddrRanges(t, &a, + MakeAddrRange(512, 1024), + ) + + // Coalesce up. + a.Add(MakeAddrRange(1024, 2048)) + validateAddrRanges(t, &a, + MakeAddrRange(512, 2048), + ) + + // Add new independent range. + a.Add(MakeAddrRange(4096, 8192)) + validateAddrRanges(t, &a, + MakeAddrRange(512, 2048), + MakeAddrRange(4096, 8192), + ) + + // Coalesce down. + a.Add(MakeAddrRange(3776, 4096)) + validateAddrRanges(t, &a, + MakeAddrRange(512, 2048), + MakeAddrRange(3776, 8192), + ) + + // Coalesce up and down. + a.Add(MakeAddrRange(2048, 3776)) + validateAddrRanges(t, &a, + MakeAddrRange(512, 8192), + ) + + // Push a bunch of independent ranges to the end to try and force growth. + expectedRanges := []AddrRange{MakeAddrRange(512, 8192)} + for i := uintptr(0); i < 64; i++ { + dRange := MakeAddrRange(8192+(i+1)*2048, 8192+(i+1)*2048+10) + a.Add(dRange) + expectedRanges = append(expectedRanges, dRange) + validateAddrRanges(t, &a, expectedRanges...) + } + + // Push a bunch of independent ranges to the beginning to try and force growth. + var bottomRanges []AddrRange + for i := uintptr(0); i < 63; i++ { + dRange := MakeAddrRange(8+i*8, 8+i*8+4) + a.Add(dRange) + bottomRanges = append(bottomRanges, dRange) + validateAddrRanges(t, &a, append(bottomRanges, expectedRanges...)...) + } +} + func TestAddrRangesFindSucc(t *testing.T) { var large []AddrRange for i := 0; i < 100; i++ { -- cgit v1.2.1 From bc0b198bd75a8eef45d0965531ba6fa127d0e8ec Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Thu, 15 Oct 2020 01:43:51 +0000 Subject: runtime: dump the status of lockedg on error The dumpgstatus() will dump current g's status anyway. When lockedg's status is bad, it's more helpful to dump lockedg's status as well than dumping current g's status twice. Change-Id: If5248cb94b9cdcbf4ceea07562237e1d6ee28489 GitHub-Last-Rev: da814c51ff42f56fb28582f088f4d72b500061fe GitHub-Pull-Request: golang/go#40248 Reviewed-on: https://go-review.googlesource.com/c/go/+/243097 Reviewed-by: Keith Randall Trust: Emmanuel Odeke Run-TryBot: Emmanuel Odeke TryBot-Result: Go Bot --- src/runtime/proc.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/proc.go b/src/runtime/proc.go index aeacb23391..83d2a524e0 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2302,8 +2302,8 @@ func stoplockedm() { mPark() status := readgstatus(_g_.m.lockedg.ptr()) if status&^_Gscan != _Grunnable { - print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n") - dumpgstatus(_g_) + print("runtime:stoplockedm: lockedg (atomicstatus=", status, ") is not Grunnable or Gscanrunnable\n") + dumpgstatus(_g_.m.lockedg.ptr()) throw("stoplockedm: not runnable") } acquirep(_g_.m.nextp.ptr()) -- cgit v1.2.1 From 30c18878730434027dbefd343aad74963a1fdc48 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 1 Oct 2020 17:22:38 -0400 Subject: runtime,cmd/cgo: simplify C -> Go call path This redesigns the way calls work from C to exported Go functions. It removes several steps from the call path, makes cmd/cgo no longer sensitive to the Go calling convention, and eliminates the use of reflectcall from cgo. In order to avoid generating a large amount of FFI glue between the C and Go ABIs, the cgo tool has long depended on generating a C function that marshals the arguments into a struct, and then the actual ABI switch happens in functions with fixed signatures that simply take a pointer to this struct. In a way, this CL simply pushes this idea further. Currently, the cgo tool generates this argument struct in the exact layout of the Go stack frame and depends on reflectcall to unpack it into the appropriate Go call (even though it's actually reflectcall'ing a function generated by cgo). In this CL, we decouple this struct from the Go stack layout. Instead, cgo generates a Go function that takes the struct, unpacks it, and calls the exported function. Since this generated function has a generic signature (like the rest of the call path), we don't need reflectcall and can instead depend on the Go compiler itself to implement the call to the exported Go function. One complication is that syscall.NewCallback on Windows, which converts a Go function into a C function pointer, depends on cgocallback's current dynamic calling approach since the signatures of the callbacks aren't known statically. For this specific case, we continue to depend on reflectcall. Really, the current approach makes some overly simplistic assumptions about translating the C ABI to the Go ABI. Now we're at least in a much better position to do a proper ABI translation. For comparison, the current cgo call path looks like: GoF (generated C function) -> crosscall2 (in cgo/asm_*.s) -> _cgoexp_GoF (generated Go function) -> cgocallback (in asm_*.s) -> cgocallback_gofunc (in asm_*.s) -> cgocallbackg (in cgocall.go) -> cgocallbackg1 (in cgocall.go) -> reflectcall (in asm_*.s) -> _cgoexpwrap_GoF (generated Go function) -> p.GoF Now the call path looks like: GoF (generated C function) -> crosscall2 (in cgo/asm_*.s) -> cgocallback (in asm_*.s) -> cgocallbackg (in cgocall.go) -> cgocallbackg1 (in cgocall.go) -> _cgoexp_GoF (generated Go function) -> p.GoF Notably: 1. We combine _cgoexp_GoF and _cgoexpwrap_GoF and move the combined operation to the end of the sequence. This combined function also handles reflectcall's previous role. 2. We combined cgocallback and cgocallback_gofunc since the only purpose of having both was to convert a raw PC into a Go function value. We instead construct the Go function value in cgocallbackg1. 3. cgocallbackg1 no longer reaches backwards through the stack to get the arguments to cgocallback_gofunc. Instead, we just pass the arguments down. 4. Currently, we need an explicit msanwrite to mark the results struct as written because reflectcall doesn't do this. Now, the results are written by regular Go assignments, so the Go compiler generates the necessary MSAN annotations. This also means we no longer need to track the size of the arguments frame. Updates #40724, since now we don't need to teach cgo about the register ABI or change how it uses reflectcall. Change-Id: I7840489a2597962aeb670e0c1798a16a7359c94f Reviewed-on: https://go-review.googlesource.com/c/go/+/258938 Trust: Austin Clements Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/runtime/asm_386.s | 56 ++++++---------- src/runtime/asm_amd64.s | 55 ++++++---------- src/runtime/asm_arm.s | 58 ++++++----------- src/runtime/asm_arm64.s | 48 +++++--------- src/runtime/asm_mips64x.s | 54 ++++++---------- src/runtime/asm_mipsx.s | 54 ++++++---------- src/runtime/asm_ppc64x.s | 55 ++++++---------- src/runtime/asm_riscv64.s | 5 +- src/runtime/asm_s390x.s | 54 ++++++---------- src/runtime/asm_wasm.s | 5 +- src/runtime/cgo/asm_386.s | 12 ++-- src/runtime/cgo/asm_amd64.s | 20 +++--- src/runtime/cgo/asm_arm.s | 61 ++++++++--------- src/runtime/cgo/asm_arm64.s | 19 +++--- src/runtime/cgo/asm_mips64x.s | 19 +++--- src/runtime/cgo/asm_mipsx.s | 19 +++--- src/runtime/cgo/asm_ppc64x.s | 20 +++--- src/runtime/cgo/asm_s390x.s | 14 ++-- src/runtime/cgo/callbacks.go | 29 ++++----- src/runtime/cgocall.go | 140 +++++++++++----------------------------- src/runtime/proc.go | 2 +- src/runtime/race/output_test.go | 2 +- src/runtime/stubs.go | 25 ++++--- src/runtime/symtab.go | 2 +- src/runtime/sys_windows_386.s | 67 ++++++++----------- src/runtime/sys_windows_amd64.s | 53 +++++++-------- src/runtime/sys_windows_arm.s | 3 + src/runtime/syscall_windows.go | 42 ++++++++++-- src/runtime/traceback.go | 2 +- 29 files changed, 412 insertions(+), 583 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index 11863fba39..a54b68e03d 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -702,25 +702,9 @@ nosave: MOVL AX, ret+8(FP) RET -// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) -// Turn the fn into a Go func (by taking its address) and call -// cgocallback_gofunc. -TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 - LEAL fn+0(FP), AX - MOVL AX, 0(SP) - MOVL frame+4(FP), AX - MOVL AX, 4(SP) - MOVL framesize+8(FP), AX - MOVL AX, 8(SP) - MOVL ctxt+12(FP), AX - MOVL AX, 12(SP) - MOVL $runtime·cgocallback_gofunc(SB), AX - CALL AX - RET - -// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) +// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16 +TEXT ·cgocallback(SB),NOSPLIT,$16-12 // Frame size must match commented places below NO_LOCAL_POINTERS // If g is nil, Go did not create the current thread. @@ -780,34 +764,36 @@ havem: // save that information (m->curg->sched) so we can restore it. // We can restore m->curg->sched.sp easily, because calling // runtime.cgocallbackg leaves SP unchanged upon return. - // To save m->curg->sched.pc, we push it onto the stack. - // This has the added benefit that it looks to the traceback - // routine like cgocallbackg is going to return to that - // PC (because the frame we allocate below has the same - // size as cgocallback_gofunc's frame declared above) - // so that the traceback will seamlessly trace back into - // the earlier calls. + // To save m->curg->sched.pc, we push it onto the curg stack and + // open a frame the same size as cgocallback's g0 frame. + // Once we switch to the curg stack, the pushed PC will appear + // to be the return PC of cgocallback, so that the traceback + // will seamlessly trace back into the earlier calls. // - // In the new goroutine, 4(SP) holds the saved oldm (DX) register. - // 8(SP) is unused. + // In the new goroutine, 12(SP) holds the saved oldm (DX) register. MOVL m_curg(BP), SI MOVL SI, g(CX) MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI MOVL (g_sched+gobuf_pc)(SI), BP - MOVL BP, -4(DI) - MOVL ctxt+12(FP), CX - LEAL -(4+12)(DI), SP - MOVL DX, 4(SP) - MOVL CX, 0(SP) + MOVL BP, -4(DI) // "push" return PC on the g stack + // Gather our arguments into registers. + MOVL fn+0(FP), AX + MOVL frame+4(FP), BX + MOVL ctxt+8(FP), CX + LEAL -(4+16)(DI), SP // Must match declared frame size + MOVL DX, 12(SP) + MOVL AX, 0(SP) + MOVL BX, 4(SP) + MOVL CX, 8(SP) CALL runtime·cgocallbackg(SB) - MOVL 4(SP), DX + MOVL 12(SP), DX // Restore g->sched (== m->curg->sched) from saved values. get_tls(CX) MOVL g(CX), SI - MOVL 12(SP), BP + MOVL 16(SP), BP // Must match declared frame size MOVL BP, (g_sched+gobuf_pc)(SI) - LEAL (12+4)(SP), DI + LEAL (16+4)(SP), DI // Must match declared frame size MOVL DI, (g_sched+gobuf_sp)(SI) // Switch back to m->g0's stack and restore m->g0->sched.sp. diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 256f4112cd..3d5d9c4d58 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -691,25 +691,9 @@ nosave: MOVL AX, ret+16(FP) RET -// func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr) -// Turn the fn into a Go func (by taking its address) and call -// cgocallback_gofunc. -TEXT runtime·cgocallback(SB),NOSPLIT,$32-32 - LEAQ fn+0(FP), AX - MOVQ AX, 0(SP) - MOVQ frame+8(FP), AX - MOVQ AX, 8(SP) - MOVQ framesize+16(FP), AX - MOVQ AX, 16(SP) - MOVQ ctxt+24(FP), AX - MOVQ AX, 24(SP) - MOVQ $runtime·cgocallback_gofunc(SB), AX - CALL AX - RET - -// func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr) +// func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32 +TEXT ·cgocallback(SB),NOSPLIT,$32-24 NO_LOCAL_POINTERS // If g is nil, Go did not create the current thread. @@ -769,37 +753,40 @@ havem: // save that information (m->curg->sched) so we can restore it. // We can restore m->curg->sched.sp easily, because calling // runtime.cgocallbackg leaves SP unchanged upon return. - // To save m->curg->sched.pc, we push it onto the stack. - // This has the added benefit that it looks to the traceback - // routine like cgocallbackg is going to return to that - // PC (because the frame we allocate below has the same - // size as cgocallback_gofunc's frame declared above) - // so that the traceback will seamlessly trace back into - // the earlier calls. + // To save m->curg->sched.pc, we push it onto the curg stack and + // open a frame the same size as cgocallback's g0 frame. + // Once we switch to the curg stack, the pushed PC will appear + // to be the return PC of cgocallback, so that the traceback + // will seamlessly trace back into the earlier calls. // - // In the new goroutine, 8(SP) holds the saved R8. + // In the new goroutine, 24(SP) holds the saved R8. MOVQ m_curg(BX), SI MOVQ SI, g(CX) MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI MOVQ (g_sched+gobuf_pc)(SI), BX - MOVQ BX, -8(DI) + MOVQ BX, -8(DI) // "push" return PC on the g stack + // Gather our arguments into registers. + MOVQ fn+0(FP), BX + MOVQ frame+8(FP), CX + MOVQ ctxt+16(FP), DX // Compute the size of the frame, including return PC and, if // GOEXPERIMENT=framepointer, the saved base pointer - MOVQ ctxt+24(FP), BX - LEAQ fv+0(FP), AX - SUBQ SP, AX - SUBQ AX, DI + LEAQ fn+0(FP), AX + SUBQ SP, AX // AX is our actual frame size + SUBQ AX, DI // Allocate the same frame size on the g stack MOVQ DI, SP - MOVQ R8, 8(SP) + MOVQ R8, 24(SP) MOVQ BX, 0(SP) + MOVQ CX, 8(SP) + MOVQ DX, 16(SP) CALL runtime·cgocallbackg(SB) - MOVQ 8(SP), R8 + MOVQ 24(SP), R8 // Compute the size of the frame again. FP and SP have // completely different values here than they did above, // but only their difference matters. - LEAQ fv+0(FP), AX + LEAQ fn+0(FP), AX SUBQ SP, AX // Restore g->sched (== m->curg->sched) from saved values. diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s index 51a50c604c..c54b4eb006 100644 --- a/src/runtime/asm_arm.s +++ b/src/runtime/asm_arm.s @@ -643,25 +643,9 @@ nosave: MOVW R0, ret+8(FP) RET -// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) -// Turn the fn into a Go func (by taking its address) and call -// cgocallback_gofunc. -TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 - MOVW $fn+0(FP), R0 - MOVW R0, 4(R13) - MOVW frame+4(FP), R0 - MOVW R0, 8(R13) - MOVW framesize+8(FP), R0 - MOVW R0, 12(R13) - MOVW ctxt+12(FP), R0 - MOVW R0, 16(R13) - MOVW $runtime·cgocallback_gofunc(SB), R0 - BL (R0) - RET - -// cgocallback_gofunc(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) +// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-16 +TEXT ·cgocallback(SB),NOSPLIT,$12-12 NO_LOCAL_POINTERS // Load m and g from thread-local storage. @@ -686,7 +670,7 @@ needm: MOVW $runtime·needm(SB), R0 BL (R0) - // Set m->sched.sp = SP, so that if a panic happens + // Set m->g0->sched.sp = SP, so that if a panic happens // during the function we are about to execute, it will // have a valid SP to run on the g0 stack. // The next few lines (after the havem label) @@ -706,10 +690,10 @@ havem: // Save current m->g0->sched.sp on stack and then set it to SP. // Save current sp in m->g0->sched.sp in preparation for // switch back to m->curg stack. - // NOTE: unwindm knows that the saved g->sched.sp is at 4(R13) aka savedsp-8(SP). + // NOTE: unwindm knows that the saved g->sched.sp is at 4(R13) aka savedsp-12(SP). MOVW m_g0(R8), R3 MOVW (g_sched+gobuf_sp)(R3), R4 - MOVW R4, savedsp-8(SP) + MOVW R4, savedsp-12(SP) // must match frame size MOVW R13, (g_sched+gobuf_sp)(R3) // Switch to m->curg stack and call runtime.cgocallbackg. @@ -718,30 +702,30 @@ havem: // save that information (m->curg->sched) so we can restore it. // We can restore m->curg->sched.sp easily, because calling // runtime.cgocallbackg leaves SP unchanged upon return. - // To save m->curg->sched.pc, we push it onto the stack. - // This has the added benefit that it looks to the traceback - // routine like cgocallbackg is going to return to that - // PC (because the frame we allocate below has the same - // size as cgocallback_gofunc's frame declared above) - // so that the traceback will seamlessly trace back into - // the earlier calls. - // - // In the new goroutine, -4(SP) is unused (where SP refers to - // m->curg's SP while we're setting it up, before we've adjusted it). + // To save m->curg->sched.pc, we push it onto the curg stack and + // open a frame the same size as cgocallback's g0 frame. + // Once we switch to the curg stack, the pushed PC will appear + // to be the return PC of cgocallback, so that the traceback + // will seamlessly trace back into the earlier calls. MOVW m_curg(R8), R0 BL setg<>(SB) MOVW (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 MOVW (g_sched+gobuf_pc)(g), R5 - MOVW R5, -12(R4) - MOVW ctxt+12(FP), R0 - MOVW R0, -8(R4) - MOVW $-12(R4), R13 + MOVW R5, -(12+4)(R4) // "saved LR"; must match frame size + // Gather our arguments into registers. + MOVW fn+0(FP), R1 + MOVW frame+4(FP), R2 + MOVW ctxt+8(FP), R3 + MOVW $-(12+4)(R4), R13 // switch stack; must match frame size + MOVW R1, 4(R13) + MOVW R2, 8(R13) + MOVW R3, 12(R13) BL runtime·cgocallbackg(SB) // Restore g->sched (== m->curg->sched) from saved values. MOVW 0(R13), R5 MOVW R5, (g_sched+gobuf_pc)(g) - MOVW $12(R13), R4 + MOVW $(12+4)(R13), R4 // must match frame size MOVW R4, (g_sched+gobuf_sp)(g) // Switch back to m->g0's stack and restore m->g0->sched.sp. @@ -751,7 +735,7 @@ havem: MOVW m_g0(R8), R0 BL setg<>(SB) MOVW (g_sched+gobuf_sp)(g), R13 - MOVW savedsp-8(SP), R4 + MOVW savedsp-12(SP), R4 // must match frame size MOVW R4, (g_sched+gobuf_sp)(g) // If the m on entry was nil, we called needm above to borrow an m diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s index 1f46d1962c..a45e342478 100644 --- a/src/runtime/asm_arm64.s +++ b/src/runtime/asm_arm64.s @@ -958,25 +958,9 @@ nosave: MOVD R0, ret+16(FP) RET -// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) -// Turn the fn into a Go func (by taking its address) and call -// cgocallback_gofunc. -TEXT runtime·cgocallback(SB),NOSPLIT,$40-32 - MOVD $fn+0(FP), R0 - MOVD R0, 8(RSP) - MOVD frame+8(FP), R0 - MOVD R0, 16(RSP) - MOVD framesize+16(FP), R0 - MOVD R0, 24(RSP) - MOVD ctxt+24(FP), R0 - MOVD R0, 32(RSP) - MOVD $runtime·cgocallback_gofunc(SB), R0 - BL (R0) - RET - -// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) +// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$24-32 +TEXT ·cgocallback(SB),NOSPLIT,$24-24 NO_LOCAL_POINTERS // Load g from thread-local storage. @@ -1001,7 +985,7 @@ needm: MOVD $runtime·needm(SB), R0 BL (R0) - // Set m->sched.sp = SP, so that if a panic happens + // Set m->g0->sched.sp = SP, so that if a panic happens // during the function we are about to execute, it will // have a valid SP to run on the g0 stack. // The next few lines (after the havem label) @@ -1037,16 +1021,11 @@ havem: // save that information (m->curg->sched) so we can restore it. // We can restore m->curg->sched.sp easily, because calling // runtime.cgocallbackg leaves SP unchanged upon return. - // To save m->curg->sched.pc, we push it onto the stack. - // This has the added benefit that it looks to the traceback - // routine like cgocallbackg is going to return to that - // PC (because the frame we allocate below has the same - // size as cgocallback_gofunc's frame declared above) - // so that the traceback will seamlessly trace back into - // the earlier calls. - // - // In the new goroutine, -8(SP) is unused (where SP refers to - // m->curg's SP while we're setting it up, before we've adjusted it). + // To save m->curg->sched.pc, we push it onto the curg stack and + // open a frame the same size as cgocallback's g0 frame. + // Once we switch to the curg stack, the pushed PC will appear + // to be the return PC of cgocallback, so that the traceback + // will seamlessly trace back into the earlier calls. MOVD m_curg(R8), g BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 @@ -1054,10 +1033,15 @@ havem: MOVD R5, -48(R4) MOVD (g_sched+gobuf_bp)(g), R5 MOVD R5, -56(R4) - MOVD ctxt+24(FP), R0 - MOVD R0, -40(R4) + // Gather our arguments into registers. + MOVD fn+0(FP), R1 + MOVD frame+8(FP), R2 + MOVD ctxt+16(FP), R3 MOVD $-48(R4), R0 // maintain 16-byte SP alignment - MOVD R0, RSP + MOVD R0, RSP // switch stack + MOVD R1, 8(RSP) + MOVD R2, 16(RSP) + MOVD R3, 24(RSP) BL runtime·cgocallbackg(SB) // Restore g->sched (== m->curg->sched) from saved values. diff --git a/src/runtime/asm_mips64x.s b/src/runtime/asm_mips64x.s index 0ff1b24225..19781f7885 100644 --- a/src/runtime/asm_mips64x.s +++ b/src/runtime/asm_mips64x.s @@ -471,25 +471,9 @@ g0: MOVW R2, ret+16(FP) RET -// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) -// Turn the fn into a Go func (by taking its address) and call -// cgocallback_gofunc. -TEXT runtime·cgocallback(SB),NOSPLIT,$32-32 - MOVV $fn+0(FP), R1 - MOVV R1, 8(R29) - MOVV frame+8(FP), R1 - MOVV R1, 16(R29) - MOVV framesize+16(FP), R1 - MOVV R1, 24(R29) - MOVV ctxt+24(FP), R1 - MOVV R1, 32(R29) - MOVV $runtime·cgocallback_gofunc(SB), R1 - JAL (R1) - RET - -// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) +// func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32 +TEXT ·cgocallback(SB),NOSPLIT,$24-24 NO_LOCAL_POINTERS // Load m and g from thread-local storage. @@ -537,7 +521,7 @@ havem: // NOTE: unwindm knows that the saved g->sched.sp is at 8(R29) aka savedsp-16(SP). MOVV m_g0(R3), R1 MOVV (g_sched+gobuf_sp)(R1), R2 - MOVV R2, savedsp-16(SP) + MOVV R2, savedsp-24(SP) // must match frame size MOVV R29, (g_sched+gobuf_sp)(R1) // Switch to m->curg stack and call runtime.cgocallbackg. @@ -546,30 +530,30 @@ havem: // save that information (m->curg->sched) so we can restore it. // We can restore m->curg->sched.sp easily, because calling // runtime.cgocallbackg leaves SP unchanged upon return. - // To save m->curg->sched.pc, we push it onto the stack. - // This has the added benefit that it looks to the traceback - // routine like cgocallbackg is going to return to that - // PC (because the frame we allocate below has the same - // size as cgocallback_gofunc's frame declared above) - // so that the traceback will seamlessly trace back into - // the earlier calls. - // - // In the new goroutine, -8(SP) is unused (where SP refers to - // m->curg's SP while we're setting it up, before we've adjusted it). + // To save m->curg->sched.pc, we push it onto the curg stack and + // open a frame the same size as cgocallback's g0 frame. + // Once we switch to the curg stack, the pushed PC will appear + // to be the return PC of cgocallback, so that the traceback + // will seamlessly trace back into the earlier calls. MOVV m_curg(R3), g JAL runtime·save_g(SB) MOVV (g_sched+gobuf_sp)(g), R2 // prepare stack as R2 MOVV (g_sched+gobuf_pc)(g), R4 - MOVV R4, -24(R2) - MOVV ctxt+24(FP), R1 - MOVV R1, -16(R2) - MOVV $-24(R2), R29 + MOVV R4, -(24+8)(R2) // "saved LR"; must match frame size + // Gather our arguments into registers. + MOVV fn+0(FP), R5 + MOVV frame+8(FP), R6 + MOVV ctxt+16(FP), R7 + MOVV $-(24+8)(R2), R29 // switch stack; must match frame size + MOVV R5, 8(R29) + MOVV R6, 16(R29) + MOVV R7, 24(R29) JAL runtime·cgocallbackg(SB) // Restore g->sched (== m->curg->sched) from saved values. MOVV 0(R29), R4 MOVV R4, (g_sched+gobuf_pc)(g) - MOVV $24(R29), R2 + MOVV $(24+8)(R29), R2 // must match frame size MOVV R2, (g_sched+gobuf_sp)(g) // Switch back to m->g0's stack and restore m->g0->sched.sp. @@ -579,7 +563,7 @@ havem: MOVV m_g0(R3), g JAL runtime·save_g(SB) MOVV (g_sched+gobuf_sp)(g), R29 - MOVV savedsp-16(SP), R2 + MOVV savedsp-24(SP), R2 // must match frame size MOVV R2, (g_sched+gobuf_sp)(g) // If the m on entry was nil, we called needm above to borrow an m diff --git a/src/runtime/asm_mipsx.s b/src/runtime/asm_mipsx.s index aca0510b69..ee87d81436 100644 --- a/src/runtime/asm_mipsx.s +++ b/src/runtime/asm_mipsx.s @@ -472,25 +472,9 @@ g0: MOVW R2, ret+8(FP) RET -// cgocallback(void (*fn)(void*), void *frame, uintptr framesize) -// Turn the fn into a Go func (by taking its address) and call -// cgocallback_gofunc. -TEXT runtime·cgocallback(SB),NOSPLIT,$16-16 - MOVW $fn+0(FP), R1 - MOVW R1, 4(R29) - MOVW frame+4(FP), R1 - MOVW R1, 8(R29) - MOVW framesize+8(FP), R1 - MOVW R1, 12(R29) - MOVW ctxt+12(FP), R1 - MOVW R1, 16(R29) - MOVW $runtime·cgocallback_gofunc(SB), R1 - JAL (R1) - RET - -// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) +// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$8-16 +TEXT ·cgocallback(SB),NOSPLIT,$12-12 NO_LOCAL_POINTERS // Load m and g from thread-local storage. @@ -538,7 +522,7 @@ havem: // NOTE: unwindm knows that the saved g->sched.sp is at 4(R29) aka savedsp-8(SP). MOVW m_g0(R3), R1 MOVW (g_sched+gobuf_sp)(R1), R2 - MOVW R2, savedsp-8(SP) + MOVW R2, savedsp-12(SP) // must match frame size MOVW R29, (g_sched+gobuf_sp)(R1) // Switch to m->curg stack and call runtime.cgocallbackg. @@ -547,30 +531,30 @@ havem: // save that information (m->curg->sched) so we can restore it. // We can restore m->curg->sched.sp easily, because calling // runtime.cgocallbackg leaves SP unchanged upon return. - // To save m->curg->sched.pc, we push it onto the stack. - // This has the added benefit that it looks to the traceback - // routine like cgocallbackg is going to return to that - // PC (because the frame we allocate below has the same - // size as cgocallback_gofunc's frame declared above) - // so that the traceback will seamlessly trace back into - // the earlier calls. - // - // In the new goroutine, -4(SP) is unused (where SP refers to - // m->curg's SP while we're setting it up, before we've adjusted it). + // To save m->curg->sched.pc, we push it onto the curg stack and + // open a frame the same size as cgocallback's g0 frame. + // Once we switch to the curg stack, the pushed PC will appear + // to be the return PC of cgocallback, so that the traceback + // will seamlessly trace back into the earlier calls. MOVW m_curg(R3), g JAL runtime·save_g(SB) MOVW (g_sched+gobuf_sp)(g), R2 // prepare stack as R2 MOVW (g_sched+gobuf_pc)(g), R4 - MOVW R4, -12(R2) - MOVW ctxt+12(FP), R1 - MOVW R1, -8(R2) - MOVW $-12(R2), R29 + MOVW R4, -(12+4)(R2) // "saved LR"; must match frame size + // Gather our arguments into registers. + MOVW fn+0(FP), R5 + MOVW frame+4(FP), R6 + MOVW ctxt+8(FP), R7 + MOVW $-(12+4)(R2), R29 // switch stack; must match frame size + MOVW R5, 4(R29) + MOVW R6, 8(R29) + MOVW R7, 12(R29) JAL runtime·cgocallbackg(SB) // Restore g->sched (== m->curg->sched) from saved values. MOVW 0(R29), R4 MOVW R4, (g_sched+gobuf_pc)(g) - MOVW $12(R29), R2 + MOVW $(12+4)(R29), R2 // must match frame size MOVW R2, (g_sched+gobuf_sp)(g) // Switch back to m->g0's stack and restore m->g0->sched.sp. @@ -580,7 +564,7 @@ havem: MOVW m_g0(R3), g JAL runtime·save_g(SB) MOVW (g_sched+gobuf_sp)(g), R29 - MOVW savedsp-8(SP), R2 + MOVW savedsp-12(SP), R2 // must match frame size MOVW R2, (g_sched+gobuf_sp)(g) // If the m on entry was nil, we called needm above to borrow an m diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 603058a61b..dc34c0e4c8 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -651,26 +651,9 @@ g0: MOVW R3, ret+16(FP) RET -// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) -// Turn the fn into a Go func (by taking its address) and call -// cgocallback_gofunc. -TEXT runtime·cgocallback(SB),NOSPLIT,$32-32 - MOVD $fn+0(FP), R3 - MOVD R3, FIXED_FRAME+0(R1) - MOVD frame+8(FP), R3 - MOVD R3, FIXED_FRAME+8(R1) - MOVD framesize+16(FP), R3 - MOVD R3, FIXED_FRAME+16(R1) - MOVD ctxt+24(FP), R3 - MOVD R3, FIXED_FRAME+24(R1) - MOVD $runtime·cgocallback_gofunc(SB), R12 - MOVD R12, CTR - BL (CTR) - RET - -// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) +// func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32 +TEXT ·cgocallback(SB),NOSPLIT,$24-24 NO_LOCAL_POINTERS // Load m and g from thread-local storage. @@ -721,7 +704,7 @@ havem: // NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP). MOVD m_g0(R8), R3 MOVD (g_sched+gobuf_sp)(R3), R4 - MOVD R4, savedsp-16(SP) + MOVD R4, savedsp-24(SP) // must match frame size MOVD R1, (g_sched+gobuf_sp)(R3) // Switch to m->curg stack and call runtime.cgocallbackg. @@ -730,30 +713,30 @@ havem: // save that information (m->curg->sched) so we can restore it. // We can restore m->curg->sched.sp easily, because calling // runtime.cgocallbackg leaves SP unchanged upon return. - // To save m->curg->sched.pc, we push it onto the stack. - // This has the added benefit that it looks to the traceback - // routine like cgocallbackg is going to return to that - // PC (because the frame we allocate below has the same - // size as cgocallback_gofunc's frame declared above) - // so that the traceback will seamlessly trace back into - // the earlier calls. - // - // In the new goroutine, -8(SP) is unused (where SP refers to - // m->curg's SP while we're setting it up, before we've adjusted it). + // To save m->curg->sched.pc, we push it onto the curg stack and + // open a frame the same size as cgocallback's g0 frame. + // Once we switch to the curg stack, the pushed PC will appear + // to be the return PC of cgocallback, so that the traceback + // will seamlessly trace back into the earlier calls. MOVD m_curg(R8), g BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 MOVD (g_sched+gobuf_pc)(g), R5 - MOVD R5, -(FIXED_FRAME+16)(R4) - MOVD ctxt+24(FP), R3 - MOVD R3, -16(R4) - MOVD $-(FIXED_FRAME+16)(R4), R1 + MOVD R5, -(24+FIXED_FRAME)(R4) // "saved LR"; must match frame size + // Gather our arguments into registers. + MOVD fn+0(FP), R5 + MOVD frame+8(FP), R6 + MOVD ctxt+16(FP), R7 + MOVD $-(24+FIXED_FRAME)(R4), R1 // switch stack; must match frame size + MOVD R5, FIXED_FRAME+0(R1) + MOVD R6, FIXED_FRAME+8(R1) + MOVD R7, FIXED_FRAME+16(R1) BL runtime·cgocallbackg(SB) // Restore g->sched (== m->curg->sched) from saved values. MOVD 0(R1), R5 MOVD R5, (g_sched+gobuf_pc)(g) - MOVD $(FIXED_FRAME+16)(R1), R4 + MOVD $(24+FIXED_FRAME)(R1), R4 // must match frame size MOVD R4, (g_sched+gobuf_sp)(g) // Switch back to m->g0's stack and restore m->g0->sched.sp. @@ -763,7 +746,7 @@ havem: MOVD m_g0(R8), g BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R1 - MOVD savedsp-16(SP), R4 + MOVD savedsp-24(SP), R4 // must match frame size MOVD R4, (g_sched+gobuf_sp)(g) // If the m on entry was nil, we called needm above to borrow an m diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s index a136085084..fd01fd6f07 100644 --- a/src/runtime/asm_riscv64.s +++ b/src/runtime/asm_riscv64.s @@ -453,8 +453,9 @@ TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0 // traceback from goexit1 must hit code range of goexit MOV ZERO, ZERO // NOP -// func cgocallback_gofunc(fv uintptr, frame uintptr, framesize, ctxt uintptr) -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$24-32 +// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) +// See cgocall.go for more details. +TEXT ·cgocallback(SB),NOSPLIT,$0-24 // TODO(jsing): Add support for cgo - issue #36641. WORD $0 // crash diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s index 46a434119b..7baef37324 100644 --- a/src/runtime/asm_s390x.s +++ b/src/runtime/asm_s390x.s @@ -575,25 +575,9 @@ g0: MOVW R2, ret+16(FP) RET -// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt) -// Turn the fn into a Go func (by taking its address) and call -// cgocallback_gofunc. -TEXT runtime·cgocallback(SB),NOSPLIT,$32-32 - MOVD $fn+0(FP), R3 - MOVD R3, 8(R15) - MOVD frame+8(FP), R3 - MOVD R3, 16(R15) - MOVD framesize+16(FP), R3 - MOVD R3, 24(R15) - MOVD ctxt+24(FP), R3 - MOVD R3, 32(R15) - MOVD $runtime·cgocallback_gofunc(SB), R3 - BL (R3) - RET - -// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt) +// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32 +TEXT ·cgocallback(SB),NOSPLIT,$24-24 NO_LOCAL_POINTERS // Load m and g from thread-local storage. @@ -641,7 +625,7 @@ havem: // NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP). MOVD m_g0(R8), R3 MOVD (g_sched+gobuf_sp)(R3), R4 - MOVD R4, savedsp-16(SP) + MOVD R4, savedsp-24(SP) // must match frame size MOVD R15, (g_sched+gobuf_sp)(R3) // Switch to m->curg stack and call runtime.cgocallbackg. @@ -650,30 +634,30 @@ havem: // save that information (m->curg->sched) so we can restore it. // We can restore m->curg->sched.sp easily, because calling // runtime.cgocallbackg leaves SP unchanged upon return. - // To save m->curg->sched.pc, we push it onto the stack. - // This has the added benefit that it looks to the traceback - // routine like cgocallbackg is going to return to that - // PC (because the frame we allocate below has the same - // size as cgocallback_gofunc's frame declared above) - // so that the traceback will seamlessly trace back into - // the earlier calls. - // - // In the new goroutine, -8(SP) is unused (where SP refers to - // m->curg's SP while we're setting it up, before we've adjusted it). + // To save m->curg->sched.pc, we push it onto the curg stack and + // open a frame the same size as cgocallback's g0 frame. + // Once we switch to the curg stack, the pushed PC will appear + // to be the return PC of cgocallback, so that the traceback + // will seamlessly trace back into the earlier calls. MOVD m_curg(R8), g BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 MOVD (g_sched+gobuf_pc)(g), R5 - MOVD R5, -24(R4) - MOVD ctxt+24(FP), R5 - MOVD R5, -16(R4) - MOVD $-24(R4), R15 + MOVD R5, -(24+8)(R4) // "saved LR"; must match frame size + // Gather our arguments into registers. + MOVD fn+0(FP), R1 + MOVD frame+8(FP), R2 + MOVD ctxt+16(FP), R3 + MOVD $-(24+8)(R4), R15 // switch stack; must match frame size + MOVD R1, 8(R15) + MOVD R2, 16(R15) + MOVD R3, 24(R15) BL runtime·cgocallbackg(SB) // Restore g->sched (== m->curg->sched) from saved values. MOVD 0(R15), R5 MOVD R5, (g_sched+gobuf_pc)(g) - MOVD $24(R15), R4 + MOVD $(24+8)(R15), R4 // must match frame size MOVD R4, (g_sched+gobuf_sp)(g) // Switch back to m->g0's stack and restore m->g0->sched.sp. @@ -683,7 +667,7 @@ havem: MOVD m_g0(R8), g BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R15 - MOVD savedsp-16(SP), R4 + MOVD savedsp-24(SP), R4 // must match frame size MOVD R4, (g_sched+gobuf_sp)(g) // If the m on entry was nil, we called needm above to borrow an m diff --git a/src/runtime/asm_wasm.s b/src/runtime/asm_wasm.s index 1275af136b..67e81adf0b 100644 --- a/src/runtime/asm_wasm.s +++ b/src/runtime/asm_wasm.s @@ -288,9 +288,6 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 TEXT ·asmcgocall(SB), NOSPLIT, $0-0 UNDEF -TEXT ·cgocallback_gofunc(SB), NOSPLIT, $16-32 - UNDEF - #define DISPATCH(NAME, MAXSIZE) \ Get R0; \ I64Const $MAXSIZE; \ @@ -432,7 +429,7 @@ TEXT runtime·goexit(SB), NOSPLIT, $0-0 CALL runtime·goexit1(SB) // does not return UNDEF -TEXT runtime·cgocallback(SB), NOSPLIT, $32-32 +TEXT runtime·cgocallback(SB), NOSPLIT, $0-24 UNDEF // gcWriteBarrier performs a heap pointer write and informs the GC. diff --git a/src/runtime/cgo/asm_386.s b/src/runtime/cgo/asm_386.s index 7293c20bf8..2e7e9512e2 100644 --- a/src/runtime/cgo/asm_386.s +++ b/src/runtime/cgo/asm_386.s @@ -5,8 +5,9 @@ #include "textflag.h" // Called by C code generated by cmd/cgo. -// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr) -// Saves C callee-saved registers and calls fn with three arguments. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. TEXT crosscall2(SB),NOSPLIT,$28-16 MOVL BP, 24(SP) MOVL BX, 20(SP) @@ -15,12 +16,11 @@ TEXT crosscall2(SB),NOSPLIT,$28-16 MOVL ctxt+12(FP), AX MOVL AX, 8(SP) - MOVL n+8(FP), AX - MOVL AX, 4(SP) MOVL a+4(FP), AX - MOVL AX, 0(SP) + MOVL AX, 4(SP) MOVL fn+0(FP), AX - CALL AX + MOVL AX, 0(SP) + CALL runtime·cgocallback(SB) MOVL 12(SP), DI MOVL 16(SP), SI diff --git a/src/runtime/cgo/asm_amd64.s b/src/runtime/cgo/asm_amd64.s index 06c538b9bc..5dc8e2d235 100644 --- a/src/runtime/cgo/asm_amd64.s +++ b/src/runtime/cgo/asm_amd64.s @@ -5,8 +5,10 @@ #include "textflag.h" // Called by C code generated by cmd/cgo. -// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr) -// Saves C callee-saved registers and calls fn with three arguments. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. +// This signature is known to SWIG, so we can't change it. #ifndef GOOS_windows TEXT crosscall2(SB),NOSPLIT,$0x50-0 /* keeps stack pointer 32-byte aligned */ #else @@ -33,11 +35,12 @@ TEXT crosscall2(SB),NOSPLIT,$0x110-0 /* also need to save xmm6 - xmm15 */ MOVUPS X14, 0xe0(SP) MOVUPS X15, 0xf0(SP) - MOVQ DX, 0x0(SP) /* arg */ - MOVQ R8, 0x8(SP) /* argsize (includes padding) */ + MOVQ CX, 0x0(SP) /* fn */ + MOVQ DX, 0x8(SP) /* arg */ + // Skip n in R8. MOVQ R9, 0x10(SP) /* ctxt */ - CALL CX /* fn */ + CALL runtime·cgocallback(SB) MOVQ 0x48(SP), DI MOVQ 0x50(SP), SI @@ -52,11 +55,12 @@ TEXT crosscall2(SB),NOSPLIT,$0x110-0 /* also need to save xmm6 - xmm15 */ MOVUPS 0xe0(SP), X14 MOVUPS 0xf0(SP), X15 #else - MOVQ SI, 0x0(SP) /* arg */ - MOVQ DX, 0x8(SP) /* argsize (includes padding) */ + MOVQ DI, 0x0(SP) /* fn */ + MOVQ SI, 0x8(SP) /* arg */ + // Skip n in DX. MOVQ CX, 0x10(SP) /* ctxt */ - CALL DI /* fn */ + CALL runtime·cgocallback(SB) #endif MOVQ 0x18(SP), BX diff --git a/src/runtime/cgo/asm_arm.s b/src/runtime/cgo/asm_arm.s index 60132c14a8..ea55e173c1 100644 --- a/src/runtime/cgo/asm_arm.s +++ b/src/runtime/cgo/asm_arm.s @@ -5,51 +5,52 @@ #include "textflag.h" // Called by C code generated by cmd/cgo. -// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr) -// Saves C callee-saved registers and calls fn with three arguments. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 - /* - * We still need to save all callee save register as before, and then - * push 3 args for fn (R1, R2, R3). - * Also note that at procedure entry in gc world, 4(R13) will be the - * first arg, so we must push another dummy reg (R0) for 0(R13). - * Additionally, runtime·load_g will clobber R0, so we need to save R0 - * nevertheless. - */ SUB $(8*9), R13 // Reserve space for the floating point registers. - MOVM.WP [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, g, R11, R12, R14], (R13) + // The C arguments arrive in R0, R1, R2, and R3. We want to + // pass R0, R1, and R3 to Go, so we push those on the stack. + // Also, save C callee-save registers R4-R12. + MOVM.WP [R0, R1, R3, R4, R5, R6, R7, R8, R9, g, R11, R12], (R13) + // Finally, save the link register R14. This also puts the + // arguments we pushed for cgocallback where they need to be, + // starting at 4(R13). + MOVW.W R14, -4(R13) // Skip floating point registers on GOARM < 6. MOVB runtime·goarm(SB), R11 CMP $6, R11 BLT skipfpsave - MOVD F8, (14*4+8*1)(R13) - MOVD F9, (14*4+8*2)(R13) - MOVD F10, (14*4+8*3)(R13) - MOVD F11, (14*4+8*4)(R13) - MOVD F12, (14*4+8*5)(R13) - MOVD F13, (14*4+8*6)(R13) - MOVD F14, (14*4+8*7)(R13) - MOVD F15, (14*4+8*8)(R13) + MOVD F8, (13*4+8*1)(R13) + MOVD F9, (13*4+8*2)(R13) + MOVD F10, (13*4+8*3)(R13) + MOVD F11, (13*4+8*4)(R13) + MOVD F12, (13*4+8*5)(R13) + MOVD F13, (13*4+8*6)(R13) + MOVD F14, (13*4+8*7)(R13) + MOVD F15, (13*4+8*8)(R13) skipfpsave: BL runtime·load_g(SB) - MOVW R15, R14 // R15 is PC. - MOVW 0(R13), R15 + // We set up the arguments to cgocallback when saving registers above. + BL runtime·cgocallback(SB) MOVB runtime·goarm(SB), R11 CMP $6, R11 BLT skipfprest - MOVD (14*4+8*1)(R13), F8 - MOVD (14*4+8*2)(R13), F9 - MOVD (14*4+8*3)(R13), F10 - MOVD (14*4+8*4)(R13), F11 - MOVD (14*4+8*5)(R13), F12 - MOVD (14*4+8*6)(R13), F13 - MOVD (14*4+8*7)(R13), F14 - MOVD (14*4+8*8)(R13), F15 + MOVD (13*4+8*1)(R13), F8 + MOVD (13*4+8*2)(R13), F9 + MOVD (13*4+8*3)(R13), F10 + MOVD (13*4+8*4)(R13), F11 + MOVD (13*4+8*5)(R13), F12 + MOVD (13*4+8*6)(R13), F13 + MOVD (13*4+8*7)(R13), F14 + MOVD (13*4+8*8)(R13), F15 skipfprest: - MOVM.IAW (R13), [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, g, R11, R12, R14] + MOVW.P 4(R13), R14 + MOVM.IAW (R13), [R0, R1, R3, R4, R5, R6, R7, R8, R9, g, R11, R12] ADD $(8*9), R13 MOVW R14, R15 diff --git a/src/runtime/cgo/asm_arm64.s b/src/runtime/cgo/asm_arm64.s index ce56f9b1c7..1cb25cf89e 100644 --- a/src/runtime/cgo/asm_arm64.s +++ b/src/runtime/cgo/asm_arm64.s @@ -5,19 +5,20 @@ #include "textflag.h" // Called by C code generated by cmd/cgo. -// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr) -// Saves C callee-saved registers and calls fn with three arguments. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 /* * We still need to save all callee save register as before, and then - * push 3 args for fn (R1, R2, R3). + * push 3 args for fn (R0, R1, R3), skipping R2. * Also note that at procedure entry in gc world, 8(RSP) will be the * first arg. * TODO(minux): use LDP/STP here if it matters. */ SUB $(8*24), RSP - MOVD R1, (8*1)(RSP) - MOVD R2, (8*2)(RSP) + MOVD R0, (8*1)(RSP) + MOVD R1, (8*2)(RSP) MOVD R3, (8*3)(RSP) MOVD R19, (8*4)(RSP) MOVD R20, (8*5)(RSP) @@ -40,15 +41,11 @@ TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 FMOVD F14, (8*22)(RSP) FMOVD F15, (8*23)(RSP) - MOVD R0, R19 - // Initialize Go ABI environment BL runtime·load_g(SB) - BL (R19) - MOVD (8*1)(RSP), R1 - MOVD (8*2)(RSP), R2 - MOVD (8*3)(RSP), R3 + BL runtime·cgocallback(SB) + MOVD (8*4)(RSP), R19 MOVD (8*5)(RSP), R20 MOVD (8*6)(RSP), R21 diff --git a/src/runtime/cgo/asm_mips64x.s b/src/runtime/cgo/asm_mips64x.s index 1235852dbe..e51cdf3d12 100644 --- a/src/runtime/cgo/asm_mips64x.s +++ b/src/runtime/cgo/asm_mips64x.s @@ -6,14 +6,14 @@ #include "textflag.h" -/* - * void crosscall2(void (*fn)(void*, int32, uintptr), void*, int32, uintptr) - * Save registers and call fn with two arguments. - */ +// Called by C code generated by cmd/cgo. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 /* * We still need to save all callee save register as before, and then - * push 3 args for fn (R5, R6, R7). + * push 3 args for fn (R4, R5, R7), skipping R6. * Also note that at procedure entry in gc world, 8(R29) will be the * first arg. */ @@ -22,9 +22,9 @@ TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 #else ADDV $(-8*15), R29 #endif - MOVV R5, (8*1)(R29) // void* - MOVW R6, (8*2)(R29) // int32 - MOVV R7, (8*3)(R29) // uintptr + MOVV R4, (8*1)(R29) // fn unsafe.Pointer + MOVV R5, (8*2)(R29) // a unsafe.Pointer + MOVV R7, (8*3)(R29) // ctxt uintptr MOVV R16, (8*4)(R29) MOVV R17, (8*5)(R29) MOVV R18, (8*6)(R29) @@ -52,7 +52,8 @@ TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 SRLV $32, R31, RSB SLLV $32, RSB JAL runtime·load_g(SB) - JAL (R4) + + JAL runtime·cgocallback(SB) MOVV (8*4)(R29), R16 MOVV (8*5)(R29), R17 diff --git a/src/runtime/cgo/asm_mipsx.s b/src/runtime/cgo/asm_mipsx.s index e3090da223..1127c8beb4 100644 --- a/src/runtime/cgo/asm_mipsx.s +++ b/src/runtime/cgo/asm_mipsx.s @@ -6,14 +6,14 @@ #include "textflag.h" -/* - * void crosscall2(void (*fn)(void*, int32, uintptr), void*, int32, uintptr) - * Save registers and call fn with two arguments. - */ +// Called by C code generated by cmd/cgo. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 /* * We still need to save all callee save register as before, and then - * push 3 args for fn (R5, R6, R7). + * push 3 args for fn (R4, R5, R7), skipping R6. * Also note that at procedure entry in gc world, 4(R29) will be the * first arg. */ @@ -25,9 +25,9 @@ TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 #else SUBU $(4*14-16), R29 // For soft-float, no FPR. #endif - MOVW R5, (4*1)(R29) - MOVW R6, (4*2)(R29) - MOVW R7, (4*3)(R29) + MOVW R4, (4*1)(R29) // fn unsafe.Pointer + MOVW R5, (4*2)(R29) // a unsafe.Pointer + MOVW R7, (4*3)(R29) // ctxt uintptr MOVW R16, (4*4)(R29) MOVW R17, (4*5)(R29) MOVW R18, (4*6)(R29) @@ -47,7 +47,8 @@ TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 MOVD F30, (4*14+8*5)(R29) #endif JAL runtime·load_g(SB) - JAL (R4) + + JAL runtime·cgocallback(SB) MOVW (4*4)(R29), R16 MOVW (4*5)(R29), R17 diff --git a/src/runtime/cgo/asm_ppc64x.s b/src/runtime/cgo/asm_ppc64x.s index 3876f9389c..f4efc1e67d 100644 --- a/src/runtime/cgo/asm_ppc64x.s +++ b/src/runtime/cgo/asm_ppc64x.s @@ -8,8 +8,9 @@ #include "asm_ppc64x.h" // Called by C code generated by cmd/cgo. -// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr) -// Saves C callee-saved registers and calls fn with three arguments. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 // Start with standard C stack frame layout and linkage MOVD LR, R0 @@ -26,19 +27,18 @@ TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 BL runtime·reginit(SB) BL runtime·load_g(SB) - MOVD R3, R12 #ifdef GOARCH_ppc64 // ppc64 use elf ABI v1. we must get the real entry address from // first slot of the function descriptor before call. // Same for AIX. - MOVD 8(R12), R2 - MOVD (R12), R12 + MOVD 8(R3), R2 + MOVD (R3), R3 #endif - MOVD R12, CTR - MOVD R4, FIXED_FRAME+0(R1) - MOVW R5, FIXED_FRAME+8(R1) - MOVD R6, FIXED_FRAME+16(R1) - BL (CTR) + MOVD R3, FIXED_FRAME+0(R1) // fn unsafe.Pointer + MOVD R4, FIXED_FRAME+8(R1) // a unsafe.Pointer + // Skip R5 = n uint32 + MOVD R6, FIXED_FRAME+16(R1) // ctxt uintptr + BL runtime·cgocallback(SB) ADD $(288+3*8+FIXED_FRAME), R1 diff --git a/src/runtime/cgo/asm_s390x.s b/src/runtime/cgo/asm_s390x.s index 7eab8f652a..8bf16e75e2 100644 --- a/src/runtime/cgo/asm_s390x.s +++ b/src/runtime/cgo/asm_s390x.s @@ -5,8 +5,9 @@ #include "textflag.h" // Called by C code generated by cmd/cgo. -// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr) -// Saves C callee-saved registers and calls fn with three arguments. +// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +// Saves C callee-saved registers and calls cgocallback with three arguments. +// fn is the PC of a func(a unsafe.Pointer) function. TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 // Start with standard C stack frame layout and linkage. @@ -29,10 +30,11 @@ TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 // Initialize Go ABI environment. BL runtime·load_g(SB) - MOVD R3, 8(R15) // arg1 - MOVW R4, 16(R15) // arg2 - MOVD R5, 24(R15) // arg3 - BL (R2) // fn(arg1, arg2, arg3) + MOVD R2, 8(R15) // fn unsafe.Pointer + MOVD R3, 16(R15) // a unsafe.Pointer + // Skip R4 = n uint32 + MOVD R5, 24(R15) // ctxt uintptr + BL runtime·cgocallback(SB) FMOVD 32(R15), F8 FMOVD 40(R15), F9 diff --git a/src/runtime/cgo/callbacks.go b/src/runtime/cgo/callbacks.go index 14a218ec92..cd8b795387 100644 --- a/src/runtime/cgo/callbacks.go +++ b/src/runtime/cgo/callbacks.go @@ -9,20 +9,18 @@ import "unsafe" // These utility functions are available to be called from code // compiled with gcc via crosscall2. -// cgocallback is defined in runtime -//go:linkname _runtime_cgocallback runtime.cgocallback -func _runtime_cgocallback(unsafe.Pointer, unsafe.Pointer, uintptr, uintptr) - // The declaration of crosscall2 is: -// void crosscall2(void (*fn)(void *, int), void *, int); +// void crosscall2(void (*fn)(void *), void *, int); // // We need to export the symbol crosscall2 in order to support // callbacks from shared libraries. This applies regardless of // linking mode. // -// Compatibility note: crosscall2 actually takes four arguments, but -// it works to call it with three arguments when calling _cgo_panic. -// That is supported for backward compatibility. +// Compatibility note: SWIG uses crosscall2 in exactly one situation: +// to call _cgo_panic using the pattern shown below. We need to keep +// that pattern working. In particular, crosscall2 actually takes four +// arguments, but it works to call it with three arguments when +// calling _cgo_panic. //go:cgo_export_static crosscall2 //go:cgo_export_dynamic crosscall2 @@ -34,21 +32,18 @@ func _runtime_cgocallback(unsafe.Pointer, unsafe.Pointer, uintptr, uintptr) // crosscall2(_cgo_panic, &a, sizeof a); // /* The function call will not return. */ +// TODO: We should export a regular C function to panic, change SWIG +// to use that instead of the above pattern, and then we can drop +// backwards-compatibility from crosscall2 and stop exporting it. + //go:linkname _runtime_cgo_panic_internal runtime._cgo_panic_internal func _runtime_cgo_panic_internal(p *byte) //go:linkname _cgo_panic _cgo_panic //go:cgo_export_static _cgo_panic //go:cgo_export_dynamic _cgo_panic -//go:nosplit -//go:norace -func _cgo_panic(a unsafe.Pointer, n int32) { - f := _runtime_cgo_panic_internal - type funcval struct { - pc unsafe.Pointer - } - fv := *(**funcval)(unsafe.Pointer(&f)) - _runtime_cgocallback(fv.pc, a, uintptr(n), 0) +func _cgo_panic(a *struct{ cstr *byte }) { + _runtime_cgo_panic_internal(a.cstr) } //go:cgo_import_static x_cgo_init diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index 7ab42a0ed0..9bca279318 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -35,31 +35,25 @@ // cgo writes a gcc-compiled function named GoF (not p.GoF, since gcc doesn't // know about packages). The gcc-compiled C function f calls GoF. // -// GoF calls crosscall2(_cgoexp_GoF, frame, framesize, ctxt). -// Crosscall2 (in cgo/asm_$GOARCH.s) is a four-argument adapter from -// the gcc function call ABI to the gc function call ABI. -// It is called from gcc to call gc functions. In this case it calls -// _cgoexp_GoF(frame, framesize), still running on m.g0's stack -// and outside the $GOMAXPROCS limit. Thus, this code cannot yet -// call arbitrary Go code directly and must be careful not to allocate -// memory or use up m.g0's stack. +// GoF initializes "frame", a structure containing all of its +// arguments and slots for p.GoF's results. It calls +// crosscall2(_cgoexp_GoF, frame, framesize, ctxt) using the gcc ABI. // -// _cgoexp_GoF (generated by cmd/cgo) calls -// runtime.cgocallback(funcPC(p.GoF), frame, framesize, ctxt). -// (The reason for having _cgoexp_GoF instead of writing a crosscall3 -// to make this call directly is that _cgoexp_GoF, because it is compiled -// with gc instead of gcc, can refer to dotted names like -// runtime.cgocallback and p.GoF.) +// crosscall2 (in cgo/asm_$GOARCH.s) is a four-argument adapter from +// the gcc function call ABI to the gc function call ABI. At this +// point we're in the Go runtime, but we're still running on m.g0's +// stack and outside the $GOMAXPROCS limit. crosscall2 calls +// runtime.cgocallback(_cgoexp_GoF, frame, ctxt) using the gc ABI. +// (crosscall2's framesize argument is no longer used, but there's one +// case where SWIG calls crosscall2 directly and expects to pass this +// argument. See _cgo_panic.) // -// runtime.cgocallback (in asm_$GOARCH.s) turns the raw PC of p.GoF -// into a Go function value and calls runtime.cgocallback_gofunc. -// -// runtime.cgocallback_gofunc (in asm_$GOARCH.s) switches from m.g0's -// stack to the original g (m.curg)'s stack, on which it calls -// runtime.cgocallbackg(p.GoF, frame, framesize). -// As part of the stack switch, runtime.cgocallback saves the current -// SP as m.g0.sched.sp, so that any use of m.g0's stack during the -// execution of the callback will be done below the existing stack frames. +// runtime.cgocallback (in asm_$GOARCH.s) switches from m.g0's stack +// to the original g (m.curg)'s stack, on which it calls +// runtime.cgocallbackg(_cgoexp_GoF, frame, ctxt). As part of the +// stack switch, runtime.cgocallback saves the current SP as +// m.g0.sched.sp, so that any use of m.g0's stack during the execution +// of the callback will be done below the existing stack frames. // Before overwriting m.g0.sched.sp, it pushes the old value on the // m.g0 stack, so that it can be restored later. // @@ -67,19 +61,26 @@ // stack (not an m.g0 stack). First it calls runtime.exitsyscall, which will // block until the $GOMAXPROCS limit allows running this goroutine. // Once exitsyscall has returned, it is safe to do things like call the memory -// allocator or invoke the Go callback function p.GoF. runtime.cgocallbackg +// allocator or invoke the Go callback function. runtime.cgocallbackg // first defers a function to unwind m.g0.sched.sp, so that if p.GoF // panics, m.g0.sched.sp will be restored to its old value: the m.g0 stack // and the m.curg stack will be unwound in lock step. -// Then it calls p.GoF. Finally it pops but does not execute the deferred -// function, calls runtime.entersyscall, and returns to runtime.cgocallback. +// Then it calls _cgoexp_GoF(frame). +// +// _cgoexp_GoF, which was generated by cmd/cgo, unpacks the arguments +// from frame, calls p.GoF, writes the results back to frame, and +// returns. Now we start unwinding this whole process. +// +// runtime.cgocallbackg pops but does not execute the deferred +// function to unwind m.g0.sched.sp, calls runtime.entersyscall, and +// returns to runtime.cgocallback. // // After it regains control, runtime.cgocallback switches back to // m.g0's stack (the pointer is still in m.g0.sched.sp), restores the old -// m.g0.sched.sp value from the stack, and returns to _cgoexp_GoF. +// m.g0.sched.sp value from the stack, and returns to crosscall2. // -// _cgoexp_GoF immediately returns to crosscall2, which restores the -// callee-save registers for gcc and returns to GoF, which returns to f. +// crosscall2 restores the callee-save registers for gcc and returns +// to GoF, which unpacks any result values and returns to f. package runtime @@ -196,7 +197,7 @@ func cgocall(fn, arg unsafe.Pointer) int32 { // Call from C back to Go. //go:nosplit -func cgocallbackg(ctxt uintptr) { +func cgocallbackg(fn, frame unsafe.Pointer, ctxt uintptr) { gp := getg() if gp != gp.m.curg { println("runtime: bad g in cgocallback") @@ -224,7 +225,7 @@ func cgocallbackg(ctxt uintptr) { osPreemptExtExit(gp.m) - cgocallbackg1(ctxt) + cgocallbackg1(fn, frame, ctxt) // At this point unlockOSThread has been called. // The following code must not change to a different m. @@ -239,7 +240,7 @@ func cgocallbackg(ctxt uintptr) { gp.m.syscall = syscall } -func cgocallbackg1(ctxt uintptr) { +func cgocallbackg1(fn, frame unsafe.Pointer, ctxt uintptr) { gp := getg() if gp.m.needextram || atomic.Load(&extraMWaiters) > 0 { gp.m.needextram = false @@ -283,79 +284,16 @@ func cgocallbackg1(ctxt uintptr) { raceacquire(unsafe.Pointer(&racecgosync)) } - type args struct { - fn *funcval - arg unsafe.Pointer - argsize uintptr - } - var cb *args - - // Location of callback arguments depends on stack frame layout - // and size of stack frame of cgocallback_gofunc. - sp := gp.m.g0.sched.sp - switch GOARCH { - default: - throw("cgocallbackg is unimplemented on arch") - case "arm": - // On arm, stack frame is two words and there's a saved LR between - // SP and the stack frame and between the stack frame and the arguments. - cb = (*args)(unsafe.Pointer(sp + 4*sys.PtrSize)) - case "arm64": - // On arm64, stack frame is four words and there's a saved LR between - // SP and the stack frame and between the stack frame and the arguments. - // Additional two words (16-byte alignment) are for saving FP. - cb = (*args)(unsafe.Pointer(sp + 7*sys.PtrSize)) - case "amd64": - // On amd64, stack frame is two words, plus caller PC and BP. - cb = (*args)(unsafe.Pointer(sp + 4*sys.PtrSize)) - case "386": - // On 386, stack frame is three words, plus caller PC. - cb = (*args)(unsafe.Pointer(sp + 4*sys.PtrSize)) - case "ppc64", "ppc64le", "s390x": - // On ppc64 and s390x, the callback arguments are in the arguments area of - // cgocallback's stack frame. The stack looks like this: - // +--------------------+------------------------------+ - // | | ... | - // | cgoexp_$fn +------------------------------+ - // | | fixed frame area | - // +--------------------+------------------------------+ - // | | arguments area | - // | cgocallback +------------------------------+ <- sp + 2*minFrameSize + 2*ptrSize - // | | fixed frame area | - // +--------------------+------------------------------+ <- sp + minFrameSize + 2*ptrSize - // | | local variables (2 pointers) | - // | cgocallback_gofunc +------------------------------+ <- sp + minFrameSize - // | | fixed frame area | - // +--------------------+------------------------------+ <- sp - cb = (*args)(unsafe.Pointer(sp + 2*sys.MinFrameSize + 2*sys.PtrSize)) - case "mips64", "mips64le": - // On mips64x, stack frame is two words and there's a saved LR between - // SP and the stack frame and between the stack frame and the arguments. - cb = (*args)(unsafe.Pointer(sp + 4*sys.PtrSize)) - case "mips", "mipsle": - // On mipsx, stack frame is two words and there's a saved LR between - // SP and the stack frame and between the stack frame and the arguments. - cb = (*args)(unsafe.Pointer(sp + 4*sys.PtrSize)) - } - - // Invoke callback. - // NOTE(rsc): passing nil for argtype means that the copying of the - // results back into cb.arg happens without any corresponding write barriers. - // For cgo, cb.arg points into a C stack frame and therefore doesn't - // hold any pointers that the GC can find anyway - the write barrier - // would be a no-op. - reflectcall(nil, unsafe.Pointer(cb.fn), cb.arg, uint32(cb.argsize), 0) + // Invoke callback. This function is generated by cmd/cgo and + // will unpack the argument frame and call the Go function. + var cb func(frame unsafe.Pointer) + cbFV := funcval{uintptr(fn)} + *(*unsafe.Pointer)(unsafe.Pointer(&cb)) = noescape(unsafe.Pointer(&cbFV)) + cb(frame) if raceenabled { racereleasemerge(unsafe.Pointer(&racecgosync)) } - if msanenabled { - // Tell msan that we wrote to the entire argument block. - // This tells msan that we set the results. - // Since we have already called the function it doesn't - // matter that we are writing to the non-result parameters. - msanwrite(cb.arg, cb.argsize) - } // Do not unwind m->g0->sched.sp. // Our caller, cgocallback, will do that. diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 83d2a524e0..c629fd45f0 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -4243,7 +4243,7 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { // First, it may be that the g switch has no PC update, because the SP // either corresponds to a user g throughout (as in asmcgocall) // or because it has been arranged to look like a user g frame - // (as in cgocallback_gofunc). In this case, since the entire + // (as in cgocallback). In this case, since the entire // transition is a g+SP update, a partial transition updating just one of // those will be detected by the stack bounds check. // diff --git a/src/runtime/race/output_test.go b/src/runtime/race/output_test.go index d3e7762175..b4b8936c7c 100644 --- a/src/runtime/race/output_test.go +++ b/src/runtime/race/output_test.go @@ -309,7 +309,7 @@ Read at 0x[0-9,a-f]+ by main goroutine: Previous write at 0x[0-9,a-f]+ by goroutine [0-9]: main\.goCallback\(\) .*/main\.go:27 \+0x[0-9,a-f]+ - main._cgoexpwrap_[0-9a-z]+_goCallback\(\) + _cgoexp_[0-9a-z]+_goCallback\(\) .*_cgo_gotypes\.go:[0-9]+ \+0x[0-9,a-f]+ Goroutine [0-9] \(running\) created at: diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go index 6290142a41..d77cb4d460 100644 --- a/src/runtime/stubs.go +++ b/src/runtime/stubs.go @@ -148,7 +148,13 @@ func noescape(p unsafe.Pointer) unsafe.Pointer { return unsafe.Pointer(x ^ 0) } -func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr) +// Not all cgocallback frames are actually cgocallback, +// so not all have these arguments. Mark them uintptr so that the GC +// does not misinterpret memory when the arguments are not present. +// cgocallback is not called from Go, only from crosscall2. +// This in turn calls cgocallbackg, which is where we'll find +// pointer-declared arguments. +func cgocallback(fn, frame, ctxt uintptr) func gogo(buf *gobuf) func gosave(buf *gobuf) @@ -163,10 +169,11 @@ func breakpoint() // back into arg+retoffset before returning. If copying result bytes back, // the caller should pass the argument frame type as argtype, so that // call can execute appropriate write barriers during the copy. -// Package reflect passes a frame type. In package runtime, there is only -// one call that copies results back, in cgocallbackg1, and it does NOT pass a -// frame type, meaning there are no write barriers invoked. See that call -// site for justification. +// +// Package reflect always passes a frame type. In package runtime, +// Windows callbacks are the only use of this that copies results +// back, and those cannot have pointers in their results, so runtime +// passes nil for the frame type. // // Package reflect accesses this symbol through a linkname. func reflectcall(argtype *_type, fn, arg unsafe.Pointer, argsize uint32, retoffset uint32) @@ -187,14 +194,6 @@ type neverCallThisFunction struct{} // prematurely and if there is leftover state it may panic. func goexit(neverCallThisFunction) -// Not all cgocallback_gofunc frames are actually cgocallback_gofunc, -// so not all have these arguments. Mark them uintptr so that the GC -// does not misinterpret memory when the arguments are not present. -// cgocallback_gofunc is not called from go, only from cgocallback, -// so the arguments will be found via cgocallback's pointer-declared arguments. -// See the assembly implementations for more details. -func cgocallback_gofunc(fv, frame, framesize, ctxt uintptr) - // publicationBarrier performs a store/store barrier (a "publication" // or "export" barrier). Some form of synchronization is required // between initializing an object and making that object accessible to diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go index 84637376bf..932fba3de0 100644 --- a/src/runtime/symtab.go +++ b/src/runtime/symtab.go @@ -326,7 +326,7 @@ const ( funcID_gcBgMarkWorker funcID_systemstack_switch funcID_systemstack - funcID_cgocallback_gofunc + funcID_cgocallback funcID_gogo funcID_externalthreadhandler funcID_debugCallV1 diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s index 4ac1527ab1..2e5e82879c 100644 --- a/src/runtime/sys_windows_386.s +++ b/src/runtime/sys_windows_386.s @@ -239,7 +239,7 @@ GLOBL runtime·cbctxts(SB), NOPTR, $4 TEXT runtime·callbackasm1(SB),NOSPLIT,$0 MOVL 0(SP), AX // will use to find our callback context - // remove return address from stack, we are not returning there + // remove return address from stack, we are not returning to callbackasm, but to its caller. ADDL $4, SP // address to callback parameters into CX @@ -251,50 +251,35 @@ TEXT runtime·callbackasm1(SB),NOSPLIT,$0 PUSHL BP PUSHL BX - // determine index into runtime·cbctxts table + // Go ABI requires DF flag to be cleared. + CLD + + // determine index into runtime·cbs table SUBL $runtime·callbackasm(SB), AX MOVL $0, DX MOVL $5, BX // divide by 5 because each call instruction in runtime·callbacks is 5 bytes long DIVL BX - - // find correspondent runtime·cbctxts table entry - MOVL runtime·cbctxts(SB), BX - MOVL -4(BX)(AX*4), BX - - // extract callback context - MOVL wincallbackcontext_gobody(BX), AX - MOVL wincallbackcontext_argsize(BX), DX - - // preserve whatever's at the memory location that - // the callback will use to store the return value - PUSHL 0(CX)(DX*1) - - // extend argsize by size of return value - ADDL $4, DX - - // remember how to restore stack on return - MOVL wincallbackcontext_restorestack(BX), BX - PUSHL BX - - // call target Go function - PUSHL DX // argsize (including return value) - PUSHL CX // callback parameters - PUSHL AX // address of target Go function - CLD - CALL runtime·cgocallback_gofunc(SB) - POPL AX - POPL CX - POPL DX - - // how to restore stack on return - POPL BX - - // return value into AX (as per Windows spec) - // and restore previously preserved value - MOVL -4(CX)(DX*1), AX - POPL -4(CX)(DX*1) - - MOVL BX, CX // cannot use BX anymore + SUBL $1, AX // subtract 1 because return PC is to the next slot + + // Create a struct callbackArgs on our stack. + SUBL $(12+callbackArgs__size), SP + MOVL AX, (12+callbackArgs_index)(SP) // callback index + MOVL CX, (12+callbackArgs_args)(SP) // address of args vector + MOVL $0, (12+callbackArgs_result)(SP) // result + LEAL 12(SP), AX // AX = &callbackArgs{...} + + // Call cgocallback, which will call callbackWrap(frame). + MOVL $0, 8(SP) // context + MOVL AX, 4(SP) // frame (address of callbackArgs) + LEAL ·callbackWrap(SB), AX + MOVL AX, 0(SP) // PC of function to call + CALL runtime·cgocallback(SB) + + // Get callback result. + MOVL (12+callbackArgs_result)(SP), AX + // Get popRet. + MOVL (12+callbackArgs_retPop)(SP), CX // Can't use a callee-save register + ADDL $(12+callbackArgs__size), SP // restore registers as required for windows callback POPL BX diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s index 847542592b..e9ec99a51d 100644 --- a/src/runtime/sys_windows_amd64.s +++ b/src/runtime/sys_windows_amd64.s @@ -291,31 +291,20 @@ TEXT runtime·callbackasm1(SB),NOSPLIT,$0 MOVQ DX, (16+8)(SP) MOVQ R8, (16+16)(SP) MOVQ R9, (16+24)(SP) + // R8 = address of args vector + LEAQ (16+0)(SP), R8 - // remove return address from stack, we are not returning there + // remove return address from stack, we are not returning to callbackasm, but to its caller. MOVQ 0(SP), AX ADDQ $8, SP - // determine index into runtime·cbctxts table + // determine index into runtime·cbs table MOVQ $runtime·callbackasm(SB), DX SUBQ DX, AX MOVQ $0, DX MOVQ $5, CX // divide by 5 because each call instruction in runtime·callbacks is 5 bytes long DIVL CX - - // find correspondent runtime·cbctxts table entry - MOVQ runtime·cbctxts(SB), CX - MOVQ -8(CX)(AX*8), AX - - // extract callback context - MOVQ wincallbackcontext_argsize(AX), DX - MOVQ wincallbackcontext_gobody(AX), AX - - // preserve whatever's at the memory location that - // the callback will use to store the return value - LEAQ 8(SP), CX // args vector, skip return address - PUSHQ 0(CX)(DX*1) // store 8 bytes from just after the args array - ADDQ $8, DX // extend argsize by size of return value + SUBQ $1, AX // subtract 1 because return PC is to the next slot // DI SI BP BX R12 R13 R14 R15 registers and DF flag are preserved // as required by windows callback convention. @@ -330,18 +319,25 @@ TEXT runtime·callbackasm1(SB),NOSPLIT,$0 MOVQ R14, 8(SP) MOVQ R15, 0(SP) - // prepare call stack. use SUBQ to hide from stack frame checks - // cgocallback(Go func, void *frame, uintptr framesize) - SUBQ $24, SP - MOVQ DX, 16(SP) // argsize (including return value) - MOVQ CX, 8(SP) // callback parameters - MOVQ AX, 0(SP) // address of target Go function + // Go ABI requires DF flag to be cleared. CLD - CALL runtime·cgocallback_gofunc(SB) - MOVQ 0(SP), AX - MOVQ 8(SP), CX - MOVQ 16(SP), DX - ADDQ $24, SP + + // Create a struct callbackArgs on our stack to be passed as + // the "frame" to cgocallback and on to callbackWrap. + SUBQ $(24+callbackArgs__size), SP + MOVQ AX, (24+callbackArgs_index)(SP) // callback index + MOVQ R8, (24+callbackArgs_args)(SP) // address of args vector + MOVQ $0, (24+callbackArgs_result)(SP) // result + LEAQ 24(SP), AX + // Call cgocallback, which will call callbackWrap(frame). + MOVQ $0, 16(SP) // context + MOVQ AX, 8(SP) // frame (address of callbackArgs) + LEAQ ·callbackWrap(SB), BX + MOVQ BX, 0(SP) // PC of function value to call (callbackWrap) + CALL ·cgocallback(SB) + // Get callback result. + MOVQ (24+callbackArgs_result)(SP), AX + ADDQ $(24+callbackArgs__size), SP // restore registers as required for windows callback MOVQ 0(SP), R15 @@ -355,8 +351,7 @@ TEXT runtime·callbackasm1(SB),NOSPLIT,$0 ADDQ $64, SP POPFQ - MOVQ -8(CX)(DX*1), AX // return value - POPQ -8(CX)(DX*1) // restore bytes just after the args + // The return value was placed in AX above. RET // uint32 tstart_stdcall(M *newm); diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s index 57415e1306..3fc6d27cb0 100644 --- a/src/runtime/sys_windows_arm.s +++ b/src/runtime/sys_windows_arm.s @@ -314,6 +314,9 @@ TEXT runtime·externalthreadhandler(SB),NOSPLIT|NOFRAME,$0 GLOBL runtime·cbctxts(SB), NOPTR, $4 TEXT runtime·callbackasm1(SB),NOSPLIT|NOFRAME,$0 + // TODO(austin): This needs to be converted to match changes + // in cgocallback, but I have no way to test. See CL 258938, + // and callbackasm1 on amd64 and 386. MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr} SUB $36, R13 // space for locals diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index 0e2fcfb02d..ff43e7cbed 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -5,6 +5,7 @@ package runtime import ( + "runtime/internal/sys" "unsafe" ) @@ -22,10 +23,7 @@ func (c *wincallbackcontext) setCleanstack(cleanstack bool) { c.cleanstack = cleanstack } -var ( - cbs callbacks - cbctxts **wincallbackcontext = &cbs.ctxt[0] // to simplify access to cbs.ctxt in sys_windows_*.s -) +var cbs callbacks func callbackasm() @@ -53,6 +51,8 @@ func callbackasmAddr(i int) uintptr { return funcPC(callbackasm) + uintptr(i*entrySize) } +const callbackMaxArgs = 64 + //go:linkname compileCallback syscall.compileCallback func compileCallback(fn eface, cleanstack bool) (code uintptr) { if fn._type == nil || (fn._type.kind&kindMask) != kindFunc { @@ -66,6 +66,9 @@ func compileCallback(fn eface, cleanstack bool) (code uintptr) { if ft.out()[0].size != uintptrSize { panic("compileCallback: expected function with one uintptr-sized result") } + if len(ft.in()) > callbackMaxArgs { + panic("compileCallback: too many function arguments") + } argsize := uintptr(0) for _, t := range ft.in() { if t.size > uintptrSize { @@ -106,6 +109,37 @@ func compileCallback(fn eface, cleanstack bool) (code uintptr) { return r } +type callbackArgs struct { + index uintptr + args *uintptr // Arguments in stdcall/cdecl convention, with registers spilled + // Below are out-args from callbackWrap + result uintptr + retPop uintptr // For 386 cdecl, how many bytes to pop on return +} + +// callbackWrap is called by callbackasm to invoke a registered C callback. +func callbackWrap(a *callbackArgs) { + c := cbs.ctxt[a.index] + a.retPop = c.restorestack + + // Convert from stdcall to Go ABI. We assume the stack layout + // is the same, and we just need to make room for the result. + // + // TODO: This isn't a good assumption. For example, a function + // that takes two uint16 arguments will be laid out + // differently by the stdcall and Go ABIs. We should implement + // proper ABI conversion. + var frame [callbackMaxArgs + 1]uintptr + memmove(unsafe.Pointer(&frame), unsafe.Pointer(a.args), c.argsize) + + // Even though this is copying back results, we can pass a nil + // type because those results must not require write barriers. + reflectcall(nil, c.gobody, noescape(unsafe.Pointer(&frame)), sys.PtrSize+uint32(c.argsize), uint32(c.argsize)) + + // Extract the result. + a.result = frame[c.argsize/sys.PtrSize] +} + const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 // When available, this function will use LoadLibraryEx with the filename diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 94f4a44976..f3df152535 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -450,7 +450,7 @@ func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max in } n++ - if f.funcID == funcID_cgocallback_gofunc && len(cgoCtxt) > 0 { + if f.funcID == funcID_cgocallback && len(cgoCtxt) > 0 { ctxt := cgoCtxt[len(cgoCtxt)-1] cgoCtxt = cgoCtxt[:len(cgoCtxt)-1] -- cgit v1.2.1 From c91dffbc9aeaacd087eb0c0c3f718739bc5f8c4a Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 7 Oct 2020 22:53:52 -0400 Subject: runtime: tidy cgocallback On amd64 and 386, we have a very roundabout way of remembering that we need to dropm on return that currently involves saving a zero to needm's argument slot and later bringing it back. Just store the zero. This also makes amd64 and 386 more consistent with cgocallback on all other platforms: rather than saving the old M to the G stack, they now save it to a named slot on the G0 stack. The needm function no longer needs a dummy argument to get the SP, so we drop that. Change-Id: I7e84bb4a5ff9552de70dcf41d8accf02310535e7 Reviewed-on: https://go-review.googlesource.com/c/go/+/263268 Trust: Austin Clements Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/runtime/asm_386.s | 18 +++++++----------- src/runtime/asm_amd64.s | 16 ++++++---------- src/runtime/proc.go | 6 +++--- src/runtime/signal_unix.go | 6 +++--- 4 files changed, 19 insertions(+), 27 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index a54b68e03d..fa3b1be339 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -704,7 +704,7 @@ nosave: // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback(SB),NOSPLIT,$16-12 // Frame size must match commented places below +TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below NO_LOCAL_POINTERS // If g is nil, Go did not create the current thread. @@ -722,13 +722,12 @@ TEXT ·cgocallback(SB),NOSPLIT,$16-12 // Frame size must match commented places CMPL BP, $0 JEQ needm MOVL g_m(BP), BP - MOVL BP, DX // saved copy of oldm + MOVL BP, savedm-4(SP) // saved copy of oldm JMP havem needm: - MOVL $0, 0(SP) MOVL $runtime·needm(SB), AX CALL AX - MOVL 0(SP), DX + MOVL $0, savedm-4(SP) // dropm on return get_tls(CX) MOVL g(CX), BP MOVL g_m(BP), BP @@ -769,8 +768,6 @@ havem: // Once we switch to the curg stack, the pushed PC will appear // to be the return PC of cgocallback, so that the traceback // will seamlessly trace back into the earlier calls. - // - // In the new goroutine, 12(SP) holds the saved oldm (DX) register. MOVL m_curg(BP), SI MOVL SI, g(CX) MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI @@ -780,20 +777,18 @@ havem: MOVL fn+0(FP), AX MOVL frame+4(FP), BX MOVL ctxt+8(FP), CX - LEAL -(4+16)(DI), SP // Must match declared frame size - MOVL DX, 12(SP) + LEAL -(4+12)(DI), SP // Must match declared frame size MOVL AX, 0(SP) MOVL BX, 4(SP) MOVL CX, 8(SP) CALL runtime·cgocallbackg(SB) - MOVL 12(SP), DX // Restore g->sched (== m->curg->sched) from saved values. get_tls(CX) MOVL g(CX), SI - MOVL 16(SP), BP // Must match declared frame size + MOVL 12(SP), BP // Must match declared frame size MOVL BP, (g_sched+gobuf_pc)(SI) - LEAL (16+4)(SP), DI // Must match declared frame size + LEAL (12+4)(SP), DI // Must match declared frame size MOVL DI, (g_sched+gobuf_sp)(SI) // Switch back to m->g0's stack and restore m->g0->sched.sp. @@ -809,6 +804,7 @@ havem: // If the m on entry was nil, we called needm above to borrow an m // for the duration of the call. Since the call is over, return it with dropm. + MOVL savedm-4(SP), DX CMPL DX, $0 JNE 3(PC) MOVL $runtime·dropm(SB), AX diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 3d5d9c4d58..19a3bb2d7d 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -693,7 +693,7 @@ nosave: // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr) // See cgocall.go for more details. -TEXT ·cgocallback(SB),NOSPLIT,$32-24 +TEXT ·cgocallback(SB),NOSPLIT,$24-24 NO_LOCAL_POINTERS // If g is nil, Go did not create the current thread. @@ -711,13 +711,12 @@ TEXT ·cgocallback(SB),NOSPLIT,$32-24 CMPQ BX, $0 JEQ needm MOVQ g_m(BX), BX - MOVQ BX, R8 // holds oldm until end of function + MOVQ BX, savedm-8(SP) // saved copy of oldm JMP havem needm: - MOVQ $0, 0(SP) - MOVQ $runtime·needm(SB), AX + MOVQ $runtime·needm(SB), AX CALL AX - MOVQ 0(SP), R8 + MOVQ $0, savedm-8(SP) // dropm on return get_tls(CX) MOVQ g(CX), BX MOVQ g_m(BX), BX @@ -758,8 +757,6 @@ havem: // Once we switch to the curg stack, the pushed PC will appear // to be the return PC of cgocallback, so that the traceback // will seamlessly trace back into the earlier calls. - // - // In the new goroutine, 24(SP) holds the saved R8. MOVQ m_curg(BX), SI MOVQ SI, g(CX) MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI @@ -776,12 +773,10 @@ havem: SUBQ AX, DI // Allocate the same frame size on the g stack MOVQ DI, SP - MOVQ R8, 24(SP) MOVQ BX, 0(SP) MOVQ CX, 8(SP) MOVQ DX, 16(SP) CALL runtime·cgocallbackg(SB) - MOVQ 24(SP), R8 // Compute the size of the frame again. FP and SP have // completely different values here than they did above, @@ -811,7 +806,8 @@ havem: // If the m on entry was nil, we called needm above to borrow an m // for the duration of the call. Since the call is over, return it with dropm. - CMPQ R8, $0 + MOVQ savedm-8(SP), BX + CMPQ BX, $0 JNE 3(PC) MOVQ $runtime·dropm(SB), AX CALL AX diff --git a/src/runtime/proc.go b/src/runtime/proc.go index c629fd45f0..ec4e6d8751 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1695,7 +1695,7 @@ func allocm(_p_ *p, fn func(), id int64) *m { // When the callback is done with the m, it calls dropm to // put the m back on the list. //go:nosplit -func needm(x byte) { +func needm() { if (iscgo || GOOS == "windows") && !cgoHasExtraM { // Can happen if C/C++ code calls Go from a global ctor. // Can also happen on Windows if a global ctor uses a @@ -1740,8 +1740,8 @@ func needm(x byte) { // which is more than enough for us. setg(mp.g0) _g_ := getg() - _g_.stack.hi = uintptr(noescape(unsafe.Pointer(&x))) + 1024 - _g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024 + _g_.stack.hi = getcallersp() + 1024 + _g_.stack.lo = getcallersp() - 32*1024 _g_.stackguard0 = _g_.stack.lo + _StackGuard // Initialize this thread to use the m. diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index e8b6f95d8f..9318a9b8bc 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -504,14 +504,14 @@ func adjustSignalStack(sig uint32, mp *m, gsigStack *gsignalStack) bool { sigaltstack(nil, &st) if st.ss_flags&_SS_DISABLE != 0 { setg(nil) - needm(0) + needm() noSignalStack(sig) dropm() } stsp := uintptr(unsafe.Pointer(st.ss_sp)) if sp < stsp || sp >= stsp+st.ss_size { setg(nil) - needm(0) + needm() sigNotOnStack(sig) dropm() } @@ -951,7 +951,7 @@ func badsignal(sig uintptr, c *sigctxt) { exit(2) *(*uintptr)(unsafe.Pointer(uintptr(123))) = 2 } - needm(0) + needm() if !sigsend(uint32(sig)) { // A foreign thread received the signal sig, and the // Go code does not want to handle it. -- cgit v1.2.1 From bda37a0b8a4e89318901a68492b79cf6531fa2ff Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sat, 3 Oct 2020 19:52:08 -0400 Subject: runtime: tidy compileCallback This makes a few minor cleanups and simplifications to compileCallback. Change-Id: Ibebf4b5ed66fb68bba7c84129c127cd4d8a691fe Reviewed-on: https://go-review.googlesource.com/c/go/+/263269 Trust: Austin Clements Trust: Alex Brainman Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Alex Brainman --- src/runtime/runtime2.go | 8 ---- src/runtime/syscall_windows.go | 86 ++++++++++++++++++++++++++---------------- 2 files changed, 53 insertions(+), 41 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 21dd7b3949..7bac5fd38d 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -387,14 +387,6 @@ type libcall struct { err uintptr // error number } -// describes how to handle callback -type wincallbackcontext struct { - gobody unsafe.Pointer // go function to call - argsize uintptr // callback arguments size (in bytes) - restorestack uintptr // adjust stack on return by (in bytes) (386 only) - cleanstack bool -} - // Stack describes a Go execution stack. // The bounds of the stack are exactly [lo, hi), // with no implicit data structures on either side. diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index ff43e7cbed..3a34d9ddba 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -9,22 +9,26 @@ import ( "unsafe" ) -type callbacks struct { - lock mutex - ctxt [cb_max]*wincallbackcontext - n int +// cbs stores all registered Go callbacks. +var cbs struct { + lock mutex + ctxt [cb_max]winCallback + index map[winCallbackKey]int + n int } -func (c *wincallbackcontext) isCleanstack() bool { - return c.cleanstack +// winCallback records information about a registered Go callback. +type winCallback struct { + fn *funcval // Go function + argsize uintptr // Callback arguments size (in bytes) + cdecl bool // C function uses cdecl calling convention } -func (c *wincallbackcontext) setCleanstack(cleanstack bool) { - c.cleanstack = cleanstack +type winCallbackKey struct { + fn *funcval + cdecl bool } -var cbs callbacks - func callbackasm() // callbackasmAddr returns address of runtime.callbackasm @@ -53,8 +57,20 @@ func callbackasmAddr(i int) uintptr { const callbackMaxArgs = 64 +// compileCallback converts a Go function fn into a C function pointer +// that can be passed to Windows APIs. +// +// On 386, if cdecl is true, the returned C function will use the +// cdecl calling convention; otherwise, it will use stdcall. On amd64, +// it always uses fastcall. On arm, it always uses the ARM convention. +// //go:linkname compileCallback syscall.compileCallback -func compileCallback(fn eface, cleanstack bool) (code uintptr) { +func compileCallback(fn eface, cdecl bool) (code uintptr) { + if GOARCH != "386" { + // cdecl is only meaningful on 386. + cdecl = false + } + if fn._type == nil || (fn._type.kind&kindMask) != kindFunc { panic("compileCallback: expected function with one uintptr-sized result") } @@ -77,36 +93,32 @@ func compileCallback(fn eface, cleanstack bool) (code uintptr) { argsize += uintptrSize } + key := winCallbackKey{(*funcval)(fn.data), cdecl} + lock(&cbs.lock) // We don't unlock this in a defer because this is used from the system stack. - n := cbs.n - for i := 0; i < n; i++ { - if cbs.ctxt[i].gobody == fn.data && cbs.ctxt[i].isCleanstack() == cleanstack { - r := callbackasmAddr(i) - unlock(&cbs.lock) - return r - } - } - if n >= cb_max { + // Check if this callback is already registered. + if n, ok := cbs.index[key]; ok { unlock(&cbs.lock) - throw("too many callback functions") + return callbackasmAddr(n) } - c := new(wincallbackcontext) - c.gobody = fn.data - c.argsize = argsize - c.setCleanstack(cleanstack) - if cleanstack && argsize != 0 { - c.restorestack = argsize - } else { - c.restorestack = 0 + // Register the callback. + if cbs.index == nil { + cbs.index = make(map[winCallbackKey]int) } + n := cbs.n + if n >= len(cbs.ctxt) { + unlock(&cbs.lock) + throw("too many callback functions") + } + c := winCallback{key.fn, argsize, cdecl} cbs.ctxt[n] = c + cbs.index[key] = n cbs.n++ - r := callbackasmAddr(n) unlock(&cbs.lock) - return r + return callbackasmAddr(n) } type callbackArgs struct { @@ -120,7 +132,15 @@ type callbackArgs struct { // callbackWrap is called by callbackasm to invoke a registered C callback. func callbackWrap(a *callbackArgs) { c := cbs.ctxt[a.index] - a.retPop = c.restorestack + if GOARCH == "386" { + if c.cdecl { + // In cdecl, the callee is responsible for + // popping its arguments. + a.retPop = c.argsize + } else { + a.retPop = 0 + } + } // Convert from stdcall to Go ABI. We assume the stack layout // is the same, and we just need to make room for the result. @@ -134,7 +154,7 @@ func callbackWrap(a *callbackArgs) { // Even though this is copying back results, we can pass a nil // type because those results must not require write barriers. - reflectcall(nil, c.gobody, noescape(unsafe.Pointer(&frame)), sys.PtrSize+uint32(c.argsize), uint32(c.argsize)) + reflectcall(nil, unsafe.Pointer(c.fn), noescape(unsafe.Pointer(&frame)), sys.PtrSize+uint32(c.argsize), uint32(c.argsize)) // Extract the result. a.result = frame[c.argsize/sys.PtrSize] -- cgit v1.2.1 From 614a8b7c8ad42ff8a9bc363f813af2aae046fd0c Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 16 Oct 2020 22:22:20 -0400 Subject: runtime: tidy Windows callback test This simplifies the systematic test of Windows callbacks with different signatures and prepares it for expanded coverage of function signatures. It now returns a result from the Go function and threads it back through C. This simplifies things, but also previously the code could have succeeded by simply not calling the callbacks at all (though other tests would have caught that). It bundles together the C function description and the Go function it's intended to call. Now the test source generation and the test running both loop over a single slice of test functions. Since the C function and Go function are now bundled, it generates the C function by reflectively inspecting the signature of the Go function. For the moment, we keep the same test suite, which is entirely functions with "uintptr" arguments, but we'll expand this shortly. It now use sub-tests. This way tests automatically get useful diagnostic labels in failures and the tests don't have to catch panics on their own. It eliminates the DLL function argument. I honestly couldn't figure out what the point of this was, and it added what appeared to be an unnecessary loop level to the tests. Change-Id: I120dfd4785057cc2c392bd2c821302f276bd128e Reviewed-on: https://go-review.googlesource.com/c/go/+/263270 Trust: Austin Clements Trust: Alex Brainman Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Alex Brainman --- src/runtime/syscall_windows_test.go | 224 +++++++++++++++--------------------- 1 file changed, 90 insertions(+), 134 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go index 2e74546e38..cb942beb3e 100644 --- a/src/runtime/syscall_windows_test.go +++ b/src/runtime/syscall_windows_test.go @@ -9,11 +9,13 @@ import ( "fmt" "internal/syscall/windows/sysdll" "internal/testenv" + "io" "io/ioutil" "math" "os" "os/exec" "path/filepath" + "reflect" "runtime" "strconv" "strings" @@ -285,99 +287,85 @@ func TestCallbackInAnotherThread(t *testing.T) { } } -type cbDLLFunc int // int determines number of callback parameters - -func (f cbDLLFunc) stdcallName() string { - return fmt.Sprintf("stdcall%d", f) +type cbFunc struct { + goFunc interface{} } -func (f cbDLLFunc) cdeclName() string { - return fmt.Sprintf("cdecl%d", f) +func (f cbFunc) cName(cdecl bool) string { + name := "stdcall" + if cdecl { + name = "cdecl" + } + t := reflect.TypeOf(f.goFunc) + for i := 0; i < t.NumIn(); i++ { + name += "_" + t.In(i).Name() + } + return name } -func (f cbDLLFunc) buildOne(stdcall bool) string { - var funcname, attr string - if stdcall { - funcname = f.stdcallName() - attr = "__stdcall" - } else { - funcname = f.cdeclName() +func (f cbFunc) cSrc(w io.Writer, cdecl bool) { + // Construct a C function that takes a callback with + // f.goFunc's signature, and calls it with integers 1..N. + funcname := f.cName(cdecl) + attr := "__stdcall" + if cdecl { attr = "__cdecl" } typename := "t" + funcname - p := make([]string, f) - for i := range p { - p[i] = "uintptr_t" - } - params := strings.Join(p, ",") - for i := range p { - p[i] = fmt.Sprintf("%d", i+1) - } - args := strings.Join(p, ",") - return fmt.Sprintf(` -typedef void %s (*%s)(%s); -void %s(%s f, uintptr_t n) { - uintptr_t i; - for(i=0;i\n\n" + f.buildOne(false) + f.buildOne(true) +func (f cbFunc) testOne(t *testing.T, dll *syscall.DLL, cdecl bool, cb uintptr) { + r1, _, _ := dll.MustFindProc(f.cName(cdecl)).Call(cb) + + want := 0 + for i := 0; i < reflect.TypeOf(f.goFunc).NumIn(); i++ { + want += i + 1 + } + if int(r1) != want { + t.Errorf("wanted result %d; got %d", want, r1) + } } -var cbFuncs = [...]interface{}{ - 2: func(i1, i2 uintptr) uintptr { - if i1+i2 != 3 { - panic("bad input") - } - return 0 - }, - 3: func(i1, i2, i3 uintptr) uintptr { - if i1+i2+i3 != 6 { - panic("bad input") - } - return 0 - }, - 4: func(i1, i2, i3, i4 uintptr) uintptr { - if i1+i2+i3+i4 != 10 { - panic("bad input") - } - return 0 - }, - 5: func(i1, i2, i3, i4, i5 uintptr) uintptr { - if i1+i2+i3+i4+i5 != 15 { - panic("bad input") - } - return 0 - }, - 6: func(i1, i2, i3, i4, i5, i6 uintptr) uintptr { - if i1+i2+i3+i4+i5+i6 != 21 { - panic("bad input") - } - return 0 - }, - 7: func(i1, i2, i3, i4, i5, i6, i7 uintptr) uintptr { - if i1+i2+i3+i4+i5+i6+i7 != 28 { - panic("bad input") - } - return 0 - }, - 8: func(i1, i2, i3, i4, i5, i6, i7, i8 uintptr) uintptr { - if i1+i2+i3+i4+i5+i6+i7+i8 != 36 { - panic("bad input") - } - return 0 - }, - 9: func(i1, i2, i3, i4, i5, i6, i7, i8, i9 uintptr) uintptr { - if i1+i2+i3+i4+i5+i6+i7+i8+i9 != 45 { - panic("bad input") - } - return 0 - }, +var cbFuncs = []cbFunc{ + {func(i1, i2 uintptr) uintptr { + return i1 + i2 + }}, + {func(i1, i2, i3 uintptr) uintptr { + return i1 + i2 + i3 + }}, + {func(i1, i2, i3, i4 uintptr) uintptr { + return i1 + i2 + i3 + i4 + }}, + {func(i1, i2, i3, i4, i5 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + }}, + {func(i1, i2, i3, i4, i5, i6 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + i6 + }}, + {func(i1, i2, i3, i4, i5, i6, i7 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + i6 + i7 + }}, + {func(i1, i2, i3, i4, i5, i6, i7, i8 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + }}, + {func(i1, i2, i3, i4, i5, i6, i7, i8, i9 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + }}, } type cbDLL struct { @@ -385,21 +373,23 @@ type cbDLL struct { buildArgs func(out, src string) []string } -func (d *cbDLL) buildSrc(t *testing.T, path string) { +func (d *cbDLL) makeSrc(t *testing.T, path string) { f, err := os.Create(path) if err != nil { t.Fatalf("failed to create source file: %v", err) } defer f.Close() - for i := 2; i < 10; i++ { - fmt.Fprint(f, cbDLLFunc(i).build()) + fmt.Fprintf(f, "#include \n\n") + for _, cbf := range cbFuncs { + cbf.cSrc(f, false) + cbf.cSrc(f, true) } } func (d *cbDLL) build(t *testing.T, dir string) string { srcname := d.name + ".c" - d.buildSrc(t, filepath.Join(dir, srcname)) + d.makeSrc(t, filepath.Join(dir, srcname)) outname := d.name + ".dll" args := d.buildArgs(outname, srcname) cmd := exec.Command(args[0], args[1:]...) @@ -426,51 +416,6 @@ var cbDLLs = []cbDLL{ }, } -type cbTest struct { - n int // number of callback parameters - param uintptr // dll function parameter -} - -func (test *cbTest) run(t *testing.T, dllpath string) { - dll := syscall.MustLoadDLL(dllpath) - defer dll.Release() - cb := cbFuncs[test.n] - stdcall := syscall.NewCallback(cb) - f := cbDLLFunc(test.n) - test.runOne(t, dll, f.stdcallName(), stdcall) - cdecl := syscall.NewCallbackCDecl(cb) - test.runOne(t, dll, f.cdeclName(), cdecl) -} - -func (test *cbTest) runOne(t *testing.T, dll *syscall.DLL, proc string, cb uintptr) { - defer func() { - if r := recover(); r != nil { - t.Errorf("dll call %v(..., %d) failed: %v", proc, test.param, r) - } - }() - dll.MustFindProc(proc).Call(cb, test.param) -} - -var cbTests = []cbTest{ - {2, 1}, - {2, 10000}, - {3, 3}, - {4, 5}, - {4, 6}, - {5, 2}, - {6, 7}, - {6, 8}, - {7, 6}, - {8, 1}, - {9, 8}, - {9, 10000}, - {3, 4}, - {5, 3}, - {7, 7}, - {8, 2}, - {9, 9}, -} - func TestStdcallAndCDeclCallbacks(t *testing.T) { if _, err := exec.LookPath("gcc"); err != nil { t.Skip("skipping test: gcc is missing") @@ -482,10 +427,21 @@ func TestStdcallAndCDeclCallbacks(t *testing.T) { defer os.RemoveAll(tmp) for _, dll := range cbDLLs { - dllPath := dll.build(t, tmp) - for _, test := range cbTests { - test.run(t, dllPath) - } + t.Run(dll.name, func(t *testing.T) { + dllPath := dll.build(t, tmp) + dll := syscall.MustLoadDLL(dllPath) + defer dll.Release() + for _, cbf := range cbFuncs { + t.Run(cbf.cName(false), func(t *testing.T) { + stdcall := syscall.NewCallback(cbf.goFunc) + cbf.testOne(t, dll, false, stdcall) + }) + t.Run(cbf.cName(true), func(t *testing.T) { + cdecl := syscall.NewCallbackCDecl(cbf.goFunc) + cbf.testOne(t, dll, true, cdecl) + }) + } + }) } } -- cgit v1.2.1 From 532c199ee56cdbc2cfd12da1c1cfb3359b122c7c Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sat, 17 Oct 2020 18:42:03 -0400 Subject: runtime: fix sub-uintptr-sized Windows callback arguments The Windows callback support accepts Go functions with arguments that are uintptr-sized or smaller. However, it doesn't implement smaller arguments correctly. It assumes the Windows arguments layout is equivalent to the Go argument layout. This is often true, but because Windows C ABIs pad arguments to word size, while Go packs arguments, the layout is different if there are multiple sub-word-size arguments in a row. For example, a function with two uint16 arguments will have a two-word C argument frame, but only a 4 byte Go argument frame. There are also subtleties surrounding floating-point register arguments that it doesn't handle correctly. To fix this, when constructing a callback, we examine the Go function's signature to construct a mapping between the C argument frame and the Go argument frame. When the callback is invoked, we use this mapping to build the Go argument frame and copy the result back. This adds several test cases to TestStdcallAndCDeclCallbacks that exercise more complex function signatures. These all fail with the current code, but work with this CL. In addition to fixing these callback types, this is also a step toward the Go register ABI (#40724), which is going to make the ABI translation more complex. Change-Id: I19fb1681b659d9fd528ffd5e88912bebb95da052 Reviewed-on: https://go-review.googlesource.com/c/go/+/263271 Trust: Austin Clements Trust: Alex Brainman Run-TryBot: Austin Clements TryBot-Result: Go Bot Reviewed-by: Michael Knyszek Reviewed-by: Alex Brainman --- src/runtime/syscall_windows.go | 143 +++++++++++++++++++++++++++--------- src/runtime/syscall_windows_test.go | 32 +++++++- 2 files changed, 136 insertions(+), 39 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index 3a34d9ddba..21f2452b5a 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -19,9 +19,32 @@ var cbs struct { // winCallback records information about a registered Go callback. type winCallback struct { - fn *funcval // Go function - argsize uintptr // Callback arguments size (in bytes) - cdecl bool // C function uses cdecl calling convention + fn *funcval // Go function + retPop uintptr // For 386 cdecl, how many bytes to pop on return + + // abiMap specifies how to translate from a C frame to a Go + // frame. This does not specify how to translate back because + // the result is always a uintptr. If the C ABI is fastcall, + // this assumes the four fastcall registers were first spilled + // to the shadow space. + abiMap []abiPart + // retOffset is the offset of the uintptr-sized result in the Go + // frame. + retOffset uintptr +} + +// abiPart encodes a step in translating between calling ABIs. +type abiPart struct { + src, dst uintptr + len uintptr +} + +func (a *abiPart) tryMerge(b abiPart) bool { + if a.src+a.len == b.src && a.dst+a.len == b.dst { + a.len += b.len + return true + } + return false } type winCallbackKey struct { @@ -55,7 +78,7 @@ func callbackasmAddr(i int) uintptr { return funcPC(callbackasm) + uintptr(i*entrySize) } -const callbackMaxArgs = 64 +const callbackMaxFrame = 64 * sys.PtrSize // compileCallback converts a Go function fn into a C function pointer // that can be passed to Windows APIs. @@ -75,22 +98,81 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { panic("compileCallback: expected function with one uintptr-sized result") } ft := (*functype)(unsafe.Pointer(fn._type)) + + // Check arguments and construct ABI translation. + var abiMap []abiPart + var src, dst uintptr + for _, t := range ft.in() { + if t.size > sys.PtrSize { + // We don't support this right now. In + // stdcall/cdecl, 64-bit ints and doubles are + // passed as two words (little endian); and + // structs are pushed on the stack. In + // fastcall, arguments larger than the word + // size are passed by reference. + panic("compileCallback: argument size is larger than uintptr") + } + if k := t.kind & kindMask; GOARCH == "amd64" && (k == kindFloat32 || k == kindFloat64) { + // In fastcall, floating-point arguments in + // the first four positions are passed in + // floating-point registers, which we don't + // currently spill. + panic("compileCallback: float arguments not supported") + } + + // The Go ABI aligns arguments. + dst = alignUp(dst, uintptr(t.align)) + // In the C ABI, we're already on a word boundary. + // Also, sub-word-sized fastcall register arguments + // are stored to the least-significant bytes of the + // argument word and all supported Windows + // architectures are little endian, so src is already + // pointing to the right place for smaller arguments. + + // Copy just the size of the argument. Note that this + // could be a small by-value struct, but C and Go + // struct layouts are compatible, so we can copy these + // directly, too. + part := abiPart{src, dst, t.size} + // Add this step to the adapter. + if len(abiMap) == 0 || !abiMap[len(abiMap)-1].tryMerge(part) { + abiMap = append(abiMap, part) + } + + // cdecl, stdcall, and fastcall pad arguments to word size. + src += sys.PtrSize + // The Go ABI packs arguments. + dst += t.size + } + // The Go ABI aligns the result to the word size. src is + // already aligned. + dst = alignUp(dst, sys.PtrSize) + retOffset := dst + if len(ft.out()) != 1 { panic("compileCallback: expected function with one uintptr-sized result") } - uintptrSize := unsafe.Sizeof(uintptr(0)) - if ft.out()[0].size != uintptrSize { + if ft.out()[0].size != sys.PtrSize { panic("compileCallback: expected function with one uintptr-sized result") } - if len(ft.in()) > callbackMaxArgs { - panic("compileCallback: too many function arguments") + if k := ft.out()[0].kind & kindMask; k == kindFloat32 || k == kindFloat64 { + // In cdecl and stdcall, float results are returned in + // ST(0). In fastcall, they're returned in XMM0. + // Either way, it's not AX. + panic("compileCallback: float results not supported") } - argsize := uintptr(0) - for _, t := range ft.in() { - if t.size > uintptrSize { - panic("compileCallback: argument size is larger than uintptr") - } - argsize += uintptrSize + // Make room for the uintptr-sized result. + dst += sys.PtrSize + + if dst > callbackMaxFrame { + panic("compileCallback: function argument frame too large") + } + + // For cdecl, the callee is responsible for popping its + // arguments from the C stack. + var retPop uintptr + if cdecl { + retPop = src } key := winCallbackKey{(*funcval)(fn.data), cdecl} @@ -112,7 +194,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { unlock(&cbs.lock) throw("too many callback functions") } - c := winCallback{key.fn, argsize, cdecl} + c := winCallback{key.fn, retPop, abiMap, retOffset} cbs.ctxt[n] = c cbs.index[key] = n cbs.n++ @@ -123,7 +205,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { type callbackArgs struct { index uintptr - args *uintptr // Arguments in stdcall/cdecl convention, with registers spilled + args unsafe.Pointer // Arguments in stdcall/cdecl convention, with registers spilled // Below are out-args from callbackWrap result uintptr retPop uintptr // For 386 cdecl, how many bytes to pop on return @@ -132,32 +214,21 @@ type callbackArgs struct { // callbackWrap is called by callbackasm to invoke a registered C callback. func callbackWrap(a *callbackArgs) { c := cbs.ctxt[a.index] - if GOARCH == "386" { - if c.cdecl { - // In cdecl, the callee is responsible for - // popping its arguments. - a.retPop = c.argsize - } else { - a.retPop = 0 - } - } + a.retPop = c.retPop - // Convert from stdcall to Go ABI. We assume the stack layout - // is the same, and we just need to make room for the result. - // - // TODO: This isn't a good assumption. For example, a function - // that takes two uint16 arguments will be laid out - // differently by the stdcall and Go ABIs. We should implement - // proper ABI conversion. - var frame [callbackMaxArgs + 1]uintptr - memmove(unsafe.Pointer(&frame), unsafe.Pointer(a.args), c.argsize) + // Convert from stdcall to Go ABI. + var frame [callbackMaxFrame]byte + goArgs := unsafe.Pointer(&frame) + for _, part := range c.abiMap { + memmove(add(goArgs, part.dst), add(a.args, part.src), part.len) + } // Even though this is copying back results, we can pass a nil // type because those results must not require write barriers. - reflectcall(nil, unsafe.Pointer(c.fn), noescape(unsafe.Pointer(&frame)), sys.PtrSize+uint32(c.argsize), uint32(c.argsize)) + reflectcall(nil, unsafe.Pointer(c.fn), noescape(goArgs), uint32(c.retOffset)+sys.PtrSize, uint32(c.retOffset)) // Extract the result. - a.result = frame[c.argsize/sys.PtrSize] + a.result = *(*uintptr)(unsafe.Pointer(&frame[c.retOffset])) } const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go index cb942beb3e..7705d2a017 100644 --- a/src/runtime/syscall_windows_test.go +++ b/src/runtime/syscall_windows_test.go @@ -317,9 +317,13 @@ func (f cbFunc) cSrc(w io.Writer, cdecl bool) { cArgs := make([]string, t.NumIn()) for i := range cTypes { // We included stdint.h, so this works for all sized - // integer types. + // integer types, and uint8Pair_t. cTypes[i] = t.In(i).Name() + "_t" - cArgs[i] = fmt.Sprintf("%d", i+1) + if t.In(i).Name() == "uint8Pair" { + cArgs[i] = fmt.Sprintf("(uint8Pair_t){%d,1}", i) + } else { + cArgs[i] = fmt.Sprintf("%d", i+1) + } } fmt.Fprintf(w, ` typedef uintptr_t %s (*%s)(%s); @@ -341,6 +345,8 @@ func (f cbFunc) testOne(t *testing.T, dll *syscall.DLL, cdecl bool, cb uintptr) } } +type uint8Pair struct{ x, y uint8 } + var cbFuncs = []cbFunc{ {func(i1, i2 uintptr) uintptr { return i1 + i2 @@ -366,6 +372,23 @@ var cbFuncs = []cbFunc{ {func(i1, i2, i3, i4, i5, i6, i7, i8, i9 uintptr) uintptr { return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 }}, + + // Non-uintptr parameters. + {func(i1, i2, i3, i4, i5, i6, i7, i8, i9 uint8) uintptr { + return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9) + }}, + {func(i1, i2, i3, i4, i5, i6, i7, i8, i9 uint16) uintptr { + return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9) + }}, + {func(i1, i2, i3, i4, i5, i6, i7, i8, i9 int8) uintptr { + return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9) + }}, + {func(i1 int8, i2 int16, i3 int32, i4, i5 uintptr) uintptr { + return uintptr(i1) + uintptr(i2) + uintptr(i3) + i4 + i5 + }}, + {func(i1, i2, i3, i4, i5 uint8Pair) uintptr { + return uintptr(i1.x + i1.y + i2.x + i2.y + i3.x + i3.y + i4.x + i4.y + i5.x + i5.y) + }}, } type cbDLL struct { @@ -380,7 +403,10 @@ func (d *cbDLL) makeSrc(t *testing.T, path string) { } defer f.Close() - fmt.Fprintf(f, "#include \n\n") + fmt.Fprint(f, ` +#include +typedef struct { uint8_t x, y; } uint8Pair_t; +`) for _, cbf := range cbFuncs { cbf.cSrc(f, false) cbf.cSrc(f, true) -- cgit v1.2.1 From 8cc280aa727bc7159adfdd083861472aa3066a35 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 20:13:49 +0000 Subject: runtime: define and enforce synchronization on heap_scan Currently heap_scan is mostly protected by the heap lock, but gcControllerState.revise sometimes accesses it without a lock. In an effort to make gcControllerState.revise callable from more contexts (and have its synchronization guarantees actually respected), make heap_scan atomically read from and written to, unless the world is stopped. Note that we don't update gcControllerState.revise's erroneous doc comment here because this change isn't about revise's guarantees, just about heap_scan. The comment is updated in a later change. Change-Id: Iddbbeb954767c704c2bd1d221f36e6c4fc9948a6 Reviewed-on: https://go-review.googlesource.com/c/go/+/246960 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Emmanuel Odeke Reviewed-by: Michael Pratt --- src/runtime/mgc.go | 5 +++-- src/runtime/mheap.go | 4 ++-- src/runtime/mstats.go | 4 +++- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index c42c7fbd29..94539dd770 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -494,6 +494,7 @@ func (c *gcControllerState) revise() { gcpercent = 100000 } live := atomic.Load64(&memstats.heap_live) + scan := atomic.Load64(&memstats.heap_scan) // Assume we're under the soft goal. Pace GC to complete at // next_gc assuming the heap is in steady-state. @@ -508,7 +509,7 @@ func (c *gcControllerState) revise() { // // (This is a float calculation to avoid overflowing on // 100*heap_scan.) - scanWorkExpected := int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent)) + scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent)) if live > memstats.next_gc || c.scanWork > scanWorkExpected { // We're past the soft goal, or we've already done more scan @@ -518,7 +519,7 @@ func (c *gcControllerState) revise() { heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) // Compute the upper bound on the scan work remaining. - scanWorkExpected = int64(memstats.heap_scan) + scanWorkExpected = int64(scan) } // Compute the remaining scan work estimate. diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 1a57bcd66e..124bbacd1d 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1168,7 +1168,7 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS throw("mheap.allocSpan called with no P") } } - memstats.heap_scan += uint64(c.local_scan) + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 memstats.tinyallocs += uint64(c.local_tinyallocs) c.local_tinyallocs = 0 @@ -1375,7 +1375,7 @@ func (h *mheap) freeSpan(s *mspan) { systemstack(func() { c := getg().m.p.ptr().mcache lock(&h.lock) - memstats.heap_scan += uint64(c.local_scan) + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 memstats.tinyallocs += uint64(c.local_tinyallocs) c.local_tinyallocs = 0 diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index b95b332134..2c217ecf84 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -139,6 +139,8 @@ type mstats struct { // no-scan objects and no-scan tails of objects. // // Whenever this is updated, call gcController.revise(). + // + // Read and written atomically or with the world stopped. heap_scan uint64 // heap_marked is the number of bytes marked by the previous @@ -635,7 +637,7 @@ func flushallmcaches() { func purgecachedstats(c *mcache) { // Protected by either heap or GC lock. h := &mheap_ - memstats.heap_scan += uint64(c.local_scan) + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 memstats.tinyallocs += uint64(c.local_tinyallocs) c.local_tinyallocs = 0 -- cgit v1.2.1 From 93d7d1685ee9e9f296e20f6c712796e54602e891 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 20:17:40 +0000 Subject: runtime: load gcControllerState.scanWork atomically in revise gcControllerState.scanWork's docs state that it must be accessed atomically during a GC cycle, but gcControllerState.revise does not do this (even when called with the heap lock held). This change makes it so that gcControllerState.revise accesses scanWork atomically and explicitly. Note that we don't update gcControllerState.revise's erroneous doc comment here because this change isn't about revise's guarantees, just about heap_scan. The comment is updated in a later change. Change-Id: Iafc3ad214e517190bfd8a219896d23da19f7659d Reviewed-on: https://go-review.googlesource.com/c/go/+/246961 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mgc.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 94539dd770..4b9a6da3b3 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -495,6 +495,7 @@ func (c *gcControllerState) revise() { } live := atomic.Load64(&memstats.heap_live) scan := atomic.Load64(&memstats.heap_scan) + work := atomic.Loadint64(&c.scanWork) // Assume we're under the soft goal. Pace GC to complete at // next_gc assuming the heap is in steady-state. @@ -511,7 +512,7 @@ func (c *gcControllerState) revise() { // 100*heap_scan.) scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent)) - if live > memstats.next_gc || c.scanWork > scanWorkExpected { + if live > memstats.next_gc || work > scanWorkExpected { // We're past the soft goal, or we've already done more scan // work than we expected. Pace GC so that in the worst case it // will complete by the hard goal. @@ -529,7 +530,7 @@ func (c *gcControllerState) revise() { // (scanWork), so allocation will change this difference // slowly in the soft regime and not at all in the hard // regime. - scanWorkRemaining := scanWorkExpected - c.scanWork + scanWorkRemaining := scanWorkExpected - work if scanWorkRemaining < 1000 { // We set a somewhat arbitrary lower bound on // remaining scan work since if we aim a little high, -- cgit v1.2.1 From f5c6875f3228951afa1fcf2ec01c614e0fb7e2dd Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 20:24:56 +0000 Subject: runtime: make next_gc atomically accessed next_gc is mostly updated only during a STW, but may occasionally be updated by calls to e.g. debug.SetGCPercent. In this case the update is supposed to be protected by the heap lock, but in reality it's accessed by gcController.revise which may be called without the heap lock held (despite its documentation, which will be updated in a later change). Change the synchronization policy on next_gc so that it's atomically accessed when the world is not stopped to aid in making revise safe for concurrent use. Change-Id: I79657a72f91563f3241aaeda66e8a7757d399529 Reviewed-on: https://go-review.googlesource.com/c/go/+/246962 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mgc.go | 13 +++++++------ src/runtime/mgcscavenge.go | 2 +- src/runtime/mstats.go | 10 ++++++++-- src/runtime/trace.go | 5 +++-- 4 files changed, 19 insertions(+), 11 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 4b9a6da3b3..5c565a5853 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -409,7 +409,8 @@ type gcControllerState struct { } // startCycle resets the GC controller's state and computes estimates -// for a new GC cycle. The caller must hold worldsema. +// for a new GC cycle. The caller must hold worldsema and the world +// must be stopped. func (c *gcControllerState) startCycle() { c.scanWork = 0 c.bgScanCredit = 0 @@ -499,7 +500,7 @@ func (c *gcControllerState) revise() { // Assume we're under the soft goal. Pace GC to complete at // next_gc assuming the heap is in steady-state. - heapGoal := int64(memstats.next_gc) + heapGoal := int64(atomic.Load64(&memstats.next_gc)) // Compute the expected scan work remaining. // @@ -512,12 +513,12 @@ func (c *gcControllerState) revise() { // 100*heap_scan.) scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent)) - if live > memstats.next_gc || work > scanWorkExpected { + if int64(live) > heapGoal || work > scanWorkExpected { // We're past the soft goal, or we've already done more scan // work than we expected. Pace GC so that in the worst case it // will complete by the hard goal. const maxOvershoot = 1.1 - heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) + heapGoal = int64(float64(heapGoal) * maxOvershoot) // Compute the upper bound on the scan work remaining. scanWorkExpected = int64(scan) @@ -846,7 +847,7 @@ func gcSetTriggerRatio(triggerRatio float64) { // Commit to the trigger and goal. memstats.gc_trigger = trigger - memstats.next_gc = goal + atomic.Store64(&memstats.next_gc, goal) if trace.enabled { traceNextGC() } @@ -903,7 +904,7 @@ func gcSetTriggerRatio(triggerRatio float64) { // // mheap_.lock must be held or the world must be stopped. func gcEffectiveGrowthRatio() float64 { - egogc := float64(memstats.next_gc-memstats.heap_marked) / float64(memstats.heap_marked) + egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked) if egogc < 0 { // Shouldn't happen, but just in case. egogc = 0 diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go index 34646828e5..6328b295ca 100644 --- a/src/runtime/mgcscavenge.go +++ b/src/runtime/mgcscavenge.go @@ -123,7 +123,7 @@ func gcPaceScavenger() { return } // Compute our scavenging goal. - goalRatio := float64(memstats.next_gc) / float64(memstats.last_next_gc) + goalRatio := float64(atomic.Load64(&memstats.next_gc)) / float64(memstats.last_next_gc) retainedGoal := uint64(float64(memstats.last_heap_inuse) * goalRatio) // Add retainExtraPercent overhead to retainedGoal. This calculation // looks strange but the purpose is to arrive at an integer division diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 2c217ecf84..8cc20552fb 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -57,9 +57,15 @@ type mstats struct { gc_sys uint64 // updated atomically or during STW other_sys uint64 // updated atomically or during STW - // Statistics about garbage collector. + // Statistics about the garbage collector. + + // next_gc is the goal heap_live for when next GC ends. + // Set to ^uint64(0) if disabled. + // + // Read and written atomically, unless the world is stopped. + next_gc uint64 + // Protected by mheap or stopping the world during GC. - next_gc uint64 // goal heap_live for when next GC ends; ^0 if disabled last_gc_unix uint64 // last gc (in unix time) pause_total_ns uint64 pause_ns [256]uint64 // circular buffer of recent gc pause lengths diff --git a/src/runtime/trace.go b/src/runtime/trace.go index 169b650eb4..d3ecd148be 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -13,6 +13,7 @@ package runtime import ( + "runtime/internal/atomic" "runtime/internal/sys" "unsafe" ) @@ -1146,11 +1147,11 @@ func traceHeapAlloc() { } func traceNextGC() { - if memstats.next_gc == ^uint64(0) { + if nextGC := atomic.Load64(&memstats.next_gc); nextGC == ^uint64(0) { // Heap-based triggering is disabled. traceEvent(traceEvNextGC, -1, 0) } else { - traceEvent(traceEvNextGC, -1, memstats.next_gc) + traceEvent(traceEvNextGC, -1, nextGC) } } -- cgit v1.2.1 From ce46f197b6c75281b77ee93338e2559671e28b01 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 20:48:06 +0000 Subject: runtime: access the assist ratio atomically This change makes it so that the GC assist ratio (the pair of gcControllerState fields assistBytesPerWork and assistWorkPerByte) is updated atomically. Note that the pair of fields are not updated together atomically, but that's OK. The code here was already racy for some time and in practice the assist ratio moves very slowly. The purpose of this change is so that we can document gcController.revise to be safe for concurrent use, which will be useful in further changes. Change-Id: Ie25d630207c88e4f85f2b8953f6a0051ebf1b4ea Reviewed-on: https://go-review.googlesource.com/c/go/+/246963 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mgc.go | 51 ++++++++++++++++++++++++++++++++++++++++++-------- src/runtime/mgcmark.go | 17 +++++++++++------ src/runtime/proc.go | 3 ++- 3 files changed, 56 insertions(+), 15 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 5c565a5853..c54f893689 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -388,10 +388,24 @@ type gcControllerState struct { // bytes that should be performed by mutator assists. This is // computed at the beginning of each cycle and updated every // time heap_scan is updated. - assistWorkPerByte float64 + // + // Stored as a uint64, but it's actually a float64. Use + // float64frombits to get the value. + // + // Read and written atomically. + assistWorkPerByte uint64 // assistBytesPerWork is 1/assistWorkPerByte. - assistBytesPerWork float64 + // + // Stored as a uint64, but it's actually a float64. Use + // float64frombits to get the value. + // + // Read and written atomically. + // + // Note that because this is read and written independently + // from assistWorkPerByte users may notice a skew between + // the two values, and such a state should be safe. + assistBytesPerWork uint64 // fractionalUtilizationGoal is the fraction of wall clock // time that should be spent in the fractional mark worker on @@ -470,7 +484,8 @@ func (c *gcControllerState) startCycle() { c.revise() if debug.gcpacertrace > 0 { - print("pacer: assist ratio=", c.assistWorkPerByte, + assistRatio := float64frombits(atomic.Load64(&c.assistWorkPerByte)) + print("pacer: assist ratio=", assistRatio, " (scan ", memstats.heap_scan>>20, " MB in ", work.initialHeapLive>>20, "->", memstats.next_gc>>20, " MB)", @@ -480,9 +495,22 @@ func (c *gcControllerState) startCycle() { } // revise updates the assist ratio during the GC cycle to account for -// improved estimates. This should be called either under STW or -// whenever memstats.heap_scan, memstats.heap_live, or -// memstats.next_gc is updated (with mheap_.lock held). +// improved estimates. This should be called whenever memstats.heap_scan, +// memstats.heap_live, or memstats.next_gc is updated. It is safe to +// call concurrently, but it may race with other calls to revise. +// +// The result of this race is that the two assist ratio values may not line +// up or may be stale. In practice this is OK because the assist ratio +// moves slowly throughout a GC cycle, and the assist ratio is a best-effort +// heuristic anyway. Furthermore, no part of the heuristic depends on +// the two assist ratio values being exact reciprocals of one another, since +// the two values are used to convert values from different sources. +// +// The worst case result of this raciness is that we may miss a larger shift +// in the ratio (say, if we decide to pace more aggressively against the +// hard heap goal) but even this "hard goal" is best-effort (see #40460). +// The dedicated GC should ensure we don't exceed the hard goal by too much +// in the rare case we do exceed it. // // It should only be called when gcBlackenEnabled != 0 (because this // is when assists are enabled and the necessary statistics are @@ -555,8 +583,15 @@ func (c *gcControllerState) revise() { // Compute the mutator assist ratio so by the time the mutator // allocates the remaining heap bytes up to next_gc, it will // have done (or stolen) the remaining amount of scan work. - c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining) - c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining) + // Note that the assist ratio values are updated atomically + // but not together. This means there may be some degree of + // skew between the two values. This is generally OK as the + // values shift relatively slowly over the course of a GC + // cycle. + assistWorkPerByte := float64(scanWorkRemaining) / float64(heapRemaining) + assistBytesPerWork := float64(heapRemaining) / float64(scanWorkRemaining) + atomic.Store64(&c.assistWorkPerByte, float64bits(assistWorkPerByte)) + atomic.Store64(&c.assistBytesPerWork, float64bits(assistBytesPerWork)) } // endCycle computes the trigger ratio for the next cycle. diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 79df59d6d6..c71c0e58d3 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -400,11 +400,13 @@ retry: // balance positive. When the required amount of work is low, // we over-assist to build up credit for future allocations // and amortize the cost of assisting. + assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte)) + assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork)) debtBytes := -gp.gcAssistBytes - scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes)) + scanWork := int64(assistWorkPerByte * float64(debtBytes)) if scanWork < gcOverAssistWork { scanWork = gcOverAssistWork - debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork)) + debtBytes = int64(assistBytesPerWork * float64(scanWork)) } // Steal as much credit as we can from the background GC's @@ -418,7 +420,7 @@ retry: if bgScanCredit > 0 { if bgScanCredit < scanWork { stolen = bgScanCredit - gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen)) + gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(stolen)) } else { stolen = scanWork gp.gcAssistBytes += debtBytes @@ -543,7 +545,8 @@ func gcAssistAlloc1(gp *g, scanWork int64) { // this scan work counts for. The "1+" is a poor man's // round-up, to ensure this adds credit even if // assistBytesPerWork is very low. - gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone)) + assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork)) + gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(workDone)) // If this is the last worker and we ran out of work, // signal a completion point. @@ -637,7 +640,8 @@ func gcFlushBgCredit(scanWork int64) { return } - scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork) + assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork)) + scanBytes := int64(float64(scanWork) * assistBytesPerWork) lock(&work.assistQueue.lock) for !work.assistQueue.q.empty() && scanBytes > 0 { @@ -670,7 +674,8 @@ func gcFlushBgCredit(scanWork int64) { if scanBytes > 0 { // Convert from scan bytes back to work. - scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte) + assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte)) + scanWork = int64(float64(scanBytes) * assistWorkPerByte) atomic.Xaddint64(&gcController.bgScanCredit, scanWork) } unlock(&work.assistQueue.lock) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index ec4e6d8751..ebecc92745 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -3208,7 +3208,8 @@ func goexit0(gp *g) { // Flush assist credit to the global pool. This gives // better information to pacing if the application is // rapidly creating an exiting goroutines. - scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes)) + assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte)) + scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes)) atomic.Xaddint64(&gcController.bgScanCredit, scanCredit) gp.gcAssistBytes = 0 } -- cgit v1.2.1 From 42019613df2d9b6ad39e8ccf80861e75666025a0 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 21:02:05 +0000 Subject: runtime: make distributed/local malloc stats the source-of-truth This change makes it so that various local malloc stats (excluding heap_scan and local_tinyallocs) are no longer written first to mheap fields but are instead accessed directly from each mcache. This change is part of a move toward having stats be distributed, and cleaning up some old code related to the stats. Note that because there's no central source-of-truth, when an mcache dies, it must donate its stats to another mcache. It's always safe to donate to the mcache for the 0th P, so do that. Change-Id: I2556093dbc27357cb9621c9b97671f3c00aa1173 Reviewed-on: https://go-review.googlesource.com/c/go/+/246964 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 26 ++++++++++++++++++-------- src/runtime/mcache.go | 31 +++++++++++++++++++++++++++++-- src/runtime/mheap.go | 7 ++----- src/runtime/mstats.go | 41 +++++++++++++++++++---------------------- src/runtime/proc.go | 2 +- 5 files changed, 69 insertions(+), 38 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index e65b7b8ea7..d5a90ca65b 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -339,18 +339,28 @@ func ReadMemStatsSlow() (base, slow MemStats) { // Add in frees. readmemstats_m flushed the cached stats, so // these are up-to-date. - var smallFree uint64 - slow.Frees = mheap_.nlargefree - for i := range mheap_.nsmallfree { - slow.Frees += mheap_.nsmallfree[i] - bySize[i].Frees = mheap_.nsmallfree[i] - bySize[i].Mallocs += mheap_.nsmallfree[i] - smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i]) + var largeFree, smallFree uint64 + for _, p := range allp { + c := p.mcache + if c == nil { + continue + } + // Collect large allocation stats. + largeFree += uint64(c.local_largefree) + slow.Frees += uint64(c.local_nlargefree) + + // Collect per-sizeclass stats. + for i := 0; i < _NumSizeClasses; i++ { + slow.Frees += uint64(c.local_nsmallfree[i]) + bySize[i].Frees += uint64(c.local_nsmallfree[i]) + bySize[i].Mallocs += uint64(c.local_nsmallfree[i]) + smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i]) + } } slow.Frees += memstats.tinyallocs slow.Mallocs += slow.Frees - slow.TotalAlloc = slow.Alloc + mheap_.largefree + smallFree + slow.TotalAlloc = slow.Alloc + largeFree + smallFree for i := range slow.BySize { slow.BySize[i].Mallocs = bySize[i].Mallocs diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 7a7d33ccae..5baa7b3da8 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -41,7 +41,13 @@ type mcache struct { stackcache [_NumStackOrders]stackfreelist - // Local allocator stats, flushed during GC. + // Allocator stats (source-of-truth). + // Only the P that owns this mcache may write to these + // variables, so it's safe for that P to read non-atomically. + // + // When read with stats from other mcaches and with the world + // stopped, the result will accurately reflect the state of the + // application. local_largefree uintptr // bytes freed for large objects (>maxsmallsize) local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) @@ -97,7 +103,13 @@ func allocmcache() *mcache { return c } -func freemcache(c *mcache) { +// freemcache releases resources associated with this +// mcache and puts the object onto a free list. +// +// In some cases there is no way to simply release +// resources, such as statistics, so donate them to +// a different mcache (the recipient). +func freemcache(c *mcache, recipient *mcache) { systemstack(func() { c.releaseAll() stackcache_clear(c) @@ -109,11 +121,26 @@ func freemcache(c *mcache) { lock(&mheap_.lock) purgecachedstats(c) + // Donate anything else that's left. + c.donate(recipient) mheap_.cachealloc.free(unsafe.Pointer(c)) unlock(&mheap_.lock) }) } +// donate flushes data and resources which have no global +// pool to another mcache. +func (c *mcache) donate(d *mcache) { + d.local_largefree += c.local_largefree + c.local_largefree = 0 + d.local_nlargefree += c.local_nlargefree + c.local_nlargefree = 0 + for i := range c.local_nsmallfree { + d.local_nsmallfree[i] += c.local_nsmallfree[i] + c.local_nsmallfree[i] = 0 + } +} + // refill acquires a new span of span class spc for c. This span will // have at least one free object. The current span in c must be full. // diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 124bbacd1d..1b41b204ab 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -129,11 +129,8 @@ type mheap struct { reclaimCredit uintptr // Malloc stats. - largealloc uint64 // bytes allocated for large objects - nlargealloc uint64 // number of large object allocations - largefree uint64 // bytes freed for large objects (>maxsmallsize) - nlargefree uint64 // number of frees for large objects (>maxsmallsize) - nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) + largealloc uint64 // bytes allocated for large objects + nlargealloc uint64 // number of large object allocations // arenas is the heap arena map. It points to the metadata for // the heap for every arena frame of the entire usable virtual diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 8cc20552fb..d81d2ebe81 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -571,21 +571,27 @@ func updatememstats() { memstats.by_size[i].nmalloc += c.nmalloc totalAlloc += c.nmalloc * uint64(class_to_size[i]) } - // Collect per-sizeclass stats. - for i := 0; i < _NumSizeClasses; i++ { - if i == 0 { - memstats.nmalloc += mheap_.nlargealloc - totalAlloc += mheap_.largealloc - totalFree += mheap_.largefree - memstats.nfree += mheap_.nlargefree + + for _, p := range allp { + c := p.mcache + if c == nil { continue } - - // The mcache stats have been flushed to mheap_. - memstats.nfree += mheap_.nsmallfree[i] - memstats.by_size[i].nfree = mheap_.nsmallfree[i] - smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i]) + // Collect large allocation stats. + totalFree += uint64(c.local_largefree) + memstats.nfree += uint64(c.local_nlargefree) + + // Collect per-sizeclass stats. + for i := 0; i < _NumSizeClasses; i++ { + memstats.nfree += uint64(c.local_nsmallfree[i]) + memstats.by_size[i].nfree += uint64(c.local_nsmallfree[i]) + smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i]) + } } + // Collect remaining large allocation stats. + memstats.nmalloc += mheap_.nlargealloc + totalAlloc += mheap_.largealloc + totalFree += smallFree memstats.nfree += memstats.tinyallocs @@ -641,20 +647,11 @@ func flushallmcaches() { //go:nosplit func purgecachedstats(c *mcache) { - // Protected by either heap or GC lock. - h := &mheap_ + // Protected by heap lock. atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 memstats.tinyallocs += uint64(c.local_tinyallocs) c.local_tinyallocs = 0 - h.largefree += uint64(c.local_largefree) - c.local_largefree = 0 - h.nlargefree += uint64(c.local_nlargefree) - c.local_nlargefree = 0 - for i := 0; i < len(c.local_nsmallfree); i++ { - h.nsmallfree[i] += uint64(c.local_nsmallfree[i]) - c.local_nsmallfree[i] = 0 - } } // Atomically increases a given *system* memory stat. We are counting on this diff --git a/src/runtime/proc.go b/src/runtime/proc.go index ebecc92745..4f4cff38aa 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -4550,7 +4550,7 @@ func (pp *p) destroy() { pp.mspancache.len = 0 pp.pcache.flush(&mheap_.pages) }) - freemcache(pp.mcache) + freemcache(pp.mcache, allp[0].mcache) pp.mcache = nil gfpurge(pp) traceProcFree(pp) -- cgit v1.2.1 From e63716bc76d3264f669843434bc365a78f2141d2 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 21:10:29 +0000 Subject: runtime: make nlargealloc and largealloc mcache fields This change makes nlargealloc and largealloc into mcache fields just like nlargefree and largefree. These local fields become the new source-of-truth. This change also moves the accounting for these fields out of allocSpan (which is an inappropriate place for it -- this accounting generally happens much closer to the point of allocation) and into largeAlloc. This move is partially possible now that we can call gcController.revise at that point. Furthermore, this change moves largeAlloc into mcache.go and makes it a method of mcache. While there's a little bit of a mismatch here because largeAlloc barely interacts with the mcache, it helps solidify the mcache as the first allocation layer and provides a clear place to aggregate and manage statistics. Change-Id: I37b5e648710733bb4c04430b71e96700e438587a Reviewed-on: https://go-review.googlesource.com/c/go/+/246965 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/malloc.go | 33 +------------------------------ src/runtime/mcache.go | 54 ++++++++++++++++++++++++++++++++++++++++++++++++--- src/runtime/mheap.go | 18 +---------------- src/runtime/mstats.go | 4 ++-- 4 files changed, 55 insertions(+), 54 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index b19d1f2671..ec601ccb39 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1082,9 +1082,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { } } else { shouldhelpgc = true - systemstack(func() { - span = largeAlloc(size, needzero, noscan) - }) + span = c.largeAlloc(size, needzero, noscan) span.freeindex = 1 span.allocCount = 1 x = unsafe.Pointer(span.base()) @@ -1179,35 +1177,6 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { return x } -func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { - // print("largeAlloc size=", size, "\n") - - if size+_PageSize < size { - throw("out of memory") - } - npages := size >> _PageShift - if size&_PageMask != 0 { - npages++ - } - - // Deduct credit for this span allocation and sweep if - // necessary. mHeap_Alloc will also sweep npages, so this only - // pays the debt down to npage pages. - deductSweepCredit(npages*_PageSize, npages) - - spc := makeSpanClass(0, noscan) - s := mheap_.alloc(npages, spc, needzero) - if s == nil { - throw("out of memory") - } - // Put the large span in the mcentral swept list so that it's - // visible to the background sweeper. - mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s) - s.limit = s.base() + size - heapBitsForAddr(s.base()).initSpan(s) - return s -} - // implementation of new builtin // compiler (both frontend and SSA backend) knows the signature // of this function diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 5baa7b3da8..3657c0b86a 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -10,6 +10,7 @@ import ( ) // Per-thread (in Go, per-P) cache for small objects. +// This includes a small object cache and local allocation stats. // No locking needed because it is per-thread (per-P). // // mcaches are allocated from non-GC'd memory, so any heap pointers @@ -48,9 +49,11 @@ type mcache struct { // When read with stats from other mcaches and with the world // stopped, the result will accurately reflect the state of the // application. - local_largefree uintptr // bytes freed for large objects (>maxsmallsize) - local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) - local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) + local_largealloc uintptr // bytes allocated for large objects + local_nlargealloc uintptr // number of large object allocations + local_largefree uintptr // bytes freed for large objects (>maxsmallsize) + local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) + local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) // flushGen indicates the sweepgen during which this mcache // was last flushed. If flushGen != mheap_.sweepgen, the spans @@ -131,6 +134,10 @@ func freemcache(c *mcache, recipient *mcache) { // donate flushes data and resources which have no global // pool to another mcache. func (c *mcache) donate(d *mcache) { + d.local_largealloc += c.local_largealloc + c.local_largealloc = 0 + d.local_nlargealloc += c.local_nlargealloc + c.local_nlargealloc = 0 d.local_largefree += c.local_largefree c.local_largefree = 0 d.local_nlargefree += c.local_nlargefree @@ -178,6 +185,47 @@ func (c *mcache) refill(spc spanClass) { c.alloc[spc] = s } +// largeAlloc allocates a span for a large object. +func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { + if size+_PageSize < size { + throw("out of memory") + } + npages := size >> _PageShift + if size&_PageMask != 0 { + npages++ + } + + // Deduct credit for this span allocation and sweep if + // necessary. mHeap_Alloc will also sweep npages, so this only + // pays the debt down to npage pages. + deductSweepCredit(npages*_PageSize, npages) + + spc := makeSpanClass(0, noscan) + s := mheap_.alloc(npages, spc, needzero) + if s == nil { + throw("out of memory") + } + c.local_largealloc += npages * pageSize + c.local_nlargealloc++ + + // Update heap_live and revise pacing if needed. + atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize)) + if trace.enabled { + // Trace that a heap alloc occurred because heap_live changed. + traceHeapAlloc() + } + if gcBlackenEnabled != 0 { + gcController.revise() + } + + // Put the large span in the mcentral swept list so that it's + // visible to the background sweeper. + mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s) + s.limit = s.base() + size + heapBitsForAddr(s.base()).initSpan(s) + return s +} + func (c *mcache) releaseAll() { for i := range c.alloc { s := c.alloc[i] diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 1b41b204ab..5635dc6784 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -128,10 +128,6 @@ type mheap struct { // This is accessed atomically. reclaimCredit uintptr - // Malloc stats. - largealloc uint64 // bytes allocated for large objects - nlargealloc uint64 // number of large object allocations - // arenas is the heap arena map. It points to the metadata for // the heap for every arena frame of the entire usable virtual // address space. @@ -1170,14 +1166,7 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS memstats.tinyallocs += uint64(c.local_tinyallocs) c.local_tinyallocs = 0 - // Do some additional accounting if it's a large allocation. - if spanclass.sizeclass() == 0 { - mheap_.largealloc += uint64(npages * pageSize) - mheap_.nlargealloc++ - atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize)) - } - - // Either heap_live or heap_scan could have been updated. + // heap_scan was been updated. if gcBlackenEnabled != 0 { gcController.revise() } @@ -1277,11 +1266,6 @@ HaveSpan: // Update related page sweeper stats. atomic.Xadd64(&h.pagesInUse, int64(npages)) - - if trace.enabled { - // Trace that a heap alloc occurred. - traceHeapAlloc() - } } // Make sure the newly allocated span will be observed diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index d81d2ebe81..d9acb361d5 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -578,6 +578,8 @@ func updatememstats() { continue } // Collect large allocation stats. + memstats.nmalloc += uint64(c.local_nlargealloc) + totalAlloc += uint64(c.local_largealloc) totalFree += uint64(c.local_largefree) memstats.nfree += uint64(c.local_nlargefree) @@ -589,8 +591,6 @@ func updatememstats() { } } // Collect remaining large allocation stats. - memstats.nmalloc += mheap_.nlargealloc - totalAlloc += mheap_.largealloc totalFree += smallFree -- cgit v1.2.1 From a5088e76f108f6470d2a9b3ac56a58ddb9376e4f Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 22:07:44 +0000 Subject: runtime: remove mcentral.nmalloc and add mcache.local_nsmallalloc This change removes mcentral.nmalloc and adds mcache.local_nsmallalloc which fulfills the same role but may be accessed non-atomically. It also moves responsibility for updating heap_live and local_nsmallalloc into mcache functions. As a result of this change, mcache is now the sole source-of-truth for malloc stats. It is also solely responsible for updating heap_live and performing the various operations required as a result of updating heap_live. The overall improvement here is in code organization: previously malloc stats were fairly scattered, and now they have one single home, and nearly all the required manipulations exist in a single file. Change-Id: I7e93fa297c1debf17e3f2a0d68aeed28a9c6af00 Reviewed-on: https://go-review.googlesource.com/c/go/+/246966 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mcache.go | 34 ++++++++++++++++++++++++++++++++++ src/runtime/mcentral.go | 41 +---------------------------------------- src/runtime/mstats.go | 18 ++++++------------ 3 files changed, 41 insertions(+), 52 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 3657c0b86a..4d2ba6dff0 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -51,6 +51,7 @@ type mcache struct { // application. local_largealloc uintptr // bytes allocated for large objects local_nlargealloc uintptr // number of large object allocations + local_nsmallalloc [_NumSizeClasses]uintptr // number of allocs for small objects local_largefree uintptr // bytes freed for large objects (>maxsmallsize) local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) @@ -138,6 +139,10 @@ func (c *mcache) donate(d *mcache) { c.local_largealloc = 0 d.local_nlargealloc += c.local_nlargealloc c.local_nlargealloc = 0 + for i := range c.local_nsmallalloc { + d.local_nsmallalloc[i] += c.local_nsmallalloc[i] + c.local_nsmallalloc[i] = 0 + } d.local_largefree += c.local_largefree c.local_largefree = 0 d.local_nlargefree += c.local_nlargefree @@ -182,6 +187,20 @@ func (c *mcache) refill(spc spanClass) { // sweeping in the next sweep phase. s.sweepgen = mheap_.sweepgen + 3 + // Assume all objects from this span will be allocated in the + // mcache. If it gets uncached, we'll adjust this. + c.local_nsmallalloc[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount) + usedBytes := uintptr(s.allocCount) * s.elemsize + atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes)) + if trace.enabled { + // heap_live changed. + traceHeapAlloc() + } + if gcBlackenEnabled != 0 { + // heap_live changed. + gcController.revise() + } + c.alloc[spc] = s } @@ -227,9 +246,24 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { } func (c *mcache) releaseAll() { + sg := mheap_.sweepgen for i := range c.alloc { s := c.alloc[i] if s != &emptymspan { + // Adjust nsmallalloc in case the span wasn't fully allocated. + n := uintptr(s.nelems) - uintptr(s.allocCount) + c.local_nsmallalloc[spanClass(i).sizeclass()] -= n + if s.sweepgen != sg+1 { + // refill conservatively counted unallocated slots in heap_live. + // Undo this. + // + // If this span was cached before sweep, then + // heap_live was totally recomputed since + // caching this span, so we don't do this for + // stale spans. + atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) + } + // Release the span to the mcentral. mheap_.central[i].mcentral.uncacheSpan(s) c.alloc[i] = &emptymspan } diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go index ed49e01677..97fe92c2ab 100644 --- a/src/runtime/mcentral.go +++ b/src/runtime/mcentral.go @@ -44,11 +44,6 @@ type mcentral struct { // encounter swept spans, and these should be ignored. partial [2]spanSet // list of spans with a free object full [2]spanSet // list of spans with no free objects - - // nmalloc is the cumulative count of objects allocated from - // this mcentral, assuming all spans in mcaches are - // fully-allocated. Written atomically, read under STW. - nmalloc uint64 } // Initialize a single central free list. @@ -178,19 +173,6 @@ havespan: if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems { throw("span has no free objects") } - // Assume all objects from this span will be allocated in the - // mcache. If it gets uncached, we'll adjust this. - atomic.Xadd64(&c.nmalloc, int64(n)) - usedBytes := uintptr(s.allocCount) * s.elemsize - atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes)) - if trace.enabled { - // heap_live changed. - traceHeapAlloc() - } - if gcBlackenEnabled != 0 { - // heap_live changed. - gcController.revise() - } freeByteBase := s.freeindex &^ (64 - 1) whichByte := freeByteBase / 8 // Init alloc bits cache. @@ -228,27 +210,6 @@ func (c *mcentral) uncacheSpan(s *mspan) { // Indicate that s is no longer cached. atomic.Store(&s.sweepgen, sg) } - n := int(s.nelems) - int(s.allocCount) - - // Fix up statistics. - if n > 0 { - // cacheSpan updated alloc assuming all objects on s - // were going to be allocated. Adjust for any that - // weren't. We must do this before potentially - // sweeping the span. - atomic.Xadd64(&c.nmalloc, -int64(n)) - - if !stale { - // (*mcentral).cacheSpan conservatively counted - // unallocated slots in heap_live. Undo this. - // - // If this span was cached before sweep, then - // heap_live was totally recomputed since - // caching this span, so we don't do this for - // stale spans. - atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize)) - } - } // Put the span in the appropriate place. if stale { @@ -256,7 +217,7 @@ func (c *mcentral) uncacheSpan(s *mspan) { // the right list. s.sweep(false) } else { - if n > 0 { + if int(s.nelems)-int(s.allocCount) > 0 { // Put it back on the partial swept list. c.partialSwept(sg).push(s) } else { diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index d9acb361d5..44cf17c85b 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -561,17 +561,6 @@ func updatememstats() { // Collect allocation stats. This is safe and consistent // because the world is stopped. var smallFree, totalAlloc, totalFree uint64 - // Collect per-spanclass stats. - for spc := range mheap_.central { - // The mcaches are now empty, so mcentral stats are - // up-to-date. - c := &mheap_.central[spc].mcentral - memstats.nmalloc += c.nmalloc - i := spanClass(spc).sizeclass() - memstats.by_size[i].nmalloc += c.nmalloc - totalAlloc += c.nmalloc * uint64(class_to_size[i]) - } - for _, p := range allp { c := p.mcache if c == nil { @@ -585,12 +574,17 @@ func updatememstats() { // Collect per-sizeclass stats. for i := 0; i < _NumSizeClasses; i++ { + // Malloc stats. + memstats.nmalloc += uint64(c.local_nsmallalloc[i]) + memstats.by_size[i].nmalloc += uint64(c.local_nsmallalloc[i]) + totalAlloc += uint64(c.local_nsmallalloc[i]) * uint64(class_to_size[i]) + + // Free stats. memstats.nfree += uint64(c.local_nsmallfree[i]) memstats.by_size[i].nfree += uint64(c.local_nsmallfree[i]) smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i]) } } - // Collect remaining large allocation stats. totalFree += smallFree -- cgit v1.2.1 From cca3d1e5533cb40beb9ef55bbc332b733adcc6ba Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 22:16:46 +0000 Subject: runtime: don't flush local_tinyallocs This change makes local_tinyallocs work like the rest of the malloc stats and doesn't flush local_tinyallocs, instead making that the source-of-truth. Change-Id: I3e6cb5f1b3d086e432ce7d456895511a48e3617a Reviewed-on: https://go-review.googlesource.com/c/go/+/246967 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 7 +++++-- src/runtime/mcache.go | 8 +++++--- src/runtime/mheap.go | 4 ---- src/runtime/mstats.go | 6 ++++-- 4 files changed, 14 insertions(+), 11 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index d5a90ca65b..d71b180f76 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -339,7 +339,7 @@ func ReadMemStatsSlow() (base, slow MemStats) { // Add in frees. readmemstats_m flushed the cached stats, so // these are up-to-date. - var largeFree, smallFree uint64 + var tinyAllocs, largeFree, smallFree uint64 for _, p := range allp { c := p.mcache if c == nil { @@ -349,6 +349,9 @@ func ReadMemStatsSlow() (base, slow MemStats) { largeFree += uint64(c.local_largefree) slow.Frees += uint64(c.local_nlargefree) + // Collect tiny allocation stats. + tinyAllocs += uint64(c.local_tinyallocs) + // Collect per-sizeclass stats. for i := 0; i < _NumSizeClasses; i++ { slow.Frees += uint64(c.local_nsmallfree[i]) @@ -357,7 +360,7 @@ func ReadMemStatsSlow() (base, slow MemStats) { smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i]) } } - slow.Frees += memstats.tinyallocs + slow.Frees += tinyAllocs slow.Mallocs += slow.Frees slow.TotalAlloc = slow.Alloc + largeFree + smallFree diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 4d2ba6dff0..fe603116a2 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -32,9 +32,8 @@ type mcache struct { // tiny is a heap pointer. Since mcache is in non-GC'd memory, // we handle it by clearing it in releaseAll during mark // termination. - tiny uintptr - tinyoffset uintptr - local_tinyallocs uintptr // number of tiny allocs not counted in other stats + tiny uintptr + tinyoffset uintptr // The rest is not accessed on every malloc. @@ -49,6 +48,7 @@ type mcache struct { // When read with stats from other mcaches and with the world // stopped, the result will accurately reflect the state of the // application. + local_tinyallocs uintptr // number of tiny allocs not counted in other stats local_largealloc uintptr // bytes allocated for large objects local_nlargealloc uintptr // number of large object allocations local_nsmallalloc [_NumSizeClasses]uintptr // number of allocs for small objects @@ -151,6 +151,8 @@ func (c *mcache) donate(d *mcache) { d.local_nsmallfree[i] += c.local_nsmallfree[i] c.local_nsmallfree[i] = 0 } + d.local_tinyallocs += c.local_tinyallocs + c.local_tinyallocs = 0 } // refill acquires a new span of span class spc for c. This span will diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 5635dc6784..47f86ee38c 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1163,8 +1163,6 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS } atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 - memstats.tinyallocs += uint64(c.local_tinyallocs) - c.local_tinyallocs = 0 // heap_scan was been updated. if gcBlackenEnabled != 0 { @@ -1358,8 +1356,6 @@ func (h *mheap) freeSpan(s *mspan) { lock(&h.lock) atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 - memstats.tinyallocs += uint64(c.local_tinyallocs) - c.local_tinyallocs = 0 if msanenabled { // Tell msan that this entire span is no longer in use. base := unsafe.Pointer(s.base()) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 44cf17c85b..341906fced 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -550,6 +550,7 @@ func updatememstats() { memstats.total_alloc = 0 memstats.nmalloc = 0 memstats.nfree = 0 + memstats.tinyallocs = 0 for i := 0; i < len(memstats.by_size); i++ { memstats.by_size[i].nmalloc = 0 memstats.by_size[i].nfree = 0 @@ -572,6 +573,9 @@ func updatememstats() { totalFree += uint64(c.local_largefree) memstats.nfree += uint64(c.local_nlargefree) + // Collect tiny allocation stats. + memstats.tinyallocs += uint64(c.local_tinyallocs) + // Collect per-sizeclass stats. for i := 0; i < _NumSizeClasses; i++ { // Malloc stats. @@ -644,8 +648,6 @@ func purgecachedstats(c *mcache) { // Protected by heap lock. atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 - memstats.tinyallocs += uint64(c.local_tinyallocs) - c.local_tinyallocs = 0 } // Atomically increases a given *system* memory stat. We are counting on this -- cgit v1.2.1 From d677899e903c4741920846f1af2c14c56f6e710e Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 22:36:58 +0000 Subject: runtime: flush local_scan directly and more often Now that local_scan is the last mcache-based statistic that is flushed by purgecachedstats, and heap_scan and gcController.revise may be interacted with concurrently, we don't need to flush heap_scan at arbitrary locations where the heap is locked, and we don't need purgecachedstats and cachestats anymore. Instead, we can flush local_scan at the same time we update heap_live in refill, so the two updates may share the same revise call. Clean up unused functions, remove code that would cause the heap to get locked in the allocSpan when it didn't need to (other than to flush local_scan), and flush local_scan explicitly in a few important places. Notably we need to flush local_scan whenever we flush the other stats, but it doesn't need to be donated anywhere, so have releaseAll do the flushing. Also, we need to flush local_scan before we set heap_scan at the end of a GC, which was previously handled by cachestats. Just do so explicitly -- it's not much code and it becomes a lot more clear why we need to do so. Change-Id: I35ac081784df7744d515479896a41d530653692d Reviewed-on: https://go-review.googlesource.com/c/go/+/246968 Run-TryBot: Michael Knyszek Trust: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mcache.go | 22 ++++++++++++++++++++-- src/runtime/mgc.go | 14 ++++++++++++-- src/runtime/mheap.go | 49 +++---------------------------------------------- src/runtime/mstats.go | 25 ------------------------- 4 files changed, 35 insertions(+), 75 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index fe603116a2..b8e388cc4f 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -124,7 +124,6 @@ func freemcache(c *mcache, recipient *mcache) { // gcworkbuffree(c.gcworkbuf) lock(&mheap_.lock) - purgecachedstats(c) // Donate anything else that's left. c.donate(recipient) mheap_.cachealloc.free(unsafe.Pointer(c)) @@ -135,6 +134,8 @@ func freemcache(c *mcache, recipient *mcache) { // donate flushes data and resources which have no global // pool to another mcache. func (c *mcache) donate(d *mcache) { + // local_scan is handled separately because it's not + // like these stats -- it's used for GC pacing. d.local_largealloc += c.local_largealloc c.local_largealloc = 0 d.local_nlargealloc += c.local_nlargealloc @@ -192,14 +193,22 @@ func (c *mcache) refill(spc spanClass) { // Assume all objects from this span will be allocated in the // mcache. If it gets uncached, we'll adjust this. c.local_nsmallalloc[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount) + + // Update heap_live with the same assumption. usedBytes := uintptr(s.allocCount) * s.elemsize atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes)) + + // While we're here, flush local_scan, since we have to call + // revise anyway. + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) + c.local_scan = 0 + if trace.enabled { // heap_live changed. traceHeapAlloc() } if gcBlackenEnabled != 0 { - // heap_live changed. + // heap_live and heap_scan changed. gcController.revise() } @@ -248,6 +257,10 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { } func (c *mcache) releaseAll() { + // Take this opportunity to flush local_scan. + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) + c.local_scan = 0 + sg := mheap_.sweepgen for i := range c.alloc { s := c.alloc[i] @@ -273,6 +286,11 @@ func (c *mcache) releaseAll() { // Clear tinyalloc pool. c.tiny = 0 c.tinyoffset = 0 + + // Updated heap_scan and possible heap_live. + if gcBlackenEnabled != 0 { + gcController.revise() + } } // prepareForSweep flushes c if the system has entered a new sweep phase diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index c54f893689..55554c117c 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -2083,11 +2083,21 @@ func gcMark(start_time int64) { gcw.dispose() } - cachestats() - // Update the marked heap stat. memstats.heap_marked = work.bytesMarked + // Flush local_scan from each mcache since we're about to modify + // heap_scan directly. If we were to flush this later, then local_scan + // might have incorrect information. + for _, p := range allp { + c := p.mcache + if c == nil { + continue + } + memstats.heap_scan += uint64(c.local_scan) + c.local_scan = 0 + } + // Update other GC heap size stats. This must happen after // cachestats (which flushes local statistics to these) and // flushallmcaches (which modifies heap_live). diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 47f86ee38c..40fd58b0ef 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1102,23 +1102,11 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS base, scav = c.alloc(npages) if base != 0 { s = h.tryAllocMSpan() - - if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) { + if s != nil { goto HaveSpan } - // We're either running duing GC, failed to acquire a mspan, - // or the allocation is for a large object. This means we - // have to lock the heap and do a bunch of extra work, - // so go down the HaveBaseLocked path. - // - // We must do this during GC to avoid skew with heap_scan - // since we flush mcache stats whenever we lock. - // - // TODO(mknyszek): It would be nice to not have to - // lock the heap if it's a large allocation, but - // it's fine for now. The critical section here is - // short and large object allocations are relatively - // infrequent. + // We have a base but no mspan, so we need + // to lock the heap. } } @@ -1145,30 +1133,6 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS // one now that we have the heap lock. s = h.allocMSpanLocked() } - if !manual { - // This is a heap span, so we should do some additional accounting - // which may only be done with the heap locked. - - // Transfer stats from mcache to global. - var c *mcache - if gp.m.p != 0 { - c = gp.m.p.ptr().mcache - } else { - // This case occurs while bootstrapping. - // See the similar code in mallocgc. - c = mcache0 - if c == nil { - throw("mheap.allocSpan called with no P") - } - } - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 - - // heap_scan was been updated. - if gcBlackenEnabled != 0 { - gcController.revise() - } - } unlock(&h.lock) HaveSpan: @@ -1352,20 +1316,13 @@ func (h *mheap) grow(npage uintptr) bool { // Free the span back into the heap. func (h *mheap) freeSpan(s *mspan) { systemstack(func() { - c := getg().m.p.ptr().mcache lock(&h.lock) - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 if msanenabled { // Tell msan that this entire span is no longer in use. base := unsafe.Pointer(s.base()) bytes := s.npages << _PageShift msanfree(base, bytes) } - if gcBlackenEnabled != 0 { - // heap_scan changed. - gcController.revise() - } h.freeSpanLocked(s, true, true) unlock(&h.lock) }) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 341906fced..5eeb173640 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -556,9 +556,6 @@ func updatememstats() { memstats.by_size[i].nfree = 0 } - // Aggregate local stats. - cachestats() - // Collect allocation stats. This is safe and consistent // because the world is stopped. var smallFree, totalAlloc, totalFree uint64 @@ -602,21 +599,6 @@ func updatememstats() { memstats.heap_objects = memstats.nmalloc - memstats.nfree } -// cachestats flushes all mcache stats. -// -// The world must be stopped. -// -//go:nowritebarrier -func cachestats() { - for _, p := range allp { - c := p.mcache - if c == nil { - continue - } - purgecachedstats(c) - } -} - // flushmcache flushes the mcache of allp[i]. // // The world must be stopped. @@ -643,13 +625,6 @@ func flushallmcaches() { } } -//go:nosplit -func purgecachedstats(c *mcache) { - // Protected by heap lock. - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 -} - // Atomically increases a given *system* memory stat. We are counting on this // stat never overflowing a uintptr, so this function must only be used for // system memory stats. -- cgit v1.2.1 From c8638498008f9874dc5a48734418e0fbea08cee9 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Fri, 24 Jul 2020 19:58:31 +0000 Subject: runtime: rename mcache fields to match Go style This change renames a bunch of malloc statistics stored in the mcache that are all named with the "local_" prefix. It also renames largeAlloc to allocLarge to prevent a naming conflict, and next_sample because it would be the last mcache field with the old C naming style. Change-Id: I29695cb83b397a435ede7e9ad5c3c9be72767ea3 Reviewed-on: https://go-review.googlesource.com/c/go/+/246969 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 14 ++++---- src/runtime/malloc.go | 12 +++---- src/runtime/mcache.go | 78 ++++++++++++++++++++--------------------- src/runtime/mgc.go | 8 ++--- src/runtime/mgcsweep.go | 6 ++-- src/runtime/mstats.go | 22 ++++++------ src/runtime/pprof/mprof_test.go | 2 +- 7 files changed, 71 insertions(+), 71 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index d71b180f76..47cbc286f6 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -346,18 +346,18 @@ func ReadMemStatsSlow() (base, slow MemStats) { continue } // Collect large allocation stats. - largeFree += uint64(c.local_largefree) - slow.Frees += uint64(c.local_nlargefree) + largeFree += uint64(c.largeFree) + slow.Frees += uint64(c.largeFreeCount) // Collect tiny allocation stats. - tinyAllocs += uint64(c.local_tinyallocs) + tinyAllocs += uint64(c.tinyAllocCount) // Collect per-sizeclass stats. for i := 0; i < _NumSizeClasses; i++ { - slow.Frees += uint64(c.local_nsmallfree[i]) - bySize[i].Frees += uint64(c.local_nsmallfree[i]) - bySize[i].Mallocs += uint64(c.local_nsmallfree[i]) - smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i]) + slow.Frees += uint64(c.smallFreeCount[i]) + bySize[i].Frees += uint64(c.smallFreeCount[i]) + bySize[i].Mallocs += uint64(c.smallFreeCount[i]) + smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i]) } } slow.Frees += tinyAllocs diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index ec601ccb39..0f48d7f68e 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1040,7 +1040,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { // The object fits into existing tiny block. x = unsafe.Pointer(c.tiny + off) c.tinyoffset = off + size - c.local_tinyallocs++ + c.tinyAllocCount++ mp.mallocing = 0 releasem(mp) return x @@ -1082,7 +1082,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { } } else { shouldhelpgc = true - span = c.largeAlloc(size, needzero, noscan) + span = c.allocLarge(size, needzero, noscan) span.freeindex = 1 span.allocCount = 1 x = unsafe.Pointer(span.base()) @@ -1111,7 +1111,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { } else { scanSize = typ.ptrdata } - c.local_scan += scanSize + c.scanAlloc += scanSize } // Ensure that the stores above that initialize x to @@ -1153,8 +1153,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { } if rate := MemProfileRate; rate > 0 { - if rate != 1 && size < c.next_sample { - c.next_sample -= size + if rate != 1 && size < c.nextSample { + c.nextSample -= size } else { mp := acquirem() profilealloc(mp, x, size) @@ -1221,7 +1221,7 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { throw("profilealloc called with no P") } } - c.next_sample = nextSample() + c.nextSample = nextSample() mProf_Malloc(x, size) } diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index b8e388cc4f..c3e0e5e1f7 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -20,8 +20,8 @@ import ( type mcache struct { // The following members are accessed on every malloc, // so they are grouped here for better caching. - next_sample uintptr // trigger heap sample after allocating this many bytes - local_scan uintptr // bytes of scannable heap allocated + nextSample uintptr // trigger heap sample after allocating this many bytes + scanAlloc uintptr // bytes of scannable heap allocated // Allocator cache for tiny objects w/o pointers. // See "Tiny allocator" comment in malloc.go. @@ -48,13 +48,13 @@ type mcache struct { // When read with stats from other mcaches and with the world // stopped, the result will accurately reflect the state of the // application. - local_tinyallocs uintptr // number of tiny allocs not counted in other stats - local_largealloc uintptr // bytes allocated for large objects - local_nlargealloc uintptr // number of large object allocations - local_nsmallalloc [_NumSizeClasses]uintptr // number of allocs for small objects - local_largefree uintptr // bytes freed for large objects (>maxsmallsize) - local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) - local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) + tinyAllocCount uintptr // number of tiny allocs not counted in other stats + largeAlloc uintptr // bytes allocated for large objects + largeAllocCount uintptr // number of large object allocations + smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects + largeFree uintptr // bytes freed for large objects (>maxSmallSize) + largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) + smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) // flushGen indicates the sweepgen during which this mcache // was last flushed. If flushGen != mheap_.sweepgen, the spans @@ -103,7 +103,7 @@ func allocmcache() *mcache { for i := range c.alloc { c.alloc[i] = &emptymspan } - c.next_sample = nextSample() + c.nextSample = nextSample() return c } @@ -134,26 +134,26 @@ func freemcache(c *mcache, recipient *mcache) { // donate flushes data and resources which have no global // pool to another mcache. func (c *mcache) donate(d *mcache) { - // local_scan is handled separately because it's not + // scanAlloc is handled separately because it's not // like these stats -- it's used for GC pacing. - d.local_largealloc += c.local_largealloc - c.local_largealloc = 0 - d.local_nlargealloc += c.local_nlargealloc - c.local_nlargealloc = 0 - for i := range c.local_nsmallalloc { - d.local_nsmallalloc[i] += c.local_nsmallalloc[i] - c.local_nsmallalloc[i] = 0 + d.largeAlloc += c.largeAlloc + c.largeAlloc = 0 + d.largeAllocCount += c.largeAllocCount + c.largeAllocCount = 0 + for i := range c.smallAllocCount { + d.smallAllocCount[i] += c.smallAllocCount[i] + c.smallAllocCount[i] = 0 } - d.local_largefree += c.local_largefree - c.local_largefree = 0 - d.local_nlargefree += c.local_nlargefree - c.local_nlargefree = 0 - for i := range c.local_nsmallfree { - d.local_nsmallfree[i] += c.local_nsmallfree[i] - c.local_nsmallfree[i] = 0 + d.largeFree += c.largeFree + c.largeFree = 0 + d.largeFreeCount += c.largeFreeCount + c.largeFreeCount = 0 + for i := range c.smallFreeCount { + d.smallFreeCount[i] += c.smallFreeCount[i] + c.smallFreeCount[i] = 0 } - d.local_tinyallocs += c.local_tinyallocs - c.local_tinyallocs = 0 + d.tinyAllocCount += c.tinyAllocCount + c.tinyAllocCount = 0 } // refill acquires a new span of span class spc for c. This span will @@ -192,16 +192,16 @@ func (c *mcache) refill(spc spanClass) { // Assume all objects from this span will be allocated in the // mcache. If it gets uncached, we'll adjust this. - c.local_nsmallalloc[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount) + c.smallAllocCount[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount) // Update heap_live with the same assumption. usedBytes := uintptr(s.allocCount) * s.elemsize atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes)) - // While we're here, flush local_scan, since we have to call + // While we're here, flush scanAlloc, since we have to call // revise anyway. - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 + atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc)) + c.scanAlloc = 0 if trace.enabled { // heap_live changed. @@ -215,8 +215,8 @@ func (c *mcache) refill(spc spanClass) { c.alloc[spc] = s } -// largeAlloc allocates a span for a large object. -func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { +// allocLarge allocates a span for a large object. +func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) *mspan { if size+_PageSize < size { throw("out of memory") } @@ -235,8 +235,8 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { if s == nil { throw("out of memory") } - c.local_largealloc += npages * pageSize - c.local_nlargealloc++ + c.largeAlloc += npages * pageSize + c.largeAllocCount++ // Update heap_live and revise pacing if needed. atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize)) @@ -257,9 +257,9 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { } func (c *mcache) releaseAll() { - // Take this opportunity to flush local_scan. - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 + // Take this opportunity to flush scanAlloc. + atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc)) + c.scanAlloc = 0 sg := mheap_.sweepgen for i := range c.alloc { @@ -267,7 +267,7 @@ func (c *mcache) releaseAll() { if s != &emptymspan { // Adjust nsmallalloc in case the span wasn't fully allocated. n := uintptr(s.nelems) - uintptr(s.allocCount) - c.local_nsmallalloc[spanClass(i).sizeclass()] -= n + c.smallAllocCount[spanClass(i).sizeclass()] -= n if s.sweepgen != sg+1 { // refill conservatively counted unallocated slots in heap_live. // Undo this. diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 55554c117c..540c376f1c 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -2086,16 +2086,16 @@ func gcMark(start_time int64) { // Update the marked heap stat. memstats.heap_marked = work.bytesMarked - // Flush local_scan from each mcache since we're about to modify - // heap_scan directly. If we were to flush this later, then local_scan + // Flush scanAlloc from each mcache since we're about to modify + // heap_scan directly. If we were to flush this later, then scanAlloc // might have incorrect information. for _, p := range allp { c := p.mcache if c == nil { continue } - memstats.heap_scan += uint64(c.local_scan) - c.local_scan = 0 + memstats.heap_scan += uint64(c.scanAlloc) + c.scanAlloc = 0 } // Update other GC heap size stats. This must happen after diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 6b8c56ce35..7103b08455 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -503,7 +503,7 @@ func (s *mspan) sweep(preserve bool) bool { // wasn't totally filled, but then swept, still has all of its // free slots zeroed. s.needzero = 1 - c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed) + c.smallFreeCount[spc.sizeclass()] += uintptr(nfreed) } if !preserve { // The caller may not have removed this span from whatever @@ -548,8 +548,8 @@ func (s *mspan) sweep(preserve bool) bool { } else { mheap_.freeSpan(s) } - c.local_nlargefree++ - c.local_largefree += size + c.largeFreeCount++ + c.largeFree += size return true } diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 5eeb173640..64687c24e5 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -565,25 +565,25 @@ func updatememstats() { continue } // Collect large allocation stats. - memstats.nmalloc += uint64(c.local_nlargealloc) - totalAlloc += uint64(c.local_largealloc) - totalFree += uint64(c.local_largefree) - memstats.nfree += uint64(c.local_nlargefree) + memstats.nmalloc += uint64(c.largeAllocCount) + totalAlloc += uint64(c.largeAlloc) + totalFree += uint64(c.largeFree) + memstats.nfree += uint64(c.largeFreeCount) // Collect tiny allocation stats. - memstats.tinyallocs += uint64(c.local_tinyallocs) + memstats.tinyallocs += uint64(c.tinyAllocCount) // Collect per-sizeclass stats. for i := 0; i < _NumSizeClasses; i++ { // Malloc stats. - memstats.nmalloc += uint64(c.local_nsmallalloc[i]) - memstats.by_size[i].nmalloc += uint64(c.local_nsmallalloc[i]) - totalAlloc += uint64(c.local_nsmallalloc[i]) * uint64(class_to_size[i]) + memstats.nmalloc += uint64(c.smallAllocCount[i]) + memstats.by_size[i].nmalloc += uint64(c.smallAllocCount[i]) + totalAlloc += uint64(c.smallAllocCount[i]) * uint64(class_to_size[i]) // Free stats. - memstats.nfree += uint64(c.local_nsmallfree[i]) - memstats.by_size[i].nfree += uint64(c.local_nsmallfree[i]) - smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i]) + memstats.nfree += uint64(c.smallFreeCount[i]) + memstats.by_size[i].nfree += uint64(c.smallFreeCount[i]) + smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i]) } } diff --git a/src/runtime/pprof/mprof_test.go b/src/runtime/pprof/mprof_test.go index f253f07def..c11a45fd69 100644 --- a/src/runtime/pprof/mprof_test.go +++ b/src/runtime/pprof/mprof_test.go @@ -70,7 +70,7 @@ func TestMemoryProfiler(t *testing.T) { runtime.MemProfileRate = oldRate }() - // Allocate a meg to ensure that mcache.next_sample is updated to 1. + // Allocate a meg to ensure that mcache.nextSample is updated to 1. for i := 0; i < 1024; i++ { memSink = make([]byte, 1024) } -- cgit v1.2.1 From dc02578ac8bb27359c7d0451ca249e47bdef2a9e Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 29 Jul 2020 19:00:37 +0000 Subject: runtime: make the span allocation purpose more explicit This change modifies mheap's span allocation API to have each caller declare a purpose, defined as a new enum called spanAllocType. The purpose behind this change is two-fold: 1. Tight control over who gets to allocate heap memory is, generally speaking, a good thing. Every codepath that allocates heap memory places additional implicit restrictions on the allocator. A notable example of a restriction is work bufs coming from heap memory: write barriers are not allowed in allocation paths because then we could have a situation where the allocator calls into the allocator. 2. Memory statistic updating is explicit. Instead of passing an opaque pointer for statistic updating, which places restrictions on how that statistic may be updated, we use the spanAllocType to determine which statistic to update and how. We also take this opportunity to group all the statistic updating code together, which should make the accounting code a little easier to follow. Change-Id: Ic0b0898959ba2a776f67122f0e36c9d7d60e3085 Reviewed-on: https://go-review.googlesource.com/c/go/+/246970 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mbitmap.go | 4 +-- src/runtime/mgcwork.go | 4 +-- src/runtime/mheap.go | 78 +++++++++++++++++++++++++++++++++++++------------- src/runtime/stack.go | 12 ++++---- 4 files changed, 68 insertions(+), 30 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 51c3625c3d..fbfaae0f93 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -1868,12 +1868,12 @@ func materializeGCProg(ptrdata uintptr, prog *byte) *mspan { bitmapBytes := divRoundUp(ptrdata, 8*sys.PtrSize) // Compute the number of pages needed for bitmapBytes. pages := divRoundUp(bitmapBytes, pageSize) - s := mheap_.allocManual(pages, &memstats.gc_sys) + s := mheap_.allocManual(pages, spanAllocPtrScalarBits) runGCProg(addb(prog, 4), nil, (*byte)(unsafe.Pointer(s.startAddr)), 1) return s } func dematerializeGCProg(s *mspan) { - mheap_.freeManual(s, &memstats.gc_sys) + mheap_.freeManual(s, spanAllocPtrScalarBits) } func dumpGCProg(p *byte) { diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go index 51e0fe9219..b3a068661e 100644 --- a/src/runtime/mgcwork.go +++ b/src/runtime/mgcwork.go @@ -371,7 +371,7 @@ func getempty() *workbuf { } if s == nil { systemstack(func() { - s = mheap_.allocManual(workbufAlloc/pageSize, &memstats.gc_sys) + s = mheap_.allocManual(workbufAlloc/pageSize, spanAllocWorkBuf) }) if s == nil { throw("out of memory") @@ -473,7 +473,7 @@ func freeSomeWbufs(preemptible bool) bool { break } work.wbufSpans.free.remove(span) - mheap_.freeManual(span, &memstats.gc_sys) + mheap_.freeManual(span, spanAllocWorkBuf) } }) more := !work.wbufSpans.free.isEmpty() diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 40fd58b0ef..df659e222b 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -861,6 +861,22 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr { return nFreed } +// spanAllocType represents the type of allocation to make, or +// the type of allocation to be freed. +type spanAllocType uint8 + +const ( + spanAllocHeap spanAllocType = iota // heap span + spanAllocStack // stack span + spanAllocPtrScalarBits // unrolled GC prog bitmap span + spanAllocWorkBuf // work buf span +) + +// manual returns true if the span allocation is manually managed. +func (s spanAllocType) manual() bool { + return s != spanAllocHeap +} + // alloc allocates a new span of npage pages from the GC'd heap. // // spanclass indicates the span's size class and scannability. @@ -877,7 +893,7 @@ func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan if h.sweepdone == 0 { h.reclaim(npages) } - s = h.allocSpan(npages, false, spanclass, &memstats.heap_inuse) + s = h.allocSpan(npages, spanAllocHeap, spanclass) }) if s != nil { @@ -902,9 +918,15 @@ func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan // allocManual must be called on the system stack because it may // acquire the heap lock via allocSpan. See mheap for details. // +// If new code is written to call allocManual, do NOT use an +// existing spanAllocType value and instead declare a new one. +// //go:systemstack -func (h *mheap) allocManual(npages uintptr, stat *uint64) *mspan { - return h.allocSpan(npages, true, 0, stat) +func (h *mheap) allocManual(npages uintptr, typ spanAllocType) *mspan { + if !typ.manual() { + throw("manual span allocation called with non-manually-managed type") + } + return h.allocSpan(npages, typ, 0) } // setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize)) @@ -1066,7 +1088,7 @@ func (h *mheap) freeMSpanLocked(s *mspan) { // allocSpan allocates an mspan which owns npages worth of memory. // -// If manual == false, allocSpan allocates a heap span of class spanclass +// If typ.manual() == false, allocSpan allocates a heap span of class spanclass // and updates heap accounting. If manual == true, allocSpan allocates a // manually-managed span (spanclass is ignored), and the caller is // responsible for any accounting related to its use of the span. Either @@ -1081,7 +1103,7 @@ func (h *mheap) freeMSpanLocked(s *mspan) { // the heap lock and because it must block GC transitions. // //go:systemstack -func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysStat *uint64) (s *mspan) { +func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass) (s *mspan) { // Function-global state. gp := getg() base, scav := uintptr(0), uintptr(0) @@ -1143,12 +1165,10 @@ HaveSpan: s.needzero = 1 } nbytes := npages * pageSize - if manual { + if typ.manual() { s.manualFreeList = 0 s.nelems = 0 s.limit = s.base() + s.npages*pageSize - // Manually managed memory doesn't count toward heap_sys. - mSysStatDec(&memstats.heap_sys, s.npages*pageSize) s.state.set(mSpanManual) } else { // We must set span properties before the span is published anywhere @@ -1205,7 +1225,18 @@ HaveSpan: mSysStatDec(&memstats.heap_released, scav) } // Update stats. - mSysStatInc(sysStat, nbytes) + switch typ { + case spanAllocHeap: + mSysStatInc(&memstats.heap_inuse, nbytes) + case spanAllocStack: + mSysStatInc(&memstats.stacks_inuse, nbytes) + case spanAllocPtrScalarBits, spanAllocWorkBuf: + mSysStatInc(&memstats.gc_sys, nbytes) + } + if typ.manual() { + // Manually managed memory doesn't count toward heap_sys. + mSysStatDec(&memstats.heap_sys, nbytes) + } mSysStatDec(&memstats.heap_idle, nbytes) // Publish the span in various locations. @@ -1217,7 +1248,7 @@ HaveSpan: // before that happens) or pageInUse is updated. h.setSpans(s.base(), npages, s) - if !manual { + if !typ.manual() { // Mark in-use span in arena page bitmap. // // This publishes the span to the page sweeper, so @@ -1323,13 +1354,13 @@ func (h *mheap) freeSpan(s *mspan) { bytes := s.npages << _PageShift msanfree(base, bytes) } - h.freeSpanLocked(s, true, true) + h.freeSpanLocked(s, spanAllocHeap) unlock(&h.lock) }) } // freeManual frees a manually-managed span returned by allocManual. -// stat must be the same as the stat passed to the allocManual that +// typ must be the same as the spanAllocType passed to the allocManual that // allocated s. // // This must only be called when gcphase == _GCoff. See mSpanState for @@ -1339,16 +1370,14 @@ func (h *mheap) freeSpan(s *mspan) { // the heap lock. See mheap for details. // //go:systemstack -func (h *mheap) freeManual(s *mspan, stat *uint64) { +func (h *mheap) freeManual(s *mspan, typ spanAllocType) { s.needzero = 1 lock(&h.lock) - mSysStatDec(stat, s.npages*pageSize) - mSysStatInc(&memstats.heap_sys, s.npages*pageSize) - h.freeSpanLocked(s, false, true) + h.freeSpanLocked(s, typ) unlock(&h.lock) } -func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool) { +func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { switch s.state.get() { case mSpanManual: if s.allocCount != 0 { @@ -1368,12 +1397,21 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool) { throw("mheap.freeSpanLocked - invalid span state") } - if acctinuse { + // Update stats. + // + // Mirrors the code in allocSpan. + switch typ { + case spanAllocHeap: mSysStatDec(&memstats.heap_inuse, s.npages*pageSize) + case spanAllocStack: + mSysStatDec(&memstats.stacks_inuse, s.npages*pageSize) + case spanAllocPtrScalarBits, spanAllocWorkBuf: + mSysStatDec(&memstats.gc_sys, s.npages*pageSize) } - if acctidle { - mSysStatInc(&memstats.heap_idle, s.npages*pageSize) + if typ.manual() { + mSysStatInc(&memstats.heap_sys, s.npages*pageSize) } + mSysStatInc(&memstats.heap_idle, s.npages*pageSize) // Mark the space as free. h.pages.free(s.base(), s.npages) diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 2afc2635aa..7b9dce5393 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -187,7 +187,7 @@ func stackpoolalloc(order uint8) gclinkptr { lockWithRankMayAcquire(&mheap_.lock, lockRankMheap) if s == nil { // no free stacks. Allocate another span worth. - s = mheap_.allocManual(_StackCacheSize>>_PageShift, &memstats.stacks_inuse) + s = mheap_.allocManual(_StackCacheSize>>_PageShift, spanAllocStack) if s == nil { throw("out of memory") } @@ -251,7 +251,7 @@ func stackpoolfree(x gclinkptr, order uint8) { stackpool[order].item.span.remove(s) s.manualFreeList = 0 osStackFree(s) - mheap_.freeManual(s, &memstats.stacks_inuse) + mheap_.freeManual(s, spanAllocStack) } } @@ -396,7 +396,7 @@ func stackalloc(n uint32) stack { if s == nil { // Allocate a new stack from the heap. - s = mheap_.allocManual(npage, &memstats.stacks_inuse) + s = mheap_.allocManual(npage, spanAllocStack) if s == nil { throw("out of memory") } @@ -480,7 +480,7 @@ func stackfree(stk stack) { // Free the stack immediately if we're // sweeping. osStackFree(s) - mheap_.freeManual(s, &memstats.stacks_inuse) + mheap_.freeManual(s, spanAllocStack) } else { // If the GC is running, we can't return a // stack span to the heap because it could be @@ -1193,7 +1193,7 @@ func freeStackSpans() { list.remove(s) s.manualFreeList = 0 osStackFree(s) - mheap_.freeManual(s, &memstats.stacks_inuse) + mheap_.freeManual(s, spanAllocStack) } s = next } @@ -1207,7 +1207,7 @@ func freeStackSpans() { next := s.next stackLarge.free[i].remove(s) osStackFree(s) - mheap_.freeManual(s, &memstats.stacks_inuse) + mheap_.freeManual(s, spanAllocStack) s = next } } -- cgit v1.2.1 From 8ebc58452af3a586a3da1f68725bc83c78d4b073 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 29 Jul 2020 20:25:05 +0000 Subject: runtime: delineate which memstats are system stats with a type This change modifies the type of several mstats fields to be a new type: sysMemStat. This type has the same structure as the fields used to have. The purpose of this change is to make it very clear which stats may be used in various functions for accounting (usually the platform-specific sys* functions, but there are others). Currently there's an implicit understanding that the *uint64 value passed to these functions is some kind of statistic whose value is atomically managed. This understanding isn't inherently problematic, but we're about to change how some stats (which currently use mSysStatInc and mSysStatDec) work, so we want to make it very clear what the various requirements are around "sysStat". This change also removes mSysStatInc and mSysStatDec in favor of a method on sysMemStat. Note that those two functions were originally written the way they were because atomic 64-bit adds required a valid G on ARM, but this hasn't been the case for a very long time (since golang.org/cl/14204, but even before then it wasn't clear if mutexes required a valid G anymore). Today we implement 64-bit adds on ARM with a spinlock table. Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246971 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 4 +-- src/runtime/heapdump.go | 14 ++++---- src/runtime/malloc.go | 10 +++--- src/runtime/mem_aix.go | 12 +++---- src/runtime/mem_bsd.go | 12 +++---- src/runtime/mem_darwin.go | 12 +++---- src/runtime/mem_js.go | 10 +++--- src/runtime/mem_linux.go | 12 +++---- src/runtime/mem_plan9.go | 12 +++---- src/runtime/mem_windows.go | 12 +++---- src/runtime/mfixalloc.go | 4 +-- src/runtime/mgcscavenge.go | 4 +-- src/runtime/mheap.go | 28 ++++++++-------- src/runtime/mpagealloc.go | 4 +-- src/runtime/mranges.go | 4 +-- src/runtime/mstats.go | 82 +++++++++++++++++----------------------------- src/runtime/os_darwin.go | 3 +- 17 files changed, 109 insertions(+), 130 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 47cbc286f6..cb753ee819 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -820,7 +820,7 @@ type AddrRanges struct { // Add. func NewAddrRanges() AddrRanges { r := addrRanges{} - r.init(new(uint64)) + r.init(new(sysMemStat)) return AddrRanges{r, true} } @@ -844,7 +844,7 @@ func MakeAddrRanges(a ...AddrRange) AddrRanges { return AddrRanges{addrRanges{ ranges: ranges, totalBytes: total, - sysStat: new(uint64), + sysStat: new(sysMemStat), }, false} } diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 4c35309211..495ecc5164 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -548,20 +548,20 @@ func dumpmemstats() { dumpint(memstats.nmalloc) dumpint(memstats.nfree) dumpint(memstats.heap_alloc) - dumpint(memstats.heap_sys) + dumpint(memstats.heap_sys.load()) dumpint(memstats.heap_idle) dumpint(memstats.heap_inuse) dumpint(memstats.heap_released) dumpint(memstats.heap_objects) dumpint(memstats.stacks_inuse) - dumpint(memstats.stacks_sys) + dumpint(memstats.stacks_sys.load()) dumpint(memstats.mspan_inuse) - dumpint(memstats.mspan_sys) + dumpint(memstats.mspan_sys.load()) dumpint(memstats.mcache_inuse) - dumpint(memstats.mcache_sys) - dumpint(memstats.buckhash_sys) - dumpint(memstats.gc_sys) - dumpint(memstats.other_sys) + dumpint(memstats.mcache_sys.load()) + dumpint(memstats.buckhash_sys.load()) + dumpint(memstats.gc_sys.load()) + dumpint(memstats.other_sys.load()) dumpint(memstats.next_gc) dumpint(memstats.last_gc_unix) dumpint(memstats.pause_total_ns) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 0f48d7f68e..27d678d917 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1313,7 +1313,7 @@ var persistentChunks *notInHeap // The returned memory will be zeroed. // // Consider marking persistentalloc'd types go:notinheap. -func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { +func persistentalloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer { var p *notInHeap systemstack(func() { p = persistentalloc1(size, align, sysStat) @@ -1324,7 +1324,7 @@ func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { // Must run on system stack because stack growth can (re)invoke it. // See issue 9174. //go:systemstack -func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { +func persistentalloc1(size, align uintptr, sysStat *sysMemStat) *notInHeap { const ( maxBlock = 64 << 10 // VM reservation granularity is 64K on windows ) @@ -1383,8 +1383,8 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap { } if sysStat != &memstats.other_sys { - mSysStatInc(sysStat, size) - mSysStatDec(&memstats.other_sys, size) + sysStat.add(int64(size)) + memstats.other_sys.add(-int64(size)) } return p } @@ -1425,7 +1425,7 @@ func (l *linearAlloc) init(base, size uintptr) { l.end = base + size } -func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer { +func (l *linearAlloc) alloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer { p := alignUp(l.next, align) if p+size > l.end { return nil diff --git a/src/runtime/mem_aix.go b/src/runtime/mem_aix.go index 7e145b072a..957aa4dcc2 100644 --- a/src/runtime/mem_aix.go +++ b/src/runtime/mem_aix.go @@ -11,7 +11,7 @@ import ( // Don't split the stack as this method may be invoked without a valid G, which // prevents us from allocating more stack. //go:nosplit -func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { +func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer { p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) if err != 0 { if err == _EACCES { @@ -24,7 +24,7 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { } return nil } - mSysStatInc(sysStat, n) + sysStat.add(int64(n)) return p } @@ -41,8 +41,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) { // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatDec(sysStat, n) +func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(-int64(n)) munmap(v, n) } @@ -59,8 +59,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer { return p } -func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatInc(sysStat, n) +func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(int64(n)) // AIX does not allow mapping a range that is already mapped. // So, call mprotect to change permissions. diff --git a/src/runtime/mem_bsd.go b/src/runtime/mem_bsd.go index 4d860e7bd3..bc672019fb 100644 --- a/src/runtime/mem_bsd.go +++ b/src/runtime/mem_bsd.go @@ -13,12 +13,12 @@ import ( // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { +func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer { v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) if err != 0 { return nil } - mSysStatInc(sysStat, n) + sysStat.add(int64(n)) return v } @@ -35,8 +35,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) { // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatDec(sysStat, n) +func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(-int64(n)) munmap(v, n) } @@ -65,8 +65,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer { const _sunosEAGAIN = 11 const _ENOMEM = 12 -func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatInc(sysStat, n) +func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(int64(n)) p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) if err == _ENOMEM || ((GOOS == "solaris" || GOOS == "illumos") && err == _sunosEAGAIN) { diff --git a/src/runtime/mem_darwin.go b/src/runtime/mem_darwin.go index 3b5d565b0f..7fccd2bb8e 100644 --- a/src/runtime/mem_darwin.go +++ b/src/runtime/mem_darwin.go @@ -11,12 +11,12 @@ import ( // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { +func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer { v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) if err != 0 { return nil } - mSysStatInc(sysStat, n) + sysStat.add(int64(n)) return v } @@ -39,8 +39,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) { // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatDec(sysStat, n) +func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(-int64(n)) munmap(v, n) } @@ -58,8 +58,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer { const _ENOMEM = 12 -func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatInc(sysStat, n) +func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(int64(n)) p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) if err == _ENOMEM { diff --git a/src/runtime/mem_js.go b/src/runtime/mem_js.go index 092b3d4fa2..957ed36ffa 100644 --- a/src/runtime/mem_js.go +++ b/src/runtime/mem_js.go @@ -13,7 +13,7 @@ import ( // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { +func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer { p := sysReserve(nil, n) sysMap(p, n, sysStat) return p @@ -31,8 +31,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) { // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatDec(sysStat, n) +func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(-int64(n)) } func sysFault(v unsafe.Pointer, n uintptr) { @@ -80,6 +80,6 @@ func growMemory(pages int32) int32 // This allows the front-end to replace the old DataView object with a new one. func resetMemoryDataView() -func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatInc(sysStat, n) +func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(int64(n)) } diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go index 59b0bca970..3436851091 100644 --- a/src/runtime/mem_linux.go +++ b/src/runtime/mem_linux.go @@ -17,7 +17,7 @@ const ( // Don't split the stack as this method may be invoked without a valid G, which // prevents us from allocating more stack. //go:nosplit -func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { +func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer { p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0) if err != 0 { if err == _EACCES { @@ -30,7 +30,7 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { } return nil } - mSysStatInc(sysStat, n) + sysStat.add(int64(n)) return p } @@ -144,8 +144,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) { // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatDec(sysStat, n) +func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(-int64(n)) munmap(v, n) } @@ -161,8 +161,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer { return p } -func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatInc(sysStat, n) +func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(int64(n)) p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0) if err == _ENOMEM { diff --git a/src/runtime/mem_plan9.go b/src/runtime/mem_plan9.go index 4fea851cdd..53d8e6dffa 100644 --- a/src/runtime/mem_plan9.go +++ b/src/runtime/mem_plan9.go @@ -140,19 +140,19 @@ func sbrk(n uintptr) unsafe.Pointer { return unsafe.Pointer(bl) } -func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { +func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer { lock(&memlock) p := memAlloc(n) memCheck() unlock(&memlock) if p != nil { - mSysStatInc(sysStat, n) + sysStat.add(int64(n)) } return p } -func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatDec(sysStat, n) +func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(-int64(n)) lock(&memlock) if uintptr(v)+n == bloc { // Address range being freed is at the end of memory, @@ -176,10 +176,10 @@ func sysUsed(v unsafe.Pointer, n uintptr) { func sysHugePage(v unsafe.Pointer, n uintptr) { } -func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) { +func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { // sysReserve has already allocated all heap memory, // but has not adjusted stats. - mSysStatInc(sysStat, n) + sysStat.add(int64(n)) } func sysFault(v unsafe.Pointer, n uintptr) { diff --git a/src/runtime/mem_windows.go b/src/runtime/mem_windows.go index 165062ec27..3a805b9767 100644 --- a/src/runtime/mem_windows.go +++ b/src/runtime/mem_windows.go @@ -24,8 +24,8 @@ const ( // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer { - mSysStatInc(sysStat, n) +func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer { + sysStat.add(int64(n)) return unsafe.Pointer(stdcall4(_VirtualAlloc, 0, n, _MEM_COMMIT|_MEM_RESERVE, _PAGE_READWRITE)) } @@ -97,8 +97,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) { // Don't split the stack as this function may be invoked without a valid G, // which prevents us from allocating more stack. //go:nosplit -func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatDec(sysStat, n) +func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(-int64(n)) r := stdcall3(_VirtualFree, uintptr(v), 0, _MEM_RELEASE) if r == 0 { print("runtime: VirtualFree of ", n, " bytes failed with errno=", getlasterror(), "\n") @@ -124,6 +124,6 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer { return unsafe.Pointer(stdcall4(_VirtualAlloc, 0, n, _MEM_RESERVE, _PAGE_READWRITE)) } -func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) { - mSysStatInc(sysStat, n) +func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) { + sysStat.add(int64(n)) } diff --git a/src/runtime/mfixalloc.go b/src/runtime/mfixalloc.go index f9dd6ca474..293c16b38b 100644 --- a/src/runtime/mfixalloc.go +++ b/src/runtime/mfixalloc.go @@ -32,7 +32,7 @@ type fixalloc struct { chunk uintptr // use uintptr instead of unsafe.Pointer to avoid write barriers nchunk uint32 inuse uintptr // in-use bytes now - stat *uint64 + stat *sysMemStat zero bool // zero allocations } @@ -49,7 +49,7 @@ type mlink struct { // Initialize f to allocate objects of the given size, // using the allocator to obtain chunks of memory. -func (f *fixalloc) init(size uintptr, first func(arg, p unsafe.Pointer), arg unsafe.Pointer, stat *uint64) { +func (f *fixalloc) init(size uintptr, first func(arg, p unsafe.Pointer), arg unsafe.Pointer, stat *sysMemStat) { f.size = size f.first = first f.arg = arg diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go index 6328b295ca..8b1a0be353 100644 --- a/src/runtime/mgcscavenge.go +++ b/src/runtime/mgcscavenge.go @@ -100,7 +100,7 @@ const ( // heapRetained returns an estimate of the current heap RSS. func heapRetained() uint64 { - return atomic.Load64(&memstats.heap_sys) - atomic.Load64(&memstats.heap_released) + return memstats.heap_sys.load() - atomic.Load64(&memstats.heap_released) } // gcPaceScavenger updates the scavenger's pacing, particularly @@ -711,7 +711,7 @@ func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr // Update global accounting only when not in test, otherwise // the runtime's accounting will be wrong. - mSysStatInc(&memstats.heap_released, uintptr(npages)*pageSize) + atomic.Xadd64(&memstats.heap_released, int64(npages)*pageSize) return addr } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index df659e222b..27c1bfbcf1 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1222,22 +1222,22 @@ HaveSpan: // sysUsed all the pages that are actually available // in the span since some of them might be scavenged. sysUsed(unsafe.Pointer(base), nbytes) - mSysStatDec(&memstats.heap_released, scav) + atomic.Xadd64(&memstats.heap_released, -int64(scav)) } // Update stats. switch typ { case spanAllocHeap: - mSysStatInc(&memstats.heap_inuse, nbytes) + atomic.Xadd64(&memstats.heap_inuse, int64(nbytes)) case spanAllocStack: - mSysStatInc(&memstats.stacks_inuse, nbytes) + atomic.Xadd64(&memstats.stacks_inuse, int64(nbytes)) case spanAllocPtrScalarBits, spanAllocWorkBuf: - mSysStatInc(&memstats.gc_sys, nbytes) + memstats.gc_sys.add(int64(nbytes)) } if typ.manual() { // Manually managed memory doesn't count toward heap_sys. - mSysStatDec(&memstats.heap_sys, nbytes) + memstats.heap_sys.add(-int64(nbytes)) } - mSysStatDec(&memstats.heap_idle, nbytes) + atomic.Xadd64(&memstats.heap_idle, -int64(nbytes)) // Publish the span in various locations. @@ -1314,8 +1314,8 @@ func (h *mheap) grow(npage uintptr) bool { // The allocation is always aligned to the heap arena // size which is always > physPageSize, so its safe to // just add directly to heap_released. - mSysStatInc(&memstats.heap_released, asize) - mSysStatInc(&memstats.heap_idle, asize) + atomic.Xadd64(&memstats.heap_released, int64(asize)) + atomic.Xadd64(&memstats.heap_idle, int64(asize)) // Recalculate nBase. // We know this won't overflow, because sysAlloc returned @@ -1400,18 +1400,20 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { // Update stats. // // Mirrors the code in allocSpan. + nbytes := s.npages * pageSize switch typ { case spanAllocHeap: - mSysStatDec(&memstats.heap_inuse, s.npages*pageSize) + atomic.Xadd64(&memstats.heap_inuse, -int64(nbytes)) case spanAllocStack: - mSysStatDec(&memstats.stacks_inuse, s.npages*pageSize) + atomic.Xadd64(&memstats.stacks_inuse, -int64(nbytes)) case spanAllocPtrScalarBits, spanAllocWorkBuf: - mSysStatDec(&memstats.gc_sys, s.npages*pageSize) + memstats.gc_sys.add(-int64(nbytes)) } if typ.manual() { - mSysStatInc(&memstats.heap_sys, s.npages*pageSize) + // Manually managed memory doesn't count toward heap_sys, so add it back. + memstats.heap_sys.add(int64(nbytes)) } - mSysStatInc(&memstats.heap_idle, s.npages*pageSize) + atomic.Xadd64(&memstats.heap_idle, int64(nbytes)) // Mark the space as free. h.pages.free(s.base(), s.npages) diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go index 560babed03..2af1c97e0b 100644 --- a/src/runtime/mpagealloc.go +++ b/src/runtime/mpagealloc.go @@ -293,13 +293,13 @@ type pageAlloc struct { // sysStat is the runtime memstat to update when new system // memory is committed by the pageAlloc for allocation metadata. - sysStat *uint64 + sysStat *sysMemStat // Whether or not this struct is being used in tests. test bool } -func (p *pageAlloc) init(mheapLock *mutex, sysStat *uint64) { +func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) { if levelLogPages[0] > logMaxPackedValue { // We can't represent 1< 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) { + print("runtime: val=", val, " n=", n, "\n") + throw("sysMemStat overflow") } } diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go index 394bd6fb0f..3f5bb7cf96 100644 --- a/src/runtime/os_darwin.go +++ b/src/runtime/os_darwin.go @@ -198,7 +198,6 @@ func newosproc(mp *m) { exit(1) } mp.g0.stack.hi = stacksize // for mstart - //mSysStatInc(&memstats.stacks_sys, stacksize) //TODO: do this? // Tell the pthread library we won't join with this thread. if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 { @@ -247,7 +246,7 @@ func newosproc0(stacksize uintptr, fn uintptr) { exit(1) } g0.stack.hi = stacksize // for mstart - mSysStatInc(&memstats.stacks_sys, stacksize) + memstats.stacks_sys.add(int64(stacksize)) // Tell the pthread library we won't join with this thread. if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 { -- cgit v1.2.1 From 39e335ac0618044bbd8ed2fca5e5b3583d8c444e Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Fri, 31 Jul 2020 21:32:26 +0000 Subject: runtime: copy in MemStats fields explicitly Currently MemStats is populated via an unsafe memmove from memstats, but this places unnecessary structural restrictions on memstats, is annoying to reason about, and tightly couples the two. Instead, just populate the fields of MemStats explicitly. Change-Id: I96f6a64326b1a91d4084e7b30169a4bbe6a331f9 Reviewed-on: https://go-review.googlesource.com/c/go/+/246972 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/mstats.go | 70 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 23 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 571a9c9ce3..466f33836c 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -12,8 +12,6 @@ import ( ) // Statistics. -// If you edit this structure, also edit type MemStats below. -// Their layouts must match exactly. // // For detailed descriptions see the documentation for MemStats. // Fields that differ from MemStats are further documented here. @@ -87,8 +85,6 @@ type mstats struct { // to 64 bits for atomic operations on 32 bit platforms. _ [1 - _NumSizeClasses%2]uint32 - // Statistics below here are not exported to MemStats directly. - last_gc_nanotime uint64 // last gc (monotonic time) tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly last_next_gc uint64 // next_gc for the previous GC @@ -430,20 +426,7 @@ type MemStats struct { } } -// Size of the trailing by_size array differs between mstats and MemStats, -// and all data after by_size is local to runtime, not exported. -// NumSizeClasses was changed, but we cannot change MemStats because of backward compatibility. -// sizeof_C_MStats is the size of the prefix of mstats that -// corresponds to MemStats. It should match Sizeof(MemStats{}). -var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0]) - func init() { - var memStats MemStats - if sizeof_C_MStats != unsafe.Sizeof(memStats) { - println(sizeof_C_MStats, unsafe.Sizeof(memStats)) - throw("MStats vs MemStatsType size mismatch") - } - if unsafe.Offsetof(memstats.heap_live)%8 != 0 { println(unsafe.Offsetof(memstats.heap_live)) throw("memstats.heap_live not aligned to 8 bytes") @@ -469,14 +452,55 @@ func ReadMemStats(m *MemStats) { func readmemstats_m(stats *MemStats) { updatememstats() - // The size of the trailing by_size array differs between - // mstats and MemStats. NumSizeClasses was changed, but we - // cannot change MemStats because of backward compatibility. - memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats) - + stats.Alloc = memstats.alloc + stats.TotalAlloc = memstats.total_alloc + stats.Sys = memstats.sys + stats.Mallocs = memstats.nmalloc + stats.Frees = memstats.nfree + stats.HeapAlloc = memstats.heap_alloc + stats.HeapSys = memstats.heap_sys.load() + stats.HeapIdle = memstats.heap_idle + stats.HeapInuse = memstats.heap_inuse + stats.HeapReleased = memstats.heap_released + stats.HeapObjects = memstats.heap_objects + stats.StackInuse = memstats.stacks_inuse // memstats.stacks_sys is only memory mapped directly for OS stacks. // Add in heap-allocated stack memory for user consumption. - stats.StackSys += stats.StackInuse + stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load() + stats.MSpanInuse = memstats.mspan_inuse + stats.MSpanSys = memstats.mspan_sys.load() + stats.MCacheInuse = memstats.mcache_inuse + stats.MCacheSys = memstats.mcache_sys.load() + stats.BuckHashSys = memstats.buckhash_sys.load() + stats.GCSys = memstats.gc_sys.load() + stats.OtherSys = memstats.other_sys.load() + stats.NextGC = memstats.next_gc + stats.LastGC = memstats.last_gc_unix + stats.PauseTotalNs = memstats.pause_total_ns + stats.PauseNs = memstats.pause_ns + stats.PauseEnd = memstats.pause_end + stats.NumGC = memstats.numgc + stats.NumForcedGC = memstats.numforcedgc + stats.GCCPUFraction = memstats.gc_cpu_fraction + stats.EnableGC = true + + // Handle BySize. Copy N values, where N is + // the minimum of the lengths of the two arrays. + // Unfortunately copy() won't work here because + // the arrays have different structs. + // + // TODO(mknyszek): Consider renaming the fields + // of by_size's elements to align so we can use + // the copy built-in. + bySizeLen := len(stats.BySize) + if l := len(memstats.by_size); l < bySizeLen { + bySizeLen = l + } + for i := 0; i < bySizeLen; i++ { + stats.BySize[i].Size = memstats.by_size[i].size + stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc + stats.BySize[i].Frees = memstats.by_size[i].nfree + } } //go:linkname readGCStats runtime/debug.readGCStats -- cgit v1.2.1 From ad863ba32a2ede207d708fa15897e9de1d14dd87 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 3 Aug 2020 19:23:30 +0000 Subject: runtime: break down memstats.gc_sys This change breaks apart gc_sys into three distinct pieces. Two of those pieces are pieces which come from heap_sys since they're allocated from the page heap. The rest comes from memory mapped from e.g. persistentalloc which better fits the purpose of a sysMemStat. Also, rename gc_sys to gcMiscSys. Change-Id: I098789170052511e7b31edbcdc9a53e5c24573f7 Reviewed-on: https://go-review.googlesource.com/c/go/+/246973 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/heapdump.go | 5 ++++- src/runtime/malloc.go | 6 +++--- src/runtime/mcheckmark.go | 2 +- src/runtime/mfinal.go | 2 +- src/runtime/mheap.go | 16 ++++++++++------ src/runtime/mspanset.go | 4 ++-- src/runtime/mstats.go | 31 ++++++++++++++++++------------- 7 files changed, 39 insertions(+), 27 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 495ecc5164..eed47930f0 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -540,6 +540,9 @@ func dumpms() { } func dumpmemstats() { + // These ints should be identical to the exported + // MemStats structure and should be ordered the same + // way too. dumpint(tagMemStats) dumpint(memstats.alloc) dumpint(memstats.total_alloc) @@ -560,7 +563,7 @@ func dumpmemstats() { dumpint(memstats.mcache_inuse) dumpint(memstats.mcache_sys.load()) dumpint(memstats.buckhash_sys.load()) - dumpint(memstats.gc_sys.load()) + dumpint(memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse) dumpint(memstats.other_sys.load()) dumpint(memstats.next_gc) dumpint(memstats.last_gc_unix) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 27d678d917..ee22bad58c 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -743,9 +743,9 @@ mapped: throw("arena already initialized") } var r *heapArena - r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gcMiscSys)) if r == nil { - r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys)) + r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gcMiscSys)) if r == nil { throw("out of memory allocating heap arena metadata") } @@ -757,7 +757,7 @@ mapped: if size == 0 { size = physPageSize } - newArray := (*notInHeap)(persistentalloc(size, sys.PtrSize, &memstats.gc_sys)) + newArray := (*notInHeap)(persistentalloc(size, sys.PtrSize, &memstats.gcMiscSys)) if newArray == nil { throw("out of memory allocating allArenas") } diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go index 1fd8e4e78f..c0b028d715 100644 --- a/src/runtime/mcheckmark.go +++ b/src/runtime/mcheckmark.go @@ -41,7 +41,7 @@ func startCheckmarks() { if bitmap == nil { // Allocate bitmap on first use. - bitmap = (*checkmarksMap)(persistentalloc(unsafe.Sizeof(*bitmap), 0, &memstats.gc_sys)) + bitmap = (*checkmarksMap)(persistentalloc(unsafe.Sizeof(*bitmap), 0, &memstats.gcMiscSys)) if bitmap == nil { throw("out of memory allocating checkmarks bitmap") } diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go index 6676ae6736..6ec5133be0 100644 --- a/src/runtime/mfinal.go +++ b/src/runtime/mfinal.go @@ -88,7 +88,7 @@ func queuefinalizer(p unsafe.Pointer, fn *funcval, nret uintptr, fint *_type, ot lock(&finlock) if finq == nil || finq.cnt == uint32(len(finq.fin)) { if finc == nil { - finc = (*finblock)(persistentalloc(_FinBlockSize, 0, &memstats.gc_sys)) + finc = (*finblock)(persistentalloc(_FinBlockSize, 0, &memstats.gcMiscSys)) finc.alllink = allfin allfin = finc if finptrmask[0] == 0 { diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 27c1bfbcf1..1624a04b9d 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -713,7 +713,7 @@ func (h *mheap) init() { h.central[i].mcentral.init(spanClass(i)) } - h.pages.init(&h.lock, &memstats.gc_sys) + h.pages.init(&h.lock, &memstats.gcMiscSys) } // reclaim sweeps and reclaims at least npage pages into the heap. @@ -1230,8 +1230,10 @@ HaveSpan: atomic.Xadd64(&memstats.heap_inuse, int64(nbytes)) case spanAllocStack: atomic.Xadd64(&memstats.stacks_inuse, int64(nbytes)) - case spanAllocPtrScalarBits, spanAllocWorkBuf: - memstats.gc_sys.add(int64(nbytes)) + case spanAllocWorkBuf: + atomic.Xadd64(&memstats.gcWorkBufInUse, int64(nbytes)) + case spanAllocPtrScalarBits: + atomic.Xadd64(&memstats.gcProgPtrScalarBitsInUse, int64(nbytes)) } if typ.manual() { // Manually managed memory doesn't count toward heap_sys. @@ -1406,8 +1408,10 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { atomic.Xadd64(&memstats.heap_inuse, -int64(nbytes)) case spanAllocStack: atomic.Xadd64(&memstats.stacks_inuse, -int64(nbytes)) - case spanAllocPtrScalarBits, spanAllocWorkBuf: - memstats.gc_sys.add(-int64(nbytes)) + case spanAllocWorkBuf: + atomic.Xadd64(&memstats.gcWorkBufInUse, -int64(nbytes)) + case spanAllocPtrScalarBits: + atomic.Xadd64(&memstats.gcProgPtrScalarBitsInUse, -int64(nbytes)) } if typ.manual() { // Manually managed memory doesn't count toward heap_sys, so add it back. @@ -1956,7 +1960,7 @@ func newArenaMayUnlock() *gcBitsArena { var result *gcBitsArena if gcBitsArenas.free == nil { unlock(&gcBitsArenas.lock) - result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gc_sys)) + result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gcMiscSys)) if result == nil { throw("runtime: cannot allocate memory") } diff --git a/src/runtime/mspanset.go b/src/runtime/mspanset.go index 490eed4549..10d2596c38 100644 --- a/src/runtime/mspanset.go +++ b/src/runtime/mspanset.go @@ -102,7 +102,7 @@ retry: if newCap == 0 { newCap = spanSetInitSpineCap } - newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gc_sys) + newSpine := persistentalloc(newCap*sys.PtrSize, cpu.CacheLineSize, &memstats.gcMiscSys) if b.spineCap != 0 { // Blocks are allocated off-heap, so // no write barriers. @@ -283,7 +283,7 @@ func (p *spanSetBlockAlloc) alloc() *spanSetBlock { if s := (*spanSetBlock)(p.stack.pop()); s != nil { return s } - return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), cpu.CacheLineSize, &memstats.gc_sys)) + return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), cpu.CacheLineSize, &memstats.gcMiscSys)) } // free returns a spanSetBlock back to the pool. diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 466f33836c..967fe6e2be 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -44,15 +44,17 @@ type mstats struct { // Statistics about allocation of low-level fixed-size structures. // Protected by FixAlloc locks. - stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW - stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys - mspan_inuse uint64 // mspan structures - mspan_sys sysMemStat - mcache_inuse uint64 // mcache structures - mcache_sys sysMemStat - buckhash_sys sysMemStat // profiling bucket hash table - gc_sys sysMemStat // updated atomically or during STW - other_sys sysMemStat // updated atomically or during STW + stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW + stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys + mspan_inuse uint64 // mspan structures + mspan_sys sysMemStat + mcache_inuse uint64 // mcache structures + mcache_sys sysMemStat + buckhash_sys sysMemStat // profiling bucket hash table + gcWorkBufInUse uint64 // updated atomically or during STW + gcProgPtrScalarBitsInUse uint64 // updated atomically or during STW + gcMiscSys sysMemStat // updated atomically or during STW + other_sys sysMemStat // updated atomically or during STW // Statistics about the garbage collector. @@ -472,7 +474,10 @@ func readmemstats_m(stats *MemStats) { stats.MCacheInuse = memstats.mcache_inuse stats.MCacheSys = memstats.mcache_sys.load() stats.BuckHashSys = memstats.buckhash_sys.load() - stats.GCSys = memstats.gc_sys.load() + // MemStats defines GCSys as an aggregate of all memory related + // to the memory management system, but we track this memory + // at a more granular level in the runtime. + stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse stats.OtherSys = memstats.other_sys.load() stats.NextGC = memstats.next_gc stats.LastGC = memstats.last_gc_unix @@ -557,11 +562,11 @@ func updatememstats() { memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse) memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse) memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() + - memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gc_sys.load() + + memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + memstats.other_sys.load() - // We also count stacks_inuse as sys memory. - memstats.sys += memstats.stacks_inuse + // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. + memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse // Calculate memory allocator stats. // During program execution we only count number of frees and amount of freed memory. -- cgit v1.2.1 From c5dea8f38726572ddc161e5d169a453639edb7b1 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 3 Aug 2020 19:27:59 +0000 Subject: runtime: remove memstats.heap_idle This statistic is updated in many places but for MemStats may be computed from existing statistics. Specifically by definition heap_idle = heap_sys - heap_inuse since heap_sys is all memory allocated from the OS for use in the heap minus memory used for non-heap purposes. heap_idle is almost the same (since it explicitly includes memory that *could* be used for non-heap purposes) but also doesn't include memory that's actually used to hold heap objects. Although it has some utility as a sanity check, it complicates accounting and we want fewer, orthogonal statistics for upcoming metrics changes, so just drop it. Change-Id: I40af54a38e335f43249f6e218f35088bfd4380d1 Reviewed-on: https://go-review.googlesource.com/c/go/+/246974 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/heapdump.go | 2 +- src/runtime/mheap.go | 3 --- src/runtime/mstats.go | 19 +++++++++++++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index eed47930f0..f96475e848 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -552,7 +552,7 @@ func dumpmemstats() { dumpint(memstats.nfree) dumpint(memstats.heap_alloc) dumpint(memstats.heap_sys.load()) - dumpint(memstats.heap_idle) + dumpint(memstats.heap_sys.load() - memstats.heap_inuse) dumpint(memstats.heap_inuse) dumpint(memstats.heap_released) dumpint(memstats.heap_objects) diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 1624a04b9d..87d2fd495b 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1239,7 +1239,6 @@ HaveSpan: // Manually managed memory doesn't count toward heap_sys. memstats.heap_sys.add(-int64(nbytes)) } - atomic.Xadd64(&memstats.heap_idle, -int64(nbytes)) // Publish the span in various locations. @@ -1317,7 +1316,6 @@ func (h *mheap) grow(npage uintptr) bool { // size which is always > physPageSize, so its safe to // just add directly to heap_released. atomic.Xadd64(&memstats.heap_released, int64(asize)) - atomic.Xadd64(&memstats.heap_idle, int64(asize)) // Recalculate nBase. // We know this won't overflow, because sysAlloc returned @@ -1417,7 +1415,6 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { // Manually managed memory doesn't count toward heap_sys, so add it back. memstats.heap_sys.add(int64(nbytes)) } - atomic.Xadd64(&memstats.heap_idle, int64(nbytes)) // Mark the space as free. h.pages.free(s.base(), s.npages) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 967fe6e2be..43f74273f7 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -34,7 +34,6 @@ type mstats struct { // in manually-managed spans. heap_alloc uint64 // bytes allocated and not yet freed (same as alloc above) heap_sys sysMemStat // virtual address space obtained from system for GC'd heap - heap_idle uint64 // bytes in idle spans heap_inuse uint64 // bytes in mSpanInUse spans heap_released uint64 // bytes released to the os @@ -461,7 +460,23 @@ func readmemstats_m(stats *MemStats) { stats.Frees = memstats.nfree stats.HeapAlloc = memstats.heap_alloc stats.HeapSys = memstats.heap_sys.load() - stats.HeapIdle = memstats.heap_idle + // By definition, HeapIdle is memory that was mapped + // for the heap but is not currently used to hold heap + // objects. It also specifically is memory that can be + // used for other purposes, like stacks, but this memory + // is subtracted out of HeapSys before it makes that + // transition. Put another way: + // + // heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes + // heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose + // + // or + // + // heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse + // heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse + // + // => heap_idle = heap_sys - heap_inuse + stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse stats.HeapInuse = memstats.heap_inuse stats.HeapReleased = memstats.heap_released stats.HeapObjects = memstats.heap_objects -- cgit v1.2.1 From ae585ee52c2437bfd0e955ad6fc8911bf292f51d Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 3 Aug 2020 19:31:23 +0000 Subject: runtime: remove memstats.heap_alloc memstats.heap_alloc is 100% a duplicate and unnecessary copy of memstats.alloc which exists because MemStats used to be populated from memstats via a memmove. Change-Id: I995489f61be39786e573b8494a8ab6d4ea8bed9c Reviewed-on: https://go-review.googlesource.com/c/go/+/246975 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/heapdump.go | 2 +- src/runtime/mstats.go | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index f96475e848..6fcd9746af 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -550,7 +550,7 @@ func dumpmemstats() { dumpint(memstats.nlookup) dumpint(memstats.nmalloc) dumpint(memstats.nfree) - dumpint(memstats.heap_alloc) + dumpint(memstats.alloc) dumpint(memstats.heap_sys.load()) dumpint(memstats.heap_sys.load() - memstats.heap_inuse) dumpint(memstats.heap_inuse) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 43f74273f7..a6e38d1c1b 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -32,7 +32,6 @@ type mstats struct { // // Like MemStats, heap_sys and heap_inuse do not count memory // in manually-managed spans. - heap_alloc uint64 // bytes allocated and not yet freed (same as alloc above) heap_sys sysMemStat // virtual address space obtained from system for GC'd heap heap_inuse uint64 // bytes in mSpanInUse spans heap_released uint64 // bytes released to the os @@ -112,11 +111,10 @@ type mstats struct { // heap_live is the number of bytes considered live by the GC. // That is: retained by the most recent GC plus allocated - // since then. heap_live <= heap_alloc, since heap_alloc - // includes unmarked objects that have not yet been swept (and - // hence goes up as we allocate and down as we sweep) while - // heap_live excludes these objects (and hence only goes up - // between GCs). + // since then. heap_live <= alloc, since alloc includes unmarked + // objects that have not yet been swept (and hence goes up as we + // allocate and down as we sweep) while heap_live excludes these + // objects (and hence only goes up between GCs). // // This is updated atomically without locking. To reduce // contention, this is updated only when obtaining a span from @@ -458,7 +456,7 @@ func readmemstats_m(stats *MemStats) { stats.Sys = memstats.sys stats.Mallocs = memstats.nmalloc stats.Frees = memstats.nfree - stats.HeapAlloc = memstats.heap_alloc + stats.HeapAlloc = memstats.alloc stats.HeapSys = memstats.heap_sys.load() // By definition, HeapIdle is memory that was mapped // for the heap but is not currently used to hold heap @@ -639,7 +637,6 @@ func updatememstats() { // Calculate derived stats. memstats.total_alloc = totalAlloc memstats.alloc = totalAlloc - totalFree - memstats.heap_alloc = memstats.alloc memstats.heap_objects = memstats.nmalloc - memstats.nfree } -- cgit v1.2.1 From c02134abb01e019683daf051029d66b15dd11213 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 3 Aug 2020 20:08:25 +0000 Subject: runtime: add helper for getting an mcache in allocation contexts This change adds a function getMCache which returns the current P's mcache if it's available, and otherwise tries to get mcache0 if we're bootstrapping. This function will come in handy as we need to replicate this behavior in multiple places in future changes. Change-Id: I536073d6f6dc6c6390269e613ead9f8bcb6e7f98 Reviewed-on: https://go-review.googlesource.com/c/go/+/246976 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/malloc.go | 25 ++----------------------- src/runtime/mcache.go | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 23 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index ee22bad58c..6383c34817 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -972,19 +972,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { shouldhelpgc := false dataSize := size - var c *mcache - if mp.p != 0 { - c = mp.p.ptr().mcache - } else { - // We will be called without a P while bootstrapping, - // in which case we use mcache0, which is set in mallocinit. - // mcache0 is cleared when bootstrapping is complete, - // by procresize. - c = mcache0 - if c == nil { - throw("malloc called with no P") - } - } + c := getMCache() var span *mspan var x unsafe.Pointer noscan := typ == nil || typ.ptrdata == 0 @@ -1212,16 +1200,7 @@ func reflect_unsafe_NewArray(typ *_type, n int) unsafe.Pointer { } func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { - var c *mcache - if mp.p != 0 { - c = mp.p.ptr().mcache - } else { - c = mcache0 - if c == nil { - throw("profilealloc called with no P") - } - } - c.nextSample = nextSample() + getMCache().nextSample = nextSample() mProf_Malloc(x, size) } diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index c3e0e5e1f7..5564e4a47d 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -131,6 +131,29 @@ func freemcache(c *mcache, recipient *mcache) { }) } +// getMCache is a convenience function which tries to obtain an mcache. +// +// Must be running with a P when called (so the caller must be in a +// non-preemptible state) or must be called during bootstrapping. +func getMCache() *mcache { + // Grab the mcache, since that's where stats live. + pp := getg().m.p.ptr() + var c *mcache + if pp == nil { + // We will be called without a P while bootstrapping, + // in which case we use mcache0, which is set in mallocinit. + // mcache0 is cleared when bootstrapping is complete, + // by procresize. + c = mcache0 + if c == nil { + throw("getMCache called with no P or outside bootstrapping") + } + } else { + c = pp.mcache + } + return c +} + // donate flushes data and resources which have no global // pool to another mcache. func (c *mcache) donate(d *mcache) { -- cgit v1.2.1 From 2159c26ceb32bbfa86036431750c0752fca84ef6 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 14 Apr 2020 21:06:26 +0000 Subject: runtime/metrics: add package interface This change creates the runtime/metrics package and adds the initial interface as laid out in the design document. For #37112. Change-Id: I202dcee08ab008dd63bf96f7a4162f5b5f813637 Reviewed-on: https://go-review.googlesource.com/c/go/+/247040 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/metrics/description.go | 52 ++++++++++++++++++++++++++++ src/runtime/metrics/doc.go | 49 +++++++++++++++++++++++++++ src/runtime/metrics/histogram.go | 30 +++++++++++++++++ src/runtime/metrics/sample.go | 29 ++++++++++++++++ src/runtime/metrics/value.go | 69 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 229 insertions(+) create mode 100644 src/runtime/metrics/description.go create mode 100644 src/runtime/metrics/doc.go create mode 100644 src/runtime/metrics/histogram.go create mode 100644 src/runtime/metrics/sample.go create mode 100644 src/runtime/metrics/value.go (limited to 'src/runtime') diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go new file mode 100644 index 0000000000..32bb950a72 --- /dev/null +++ b/src/runtime/metrics/description.go @@ -0,0 +1,52 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics + +// Description describes a runtime metric. +type Description struct { + // Name is the full name of the metric which includes the unit. + // + // The format of the metric may be described by the following regular expression. + // + // ^(?P/[^:]+):(?P[^:*\/]+(?:[*\/][^:*\/]+)*)$ + // + // The format splits the name into two components, separated by a colon: a path which always + // starts with a /, and a machine-parseable unit. The name may contain any valid Unicode + // codepoint in between / characters, but by convention will try to stick to lowercase + // characters and hyphens. An example of such a path might be "/memory/heap/free". + // + // The unit is by convention a series of lowercase English unit names (singular or plural) + // without prefixes delimited by '*' or '/'. The unit names may contain any valid Unicode + // codepoint that is not a delimiter. + // Examples of units might be "seconds", "bytes", "bytes/second", "cpu-seconds", + // "byte*cpu-seconds", and "bytes/second/second". + // + // A complete name might look like "/memory/heap/free:bytes". + Name string + + // Kind is the kind of value for this metric. + // + // The purpose of this field is to allow users to filter out metrics whose values are + // types which their application may not understand. + Kind ValueKind + + // Cumulative is whether or not the metric is cumulative. If a cumulative metric is just + // a single number, then it increases monotonically. If the metric is a distribution, + // then each bucket count increases monotonically. + // + // This flag thus indicates whether or not it's useful to compute a rate from this value. + Cumulative bool + + // StopTheWorld is whether or not the metric requires a stop-the-world + // event in order to collect it. + StopTheWorld bool +} + +var allDesc = []Description{} + +// All returns a slice of containing metric descriptions for all supported metrics. +func All() []Description { + return allDesc +} diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go new file mode 100644 index 0000000000..b48c22ba30 --- /dev/null +++ b/src/runtime/metrics/doc.go @@ -0,0 +1,49 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package metrics provides a stable interface to access implementation-defined +metrics exported by the Go runtime. This package is similar to existing functions +like runtime.ReadMemStats and debug.ReadGCStats, but significantly more general. + +The set of metrics defined by this package may evolve as the runtime itself +evolves, and also enables variation across Go implementations, whose relevant +metric sets may not intersect. + +Interface + +Metrics are designated by a string key, rather than, for example, a field name in +a struct. The full list of supported metrics is always available in the slice of +Descriptions returned by All. Each Description also includes useful information +about the metric, such as how to display it (e.g. gauge vs. counter) and how difficult +or disruptive it is to obtain it (e.g. do you need to stop the world?). + +Thus, users of this API are encouraged to sample supported metrics defined by the +slice returned by All to remain compatible across Go versions. Of course, situations +arise where reading specific metrics is critical. For these cases, users are +encouranged to use build tags, and although metrics may be deprecated and removed, +users should consider this to be an exceptional and rare event, coinciding with a +very large change in a particular Go implementation. + +Each metric key also has a "kind" that describes the format of the metric's value. +In the interest of not breaking users of this package, the "kind" for a given metric +is guaranteed not to change. If it must change, then a new metric will be introduced +with a new key and a new "kind." + +Metric key format + +As mentioned earlier, metric keys are strings. Their format is simple and well-defined, +designed to be both human and machine readable. It is split into two components, +separated by a colon: a rooted path and a unit. The choice to include the unit in +the key is motivated by compatibility: if a metric's unit changes, its semantics likely +did also, and a new key should be introduced. + +For more details on the precise definition of the metric key's path and unit formats, see +the documentation of the Name field of the Description struct. + +Supported metrics + +TODO(mknyszek): List them here as they're added. +*/ +package metrics diff --git a/src/runtime/metrics/histogram.go b/src/runtime/metrics/histogram.go new file mode 100644 index 0000000000..e1364e1e26 --- /dev/null +++ b/src/runtime/metrics/histogram.go @@ -0,0 +1,30 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics + +// Float64Histogram represents a distribution of float64 values. +type Float64Histogram struct { + // Counts contains the weights for each histogram bucket. The length of + // Counts is equal to the length of Buckets (in the metric description) + // plus one to account for the implicit minimum bucket. + // + // Given N buckets, the following is the mathematical relationship between + // Counts and Buckets. + // count[0] is the weight of the range (-inf, bucket[0]) + // count[n] is the weight of the range [bucket[n], bucket[n+1]), for 0 < n < N-1 + // count[N-1] is the weight of the range [bucket[N-1], inf) + Counts []uint64 + + // Buckets contains the boundaries between histogram buckets, in increasing order. + // + // Because this slice contains boundaries, there are len(Buckets)+1 counts: + // a count for all values less than the first boundary, a count covering each + // [slice[i], slice[i+1]) interval, and a count for all values greater than or + // equal to the last boundary. + // + // For a given metric name, the value of Buckets is guaranteed not to change + // between calls until program exit. + Buckets []float64 +} diff --git a/src/runtime/metrics/sample.go b/src/runtime/metrics/sample.go new file mode 100644 index 0000000000..c7a3fc424a --- /dev/null +++ b/src/runtime/metrics/sample.go @@ -0,0 +1,29 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics + +// Sample captures a single metric sample. +type Sample struct { + // Name is the name of the metric sampled. + // + // It must correspond to a name in one of the metric descriptions + // returned by Descriptions. + Name string + + // Value is the value of the metric sample. + Value Value +} + +// Read populates each Value field in the given slice of metric samples. +// +// Desired metrics should be present in the slice with the appropriate name. +// The user of this API is encouraged to re-use the same slice between calls. +// +// Metric values with names not appearing in the value returned by Descriptions +// will have the value populated as KindBad to indicate that the name is +// unknown. +func Read(m []Sample) { + panic("unimplemented") +} diff --git a/src/runtime/metrics/value.go b/src/runtime/metrics/value.go new file mode 100644 index 0000000000..0b056b4ea8 --- /dev/null +++ b/src/runtime/metrics/value.go @@ -0,0 +1,69 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics + +import ( + "math" + "unsafe" +) + +// ValueKind is a tag for a metric Value which indicates its type. +type ValueKind int + +const ( + // KindBad indicates that the Value has no type and should not be used. + KindBad ValueKind = iota + + // KindUint64 indicates that the type of the Value is a uint64. + KindUint64 + + // KindFloat64 indicates that the type of the Value is a float64. + KindFloat64 + + // KindFloat64Histogram indicates that the type of the Value is a *Float64Histogram. + KindFloat64Histogram +) + +// Value represents a metric value returned by the runtime. +type Value struct { + kind ValueKind + scalar uint64 // contains scalar values for scalar Kinds. + pointer unsafe.Pointer // contains non-scalar values. +} + +// Kind returns the a tag representing the kind of value this is. +func (v Value) Kind() ValueKind { + return v.kind +} + +// Uint64 returns the internal uint64 value for the metric. +// +// If v.Kind() != KindUint64, this method panics. +func (v Value) Uint64() uint64 { + if v.kind != KindUint64 { + panic("called Uint64 on non-uint64 metric value") + } + return v.scalar +} + +// Float64 returns the internal float64 value for the metric. +// +// If v.Kind() != KindFloat64, this method panics. +func (v Value) Float64() float64 { + if v.kind != KindFloat64 { + panic("called Float64 on non-float64 metric value") + } + return math.Float64frombits(v.scalar) +} + +// Float64Histogram returns the internal *Float64Histogram value for the metric. +// +// If v.Kind() != KindFloat64Histogram, this method panics. +func (v Value) Float64Histogram() *Float64Histogram { + if v.kind != KindFloat64Histogram { + panic("called Float64 on non-float64 metric value") + } + return (*Float64Histogram)(v.pointer) +} -- cgit v1.2.1 From fe7ff71185cf30f9bdee3e8d8897e8b6069ad02e Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 3 Aug 2020 20:11:04 +0000 Subject: runtime: add consistent heap statistics This change adds a global set of heap statistics which are similar to existing memory statistics. The purpose of these new statistics is to be able to read them and get a consistent result without stopping the world. The goal is to eventually replace as many of the existing memstats statistics with the sharded ones as possible. The consistent memory statistics use a tailor-made synchronization mechanism to allow writers (allocators) to proceed with minimal synchronization by using a sequence counter and a global generation counter to determine which set of statistics to update. Readers increment the global generation counter to effectively grab a snapshot of the statistics, and then iterate over all Ps using the sequence counter to ensure that they may safely read the snapshotted statistics. To keep statistics fresh, the reader also has a responsibility to merge sets of statistics. These consistent statistics are computed, but otherwise unused for now. Upcoming changes will integrate them with the rest of the codebase and will begin to phase out existing statistics. Change-Id: I637a11f2439e2049d7dccb8650c5d82500733ca5 Reviewed-on: https://go-review.googlesource.com/c/go/+/247037 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/mcache.go | 4 + src/runtime/mgcscavenge.go | 11 ++- src/runtime/mheap.go | 34 +++++++++ src/runtime/mstats.go | 184 ++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 230 insertions(+), 3 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 5564e4a47d..e27a1c9ec0 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -61,6 +61,10 @@ type mcache struct { // in this mcache are stale and need to the flushed so they // can be swept. This is done in acquirep. flushGen uint32 + + // statsSeq is a counter indicating whether this P is currently + // writing any stats. Its value is even when not, odd when it is. + statsSeq uint32 } // A gclink is a node in a linked list of blocks, like mlink, diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go index 8b1a0be353..5843ada981 100644 --- a/src/runtime/mgcscavenge.go +++ b/src/runtime/mgcscavenge.go @@ -711,7 +711,16 @@ func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr // Update global accounting only when not in test, otherwise // the runtime's accounting will be wrong. - atomic.Xadd64(&memstats.heap_released, int64(npages)*pageSize) + nbytes := int64(npages) * pageSize + atomic.Xadd64(&memstats.heap_released, nbytes) + + // Update consistent accounting too. + c := getMCache() + stats := memstats.heapStats.acquire(c) + atomic.Xaddint64(&stats.committed, -nbytes) + atomic.Xaddint64(&stats.released, nbytes) + memstats.heapStats.release(c) + return addr } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 87d2fd495b..d17b6fa284 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1239,6 +1239,22 @@ HaveSpan: // Manually managed memory doesn't count toward heap_sys. memstats.heap_sys.add(-int64(nbytes)) } + // Update consistent stats. + c := getMCache() + stats := memstats.heapStats.acquire(c) + atomic.Xaddint64(&stats.committed, int64(scav)) + atomic.Xaddint64(&stats.released, -int64(scav)) + switch typ { + case spanAllocHeap: + atomic.Xaddint64(&stats.inHeap, int64(nbytes)) + case spanAllocStack: + atomic.Xaddint64(&stats.inStacks, int64(nbytes)) + case spanAllocPtrScalarBits: + atomic.Xaddint64(&stats.inPtrScalarBits, int64(nbytes)) + case spanAllocWorkBuf: + atomic.Xaddint64(&stats.inWorkBufs, int64(nbytes)) + } + memstats.heapStats.release(c) // Publish the span in various locations. @@ -1316,6 +1332,10 @@ func (h *mheap) grow(npage uintptr) bool { // size which is always > physPageSize, so its safe to // just add directly to heap_released. atomic.Xadd64(&memstats.heap_released, int64(asize)) + c := getMCache() + stats := memstats.heapStats.acquire(c) + atomic.Xaddint64(&stats.released, int64(asize)) + memstats.heapStats.release(c) // Recalculate nBase. // We know this won't overflow, because sysAlloc returned @@ -1415,6 +1435,20 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { // Manually managed memory doesn't count toward heap_sys, so add it back. memstats.heap_sys.add(int64(nbytes)) } + // Update consistent stats. + c := getMCache() + stats := memstats.heapStats.acquire(c) + switch typ { + case spanAllocHeap: + atomic.Xaddint64(&stats.inHeap, -int64(nbytes)) + case spanAllocStack: + atomic.Xaddint64(&stats.inStacks, -int64(nbytes)) + case spanAllocPtrScalarBits: + atomic.Xaddint64(&stats.inPtrScalarBits, -int64(nbytes)) + case spanAllocWorkBuf: + atomic.Xaddint64(&stats.inWorkBufs, -int64(nbytes)) + } + memstats.heapStats.release(c) // Mark the space as free. h.pages.free(s.base(), s.npages) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index a6e38d1c1b..76546c0f0c 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -148,6 +148,9 @@ type mstats struct { // unlike heap_live, heap_marked does not change until the // next mark termination. heap_marked uint64 + + // heapStats is a set of statistics + heapStats consistentHeapStats } var memstats mstats @@ -426,10 +429,20 @@ type MemStats struct { } func init() { - if unsafe.Offsetof(memstats.heap_live)%8 != 0 { - println(unsafe.Offsetof(memstats.heap_live)) + if offset := unsafe.Offsetof(memstats.heap_live); offset%8 != 0 { + println(offset) throw("memstats.heap_live not aligned to 8 bytes") } + if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 { + println(offset) + throw("memstats.heapStats not aligned to 8 bytes") + } + // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. + // [3]heapStatsDelta) to be 8-byte aligned. + if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { + println(size) + throw("heapStatsDelta not a multiple of 8 bytes in size") + } } // ReadMemStats populates m with memory allocator statistics. @@ -687,3 +700,170 @@ func (s *sysMemStat) add(n int64) { throw("sysMemStat overflow") } } + +// heapStatsDelta contains deltas of various runtime memory statistics +// that need to be updated together in order for them to be kept +// consistent with one another. +type heapStatsDelta struct { + committed int64 // byte delta of memory committed + released int64 // byte delta of released memory generated + inHeap int64 // byte delta of memory placed in the heap + inStacks int64 // byte delta of memory reserved for stacks + inWorkBufs int64 // byte delta of memory reserved for work bufs + inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits +} + +// merge adds in the deltas from b into a. +func (a *heapStatsDelta) merge(b *heapStatsDelta) { + a.committed += b.committed + a.released += b.released + a.inHeap += b.inHeap + a.inStacks += b.inStacks + a.inWorkBufs += b.inWorkBufs + a.inPtrScalarBits += b.inPtrScalarBits +} + +// consistentHeapStats represents a set of various memory statistics +// whose updates must be viewed completely to get a consistent +// state of the world. +// +// To write updates to memory stats use the acquire and release +// methods. To obtain a consistent global snapshot of these statistics, +// use read. +type consistentHeapStats struct { + // stats is a ring buffer of heapStatsDelta values. + // Writers always atomically update the delta at index gen. + // + // Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...) + // and synchronizing with writers by observing each mcache's + // statsSeq field. If the reader observes a P (to which the + // mcache is bound) not writing, it can be sure that it will + // pick up the new gen value the next time it writes. + // The reader then takes responsibility by clearing space + // in the ring buffer for the next reader to rotate gen to + // that space (i.e. it merges in values from index (gen-2) mod 3 + // to index (gen-1) mod 3, then clears the former). + // + // Note that this means only one reader can be reading at a time. + // There is no way for readers to synchronize. + // + // This process is why we need ring buffer of size 3 instead + // of 2: one is for the writers, one contains the most recent + // data, and the last one is clear so writers can begin writing + // to it the moment gen is updated. + stats [3]heapStatsDelta + + // gen represents the current index into which writers + // are writing, and can take on the value of 0, 1, or 2. + // This value is updated atomically. + gen uint32 +} + +// acquire returns a heapStatsDelta to be updated. In effect, +// it acquires the shard for writing. release must be called +// as soon as the relevant deltas are updated. c must be +// a valid mcache not being used by any other thread. +// +// The returned heapStatsDelta must be updated atomically. +// +// Note however, that this is unsafe to call concurrently +// with other writers and there must be only one writer +// at a time. +func (m *consistentHeapStats) acquire(c *mcache) *heapStatsDelta { + seq := atomic.Xadd(&c.statsSeq, 1) + if seq%2 == 0 { + // Should have been incremented to odd. + print("runtime: seq=", seq, "\n") + throw("bad sequence number") + } + gen := atomic.Load(&m.gen) % 3 + return &m.stats[gen] +} + +// release indicates that the writer is done modifying +// the delta. The value returned by the corresponding +// acquire must no longer be accessed or modified after +// release is called. +// +// The mcache passed here must be the same as the one +// passed to acquire. +func (m *consistentHeapStats) release(c *mcache) { + seq := atomic.Xadd(&c.statsSeq, 1) + if seq%2 != 0 { + // Should have been incremented to even. + print("runtime: seq=", seq, "\n") + throw("bad sequence number") + } +} + +// unsafeRead aggregates the delta for this shard into out. +// +// Unsafe because it does so without any synchronization. The +// only safe time to call this is if the world is stopped or +// we're freezing the world or going down anyway (and we just +// want _some_ estimate). +func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { + for i := range m.stats { + out.merge(&m.stats[i]) + } +} + +// unsafeClear clears the shard. +// +// Unsafe because the world must be stopped and values should +// be donated elsewhere before clearing. +func (m *consistentHeapStats) unsafeClear() { + for i := range m.stats { + m.stats[i] = heapStatsDelta{} + } +} + +// read takes a globally consistent snapshot of m +// and puts the aggregated value in out. Even though out is a +// heapStatsDelta, the resulting values should be complete and +// valid statistic values. +// +// Not safe to call concurrently. +func (m *consistentHeapStats) read(out *heapStatsDelta) { + // Getting preempted after this point is not safe because + // we read allp. We need to make sure a STW can't happen + // so it doesn't change out from under us. + mp := acquirem() + + // Rotate gen, effectively taking a snapshot of the state of + // these statistics at the point of the exchange by moving + // writers to the next set of deltas. + // + // This exchange is safe to do because we won't race + // with anyone else trying to update this value. + currGen := atomic.Load(&m.gen) + atomic.Xchg(&m.gen, (currGen+1)%3) + prevGen := currGen - 1 + if currGen == 0 { + prevGen = 2 + } + for _, p := range allp { + c := p.mcache + if c == nil { + continue + } + // Spin until there are no more writers. + for atomic.Load(&c.statsSeq)%2 != 0 { + } + } + + // At this point we've observed that each sequence + // number is even, so any future writers will observe + // the new gen value. That means it's safe to read from + // the other deltas in the stats buffer. + + // Perform our responsibilities and free up + // stats[prevGen] for the next time we want to take + // a snapshot. + m.stats[currGen].merge(&m.stats[prevGen]) + m.stats[prevGen] = heapStatsDelta{} + + // Finally, copy out the complete delta. + *out = m.stats[currGen] + releasem(mp) +} -- cgit v1.2.1 From f77a9025f1e4bf4bb3e2b582d13cce5f19c1ca51 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 3 Aug 2020 20:35:40 +0000 Subject: runtime: replace some memstats with consistent stats This change replaces stacks_inuse, gcWorkBufInUse and gcProgPtrScalarBitsInUse with their corresponding consistent stats. It also adds checks to make sure the rest of the sharded stats line up with existing stats in updatememstats. Change-Id: I17d0bd181aedb5c55e09c8dff18cef5b2a3a14e3 Reviewed-on: https://go-review.googlesource.com/c/go/+/247038 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/mheap.go | 18 ++----------- src/runtime/mstats.go | 73 ++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 62 insertions(+), 29 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index d17b6fa284..14a73c0491 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1225,15 +1225,8 @@ HaveSpan: atomic.Xadd64(&memstats.heap_released, -int64(scav)) } // Update stats. - switch typ { - case spanAllocHeap: + if typ == spanAllocHeap { atomic.Xadd64(&memstats.heap_inuse, int64(nbytes)) - case spanAllocStack: - atomic.Xadd64(&memstats.stacks_inuse, int64(nbytes)) - case spanAllocWorkBuf: - atomic.Xadd64(&memstats.gcWorkBufInUse, int64(nbytes)) - case spanAllocPtrScalarBits: - atomic.Xadd64(&memstats.gcProgPtrScalarBitsInUse, int64(nbytes)) } if typ.manual() { // Manually managed memory doesn't count toward heap_sys. @@ -1421,15 +1414,8 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { // // Mirrors the code in allocSpan. nbytes := s.npages * pageSize - switch typ { - case spanAllocHeap: + if typ == spanAllocHeap { atomic.Xadd64(&memstats.heap_inuse, -int64(nbytes)) - case spanAllocStack: - atomic.Xadd64(&memstats.stacks_inuse, -int64(nbytes)) - case spanAllocWorkBuf: - atomic.Xadd64(&memstats.gcWorkBufInUse, -int64(nbytes)) - case spanAllocPtrScalarBits: - atomic.Xadd64(&memstats.gcProgPtrScalarBitsInUse, -int64(nbytes)) } if typ.manual() { // Manually managed memory doesn't count toward heap_sys, so add it back. diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 76546c0f0c..4363eff1e0 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -40,19 +40,25 @@ type mstats struct { // computed on the fly by updatememstats. heap_objects uint64 // total number of allocated objects + // Statistics about stacks. + stacks_inuse uint64 // bytes in manually-managed stack spans; computed by updatememstats + stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys + // Statistics about allocation of low-level fixed-size structures. // Protected by FixAlloc locks. - stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW - stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys - mspan_inuse uint64 // mspan structures - mspan_sys sysMemStat - mcache_inuse uint64 // mcache structures - mcache_sys sysMemStat - buckhash_sys sysMemStat // profiling bucket hash table - gcWorkBufInUse uint64 // updated atomically or during STW - gcProgPtrScalarBitsInUse uint64 // updated atomically or during STW + mspan_inuse uint64 // mspan structures + mspan_sys sysMemStat + mcache_inuse uint64 // mcache structures + mcache_sys sysMemStat + buckhash_sys sysMemStat // profiling bucket hash table + + // Statistics about GC overhead. + gcWorkBufInUse uint64 // computed by updatememstats + gcProgPtrScalarBitsInUse uint64 // computed by updatememstats gcMiscSys sysMemStat // updated atomically or during STW - other_sys sysMemStat // updated atomically or during STW + + // Miscellaneous statistics. + other_sys sysMemStat // updated atomically or during STW // Statistics about the garbage collector. @@ -577,6 +583,10 @@ func readGCStats_m(pauses *[]uint64) { *pauses = p[:n+n+3] } +// Updates the memstats structure. +// +// The world must be stopped. +// //go:nowritebarrier func updatememstats() { // Flush mcaches to mcentral before doing anything else. @@ -591,9 +601,6 @@ func updatememstats() { memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + memstats.other_sys.load() - // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. - memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse - // Calculate memory allocator stats. // During program execution we only count number of frees and amount of freed memory. // Current number of alive objects in the heap and amount of alive heap memory @@ -641,6 +648,9 @@ func updatememstats() { smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i]) } } + // Collect consistent stats, which are the source-of-truth in the some cases. + var consStats heapStatsDelta + memstats.heapStats.unsafeRead(&consStats) totalFree += smallFree @@ -651,6 +661,43 @@ func updatememstats() { memstats.total_alloc = totalAlloc memstats.alloc = totalAlloc - totalFree memstats.heap_objects = memstats.nmalloc - memstats.nfree + + memstats.stacks_inuse = uint64(consStats.inStacks) + memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs) + memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits) + + // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory. + memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse + + // The world is stopped, so the consistent stats (after aggregation) + // should be identical to some combination of memstats. In particular: + // + // * heap_inuse == inHeap + // * heap_released == released + // * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits + // + // Check if that's actually true. + // + // TODO(mknyszek): Maybe don't throw here. It would be bad if a + // bug in otherwise benign accounting caused the whole application + // to crash. + if memstats.heap_inuse != uint64(consStats.inHeap) { + print("runtime: heap_inuse=", memstats.heap_inuse, "\n") + print("runtime: consistent value=", consStats.inHeap, "\n") + throw("heap_inuse and consistent stats are not equal") + } + if memstats.heap_released != uint64(consStats.released) { + print("runtime: heap_released=", memstats.heap_released, "\n") + print("runtime: consistent value=", consStats.released, "\n") + throw("heap_released and consistent stats are not equal") + } + globalRetained := memstats.heap_sys.load() - memstats.heap_released + consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits) + if globalRetained != consRetained { + print("runtime: global value=", globalRetained, "\n") + print("runtime: consistent value=", consRetained, "\n") + throw("measures of the retained heap are not equal") + } } // flushmcache flushes the mcache of allp[i]. -- cgit v1.2.1 From 79781e8dd382ac34e502ed6a088dff6860a08c05 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 4 Aug 2020 17:29:03 +0000 Subject: runtime: move malloc stats into consistentHeapStats This change moves the mcache-local malloc stats into the consistentHeapStats structure so the malloc stats can be managed consistently with the memory stats. The one exception here is tinyAllocs for which moving that into the global stats would incur several atomic writes on the fast path. Microbenchmarks for just one CPU core have shown a 50% loss in throughput. Since tiny allocation counnt isn't exposed anyway and is always blindly added to both allocs and frees, let that stay inconsistent and flush the tiny allocation count every so often. Change-Id: I2a4b75f209c0e659b9c0db081a3287bf227c10ca Reviewed-on: https://go-review.googlesource.com/c/go/+/247039 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 37 ++++++++-------------- src/runtime/malloc.go | 2 +- src/runtime/mcache.go | 70 ++++++++++++++--------------------------- src/runtime/mgcsweep.go | 10 ++++-- src/runtime/mstats.go | 78 ++++++++++++++++++++++++++-------------------- src/runtime/proc.go | 2 +- 6 files changed, 90 insertions(+), 109 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index cb753ee819..ff901fd7be 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -337,33 +337,22 @@ func ReadMemStatsSlow() (base, slow MemStats) { } } - // Add in frees. readmemstats_m flushed the cached stats, so - // these are up-to-date. - var tinyAllocs, largeFree, smallFree uint64 - for _, p := range allp { - c := p.mcache - if c == nil { - continue - } - // Collect large allocation stats. - largeFree += uint64(c.largeFree) - slow.Frees += uint64(c.largeFreeCount) - - // Collect tiny allocation stats. - tinyAllocs += uint64(c.tinyAllocCount) - - // Collect per-sizeclass stats. - for i := 0; i < _NumSizeClasses; i++ { - slow.Frees += uint64(c.smallFreeCount[i]) - bySize[i].Frees += uint64(c.smallFreeCount[i]) - bySize[i].Mallocs += uint64(c.smallFreeCount[i]) - smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i]) - } + // Add in frees by just reading the stats for those directly. + var m heapStatsDelta + memstats.heapStats.unsafeRead(&m) + + // Collect per-sizeclass free stats. + var smallFree uint64 + for i := 0; i < _NumSizeClasses; i++ { + slow.Frees += uint64(m.smallFreeCount[i]) + bySize[i].Frees += uint64(m.smallFreeCount[i]) + bySize[i].Mallocs += uint64(m.smallFreeCount[i]) + smallFree += uint64(m.smallFreeCount[i]) * uint64(class_to_size[i]) } - slow.Frees += tinyAllocs + slow.Frees += memstats.tinyallocs + uint64(m.largeFreeCount) slow.Mallocs += slow.Frees - slow.TotalAlloc = slow.Alloc + largeFree + smallFree + slow.TotalAlloc = slow.Alloc + uint64(m.largeFree) + smallFree for i := range slow.BySize { slow.BySize[i].Mallocs = bySize[i].Mallocs diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 6383c34817..d0b8c668c3 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1028,7 +1028,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { // The object fits into existing tiny block. x = unsafe.Pointer(c.tiny + off) c.tinyoffset = off + size - c.tinyAllocCount++ + c.tinyAllocs++ mp.mallocing = 0 releasem(mp) return x diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index e27a1c9ec0..c9342a41c9 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -32,8 +32,12 @@ type mcache struct { // tiny is a heap pointer. Since mcache is in non-GC'd memory, // we handle it by clearing it in releaseAll during mark // termination. + // + // tinyAllocs is the number of tiny allocations performed + // by the P that owns this mcache. tiny uintptr tinyoffset uintptr + tinyAllocs uintptr // The rest is not accessed on every malloc. @@ -41,21 +45,6 @@ type mcache struct { stackcache [_NumStackOrders]stackfreelist - // Allocator stats (source-of-truth). - // Only the P that owns this mcache may write to these - // variables, so it's safe for that P to read non-atomically. - // - // When read with stats from other mcaches and with the world - // stopped, the result will accurately reflect the state of the - // application. - tinyAllocCount uintptr // number of tiny allocs not counted in other stats - largeAlloc uintptr // bytes allocated for large objects - largeAllocCount uintptr // number of large object allocations - smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects - largeFree uintptr // bytes freed for large objects (>maxSmallSize) - largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) - smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) - // flushGen indicates the sweepgen during which this mcache // was last flushed. If flushGen != mheap_.sweepgen, the spans // in this mcache are stale and need to the flushed so they @@ -117,7 +106,7 @@ func allocmcache() *mcache { // In some cases there is no way to simply release // resources, such as statistics, so donate them to // a different mcache (the recipient). -func freemcache(c *mcache, recipient *mcache) { +func freemcache(c *mcache) { systemstack(func() { c.releaseAll() stackcache_clear(c) @@ -128,8 +117,6 @@ func freemcache(c *mcache, recipient *mcache) { // gcworkbuffree(c.gcworkbuf) lock(&mheap_.lock) - // Donate anything else that's left. - c.donate(recipient) mheap_.cachealloc.free(unsafe.Pointer(c)) unlock(&mheap_.lock) }) @@ -158,31 +145,6 @@ func getMCache() *mcache { return c } -// donate flushes data and resources which have no global -// pool to another mcache. -func (c *mcache) donate(d *mcache) { - // scanAlloc is handled separately because it's not - // like these stats -- it's used for GC pacing. - d.largeAlloc += c.largeAlloc - c.largeAlloc = 0 - d.largeAllocCount += c.largeAllocCount - c.largeAllocCount = 0 - for i := range c.smallAllocCount { - d.smallAllocCount[i] += c.smallAllocCount[i] - c.smallAllocCount[i] = 0 - } - d.largeFree += c.largeFree - c.largeFree = 0 - d.largeFreeCount += c.largeFreeCount - c.largeFreeCount = 0 - for i := range c.smallFreeCount { - d.smallFreeCount[i] += c.smallFreeCount[i] - c.smallFreeCount[i] = 0 - } - d.tinyAllocCount += c.tinyAllocCount - c.tinyAllocCount = 0 -} - // refill acquires a new span of span class spc for c. This span will // have at least one free object. The current span in c must be full. // @@ -219,12 +181,20 @@ func (c *mcache) refill(spc spanClass) { // Assume all objects from this span will be allocated in the // mcache. If it gets uncached, we'll adjust this. - c.smallAllocCount[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount) + stats := memstats.heapStats.acquire(c) + atomic.Xadduintptr(&stats.smallAllocCount[spc.sizeclass()], uintptr(s.nelems)-uintptr(s.allocCount)) + memstats.heapStats.release(c) // Update heap_live with the same assumption. usedBytes := uintptr(s.allocCount) * s.elemsize atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes)) + // Flush tinyAllocs. + if spc == tinySpanClass { + atomic.Xadd64(&memstats.tinyallocs, int64(c.tinyAllocs)) + c.tinyAllocs = 0 + } + // While we're here, flush scanAlloc, since we have to call // revise anyway. atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc)) @@ -262,8 +232,10 @@ func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) *mspan { if s == nil { throw("out of memory") } - c.largeAlloc += npages * pageSize - c.largeAllocCount++ + stats := memstats.heapStats.acquire(c) + atomic.Xadduintptr(&stats.largeAlloc, npages*pageSize) + atomic.Xadduintptr(&stats.largeAllocCount, 1) + memstats.heapStats.release(c) // Update heap_live and revise pacing if needed. atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize)) @@ -294,7 +266,9 @@ func (c *mcache) releaseAll() { if s != &emptymspan { // Adjust nsmallalloc in case the span wasn't fully allocated. n := uintptr(s.nelems) - uintptr(s.allocCount) - c.smallAllocCount[spanClass(i).sizeclass()] -= n + stats := memstats.heapStats.acquire(c) + atomic.Xadduintptr(&stats.smallAllocCount[spanClass(i).sizeclass()], -n) + memstats.heapStats.release(c) if s.sweepgen != sg+1 { // refill conservatively counted unallocated slots in heap_live. // Undo this. @@ -313,6 +287,8 @@ func (c *mcache) releaseAll() { // Clear tinyalloc pool. c.tiny = 0 c.tinyoffset = 0 + atomic.Xadd64(&memstats.tinyallocs, int64(c.tinyAllocs)) + c.tinyAllocs = 0 // Updated heap_scan and possible heap_live. if gcBlackenEnabled != 0 { diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 7103b08455..9b77ce635c 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -503,7 +503,9 @@ func (s *mspan) sweep(preserve bool) bool { // wasn't totally filled, but then swept, still has all of its // free slots zeroed. s.needzero = 1 - c.smallFreeCount[spc.sizeclass()] += uintptr(nfreed) + stats := memstats.heapStats.acquire(c) + atomic.Xadduintptr(&stats.smallFreeCount[spc.sizeclass()], uintptr(nfreed)) + memstats.heapStats.release(c) } if !preserve { // The caller may not have removed this span from whatever @@ -548,8 +550,10 @@ func (s *mspan) sweep(preserve bool) bool { } else { mheap_.freeSpan(s) } - c.largeFreeCount++ - c.largeFree += size + stats := memstats.heapStats.acquire(c) + atomic.Xadduintptr(&stats.largeFreeCount, 1) + atomic.Xadduintptr(&stats.largeFree, size) + memstats.heapStats.release(c) return true } diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 4363eff1e0..a8eca85fe6 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -612,48 +612,36 @@ func updatememstats() { memstats.total_alloc = 0 memstats.nmalloc = 0 memstats.nfree = 0 - memstats.tinyallocs = 0 for i := 0; i < len(memstats.by_size); i++ { memstats.by_size[i].nmalloc = 0 memstats.by_size[i].nfree = 0 } - - // Collect allocation stats. This is safe and consistent - // because the world is stopped. - var smallFree, totalAlloc, totalFree uint64 - for _, p := range allp { - c := p.mcache - if c == nil { - continue - } - // Collect large allocation stats. - memstats.nmalloc += uint64(c.largeAllocCount) - totalAlloc += uint64(c.largeAlloc) - totalFree += uint64(c.largeFree) - memstats.nfree += uint64(c.largeFreeCount) - - // Collect tiny allocation stats. - memstats.tinyallocs += uint64(c.tinyAllocCount) - - // Collect per-sizeclass stats. - for i := 0; i < _NumSizeClasses; i++ { - // Malloc stats. - memstats.nmalloc += uint64(c.smallAllocCount[i]) - memstats.by_size[i].nmalloc += uint64(c.smallAllocCount[i]) - totalAlloc += uint64(c.smallAllocCount[i]) * uint64(class_to_size[i]) - - // Free stats. - memstats.nfree += uint64(c.smallFreeCount[i]) - memstats.by_size[i].nfree += uint64(c.smallFreeCount[i]) - smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i]) - } - } // Collect consistent stats, which are the source-of-truth in the some cases. var consStats heapStatsDelta memstats.heapStats.unsafeRead(&consStats) - totalFree += smallFree + // Collect large allocation stats. + totalAlloc := uint64(consStats.largeAlloc) + memstats.nmalloc += uint64(consStats.largeAllocCount) + totalFree := uint64(consStats.largeFree) + memstats.nfree += uint64(consStats.largeFreeCount) + + // Collect per-sizeclass stats. + for i := 0; i < _NumSizeClasses; i++ { + // Malloc stats. + a := uint64(consStats.smallAllocCount[i]) + totalAlloc += a * uint64(class_to_size[i]) + memstats.nmalloc += a + memstats.by_size[i].nmalloc = a + + // Free stats. + f := uint64(consStats.smallFreeCount[i]) + totalFree += f * uint64(class_to_size[i]) + memstats.nfree += f + memstats.by_size[i].nfree = f + } + // Account for tiny allocations. memstats.nfree += memstats.tinyallocs memstats.nmalloc += memstats.tinyallocs @@ -752,12 +740,25 @@ func (s *sysMemStat) add(n int64) { // that need to be updated together in order for them to be kept // consistent with one another. type heapStatsDelta struct { + // Memory stats. committed int64 // byte delta of memory committed released int64 // byte delta of released memory generated inHeap int64 // byte delta of memory placed in the heap inStacks int64 // byte delta of memory reserved for stacks inWorkBufs int64 // byte delta of memory reserved for work bufs inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits + + // Allocator stats. + largeAlloc uintptr // bytes allocated for large objects + largeAllocCount uintptr // number of large object allocations + smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects + largeFree uintptr // bytes freed for large objects (>maxSmallSize) + largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) + smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) + + // Add a uint32 to ensure this struct is a multiple of 8 bytes in size. + // Only necessary on 32-bit platforms. + // _ [(sys.PtrSize / 4) % 2]uint32 } // merge adds in the deltas from b into a. @@ -768,6 +769,17 @@ func (a *heapStatsDelta) merge(b *heapStatsDelta) { a.inStacks += b.inStacks a.inWorkBufs += b.inWorkBufs a.inPtrScalarBits += b.inPtrScalarBits + + a.largeAlloc += b.largeAlloc + a.largeAllocCount += b.largeAllocCount + for i := range b.smallAllocCount { + a.smallAllocCount[i] += b.smallAllocCount[i] + } + a.largeFree += b.largeFree + a.largeFreeCount += b.largeFreeCount + for i := range b.smallFreeCount { + a.smallFreeCount[i] += b.smallFreeCount[i] + } } // consistentHeapStats represents a set of various memory statistics diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 4f4cff38aa..ebecc92745 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -4550,7 +4550,7 @@ func (pp *p) destroy() { pp.mspancache.len = 0 pp.pcache.flush(&mheap_.pages) }) - freemcache(pp.mcache, allp[0].mcache) + freemcache(pp.mcache) pp.mcache = nil gfpurge(pp) traceProcFree(pp) -- cgit v1.2.1 From b08dfbaa439e4e396b979e02ea2e7d36972e8b7a Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 1 Jul 2020 16:02:42 +0000 Subject: runtime,runtime/metrics: add memory metrics This change adds support for a variety of runtime memory metrics and contains the base implementation of Read for the runtime/metrics package, which lives in the runtime. It also adds testing infrastructure for the metrics package, and a bunch of format and documentation tests. For #37112. Change-Id: I16a2c4781eeeb2de0abcb045c15105f1210e2d8a Reviewed-on: https://go-review.googlesource.com/c/go/+/247041 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt Trust: Michael Knyszek --- src/runtime/export_test.go | 26 +++ src/runtime/metrics.go | 367 ++++++++++++++++++++++++++++++++ src/runtime/metrics/description.go | 80 ++++++- src/runtime/metrics/description_test.go | 125 +++++++++++ src/runtime/metrics/doc.go | 56 ++++- src/runtime/metrics/sample.go | 10 +- src/runtime/metrics_test.go | 114 ++++++++++ src/runtime/mstats.go | 3 +- 8 files changed, 776 insertions(+), 5 deletions(-) create mode 100644 src/runtime/metrics.go create mode 100644 src/runtime/metrics/description_test.go create mode 100644 src/runtime/metrics_test.go (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index ff901fd7be..d043fe3ee5 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -298,6 +298,32 @@ func (p *ProfBuf) Close() { (*profBuf)(p).close() } +func ReadMetricsSlow(memStats *MemStats, samplesp unsafe.Pointer, len, cap int) { + stopTheWorld("ReadMetricsSlow") + + // Initialize the metrics beforehand because this could + // allocate and skew the stats. + semacquire(&metricsSema) + initMetrics() + semrelease(&metricsSema) + + systemstack(func() { + // Read memstats first. It's going to flush + // the mcaches which readMetrics does not do, so + // going the other way around may result in + // inconsistent statistics. + readmemstats_m(memStats) + }) + + // Read metrics off the system stack. + // + // The only part of readMetrics that could allocate + // and skew the stats is initMetrics. + readMetrics(samplesp, len, cap) + + startTheWorld() +} + // ReadMemStatsSlow returns both the runtime-computed MemStats and // MemStats accumulated by scanning the heap. func ReadMemStatsSlow() (base, slow MemStats) { diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go new file mode 100644 index 0000000000..44b5a29751 --- /dev/null +++ b/src/runtime/metrics.go @@ -0,0 +1,367 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +// Metrics implementation exported to runtime/metrics. + +import ( + "unsafe" +) + +var ( + // metrics is a map of runtime/metrics keys to + // data used by the runtime to sample each metric's + // value. + metricsSema uint32 = 1 + metricsInit bool + metrics map[string]metricData +) + +type metricData struct { + // deps is the set of runtime statistics that this metric + // depends on. Before compute is called, the statAggregate + // which will be passed must ensure() these dependencies. + deps statDepSet + + // compute is a function that populates a metricValue + // given a populated statAggregate structure. + compute func(in *statAggregate, out *metricValue) +} + +// initMetrics initializes the metrics map if it hasn't been yet. +// +// metricsSema must be held. +func initMetrics() { + if metricsInit { + return + } + metrics = map[string]metricData{ + "/memory/classes/heap/free:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.committed - in.heapStats.inHeap - + in.heapStats.inStacks - in.heapStats.inWorkBufs - + in.heapStats.inPtrScalarBits) + }, + }, + "/memory/classes/heap/objects:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.heapStats.inObjects + }, + }, + "/memory/classes/heap/released:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.released) + }, + }, + "/memory/classes/heap/stacks:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.inStacks) + }, + }, + "/memory/classes/heap/unused:bytes": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.inHeap) - in.heapStats.inObjects + }, + }, + "/memory/classes/metadata/mcache/free:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.mCacheSys - in.sysStats.mCacheInUse + }, + }, + "/memory/classes/metadata/mcache/inuse:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.mCacheInUse + }, + }, + "/memory/classes/metadata/mspan/free:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.mSpanSys - in.sysStats.mSpanInUse + }, + }, + "/memory/classes/metadata/mspan/inuse:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.mSpanInUse + }, + }, + "/memory/classes/metadata/other:bytes": { + deps: makeStatDepSet(heapStatsDep, sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.inWorkBufs+in.heapStats.inPtrScalarBits) + in.sysStats.gcMiscSys + }, + }, + "/memory/classes/os-stacks:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.stacksSys + }, + }, + "/memory/classes/other:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.otherSys + }, + }, + "/memory/classes/profiling/buckets:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.buckHashSys + }, + }, + "/memory/classes/total:bytes": { + deps: makeStatDepSet(heapStatsDep, sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(in.heapStats.committed+in.heapStats.released) + + in.sysStats.stacksSys + in.sysStats.mSpanSys + + in.sysStats.mCacheSys + in.sysStats.buckHashSys + + in.sysStats.gcMiscSys + in.sysStats.otherSys + }, + }, + } + metricsInit = true +} + +// statDep is a dependency on a group of statistics +// that a metric might have. +type statDep uint + +const ( + heapStatsDep statDep = iota // corresponds to heapStatsAggregate + sysStatsDep // corresponds to sysStatsAggregate + numStatsDeps +) + +// statDepSet represents a set of statDeps. +// +// Under the hood, it's a bitmap. +type statDepSet [1]uint64 + +// makeStatDepSet creates a new statDepSet from a list of statDeps. +func makeStatDepSet(deps ...statDep) statDepSet { + var s statDepSet + for _, d := range deps { + s[d/64] |= 1 << (d % 64) + } + return s +} + +// differennce returns set difference of s from b as a new set. +func (s statDepSet) difference(b statDepSet) statDepSet { + var c statDepSet + for i := range s { + c[i] = s[i] &^ b[i] + } + return c +} + +// union returns the union of the two sets as a new set. +func (s statDepSet) union(b statDepSet) statDepSet { + var c statDepSet + for i := range s { + c[i] = s[i] | b[i] + } + return c +} + +// empty returns true if there are no dependencies in the set. +func (s *statDepSet) empty() bool { + for _, c := range s { + if c != 0 { + return false + } + } + return true +} + +// has returns true if the set contains a given statDep. +func (s *statDepSet) has(d statDep) bool { + return s[d/64]&(1<<(d%64)) != 0 +} + +// heapStatsAggregate represents memory stats obtained from the +// runtime. This set of stats is grouped together because they +// depend on each other in some way to make sense of the runtime's +// current heap memory use. They're also sharded across Ps, so it +// makes sense to grab them all at once. +type heapStatsAggregate struct { + heapStatsDelta + + // inObjects is the bytes of memory occupied by objects, + // derived from other values in heapStats. + inObjects uint64 +} + +// compute populates the heapStatsAggregate with values from the runtime. +func (a *heapStatsAggregate) compute() { + memstats.heapStats.read(&a.heapStatsDelta) + + // Calculate derived stats. + a.inObjects = uint64(a.largeAlloc - a.largeFree) + for i := range a.smallAllocCount { + a.inObjects += uint64(a.smallAllocCount[i]-a.smallFreeCount[i]) * uint64(class_to_size[i]) + } +} + +// sysStatsAggregate represents system memory stats obtained +// from the runtime. This set of stats is grouped together because +// they're all relatively cheap to acquire and generally independent +// of one another and other runtime memory stats. The fact that they +// may be acquired at different times, especially with respect to +// heapStatsAggregate, means there could be some skew, but because of +// these stats are independent, there's no real consistency issue here. +type sysStatsAggregate struct { + stacksSys uint64 + mSpanSys uint64 + mSpanInUse uint64 + mCacheSys uint64 + mCacheInUse uint64 + buckHashSys uint64 + gcMiscSys uint64 + otherSys uint64 +} + +// compute populates the sysStatsAggregate with values from the runtime. +func (a *sysStatsAggregate) compute() { + a.stacksSys = memstats.stacks_sys.load() + a.buckHashSys = memstats.buckhash_sys.load() + a.gcMiscSys = memstats.gcMiscSys.load() + a.otherSys = memstats.other_sys.load() + + systemstack(func() { + lock(&mheap_.lock) + a.mSpanSys = memstats.mspan_sys.load() + a.mSpanInUse = uint64(mheap_.spanalloc.inuse) + a.mCacheSys = memstats.mcache_sys.load() + a.mCacheInUse = uint64(mheap_.cachealloc.inuse) + unlock(&mheap_.lock) + }) +} + +// statAggregate is the main driver of the metrics implementation. +// +// It contains multiple aggregates of runtime statistics, as well +// as a set of these aggregates that it has populated. The aggergates +// are populated lazily by its ensure method. +type statAggregate struct { + ensured statDepSet + heapStats heapStatsAggregate + sysStats sysStatsAggregate +} + +// ensure populates statistics aggregates determined by deps if they +// haven't yet been populated. +func (a *statAggregate) ensure(deps *statDepSet) { + missing := deps.difference(a.ensured) + if missing.empty() { + return + } + for i := statDep(0); i < numStatsDeps; i++ { + if !missing.has(i) { + continue + } + switch i { + case heapStatsDep: + a.heapStats.compute() + case sysStatsDep: + a.sysStats.compute() + } + } + a.ensured = a.ensured.union(missing) +} + +// metricValidKind is a runtime copy of runtime/metrics.ValueKind and +// must be kept structurally identical to that type. +type metricKind int + +const ( + // These values must be kept identical to their corresponding Kind* values + // in the runtime/metrics package. + metricKindBad metricKind = iota + metricKindUint64 + metricKindFloat64 + metricKindFloat64Histogram +) + +// metricSample is a runtime copy of runtime/metrics.Sample and +// must be kept structurally identical to that type. +type metricSample struct { + name string + value metricValue +} + +// metricValue is a runtime copy of runtime/metrics.Sample and +// must be kept structurally identical to that type. +type metricValue struct { + kind metricKind + scalar uint64 // contains scalar values for scalar Kinds. + pointer unsafe.Pointer // contains non-scalar values. +} + +// agg is used by readMetrics, and is protected by metricsSema. +// +// Managed as a global variable because its pointer will be +// an argument to a dynamically-defined function, and we'd +// like to avoid it escaping to the heap. +var agg statAggregate + +// readMetrics is the implementation of runtime/metrics.Read. +// +//go:linkname readMetrics runtime/metrics.runtime_readMetrics +func readMetrics(samplesp unsafe.Pointer, len int, cap int) { + // Construct a slice from the args. + sl := slice{samplesp, len, cap} + samples := *(*[]metricSample)(unsafe.Pointer(&sl)) + + // Acquire the metricsSema but with handoff. This operation + // is expensive enough that queueing up goroutines and handing + // off between them will be noticably better-behaved. + semacquire1(&metricsSema, true, 0, 0) + + // Ensure the map is initialized. + initMetrics() + + // Clear agg defensively. + agg = statAggregate{} + + // Sample. + for i := range samples { + sample := &samples[i] + data, ok := metrics[sample.name] + if !ok { + sample.value.kind = metricKindBad + continue + } + // Ensure we have all the stats we need. + // agg is populated lazily. + agg.ensure(&data.deps) + + // Compute the value based on the stats we have. + data.compute(&agg, &sample.value) + } + + semrelease(&metricsSema) +} diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index 32bb950a72..2e7df7e09f 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -10,7 +10,7 @@ type Description struct { // // The format of the metric may be described by the following regular expression. // - // ^(?P/[^:]+):(?P[^:*\/]+(?:[*\/][^:*\/]+)*)$ + // ^(?P/[^:]+):(?P[^:*/]+(?:[*/][^:*/]+)*)$ // // The format splits the name into two components, separated by a colon: a path which always // starts with a /, and a machine-parseable unit. The name may contain any valid Unicode @@ -26,6 +26,9 @@ type Description struct { // A complete name might look like "/memory/heap/free:bytes". Name string + // Description is an English language sentence describing the metric. + Description string + // Kind is the kind of value for this metric. // // The purpose of this field is to allow users to filter out metrics whose values are @@ -44,7 +47,80 @@ type Description struct { StopTheWorld bool } -var allDesc = []Description{} +// The English language descriptions below must be kept in sync with the +// descriptions of each metric in doc.go. +var allDesc = []Description{ + { + Name: "/memory/classes/heap/free:bytes", + Description: "Memory that is available for allocation, and may be returned to the underlying system.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/heap/objects:bytes", + Description: "Memory occupied by live objects and dead objects that have not yet been collected.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/heap/released:bytes", + Description: "Memory that has been returned to the underlying system.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/heap/stacks:bytes", + Description: "Memory allocated from the heap that is occupied by stacks.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/heap/unused:bytes", + Description: "Memory that is unavailable for allocation, but cannot be returned to the underlying system.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/mcache/free:bytes", + Description: "Memory that is reserved for runtime mcache structures, but not in-use.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/mcache/inuse:bytes", + Description: "Memory that is occupied by runtime mcache structures that are currently being used.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/mspan/free:bytes", + Description: "Memory that is reserved for runtime mspan structures, but not in-use.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/mspan/inuse:bytes", + Description: "Memory that is occupied by runtime mspan structures that are currently being used.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/metadata/other:bytes", + Description: "Memory that is reserved for or used to hold runtime metadata.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/os-stacks:bytes", + Description: "Stack memory allocated by the underlying operating system.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/other:bytes", + Description: "Memory used by execution trace buffers, structures for debugging the runtime, finalizer and profiler specials, and more.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/profiling/buckets:bytes", + Description: "Memory that is used by the stack trace hash map used for profiling.", + Kind: KindUint64, + }, + { + Name: "/memory/classes/total:bytes", + Description: "All memory mapped by the Go runtime into the current process as read-write. Note that this does not include memory mapped by code called via cgo or via the syscall package. Sum of all metrics in /memory/classes.", + Kind: KindUint64, + }, +} // All returns a slice of containing metric descriptions for all supported metrics. func All() []Description { diff --git a/src/runtime/metrics/description_test.go b/src/runtime/metrics/description_test.go new file mode 100644 index 0000000000..e966a281a1 --- /dev/null +++ b/src/runtime/metrics/description_test.go @@ -0,0 +1,125 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package metrics_test + +import ( + "bufio" + "os" + "path/filepath" + "regexp" + "runtime" + "runtime/metrics" + "strings" + "testing" +) + +func TestDescriptionNameFormat(t *testing.T) { + r := regexp.MustCompile("^(?P/[^:]+):(?P[^:*/]+(?:[*/][^:*/]+)*)$") + descriptions := metrics.All() + for _, desc := range descriptions { + if !r.MatchString(desc.Name) { + t.Errorf("metrics %q does not match regexp %s", desc.Name, r) + } + } +} + +func extractMetricDocs(t *testing.T) map[string]string { + if runtime.GOOS == "android" { + t.Skip("no access to Go source on android") + } + + // Get doc.go. + _, filename, _, _ := runtime.Caller(0) + filename = filepath.Join(filepath.Dir(filename), "doc.go") + + f, err := os.Open(filename) + if err != nil { + t.Fatal(err) + } + const ( + stateSearch = iota // look for list of metrics + stateNextMetric // look for next metric + stateNextDescription // build description + ) + state := stateSearch + s := bufio.NewScanner(f) + result := make(map[string]string) + var metric string + var prevMetric string + var desc strings.Builder + for s.Scan() { + line := strings.TrimSpace(s.Text()) + switch state { + case stateSearch: + if line == "Supported metrics" { + state = stateNextMetric + } + case stateNextMetric: + // Ignore empty lines until we find a non-empty + // one. This will be our metric name. + if len(line) != 0 { + prevMetric = metric + metric = line + if prevMetric > metric { + t.Errorf("metrics %s and %s are out of lexicographical order", prevMetric, metric) + } + state = stateNextDescription + } + case stateNextDescription: + if len(line) == 0 || line == `*/` { + // An empty line means we're done. + // Write down the description and look + // for a new metric. + result[metric] = desc.String() + desc.Reset() + state = stateNextMetric + } else { + // As long as we're seeing data, assume that's + // part of the description and append it. + if desc.Len() != 0 { + // Turn previous newlines into spaces. + desc.WriteString(" ") + } + desc.WriteString(line) + } + } + if line == `*/` { + break + } + } + if state == stateSearch { + t.Fatalf("failed to find supported metrics docs in %s", filename) + } + return result +} + +func TestDescriptionDocs(t *testing.T) { + docs := extractMetricDocs(t) + descriptions := metrics.All() + for _, d := range descriptions { + want := d.Description + got, ok := docs[d.Name] + if !ok { + t.Errorf("no docs found for metric %s", d.Name) + continue + } + if got != want { + t.Errorf("mismatched description and docs for metric %s", d.Name) + t.Errorf("want: %q, got %q", want, got) + continue + } + } + if len(docs) > len(descriptions) { + docsLoop: + for name, _ := range docs { + for _, d := range descriptions { + if name == d.Name { + continue docsLoop + } + } + t.Errorf("stale documentation for non-existent metric: %s", name) + } + } +} diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index b48c22ba30..fb4e23a2b5 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -44,6 +44,60 @@ the documentation of the Name field of the Description struct. Supported metrics -TODO(mknyszek): List them here as they're added. + /memory/classes/heap/free:bytes + Memory that is available for allocation, and may be returned + to the underlying system. + + /memory/classes/heap/objects:bytes + Memory occupied by live objects and dead objects that have + not yet been collected. + + /memory/classes/heap/released:bytes + Memory that has been returned to the underlying system. + + /memory/classes/heap/stacks:bytes + Memory allocated from the heap that is occupied by stacks. + + /memory/classes/heap/unused:bytes + Memory that is unavailable for allocation, but cannot be + returned to the underlying system. + + /memory/classes/metadata/mcache/free:bytes + Memory that is reserved for runtime mcache structures, but + not in-use. + + /memory/classes/metadata/mcache/inuse:bytes + Memory that is occupied by runtime mcache structures that + are currently being used. + + /memory/classes/metadata/mspan/free:bytes + Memory that is reserved for runtime mspan structures, but + not in-use. + + /memory/classes/metadata/mspan/inuse:bytes + Memory that is occupied by runtime mspan structures that are + currently being used. + + /memory/classes/metadata/other:bytes + Memory that is reserved for or used to hold runtime + metadata. + + /memory/classes/os-stacks:bytes + Stack memory allocated by the underlying operating system. + + /memory/classes/other:bytes + Memory used by execution trace buffers, structures for + debugging the runtime, finalizer and profiler specials, and + more. + + /memory/classes/profiling/buckets:bytes + Memory that is used by the stack trace hash map used for + profiling. + + /memory/classes/total:bytes + All memory mapped by the Go runtime into the current process + as read-write. Note that this does not include memory mapped + by code called via cgo or via the syscall package. + Sum of all metrics in /memory/classes. */ package metrics diff --git a/src/runtime/metrics/sample.go b/src/runtime/metrics/sample.go index c7a3fc424a..b4b0979aa6 100644 --- a/src/runtime/metrics/sample.go +++ b/src/runtime/metrics/sample.go @@ -4,6 +4,11 @@ package metrics +import ( + _ "runtime" // depends on the runtime via a linkname'd function + "unsafe" +) + // Sample captures a single metric sample. type Sample struct { // Name is the name of the metric sampled. @@ -16,6 +21,9 @@ type Sample struct { Value Value } +// Implemented in the runtime. +func runtime_readMetrics(unsafe.Pointer, int, int) + // Read populates each Value field in the given slice of metric samples. // // Desired metrics should be present in the slice with the appropriate name. @@ -25,5 +33,5 @@ type Sample struct { // will have the value populated as KindBad to indicate that the name is // unknown. func Read(m []Sample) { - panic("unimplemented") + runtime_readMetrics(unsafe.Pointer(&m[0]), len(m), cap(m)) } diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go new file mode 100644 index 0000000000..f00aad07c4 --- /dev/null +++ b/src/runtime/metrics_test.go @@ -0,0 +1,114 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + "runtime" + "runtime/metrics" + "strings" + "testing" + "unsafe" +) + +func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) { + all := metrics.All() + samples := make([]metrics.Sample, len(all)) + descs := make(map[string]metrics.Description) + for i := range all { + samples[i].Name = all[i].Name + descs[all[i].Name] = all[i] + } + return descs, samples +} + +func TestReadMetrics(t *testing.T) { + // Tests whether readMetrics produces values aligning + // with ReadMemStats while the world is stopped. + var mstats runtime.MemStats + _, samples := prepareAllMetricsSamples() + runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples)) + + checkUint64 := func(t *testing.T, m string, got, want uint64) { + t.Helper() + if got != want { + t.Errorf("metric %q: got %d, want %d", m, got, want) + } + } + + // Check to make sure the values we read line up with other values we read. + for i := range samples { + switch name := samples[i].Name; name { + case "/memory/classes/heap/free:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased) + case "/memory/classes/heap/released:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased) + case "/memory/classes/heap/objects:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc) + case "/memory/classes/heap/unused:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc) + case "/memory/classes/heap/stacks:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse) + case "/memory/classes/metadata/mcache/free:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse) + case "/memory/classes/metadata/mcache/inuse:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse) + case "/memory/classes/metadata/mspan/free:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse) + case "/memory/classes/metadata/mspan/inuse:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse) + case "/memory/classes/metadata/other:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys) + case "/memory/classes/os-stacks:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse) + case "/memory/classes/other:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys) + case "/memory/classes/profiling/buckets:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys) + case "/memory/classes/total:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys) + } + } +} + +func TestReadMetricsConsistency(t *testing.T) { + // Tests whether readMetrics produces consistent, sensible values. + // The values are read concurrently with the runtime doing other + // things (e.g. allocating) so what we read can't reasonably compared + // to runtime values. + + // Read all the supported metrics through the metrics package. + descs, samples := prepareAllMetricsSamples() + metrics.Read(samples) + + // Check to make sure the values we read make sense. + var totalVirtual struct { + got, want uint64 + } + for i := range samples { + kind := samples[i].Value.Kind() + if want := descs[samples[i].Name].Kind; kind != want { + t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want) + continue + } + if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") { + v := samples[i].Value.Uint64() + totalVirtual.want += v + + // None of these stats should ever get this big. + // If they do, there's probably overflow involved, + // usually due to bad accounting. + if int64(v) < 0 { + t.Errorf("%q has high/negative value: %d", samples[i].Name, v) + } + } + switch samples[i].Name { + case "/memory/classes/total:bytes": + totalVirtual.got = samples[i].Value.Uint64() + } + } + if totalVirtual.got != totalVirtual.want { + t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want) + } +} diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index a8eca85fe6..512a06cffa 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -882,7 +882,8 @@ func (m *consistentHeapStats) unsafeClear() { // heapStatsDelta, the resulting values should be complete and // valid statistic values. // -// Not safe to call concurrently. +// Not safe to call concurrently. The world must be stopped +// or metricsSema must be held. func (m *consistentHeapStats) read(out *heapStatsDelta) { // Getting preempted after this point is not safe because // we read allp. We need to make sure a STW can't happen -- cgit v1.2.1 From 74e566ed1dc52f7ef58093aff936a0931537a1ad Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 5 Aug 2020 23:10:46 +0000 Subject: runtime: add readMetrics latency benchmark This change adds a new benchmark to the runtime tests for measuring the latency of the new metrics implementation, based on the ReadMemStats latency benchmark. readMetrics will have more metrics added to it in the future, and this benchmark will serve as a way to measure the cost of adding additional metrics. Change-Id: Ib05e3ed4afa49a70863fc0c418eab35b72263e24 Reviewed-on: https://go-review.googlesource.com/c/go/+/247042 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Emmanuel Odeke Reviewed-by: Michael Pratt --- src/runtime/gc_test.go | 17 ++++++++++++----- src/runtime/metrics_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 9edebdada6..7870f31ae9 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -518,7 +518,7 @@ func BenchmarkReadMemStats(b *testing.B) { hugeSink = nil } -func BenchmarkReadMemStatsLatency(b *testing.B) { +func applyGCLoad(b *testing.B) func() { // We’ll apply load to the runtime with maxProcs-1 goroutines // and use one more to actually benchmark. It doesn't make sense // to try to run this test with only 1 P (that's what @@ -563,6 +563,14 @@ func BenchmarkReadMemStatsLatency(b *testing.B) { runtime.KeepAlive(hold) }() } + return func() { + close(done) + wg.Wait() + } +} + +func BenchmarkReadMemStatsLatency(b *testing.B) { + stop := applyGCLoad(b) // Spend this much time measuring latencies. latencies := make([]time.Duration, 0, 1024) @@ -579,12 +587,11 @@ func BenchmarkReadMemStatsLatency(b *testing.B) { runtime.ReadMemStats(&ms) latencies = append(latencies, time.Now().Sub(start)) } - close(done) - // Make sure to stop the timer before we wait! The goroutines above - // are very heavy-weight and not easy to stop, so we could end up + // Make sure to stop the timer before we wait! The load created above + // is very heavy-weight and not easy to stop, so we could end up // confusing the benchmarking framework for small b.N. b.StopTimer() - wg.Wait() + stop() // Disable the default */op metrics. // ns/op doesn't mean anything because it's an average, but we diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index f00aad07c4..d925b057b0 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -7,8 +7,10 @@ package runtime_test import ( "runtime" "runtime/metrics" + "sort" "strings" "testing" + "time" "unsafe" ) @@ -112,3 +114,39 @@ func TestReadMetricsConsistency(t *testing.T) { t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want) } } + +func BenchmarkReadMetricsLatency(b *testing.B) { + stop := applyGCLoad(b) + + // Spend this much time measuring latencies. + latencies := make([]time.Duration, 0, 1024) + _, samples := prepareAllMetricsSamples() + + // Hit metrics.Read continuously and measure. + b.ResetTimer() + for i := 0; i < b.N; i++ { + start := time.Now() + metrics.Read(samples) + latencies = append(latencies, time.Now().Sub(start)) + } + // Make sure to stop the timer before we wait! The load created above + // is very heavy-weight and not easy to stop, so we could end up + // confusing the benchmarking framework for small b.N. + b.StopTimer() + stop() + + // Disable the default */op metrics. + // ns/op doesn't mean anything because it's an average, but we + // have a sleep in our b.N loop above which skews this significantly. + b.ReportMetric(0, "ns/op") + b.ReportMetric(0, "B/op") + b.ReportMetric(0, "allocs/op") + + // Sort latencies then report percentiles. + sort.Slice(latencies, func(i, j int) bool { + return latencies[i] < latencies[j] + }) + b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns") + b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns") + b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns") +} -- cgit v1.2.1 From 07c3f65d53df7bb9f84bdbd2ab64c0ae12337e3e Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 6 Aug 2020 15:44:27 +0000 Subject: runtime,runtime/metrics: add heap object count metric For #37112. Change-Id: Idd3dd5c84215ddd1ab05c2e76e848aa0a4d40fb0 Reviewed-on: https://go-review.googlesource.com/c/go/+/247043 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/metrics.go | 18 ++++++++++++++++-- src/runtime/metrics/description.go | 5 +++++ src/runtime/metrics/doc.go | 3 +++ src/runtime/metrics_test.go | 2 ++ 4 files changed, 26 insertions(+), 2 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 44b5a29751..cf619cca4b 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -38,6 +38,13 @@ func initMetrics() { return } metrics = map[string]metricData{ + "/gc/heap/objects:objects": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.heapStats.numObjects + }, + }, "/memory/classes/heap/free:bytes": { deps: makeStatDepSet(heapStatsDep), compute: func(in *statAggregate, out *metricValue) { @@ -210,9 +217,13 @@ func (s *statDepSet) has(d statDep) bool { type heapStatsAggregate struct { heapStatsDelta + // Derived from values in heapStatsDelta. + // inObjects is the bytes of memory occupied by objects, - // derived from other values in heapStats. inObjects uint64 + + // numObjects is the number of live objects in the heap. + numObjects uint64 } // compute populates the heapStatsAggregate with values from the runtime. @@ -221,8 +232,11 @@ func (a *heapStatsAggregate) compute() { // Calculate derived stats. a.inObjects = uint64(a.largeAlloc - a.largeFree) + a.numObjects = uint64(a.largeAllocCount - a.largeFreeCount) for i := range a.smallAllocCount { - a.inObjects += uint64(a.smallAllocCount[i]-a.smallFreeCount[i]) * uint64(class_to_size[i]) + n := uint64(a.smallAllocCount[i] - a.smallFreeCount[i]) + a.inObjects += n * uint64(class_to_size[i]) + a.numObjects += n } } diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index 2e7df7e09f..47013e1451 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -50,6 +50,11 @@ type Description struct { // The English language descriptions below must be kept in sync with the // descriptions of each metric in doc.go. var allDesc = []Description{ + { + Name: "/gc/heap/objects:objects", + Description: "Number of objects, live or unswept, occupying heap memory.", + Kind: KindUint64, + }, { Name: "/memory/classes/heap/free:bytes", Description: "Memory that is available for allocation, and may be returned to the underlying system.", diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index fb4e23a2b5..4ac44bb19c 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -44,6 +44,9 @@ the documentation of the Name field of the Description struct. Supported metrics + /gc/heap/objects:objects + Number of objects, live or unswept, occupying heap memory. + /memory/classes/heap/free:bytes Memory that is available for allocation, and may be returned to the underlying system. diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index d925b057b0..6c0be7dc0b 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -70,6 +70,8 @@ func TestReadMetrics(t *testing.T) { checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys) case "/memory/classes/total:bytes": checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys) + case "/gc/heap/objects:objects": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects) } } } -- cgit v1.2.1 From a8b28ebc87854fb6f2ba99f415f046dc2ff63604 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 6 Aug 2020 16:47:58 +0000 Subject: runtime,runtime/metrics: add heap goal and GC cycle metrics This change adds three new metrics: the heap goal, GC cycle count, and forced GC count. These metrics are identical to their MemStats counterparts. For #37112. Change-Id: I5a5e8dd550c0d646e5dcdbdf38274895e27cdd88 Reviewed-on: https://go-review.googlesource.com/c/go/+/247044 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/metrics.go | 51 ++++++++++++++++++++++++++++++++------ src/runtime/metrics/description.go | 23 +++++++++++++++++ src/runtime/metrics/doc.go | 12 +++++++++ src/runtime/metrics_test.go | 8 ++++++ 4 files changed, 86 insertions(+), 8 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index cf619cca4b..6595a4342c 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -7,6 +7,7 @@ package runtime // Metrics implementation exported to runtime/metrics. import ( + "runtime/internal/atomic" "unsafe" ) @@ -38,6 +39,34 @@ func initMetrics() { return } metrics = map[string]metricData{ + "/gc/cycles/automatic:gc-cycles": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.gcCyclesDone - in.sysStats.gcCyclesForced + }, + }, + "/gc/cycles/forced:gc-cycles": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.gcCyclesForced + }, + }, + "/gc/cycles/total:gc-cycles": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.gcCyclesDone + }, + }, + "/gc/heap/goal:bytes": { + deps: makeStatDepSet(sysStatsDep), + compute: func(in *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = in.sysStats.heapGoal + }, + }, "/gc/heap/objects:objects": { deps: makeStatDepSet(heapStatsDep), compute: func(in *statAggregate, out *metricValue) { @@ -248,14 +277,17 @@ func (a *heapStatsAggregate) compute() { // heapStatsAggregate, means there could be some skew, but because of // these stats are independent, there's no real consistency issue here. type sysStatsAggregate struct { - stacksSys uint64 - mSpanSys uint64 - mSpanInUse uint64 - mCacheSys uint64 - mCacheInUse uint64 - buckHashSys uint64 - gcMiscSys uint64 - otherSys uint64 + stacksSys uint64 + mSpanSys uint64 + mSpanInUse uint64 + mCacheSys uint64 + mCacheInUse uint64 + buckHashSys uint64 + gcMiscSys uint64 + otherSys uint64 + heapGoal uint64 + gcCyclesDone uint64 + gcCyclesForced uint64 } // compute populates the sysStatsAggregate with values from the runtime. @@ -264,6 +296,9 @@ func (a *sysStatsAggregate) compute() { a.buckHashSys = memstats.buckhash_sys.load() a.gcMiscSys = memstats.gcMiscSys.load() a.otherSys = memstats.other_sys.load() + a.heapGoal = atomic.Load64(&memstats.next_gc) + a.gcCyclesDone = uint64(memstats.numgc) + a.gcCyclesForced = uint64(memstats.numforcedgc) systemstack(func() { lock(&mheap_.lock) diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index 47013e1451..66d229c270 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -50,6 +50,29 @@ type Description struct { // The English language descriptions below must be kept in sync with the // descriptions of each metric in doc.go. var allDesc = []Description{ + { + Name: "/gc/cycles/automatic:gc-cycles", + Description: "Count of completed GC cycles generated by the Go runtime.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/cycles/forced:gc-cycles", + Description: "Count of completed forced GC cycles.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/cycles/total:gc-cycles", + Description: "Count of all completed GC cycles.", + Kind: KindUint64, + Cumulative: true, + }, + { + Name: "/gc/heap/goal:bytes", + Description: "Heap size target for the end of the GC cycle.", + Kind: KindUint64, + }, { Name: "/gc/heap/objects:objects", Description: "Number of objects, live or unswept, occupying heap memory.", diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index 4ac44bb19c..9b44e73ee6 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -44,6 +44,18 @@ the documentation of the Name field of the Description struct. Supported metrics + /gc/cycles/automatic:gc-cycles + Count of completed GC cycles generated by the Go runtime. + + /gc/cycles/forced:gc-cycles + Count of completed forced GC cycles. + + /gc/cycles/total:gc-cycles + Count of all completed GC cycles. + + /gc/heap/goal:bytes + Heap size target for the end of the GC cycle. + /gc/heap/objects:objects Number of objects, live or unswept, occupying heap memory. diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 6c0be7dc0b..3724760294 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -72,6 +72,14 @@ func TestReadMetrics(t *testing.T) { checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys) case "/gc/heap/objects:objects": checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects) + case "/gc/heap/goal:bytes": + checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC) + case "/gc/cycles/automatic:gc-cycles": + checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC)) + case "/gc/cycles/forced:gc-cycles": + checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC)) + case "/gc/cycles/total:gc-cycles": + checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC)) } } } -- cgit v1.2.1 From 22d2b984a680900ebbec6268f93a839286b6f130 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 26 Oct 2020 19:35:23 +0000 Subject: runtime: make sysMemStats' methods nosplit sysMemStats are updated early on in runtime initialization, so triggering a stack growth would be bad. Mark them nosplit. Thank you so much to cherryyz@google.com for finding this fix! Fixes #42218. Change-Id: Ic62db76e6a4f829355d7eaabed1727c51adfbd0f Reviewed-on: https://go-review.googlesource.com/c/go/+/265157 Trust: Michael Knyszek Run-TryBot: Michael Knyszek Reviewed-by: Michael Pratt Reviewed-by: Cherry Zhang Reviewed-by: Austin Clements TryBot-Result: Go Bot --- src/runtime/mstats.go | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 512a06cffa..07f466ec49 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -720,11 +720,17 @@ func flushallmcaches() { type sysMemStat uint64 // load atomically reads the value of the stat. +// +// Must be nosplit as it is called in runtime initialization, e.g. newosproc0. +//go:nosplit func (s *sysMemStat) load() uint64 { return atomic.Load64((*uint64)(s)) } // add atomically adds the sysMemStat by n. +// +// Must be nosplit as it is called in runtime initialization, e.g. newosproc0. +//go:nosplit func (s *sysMemStat) add(n int64) { if s == nil { return -- cgit v1.2.1 From 8e2370bf7f0c992ce1ea5dc54b43551cea71a485 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 6 Aug 2020 19:04:46 +0000 Subject: runtime,runtime/metrics: add object size distribution metrics This change adds metrics for the distribution of objects allocated and freed by size, mirroring MemStats' BySize field. For #37112. Change-Id: Ibaf1812da93598b37265ec97abc6669c1a5efcbf Reviewed-on: https://go-review.googlesource.com/c/go/+/247045 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/metrics.go | 52 ++++++++++++++++++++++++++++++++++++++ src/runtime/metrics/description.go | 10 ++++++++ src/runtime/metrics/doc.go | 6 +++++ src/runtime/metrics_test.go | 36 ++++++++++++++++++++++++++ 4 files changed, 104 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 6595a4342c..32d8ab461c 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -18,6 +18,8 @@ var ( metricsSema uint32 = 1 metricsInit bool metrics map[string]metricData + + sizeClassBuckets []float64 ) type metricData struct { @@ -38,6 +40,10 @@ func initMetrics() { if metricsInit { return } + sizeClassBuckets = make([]float64, _NumSizeClasses) + for i := range sizeClassBuckets { + sizeClassBuckets[i] = float64(class_to_size[i]) + } metrics = map[string]metricData{ "/gc/cycles/automatic:gc-cycles": { deps: makeStatDepSet(sysStatsDep), @@ -60,6 +66,26 @@ func initMetrics() { out.scalar = in.sysStats.gcCyclesDone }, }, + "/gc/heap/allocs-by-size:objects": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + hist := out.float64HistOrInit(sizeClassBuckets) + hist.counts[len(hist.counts)-1] = uint64(in.heapStats.largeAllocCount) + for i := range hist.buckets { + hist.counts[i] = uint64(in.heapStats.smallAllocCount[i]) + } + }, + }, + "/gc/heap/frees-by-size:objects": { + deps: makeStatDepSet(heapStatsDep), + compute: func(in *statAggregate, out *metricValue) { + hist := out.float64HistOrInit(sizeClassBuckets) + hist.counts[len(hist.counts)-1] = uint64(in.heapStats.largeFreeCount) + for i := range hist.buckets { + hist.counts[i] = uint64(in.heapStats.smallFreeCount[i]) + } + }, + }, "/gc/heap/goal:bytes": { deps: makeStatDepSet(sysStatsDep), compute: func(in *statAggregate, out *metricValue) { @@ -370,6 +396,32 @@ type metricValue struct { pointer unsafe.Pointer // contains non-scalar values. } +// float64HistOrInit tries to pull out an existing float64Histogram +// from the value, but if none exists, then it allocates one with +// the given buckets. +func (v *metricValue) float64HistOrInit(buckets []float64) *metricFloat64Histogram { + var hist *metricFloat64Histogram + if v.kind == metricKindFloat64Histogram && v.pointer != nil { + hist = (*metricFloat64Histogram)(v.pointer) + } else { + v.kind = metricKindFloat64Histogram + hist = new(metricFloat64Histogram) + v.pointer = unsafe.Pointer(hist) + } + hist.buckets = buckets + if len(hist.counts) != len(hist.buckets)+1 { + hist.counts = make([]uint64, len(buckets)+1) + } + return hist +} + +// metricFloat64Histogram is a runtime copy of runtime/metrics.Float64Histogram +// and must be kept structurally identical to that type. +type metricFloat64Histogram struct { + counts []uint64 + buckets []float64 +} + // agg is used by readMetrics, and is protected by metricsSema. // // Managed as a global variable because its pointer will be diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index 66d229c270..e43904fc7d 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -68,6 +68,16 @@ var allDesc = []Description{ Kind: KindUint64, Cumulative: true, }, + { + Name: "/gc/heap/allocs-by-size:objects", + Description: "Distribution of all objects allocated by approximate size.", + Kind: KindFloat64Histogram, + }, + { + Name: "/gc/heap/frees-by-size:objects", + Description: "Distribution of all objects freed by approximate size.", + Kind: KindFloat64Histogram, + }, { Name: "/gc/heap/goal:bytes", Description: "Heap size target for the end of the GC cycle.", diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index 9b44e73ee6..5045a5b4c1 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -53,6 +53,12 @@ Supported metrics /gc/cycles/total:gc-cycles Count of all completed GC cycles. + /gc/heap/allocs-by-size:objects + Distribution of all objects allocated by approximate size. + + /gc/heap/frees-by-size:objects + Distribution of all objects freed by approximate size. + /gc/heap/goal:bytes Heap size target for the end of the GC cycle. diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 3724760294..1a30810544 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -98,6 +98,10 @@ func TestReadMetricsConsistency(t *testing.T) { var totalVirtual struct { got, want uint64 } + var objects struct { + alloc, free *metrics.Float64Histogram + total uint64 + } for i := range samples { kind := samples[i].Value.Kind() if want := descs[samples[i].Name].Kind; kind != want { @@ -118,11 +122,43 @@ func TestReadMetricsConsistency(t *testing.T) { switch samples[i].Name { case "/memory/classes/total:bytes": totalVirtual.got = samples[i].Value.Uint64() + case "/gc/heap/objects:objects": + objects.total = samples[i].Value.Uint64() + case "/gc/heap/allocs-by-size:objects": + objects.alloc = samples[i].Value.Float64Histogram() + case "/gc/heap/frees-by-size:objects": + objects.free = samples[i].Value.Float64Histogram() } } if totalVirtual.got != totalVirtual.want { t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want) } + if len(objects.alloc.Buckets) != len(objects.free.Buckets) { + t.Error("allocs-by-size and frees-by-size buckets don't match in length") + } else if len(objects.alloc.Counts) != len(objects.free.Counts) { + t.Error("allocs-by-size and frees-by-size counts don't match in length") + } else { + for i := range objects.alloc.Buckets { + ba := objects.alloc.Buckets[i] + bf := objects.free.Buckets[i] + if ba != bf { + t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf) + } + } + if !t.Failed() { + got, want := uint64(0), objects.total + for i := range objects.alloc.Counts { + if objects.alloc.Counts[i] < objects.free.Counts[i] { + t.Errorf("found more allocs than frees in object dist bucket %d", i) + continue + } + got += objects.alloc.Counts[i] - objects.free.Counts[i] + } + if got != want { + t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want) + } + } + } } func BenchmarkReadMetricsLatency(b *testing.B) { -- cgit v1.2.1 From 36c5edd8d9e6c13af26733e5c820eae0598203fe Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 6 Aug 2020 20:36:49 +0000 Subject: runtime: add timeHistogram type This change adds a concurrent HDR time histogram to the runtime with tests. It also adds a function to generate boundaries for use by the metrics package. For #37112. Change-Id: Ifbef8ddce8e3a965a0dcd58ccd4915c282ae2098 Reviewed-on: https://go-review.googlesource.com/c/go/+/247046 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 24 +++++++ src/runtime/histogram.go | 148 ++++++++++++++++++++++++++++++++++++++++++ src/runtime/histogram_test.go | 58 +++++++++++++++++ src/runtime/metrics.go | 2 + 4 files changed, 232 insertions(+) create mode 100644 src/runtime/histogram.go create mode 100644 src/runtime/histogram_test.go (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index d043fe3ee5..4ca0420d2a 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1141,3 +1141,27 @@ func MSpanCountAlloc(ms *MSpan, bits []byte) int { s.gcmarkBits = nil return result } + +const ( + TimeHistSubBucketBits = timeHistSubBucketBits + TimeHistNumSubBuckets = timeHistNumSubBuckets + TimeHistNumSuperBuckets = timeHistNumSuperBuckets +) + +type TimeHistogram timeHistogram + +// Counts returns the counts for the given bucket, subBucket indices. +// Returns true if the bucket was valid, otherwise returns the counts +// for the overflow bucket and false. +func (th *TimeHistogram) Count(bucket, subBucket uint) (uint64, bool) { + t := (*timeHistogram)(th) + i := bucket*TimeHistNumSubBuckets + subBucket + if i >= uint(len(t.counts)) { + return t.overflow, false + } + return t.counts[i], true +} + +func (th *TimeHistogram) Record(duration int64) { + (*timeHistogram)(th).record(duration) +} diff --git a/src/runtime/histogram.go b/src/runtime/histogram.go new file mode 100644 index 0000000000..4020969eb9 --- /dev/null +++ b/src/runtime/histogram.go @@ -0,0 +1,148 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "runtime/internal/atomic" + "runtime/internal/sys" +) + +const ( + // For the time histogram type, we use an HDR histogram. + // Values are placed in super-buckets based solely on the most + // significant set bit. Thus, super-buckets are power-of-2 sized. + // Values are then placed into sub-buckets based on the value of + // the next timeHistSubBucketBits most significant bits. Thus, + // sub-buckets are linear within a super-bucket. + // + // Therefore, the number of sub-buckets (timeHistNumSubBuckets) + // defines the error. This error may be computed as + // 1/timeHistNumSubBuckets*100%. For example, for 16 sub-buckets + // per super-bucket the error is approximately 6%. + // + // The number of super-buckets (timeHistNumSuperBuckets), on the + // other hand, defines the range. To reserve room for sub-buckets, + // bit timeHistSubBucketBits is the first bit considered for + // super-buckets, so super-bucket indicies are adjusted accordingly. + // + // As an example, consider 45 super-buckets with 16 sub-buckets. + // + // 00110 + // ^---- + // │ ^ + // │ └---- Lowest 4 bits -> sub-bucket 6 + // └------- Bit 4 unset -> super-bucket 0 + // + // 10110 + // ^---- + // │ ^ + // │ └---- Next 4 bits -> sub-bucket 6 + // └------- Bit 4 set -> super-bucket 1 + // 100010 + // ^----^ + // │ ^ └-- Lower bits ignored + // │ └---- Next 4 bits -> sub-bucket 1 + // └------- Bit 5 set -> super-bucket 2 + // + // Following this pattern, bucket 45 will have the bit 48 set. We don't + // have any buckets for higher values, so the highest sub-bucket will + // contain values of 2^48-1 nanoseconds or approx. 3 days. This range is + // more than enough to handle durations produced by the runtime. + timeHistSubBucketBits = 4 + timeHistNumSubBuckets = 1 << timeHistSubBucketBits + timeHistNumSuperBuckets = 45 + timeHistTotalBuckets = timeHistNumSuperBuckets*timeHistNumSubBuckets + 1 +) + +// timeHistogram represents a distribution of durations in +// nanoseconds. +// +// The accuracy and range of the histogram is defined by the +// timeHistSubBucketBits and timeHistNumSuperBuckets constants. +// +// It is an HDR histogram with exponentially-distributed +// buckets and linearly distributed sub-buckets. +// +// Counts in the histogram are updated atomically, so it is safe +// for concurrent use. It is also safe to read all the values +// atomically. +type timeHistogram struct { + counts [timeHistNumSuperBuckets * timeHistNumSubBuckets]uint64 + overflow uint64 +} + +// record adds the given duration to the distribution. +// +// Although the duration is an int64 to facilitate ease-of-use +// with e.g. nanotime, the duration must be non-negative. +func (h *timeHistogram) record(duration int64) { + if duration < 0 { + throw("timeHistogram encountered negative duration") + } + // The index of the exponential bucket is just the index + // of the highest set bit adjusted for how many bits we + // use for the subbucket. Note that it's timeHistSubBucketsBits-1 + // because we use the 0th bucket to hold values < timeHistNumSubBuckets. + var superBucket, subBucket uint + if duration >= timeHistNumSubBuckets { + // At this point, we know the duration value will always be + // at least timeHistSubBucketsBits long. + superBucket = uint(sys.Len64(uint64(duration))) - timeHistSubBucketBits + if superBucket*timeHistNumSubBuckets >= uint(len(h.counts)) { + // The bucket index we got is larger than what we support, so + // add into the special overflow bucket. + atomic.Xadd64(&h.overflow, 1) + return + } + // The linear subbucket index is just the timeHistSubBucketsBits + // bits after the top bit. To extract that value, shift down + // the duration such that we leave the top bit and the next bits + // intact, then extract the index. + subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets) + } else { + subBucket = uint(duration) + } + atomic.Xadd64(&h.counts[superBucket*timeHistNumSubBuckets+subBucket], 1) +} + +// timeHistogramMetricsBuckets generates a slice of boundaries for +// the timeHistogram. These boundaries are represented in seconds, +// not nanoseconds like the timeHistogram represents durations. +func timeHistogramMetricsBuckets() []float64 { + b := make([]float64, timeHistTotalBuckets-1) + for i := 0; i < timeHistNumSuperBuckets; i++ { + superBucketMin := uint64(0) + // The (inclusive) minimum for the first bucket is 0. + if i > 0 { + // The minimum for the second bucket will be + // 1 << timeHistSubBucketBits, indicating that all + // sub-buckets are represented by the next timeHistSubBucketBits + // bits. + // Thereafter, we shift up by 1 each time, so we can represent + // this pattern as (i-1)+timeHistSubBucketBits. + superBucketMin = uint64(1) << uint(i-1+timeHistSubBucketBits) + } + // subBucketShift is the amount that we need to shift the sub-bucket + // index to combine it with the bucketMin. + subBucketShift := uint(0) + if i > 1 { + // The first two buckets are exact with respect to integers, + // so we'll never have to shift the sub-bucket index. Thereafter, + // we shift up by 1 with each subsequent bucket. + subBucketShift = uint(i - 2) + } + for j := 0; j < timeHistNumSubBuckets; j++ { + // j is the sub-bucket index. By shifting the index into position to + // combine with the bucket minimum, we obtain the minimum value for that + // sub-bucket. + subBucketMin := superBucketMin + (uint64(j) << subBucketShift) + + // Convert the subBucketMin which is in nanoseconds to a float64 seconds value. + // These values will all be exactly representable by a float64. + b[i*timeHistNumSubBuckets+j] = float64(subBucketMin) / 1e9 + } + } + return b +} diff --git a/src/runtime/histogram_test.go b/src/runtime/histogram_test.go new file mode 100644 index 0000000000..5f5b28f784 --- /dev/null +++ b/src/runtime/histogram_test.go @@ -0,0 +1,58 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + . "runtime" + "testing" +) + +var dummyTimeHistogram TimeHistogram + +func TestTimeHistogram(t *testing.T) { + // We need to use a global dummy because this + // could get stack-allocated with a non-8-byte alignment. + // The result of this bad alignment is a segfault on + // 32-bit platforms when calling Record. + h := &dummyTimeHistogram + + // Record exactly one sample in each bucket. + for i := 0; i < TimeHistNumSuperBuckets; i++ { + var base int64 + if i > 0 { + base = int64(1) << (i + TimeHistSubBucketBits - 1) + } + for j := 0; j < TimeHistNumSubBuckets; j++ { + v := int64(j) + if i > 0 { + v <<= i - 1 + } + h.Record(base + v) + } + } + // Hit the overflow bucket. + h.Record(int64(^uint64(0) >> 1)) + + // Check to make sure there's exactly one count in each + // bucket. + for i := uint(0); i < TimeHistNumSuperBuckets; i++ { + for j := uint(0); j < TimeHistNumSubBuckets; j++ { + c, ok := h.Count(i, j) + if !ok { + t.Errorf("hit overflow bucket unexpectedly: (%d, %d)", i, j) + } else if c != 1 { + t.Errorf("bucket (%d, %d) has count that is not 1: %d", i, j, c) + } + } + } + c, ok := h.Count(TimeHistNumSuperBuckets, 0) + if ok { + t.Errorf("expected to hit overflow bucket: (%d, %d)", TimeHistNumSuperBuckets, 0) + } + if c != 1 { + t.Errorf("overflow bucket has count that is not 1: %d", c) + } + dummyTimeHistogram = TimeHistogram{} +} diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 32d8ab461c..2be38ccaaa 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -20,6 +20,7 @@ var ( metrics map[string]metricData sizeClassBuckets []float64 + timeHistBuckets []float64 ) type metricData struct { @@ -44,6 +45,7 @@ func initMetrics() { for i := range sizeClassBuckets { sizeClassBuckets[i] = float64(class_to_size[i]) } + timeHistBuckets = timeHistogramMetricsBuckets() metrics = map[string]metricData{ "/gc/cycles/automatic:gc-cycles": { deps: makeStatDepSet(sysStatsDep), -- cgit v1.2.1 From d39a89fd5843f535d634620d27110b320431f584 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 6 Aug 2020 21:59:13 +0000 Subject: runtime,runtime/metrics: add metric for distribution of GC pauses For #37112. Change-Id: Ibb0425c9c582ae3da3b2662d5bbe830d7df9079c Reviewed-on: https://go-review.googlesource.com/c/go/+/247047 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/metrics.go | 9 +++++++++ src/runtime/metrics/description.go | 5 +++++ src/runtime/metrics/doc.go | 3 +++ src/runtime/metrics_test.go | 22 ++++++++++++++++++++++ src/runtime/mgc.go | 3 +++ src/runtime/mstats.go | 12 ++++++++++++ 6 files changed, 54 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 2be38ccaaa..0e391472b2 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -102,6 +102,15 @@ func initMetrics() { out.scalar = in.heapStats.numObjects }, }, + "/gc/pauses:seconds": { + compute: func(_ *statAggregate, out *metricValue) { + hist := out.float64HistOrInit(timeHistBuckets) + hist.counts[len(hist.counts)-1] = atomic.Load64(&memstats.gcPauseDist.overflow) + for i := range hist.buckets { + hist.counts[i] = atomic.Load64(&memstats.gcPauseDist.counts[i]) + } + }, + }, "/memory/classes/heap/free:bytes": { deps: makeStatDepSet(heapStatsDep), compute: func(in *statAggregate, out *metricValue) { diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index e43904fc7d..47959e467c 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -88,6 +88,11 @@ var allDesc = []Description{ Description: "Number of objects, live or unswept, occupying heap memory.", Kind: KindUint64, }, + { + Name: "/gc/pauses:seconds", + Description: "Distribution individual GC-related stop-the-world pause latencies.", + Kind: KindFloat64Histogram, + }, { Name: "/memory/classes/heap/free:bytes", Description: "Memory that is available for allocation, and may be returned to the underlying system.", diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index 5045a5b4c1..1e12ade5a1 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -65,6 +65,9 @@ Supported metrics /gc/heap/objects:objects Number of objects, live or unswept, occupying heap memory. + /gc/pauses:seconds + Distribution individual GC-related stop-the-world pause latencies. + /memory/classes/heap/free:bytes Memory that is available for allocation, and may be returned to the underlying system. diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 1a30810544..7b3132bc30 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -90,6 +90,11 @@ func TestReadMetricsConsistency(t *testing.T) { // things (e.g. allocating) so what we read can't reasonably compared // to runtime values. + // Run a few GC cycles to get some of the stats to be non-zero. + runtime.GC() + runtime.GC() + runtime.GC() + // Read all the supported metrics through the metrics package. descs, samples := prepareAllMetricsSamples() metrics.Read(samples) @@ -102,6 +107,10 @@ func TestReadMetricsConsistency(t *testing.T) { alloc, free *metrics.Float64Histogram total uint64 } + var gc struct { + numGC uint64 + pauses uint64 + } for i := range samples { kind := samples[i].Value.Kind() if want := descs[samples[i].Name].Kind; kind != want { @@ -128,6 +137,14 @@ func TestReadMetricsConsistency(t *testing.T) { objects.alloc = samples[i].Value.Float64Histogram() case "/gc/heap/frees-by-size:objects": objects.free = samples[i].Value.Float64Histogram() + case "/gc/cycles:gc-cycles": + gc.numGC = samples[i].Value.Uint64() + case "/gc/pauses:seconds": + h := samples[i].Value.Float64Histogram() + gc.pauses = 0 + for i := range h.Counts { + gc.pauses += h.Counts[i] + } } } if totalVirtual.got != totalVirtual.want { @@ -159,6 +176,11 @@ func TestReadMetricsConsistency(t *testing.T) { } } } + // The current GC has at least 2 pauses per GC. + // Check to see if that value makes sense. + if gc.pauses < gc.numGC*2 { + t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2) + } } func BenchmarkReadMetricsLatency(b *testing.B) { diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 540c376f1c..b0ab0ae6bb 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1418,6 +1418,7 @@ func gcStart(trigger gcTrigger) { now = startTheWorldWithSema(trace.enabled) work.pauseNS += now - work.pauseStart work.tMark = now + memstats.gcPauseDist.record(now - work.pauseStart) }) // Release the world sema before Gosched() in STW mode @@ -1565,6 +1566,7 @@ top: systemstack(func() { now := startTheWorldWithSema(true) work.pauseNS += now - work.pauseStart + memstats.gcPauseDist.record(now - work.pauseStart) }) semrelease(&worldsema) goto top @@ -1677,6 +1679,7 @@ func gcMarkTermination(nextTriggerRatio float64) { unixNow := sec*1e9 + int64(nsec) work.pauseNS += now - work.pauseStart work.tEnd = now + memstats.gcPauseDist.record(now - work.pauseStart) atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 07f466ec49..e0a417d213 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -157,6 +157,14 @@ type mstats struct { // heapStats is a set of statistics heapStats consistentHeapStats + + _ uint32 // ensure gcPauseDist is aligned + + // gcPauseDist represents the distribution of all GC-related + // application pauses in the runtime. + // + // Each individual pause is counted separately, unlike pause_ns. + gcPauseDist timeHistogram } var memstats mstats @@ -443,6 +451,10 @@ func init() { println(offset) throw("memstats.heapStats not aligned to 8 bytes") } + if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 { + println(offset) + throw("memstats.gcPauseDist not aligned to 8 bytes") + } // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. // [3]heapStatsDelta) to be 8-byte aligned. if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { -- cgit v1.2.1 From 80c6b92ecb911409f57d06793a1213395b75ebe2 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Fri, 7 Aug 2020 16:37:29 +0000 Subject: runtime,runtime/metrics: export goroutine count as a metric For #37112. Change-Id: I994dfe848605b95ef6aec24f53869e929247e987 Reviewed-on: https://go-review.googlesource.com/c/go/+/247049 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/metrics.go | 6 ++++++ src/runtime/metrics/description.go | 5 +++++ src/runtime/metrics/doc.go | 3 +++ src/runtime/metrics_test.go | 4 ++++ 4 files changed, 18 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 0e391472b2..d3c0341aee 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -214,6 +214,12 @@ func initMetrics() { in.sysStats.gcMiscSys + in.sysStats.otherSys }, }, + "/sched/goroutines:goroutines": { + compute: func(_ *statAggregate, out *metricValue) { + out.kind = metricKindUint64 + out.scalar = uint64(gcount()) + }, + }, } metricsInit = true } diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index 47959e467c..bc2e0882db 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -163,6 +163,11 @@ var allDesc = []Description{ Description: "All memory mapped by the Go runtime into the current process as read-write. Note that this does not include memory mapped by code called via cgo or via the syscall package. Sum of all metrics in /memory/classes.", Kind: KindUint64, }, + { + Name: "/sched/goroutines:goroutines", + Description: "Count of live goroutines.", + Kind: KindUint64, + }, } // All returns a slice of containing metric descriptions for all supported metrics. diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index 1e12ade5a1..e340f3d0dd 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -123,5 +123,8 @@ Supported metrics as read-write. Note that this does not include memory mapped by code called via cgo or via the syscall package. Sum of all metrics in /memory/classes. + + /sched/goroutines:goroutines + Count of live goroutines. */ package metrics diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 7b3132bc30..167edd57fd 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -145,6 +145,10 @@ func TestReadMetricsConsistency(t *testing.T) { for i := range h.Counts { gc.pauses += h.Counts[i] } + case "/sched/goroutines:goroutines": + if samples[i].Value.Uint64() < 1 { + t.Error("number of goroutines is less than one") + } } } if totalVirtual.got != totalVirtual.want { -- cgit v1.2.1 From 32d0eaa44e2d83cff6f0c1fa3d58af7627f3cd99 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 17 Sep 2020 21:19:28 +0000 Subject: runtime: implement dumpmemstats in terms of readmemstats_m Since MemStats is now populated directly and some values are derived, avoid duplicating the logic by instead populating the heap dump directly from MemStats (external version) instead of memstats (runtime internal version). Change-Id: I0bec96bfa02d2ffd1b56475779c124a760e64238 Reviewed-on: https://go-review.googlesource.com/c/go/+/255817 Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/heapdump.go | 76 ++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 32 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 6fcd9746af..33e224d587 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -20,8 +20,19 @@ import ( func runtime_debug_WriteHeapDump(fd uintptr) { stopTheWorld("write heap dump") + // Keep m on this G's stack instead of the system stack. + // Both readmemstats_m and writeheapdump_m have pretty large + // peak stack depths and we risk blowing the system stack. + // This is safe because the world is stopped, so we don't + // need to worry about anyone shrinking and therefore moving + // our stack. + var m MemStats systemstack(func() { - writeheapdump_m(fd) + // Call readmemstats_m here instead of deeper in + // writeheapdump_m because we might blow the system stack + // otherwise. + readmemstats_m(&m) + writeheapdump_m(fd, &m) }) startTheWorld() @@ -539,39 +550,40 @@ func dumpms() { } } -func dumpmemstats() { +//go:systemstack +func dumpmemstats(m *MemStats) { // These ints should be identical to the exported // MemStats structure and should be ordered the same // way too. dumpint(tagMemStats) - dumpint(memstats.alloc) - dumpint(memstats.total_alloc) - dumpint(memstats.sys) - dumpint(memstats.nlookup) - dumpint(memstats.nmalloc) - dumpint(memstats.nfree) - dumpint(memstats.alloc) - dumpint(memstats.heap_sys.load()) - dumpint(memstats.heap_sys.load() - memstats.heap_inuse) - dumpint(memstats.heap_inuse) - dumpint(memstats.heap_released) - dumpint(memstats.heap_objects) - dumpint(memstats.stacks_inuse) - dumpint(memstats.stacks_sys.load()) - dumpint(memstats.mspan_inuse) - dumpint(memstats.mspan_sys.load()) - dumpint(memstats.mcache_inuse) - dumpint(memstats.mcache_sys.load()) - dumpint(memstats.buckhash_sys.load()) - dumpint(memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse) - dumpint(memstats.other_sys.load()) - dumpint(memstats.next_gc) - dumpint(memstats.last_gc_unix) - dumpint(memstats.pause_total_ns) + dumpint(m.Alloc) + dumpint(m.TotalAlloc) + dumpint(m.Sys) + dumpint(m.Lookups) + dumpint(m.Mallocs) + dumpint(m.Frees) + dumpint(m.HeapAlloc) + dumpint(m.HeapSys) + dumpint(m.HeapIdle) + dumpint(m.HeapInuse) + dumpint(m.HeapReleased) + dumpint(m.HeapObjects) + dumpint(m.StackInuse) + dumpint(m.StackSys) + dumpint(m.MSpanInuse) + dumpint(m.MSpanSys) + dumpint(m.MCacheInuse) + dumpint(m.MCacheSys) + dumpint(m.BuckHashSys) + dumpint(m.GCSys) + dumpint(m.OtherSys) + dumpint(m.NextGC) + dumpint(m.LastGC) + dumpint(m.PauseTotalNs) for i := 0; i < 256; i++ { - dumpint(memstats.pause_ns[i]) + dumpint(m.PauseNs[i]) } - dumpint(uint64(memstats.numgc)) + dumpint(uint64(m.NumGC)) } func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, frees uintptr) { @@ -642,7 +654,7 @@ func dumpmemprof() { var dumphdr = []byte("go1.7 heap dump\n") -func mdump() { +func mdump(m *MemStats) { // make sure we're done sweeping for _, s := range mheap_.allspans { if s.state.get() == mSpanInUse { @@ -657,13 +669,13 @@ func mdump() { dumpgs() dumpms() dumproots() - dumpmemstats() + dumpmemstats(m) dumpmemprof() dumpint(tagEOF) flush() } -func writeheapdump_m(fd uintptr) { +func writeheapdump_m(fd uintptr, m *MemStats) { _g_ := getg() casgstatus(_g_.m.curg, _Grunning, _Gwaiting) _g_.waitreason = waitReasonDumpingHeap @@ -677,7 +689,7 @@ func writeheapdump_m(fd uintptr) { dumpfd = fd // Call dump routine. - mdump() + mdump(m) // Reset dump file. dumpfd = 0 -- cgit v1.2.1 From 76bce1dd52b0c2a06d48bf7db4e89e8dec47c507 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 14 Jul 2020 21:45:16 +0000 Subject: runtime: implement addrRanges.findSucc with a binary search This change modifies addrRanges.findSucc to more efficiently find the successor range in an addrRanges by using a binary search to narrow down large addrRanges and iterate over no more than 8 addrRanges. This change makes the runtime more robust against systems that may aggressively randomize the address space mappings it gives the runtime (e.g. Fuchsia). For #40191. Change-Id: If529df2abd2edb1b1496d8690ddd284ecd7138c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/242679 Trust: Michael Knyszek Reviewed-by: Austin Clements Reviewed-by: Michael Pratt --- src/runtime/mranges.go | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go index 16acadcff1..84a2c06dbb 100644 --- a/src/runtime/mranges.go +++ b/src/runtime/mranges.go @@ -172,20 +172,46 @@ func (a *addrRanges) init(sysStat *sysMemStat) { a.totalBytes = 0 } -// findSucc returns the first index in a such that base is +// findSucc returns the first index in a such that addr is // less than the base of the addrRange at that index. func (a *addrRanges) findSucc(addr uintptr) int { - // TODO(mknyszek): Consider a binary search for large arrays. - // While iterating over these ranges is potentially expensive, - // the expected number of ranges is small, ideally just 1, - // since Go heaps are usually mostly contiguous. base := offAddr{addr} - for i := range a.ranges { + + // Narrow down the search space via a binary search + // for large addrRanges until we have at most iterMax + // candidates left. + const iterMax = 8 + bot, top := 0, len(a.ranges) + for top-bot > iterMax { + i := ((top - bot) / 2) + bot + if a.ranges[i].contains(base.addr()) { + // a.ranges[i] contains base, so + // its successor is the next index. + return i + 1 + } + if base.lessThan(a.ranges[i].base) { + // In this case i might actually be + // the successor, but we can't be sure + // until we check the ones before it. + top = i + } else { + // In this case we know base is + // greater than or equal to a.ranges[i].limit-1, + // so i is definitely not the successor. + // We already checked i, so pick the next + // one. + bot = i + 1 + } + } + // There are top-bot candidates left, so + // iterate over them and find the first that + // base is strictly less than. + for i := bot; i < top; i++ { if base.lessThan(a.ranges[i].base) { return i } } - return len(a.ranges) + return top } // findAddrGreaterEqual returns the smallest address represented by a -- cgit v1.2.1 From 8fdc79e18a9704185bd6471b592db1e8004bd993 Mon Sep 17 00:00:00 2001 From: Chris Hines Date: Fri, 1 May 2020 17:04:36 -0400 Subject: runtime: reduce timer latency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the scheduler to treat expired timers with the same approach it uses to steal runnable G's. Previously the scheduler ignored timers on P's not marked for preemption. That had the downside that any G's waiting on those expired timers starved until the G running on their P completed or was preempted. That could take as long as 20ms if sysmon was in a 10ms wake up cycle. In addition, a spinning P that ignored an expired timer and found no other work would stop despite there being available work, missing the opportunity for greater parallelism. With this change the scheduler no longer ignores timers on non-preemptable P's or relies on sysmon as a backstop to start threads when timers expire. Instead it wakes an idle P, if needed, when creating a new timer because it cannot predict if the current P will have a scheduling opportunity before the new timer expires. The P it wakes will determine how long to sleep and block on the netpoller for the required time, potentially stealing the new timer when it wakes. This change also eliminates a race between a spinning P transitioning to idle concurrently with timer creation using the same pattern used for submission of new goroutines in the same window. Benchmark analysis: CL 232199, which was included in Go 1.15 improved timer latency over Go 1.14 by allowing P's to steal timers from P's not marked for preemption. The benchmarks added in this CL measure that improvement in the ParallelTimerLatency benchmark seen below. However, Go 1.15 still relies on sysmon to notice expired timers in some situations and sysmon can sleep for up to 10ms before waking to check timers. This CL fixes that shortcoming with modest regression on other benchmarks. name \ avg-late-ns go14.time.bench go15.time.bench fix.time.bench ParallelTimerLatency-8 17.3M ± 3% 7.9M ± 0% 0.2M ± 3% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-8 53.4k ±23% 50.7k ±31% 252.4k ± 9% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-8 204k ±14% 90k ±58% 188k ±12% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-8 1.17M ± 0% 0.11M ± 5% 0.11M ± 2% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-8 1.81M ±44% 0.10M ± 4% 0.10M ± 2% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-8 2.28M ±66% 0.09M ±13% 0.08M ±21% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-8 2.84M ±85% 0.07M ±15% 0.07M ±18% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-8 2.13M ±27% 0.06M ± 4% 0.06M ± 9% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-8 2.63M ± 6% 0.06M ±11% 0.06M ± 9% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-8 3.32M ±17% 0.06M ±16% 0.07M ±14% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-8 8.46M ±20% 4.37M ±21% 5.03M ±23% StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-8 1.02M ± 1% 0.20M ± 2% 0.20M ± 2% name \ max-late-ns go14.time.bench go15.time.bench fix.time.bench ParallelTimerLatency-8 18.3M ± 1% 8.2M ± 0% 0.5M ±12% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-8 141k ±19% 127k ±19% 1129k ± 3% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-8 2.78M ± 4% 1.23M ±15% 1.26M ± 5% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-8 6.05M ± 5% 0.67M ±56% 0.81M ±33% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-8 7.93M ±20% 0.71M ±46% 0.76M ±41% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-8 9.41M ±30% 0.92M ±23% 0.81M ±44% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-8 10.8M ±42% 0.8M ±41% 0.8M ±30% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-8 9.62M ±24% 0.77M ±38% 0.88M ±27% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-8 10.6M ±10% 0.8M ±32% 0.7M ±27% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-8 11.9M ±36% 0.6M ±46% 0.8M ±38% StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-8 36.8M ±21% 24.7M ±21% 27.5M ±16% StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-8 2.12M ± 2% 1.02M ±11% 1.03M ± 7% Other time benchmarks: name \ time/op go14.time.bench go15.time.bench fix.time.bench AfterFunc-8 137µs ± 4% 123µs ± 4% 131µs ± 2% After-8 212µs ± 3% 195µs ± 4% 204µs ± 7% Stop-8 165µs ± 6% 156µs ± 2% 151µs ±12% SimultaneousAfterFunc-8 260µs ± 3% 248µs ± 3% 284µs ± 2% StartStop-8 65.8µs ± 9% 64.4µs ± 7% 67.3µs ±15% Reset-8 13.6µs ± 2% 9.6µs ± 2% 9.1µs ± 4% Sleep-8 307µs ± 4% 306µs ± 3% 320µs ± 2% Ticker-8 53.0µs ± 5% 54.5µs ± 5% 57.0µs ±11% TickerReset-8 9.24µs ± 2% 9.51µs ± 3% TickerResetNaive-8 149µs ± 5% 145µs ± 5% Fixes #38860 Updates #25471 Updates #27707 Change-Id: If52680509b0f3b66dbd1d0c13fa574bd2d0bbd57 Reviewed-on: https://go-review.googlesource.com/c/go/+/232298 Run-TryBot: Alberto Donizetti TryBot-Result: Go Bot Reviewed-by: Austin Clements Trust: Ian Lance Taylor --- src/runtime/lockrank.go | 22 +++---- src/runtime/proc.go | 168 +++++++++++++++++++++++++++--------------------- 2 files changed, 107 insertions(+), 83 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go index 0cbbfc4f45..3f9b087856 100644 --- a/src/runtime/lockrank.go +++ b/src/runtime/lockrank.go @@ -41,12 +41,12 @@ const ( lockRankCpuprof lockRankSweep + lockRankPollDesc lockRankSched lockRankDeadlock lockRankPanic lockRankAllg lockRankAllp - lockRankPollDesc lockRankTimers // Multiple timers locked simultaneously in destroy() lockRankItab @@ -120,12 +120,12 @@ var lockNames = []string{ lockRankCpuprof: "cpuprof", lockRankSweep: "sweep", + lockRankPollDesc: "pollDesc", lockRankSched: "sched", lockRankDeadlock: "deadlock", lockRankPanic: "panic", lockRankAllg: "allg", lockRankAllp: "allp", - lockRankPollDesc: "pollDesc", lockRankTimers: "timers", lockRankItab: "itab", @@ -182,14 +182,14 @@ func (rank lockRank) String() string { return lockNames[rank] } -// lockPartialOrder is a partial order among the various lock types, listing the immediate -// ordering that has actually been observed in the runtime. Each entry (which -// corresponds to a particular lock rank) specifies the list of locks that can be -// already be held immediately "above" it. +// lockPartialOrder is a partial order among the various lock types, listing the +// immediate ordering that has actually been observed in the runtime. Each entry +// (which corresponds to a particular lock rank) specifies the list of locks +// that can already be held immediately "above" it. // -// So, for example, the lockRankSched entry shows that all the locks preceding it in -// rank can actually be held. The fin lock shows that only the sched, timers, or -// hchan lock can be held immediately above it when it is acquired. +// So, for example, the lockRankSched entry shows that all the locks preceding +// it in rank can actually be held. The allp lock shows that only the sysmon or +// sched lock can be held immediately above it when it is acquired. var lockPartialOrder [][]lockRank = [][]lockRank{ lockRankDummy: {}, lockRankSysmon: {}, @@ -199,12 +199,12 @@ var lockPartialOrder [][]lockRank = [][]lockRank{ lockRankAssistQueue: {}, lockRankCpuprof: {}, lockRankSweep: {}, - lockRankSched: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep}, + lockRankPollDesc: {}, + lockRankSched: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc}, lockRankDeadlock: {lockRankDeadlock}, lockRankPanic: {lockRankDeadlock}, lockRankAllg: {lockRankSysmon, lockRankSched, lockRankPanic}, lockRankAllp: {lockRankSysmon, lockRankSched}, - lockRankPollDesc: {}, lockRankTimers: {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllp, lockRankPollDesc, lockRankTimers}, lockRankItab: {}, lockRankReflectOffs: {lockRankItab}, diff --git a/src/runtime/proc.go b/src/runtime/proc.go index ebecc92745..6feecef985 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2264,11 +2264,16 @@ func handoffp(_p_ *p) { startm(_p_, false) return } - if when := nobarrierWakeTime(_p_); when != 0 { - wakeNetPoller(when) - } + + // The scheduler lock cannot be held when calling wakeNetPoller below + // because wakeNetPoller may call wakep which may call startm. + when := nobarrierWakeTime(_p_) pidleput(_p_) unlock(&sched.lock) + + if when != 0 { + wakeNetPoller(when) + } } // Tries to add one more P to execute G's. @@ -2477,40 +2482,33 @@ top: _g_.m.spinning = true atomic.Xadd(&sched.nmspinning, 1) } - for i := 0; i < 4; i++ { + const stealTries = 4 + for i := 0; i < stealTries; i++ { + stealTimersOrRunNextG := i == stealTries-1 + for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() { if sched.gcwaiting != 0 { goto top } - stealRunNextG := i > 2 // first look for ready queues with more than 1 g p2 := allp[enum.position()] if _p_ == p2 { continue } - // Don't bother to attempt to steal if p2 is idle. - if !idlepMask.read(enum.position()) { - if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil { - return gp, false - } - } - - // Consider stealing timers from p2. - // This call to checkTimers is the only place where - // we hold a lock on a different P's timers. - // Lock contention can be a problem here, so - // initially avoid grabbing the lock if p2 is running - // and is not marked for preemption. If p2 is running - // and not being preempted we assume it will handle its - // own timers. + // Steal timers from p2. This call to checkTimers is the only place + // where we might hold a lock on a different P's timers. We do this + // once on the last pass before checking runnext because stealing + // from the other P's runnext should be the last resort, so if there + // are timers to steal do that first. // - // If we're still looking for work after checking all - // the P's, then go ahead and steal from an active P. + // We only check timers on one of the stealing iterations because + // the time stored in now doesn't change in this loop and checking + // the timers for each P more than once with the same value of now + // is probably a waste of time. // - // TODO(prattmic): Maintain a global look-aside similar - // to idlepMask to avoid looking at p2 if it can't - // possibly have timers. - if i > 2 || (i > 1 && shouldStealTimers(p2)) { + // TODO(prattmic): Maintain a global look-aside similar to idlepMask + // to avoid looking at p2 if it can't possibly have timers. + if stealTimersOrRunNextG { tnow, w, ran := checkTimers(p2, now) now = tnow if w != 0 && (pollUntil == 0 || w < pollUntil) { @@ -2531,6 +2529,13 @@ top: ranTimer = true } } + + // Don't bother to attempt to steal if p2 is idle. + if !idlepMask.read(enum.position()) { + if gp := runqsteal(_p_, p2, stealTimersOrRunNextG); gp != nil { + return gp, false + } + } } } if ranTimer { @@ -2606,7 +2611,7 @@ stop: // drop nmspinning first and then check all per-P queues again (with // #StoreLoad memory barrier in between). If we do it the other way around, // another thread can submit a goroutine after we've checked all run queues - // but before we drop nmspinning; as the result nobody will unpark a thread + // but before we drop nmspinning; as a result nobody will unpark a thread // to run the goroutine. // If we discover new work below, we need to restore m.spinning as a signal // for resetspinning to unpark a new worker thread (because there can be more @@ -2640,6 +2645,35 @@ stop: } } + // Similar to above, check for timer creation or expiry concurrently with + // transitioning from spinning to non-spinning. Note that we cannot use + // checkTimers here because it calls adjusttimers which may need to allocate + // memory, and that isn't allowed when we don't have an active P. + for _, _p_ := range allpSnapshot { + // This is similar to nobarrierWakeTime, but minimizes calls to + // nanotime. + if atomic.Load(&_p_.adjustTimers) > 0 { + if now == 0 { + now = nanotime() + } + pollUntil = now + } else { + w := int64(atomic.Load64(&_p_.timer0When)) + if w != 0 && (pollUntil == 0 || w < pollUntil) { + pollUntil = w + } + } + } + if pollUntil != 0 { + if now == 0 { + now = nanotime() + } + delta = pollUntil - now + if delta < 0 { + delta = 0 + } + } + // Check for idle-priority GC work again. if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) { lock(&sched.lock) @@ -2735,9 +2769,9 @@ func pollWork() bool { return false } -// wakeNetPoller wakes up the thread sleeping in the network poller, -// if there is one, and if it isn't going to wake up anyhow before -// the when argument. +// wakeNetPoller wakes up the thread sleeping in the network poller if it isn't +// going to wake up before the when argument; or it wakes an idle P to service +// timers and the network poller if there isn't one already. func wakeNetPoller(when int64) { if atomic.Load64(&sched.lastpoll) == 0 { // In findrunnable we ensure that when polling the pollUntil @@ -2748,6 +2782,10 @@ func wakeNetPoller(when int64) { if pollerPollUntil == 0 || pollerPollUntil > when { netpollBreak() } + } else { + // There are no threads in the network poller, try to get + // one there so it can handle new timers. + wakep() } } @@ -3034,25 +3072,6 @@ func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) { return rnow, pollUntil, ran } -// shouldStealTimers reports whether we should try stealing the timers from p2. -// We don't steal timers from a running P that is not marked for preemption, -// on the assumption that it will run its own timers. This reduces -// contention on the timers lock. -func shouldStealTimers(p2 *p) bool { - if p2.status != _Prunning { - return true - } - mp := p2.m.ptr() - if mp == nil || mp.locks > 0 { - return false - } - gp := mp.curg - if gp == nil || gp.atomicstatus != _Grunning || !gp.preempt { - return false - } - return true -} - func parkunlock_c(gp *g, lock unsafe.Pointer) bool { unlock((*mutex)(lock)) return true @@ -4603,7 +4622,7 @@ func procresize(nprocs int32) *p { } sched.procresizetime = now - maskWords := (nprocs+31) / 32 + maskWords := (nprocs + 31) / 32 // Grow allp if necessary. if nprocs > int32(len(allp)) { @@ -4927,11 +4946,28 @@ func sysmon() { } usleep(delay) mDoFixup() + + // sysmon should not enter deep sleep if schedtrace is enabled so that + // it can print that information at the right time. + // + // It should also not enter deep sleep if there are any active P's so + // that it can retake P's from syscalls, preempt long running G's, and + // poll the network if all P's are busy for long stretches. + // + // It should wakeup from deep sleep if any P's become active either due + // to exiting a syscall or waking up due to a timer expiring so that it + // can resume performing those duties. If it wakes from a syscall it + // resets idle and delay as a bet that since it had retaken a P from a + // syscall before, it may need to do it again shortly after the + // application starts work again. It does not reset idle when waking + // from a timer to avoid adding system load to applications that spend + // most of their time sleeping. now := nanotime() - next, _ := timeSleepUntil() if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) { lock(&sched.lock) if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) { + syscallWake := false + next, _ := timeSleepUntil() if next > now { atomic.Store(&sched.sysmonwait, 1) unlock(&sched.lock) @@ -4945,33 +4981,27 @@ func sysmon() { if shouldRelax { osRelax(true) } - notetsleep(&sched.sysmonnote, sleep) + syscallWake = notetsleep(&sched.sysmonnote, sleep) mDoFixup() if shouldRelax { osRelax(false) } - now = nanotime() - next, _ = timeSleepUntil() lock(&sched.lock) atomic.Store(&sched.sysmonwait, 0) noteclear(&sched.sysmonnote) } - idle = 0 - delay = 20 + if syscallWake { + idle = 0 + delay = 20 + } } unlock(&sched.lock) } + lock(&sched.sysmonlock) - { - // If we spent a long time blocked on sysmonlock - // then we want to update now and next since it's - // likely stale. - now1 := nanotime() - if now1-now > 50*1000 /* 50µs */ { - next, _ = timeSleepUntil() - } - now = now1 - } + // Update now in case we blocked on sysmonnote or spent a long time + // blocked on schedlock or sysmonlock above. + now = nanotime() // trigger libc interceptors if needed if *cgo_yield != nil { @@ -4996,12 +5026,6 @@ func sysmon() { } } mDoFixup() - if next < now { - // There are timers that should have already run, - // perhaps because there is an unpreemptible P. - // Try to start an M to run them. - startm(nil, false) - } if atomic.Load(&scavenge.sysmonWake) != 0 { // Kick the scavenger awake if someone requested it. wakeScavenger() -- cgit v1.2.1 From c515852732a490bab64f35d001ddc444b0f0f553 Mon Sep 17 00:00:00 2001 From: Heisenberg Date: Tue, 8 Sep 2020 14:31:39 +0800 Subject: runtime: add 2-byte and 8-byte sub-benchmarks for memmove load/store Change-Id: I6389d7efe90836b6ece44d2e75053d1ad9f35d08 Reviewed-on: https://go-review.googlesource.com/c/go/+/253417 Trust: Emmanuel Odeke Reviewed-by: Keith Randall --- src/runtime/memmove_test.go | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go index 396c1304c5..b549433f71 100644 --- a/src/runtime/memmove_test.go +++ b/src/runtime/memmove_test.go @@ -538,21 +538,30 @@ func BenchmarkCopyFat1024(b *testing.B) { } } +// BenchmarkIssue18740 ensures that memmove uses 4 and 8 byte load/store to move 4 and 8 bytes. +// It used to do 2 2-byte load/stores, which leads to a pipeline stall +// when we try to read the result with one 4-byte load. func BenchmarkIssue18740(b *testing.B) { - // This tests that memmove uses one 4-byte load/store to move 4 bytes. - // It used to do 2 2-byte load/stores, which leads to a pipeline stall - // when we try to read the result with one 4-byte load. - var buf [4]byte - for j := 0; j < b.N; j++ { - s := uint32(0) - for i := 0; i < 4096; i += 4 { - copy(buf[:], g[i:]) - s += binary.LittleEndian.Uint32(buf[:]) - } - sink = uint64(s) + benchmarks := []struct { + name string + nbyte int + f func([]byte) uint64 + }{ + {"2byte", 2, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint16(buf)) }}, + {"4byte", 4, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint32(buf)) }}, + {"8byte", 8, func(buf []byte) uint64 { return binary.LittleEndian.Uint64(buf) }}, + } + + var g [4096]byte + for _, bm := range benchmarks { + buf := make([]byte, bm.nbyte) + b.Run(bm.name, func(b *testing.B) { + for j := 0; j < b.N; j++ { + for i := 0; i < 4096; i += bm.nbyte { + copy(buf[:], g[i:]) + sink += bm.f(buf[:]) + } + } + }) } } - -// TODO: 2 byte and 8 byte benchmarks also. - -var g [4096]byte -- cgit v1.2.1 From 009d71409821a6ac4f1b32aaae2c856c20a29f92 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 22 Oct 2020 16:37:19 -0700 Subject: cmd/compile, runtime: store pointers to go:notinheap types indirectly pointers to go:notinheap types should be treated as scalars. That means they shouldn't be stored directly in interfaces, or directly in reflect.Value.ptr. Also be sure to use uintpr to compare such pointers in reflect.DeepEqual. Fixes #42076 Change-Id: I53735f6d434e9c3108d4940bd1bae14c61ef2a74 Reviewed-on: https://go-review.googlesource.com/c/go/+/264480 Trust: Keith Randall Reviewed-by: Ian Lance Taylor --- src/runtime/netpoll.go | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go index 34ea82a7fa..77eb3aa4c6 100644 --- a/src/runtime/netpoll.go +++ b/src/runtime/netpoll.go @@ -79,16 +79,17 @@ type pollDesc struct { lock mutex // protects the following fields fd uintptr closing bool - everr bool // marks event scanning error happened - user uint32 // user settable cookie - rseq uintptr // protects from stale read timers - rg uintptr // pdReady, pdWait, G waiting for read or nil - rt timer // read deadline timer (set if rt.f != nil) - rd int64 // read deadline - wseq uintptr // protects from stale write timers - wg uintptr // pdReady, pdWait, G waiting for write or nil - wt timer // write deadline timer - wd int64 // write deadline + everr bool // marks event scanning error happened + user uint32 // user settable cookie + rseq uintptr // protects from stale read timers + rg uintptr // pdReady, pdWait, G waiting for read or nil + rt timer // read deadline timer (set if rt.f != nil) + rd int64 // read deadline + wseq uintptr // protects from stale write timers + wg uintptr // pdReady, pdWait, G waiting for write or nil + wt timer // write deadline timer + wd int64 // write deadline + self *pollDesc // storage for indirect interface. See (*pollDesc).makeArg. } type pollCache struct { @@ -157,6 +158,7 @@ func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) { pd.wseq++ pd.wg = 0 pd.wd = 0 + pd.self = pd unlock(&pd.lock) var errno int32 @@ -271,14 +273,14 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) { // Copy current seq into the timer arg. // Timer func will check the seq against current descriptor seq, // if they differ the descriptor was reused or timers were reset. - pd.rt.arg = pd + pd.rt.arg = pd.makeArg() pd.rt.seq = pd.rseq resettimer(&pd.rt, pd.rd) } } else if pd.rd != rd0 || combo != combo0 { pd.rseq++ // invalidate current timers if pd.rd > 0 { - modtimer(&pd.rt, pd.rd, 0, rtf, pd, pd.rseq) + modtimer(&pd.rt, pd.rd, 0, rtf, pd.makeArg(), pd.rseq) } else { deltimer(&pd.rt) pd.rt.f = nil @@ -287,14 +289,14 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) { if pd.wt.f == nil { if pd.wd > 0 && !combo { pd.wt.f = netpollWriteDeadline - pd.wt.arg = pd + pd.wt.arg = pd.makeArg() pd.wt.seq = pd.wseq resettimer(&pd.wt, pd.wd) } } else if pd.wd != wd0 || combo != combo0 { pd.wseq++ // invalidate current timers if pd.wd > 0 && !combo { - modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd, pd.wseq) + modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd.makeArg(), pd.wseq) } else { deltimer(&pd.wt) pd.wt.f = nil @@ -547,3 +549,20 @@ func (c *pollCache) alloc() *pollDesc { unlock(&c.lock) return pd } + +// makeArg converts pd to an interface{}. +// makeArg does not do any allocation. Normally, such +// a conversion requires an allocation because pointers to +// go:notinheap types (which pollDesc is) must be stored +// in interfaces indirectly. See issue 42076. +func (pd *pollDesc) makeArg() (i interface{}) { + x := (*eface)(unsafe.Pointer(&i)) + x._type = pdType + x.data = unsafe.Pointer(&pd.self) + return +} + +var ( + pdEface interface{} = (*pollDesc)(nil) + pdType *_type = efaceOf(&pdEface)._type +) -- cgit v1.2.1 From 091257def92b0280b07bde9536b7cdf5f3b02aec Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 27 Oct 2020 14:15:00 -0700 Subject: cmd/compile: print pointers to go:notinheap types without converting to unsafe.Pointer Pretty minor concern, but after auditing the compiler/runtime for conversions from pointers to go:notinheap types to unsafe.Pointer, this is the only remaining one I found. Update #42076 Change-Id: I81d5b893c9ada2fc19a51c2559262f2e9ff71c35 Reviewed-on: https://go-review.googlesource.com/c/go/+/265757 Trust: Keith Randall Run-TryBot: Keith Randall TryBot-Result: Go Bot Reviewed-by: Matthew Dempsky --- src/runtime/print.go | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/print.go b/src/runtime/print.go index e605eb34cb..64055a34cc 100644 --- a/src/runtime/print.go +++ b/src/runtime/print.go @@ -237,6 +237,9 @@ func printhex(v uint64) { func printpointer(p unsafe.Pointer) { printhex(uint64(uintptr(p))) } +func printuintptr(p uintptr) { + printhex(uint64(p)) +} func printstring(s string) { gwrite(bytes(s)) -- cgit v1.2.1 From b4b014465216790e01aa66f9120d03230e4aff46 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 29 Sep 2020 17:01:33 -0700 Subject: runtime: don't always adjust timers Some programs have a lot of timers that they adjust both forward and backward in time. This can cause a large number of timerModifiedEarlier timers. In practice these timers are used for I/O deadlines and are rarely reached. The effect is that the runtime spends a lot of time in adjusttimers making sure that there are no timerModifiedEarlier timers, but the effort is wasted because none of the adjusted timers are near the top of the timer heap anyhow. Avoid much of this extra work by keeping track of the earliest known timerModifiedEarlier timer. This lets us skip adjusttimers if we know that none of the timers will be ready to run anyhow. We will still eventually run it, when we reach the deadline of the earliest known timerModifiedEarlier, although in practice that timer has likely been removed. When we do run adjusttimers, we will reset all of the timerModifiedEarlier timers, and clear our notion of when we need to run adjusttimers again. This effect should be to significantly reduce the number of times we walk through the timer list in adjusttimers. Fixes #41699 Change-Id: I38eb2be611fb34e3017bb33d0a9ed40d75fb414f Reviewed-on: https://go-review.googlesource.com/c/go/+/258303 Trust: Ian Lance Taylor Trust: Emmanuel Odeke Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Michael Knyszek --- src/runtime/proc.go | 52 +++++++++++++------------- src/runtime/runtime2.go | 7 ++++ src/runtime/time.go | 99 +++++++++++++++++++++++++++---------------------- 3 files changed, 88 insertions(+), 70 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 6feecef985..87d4b6e568 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -3017,40 +3017,40 @@ func dropg() { // We pass now in and out to avoid extra calls of nanotime. //go:yeswritebarrierrec func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) { - // If there are no timers to adjust, and the first timer on - // the heap is not yet ready to run, then there is nothing to do. - if atomic.Load(&pp.adjustTimers) == 0 { - next := int64(atomic.Load64(&pp.timer0When)) - if next == 0 { - return now, 0, false - } - if now == 0 { - now = nanotime() - } - if now < next { - // Next timer is not ready to run. - // But keep going if we would clear deleted timers. - // This corresponds to the condition below where - // we decide whether to call clearDeletedTimers. - if pp != getg().m.p.ptr() || int(atomic.Load(&pp.deletedTimers)) <= int(atomic.Load(&pp.numTimers)/4) { - return now, next, false - } + // If it's not yet time for the first timer, or the first adjusted + // timer, then there is nothing to do. + next := int64(atomic.Load64(&pp.timer0When)) + nextAdj := int64(atomic.Load64(&pp.timerModifiedEarliest)) + if next == 0 || (nextAdj != 0 && nextAdj < next) { + next = nextAdj + } + + if next == 0 { + // No timers to run or adjust. + return now, 0, false + } + + if now == 0 { + now = nanotime() + } + if now < next { + // Next timer is not ready to run, but keep going + // if we would clear deleted timers. + // This corresponds to the condition below where + // we decide whether to call clearDeletedTimers. + if pp != getg().m.p.ptr() || int(atomic.Load(&pp.deletedTimers)) <= int(atomic.Load(&pp.numTimers)/4) { + return now, next, false } } lock(&pp.timersLock) - adjusttimers(pp) - - rnow = now if len(pp.timers) > 0 { - if rnow == 0 { - rnow = nanotime() - } + adjusttimers(pp, now) for len(pp.timers) > 0 { // Note that runtimer may temporarily unlock // pp.timersLock. - if tw := runtimer(pp, rnow); tw != 0 { + if tw := runtimer(pp, now); tw != 0 { if tw > 0 { pollUntil = tw } @@ -3069,7 +3069,7 @@ func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) { unlock(&pp.timersLock) - return rnow, pollUntil, ran + return now, pollUntil, ran } func parkunlock_c(gp *g, lock unsafe.Pointer) bool { diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 7bac5fd38d..a2e4411c7d 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -646,6 +646,13 @@ type p struct { // This is 0 if the timer heap is empty. timer0When uint64 + // The earliest known nextwhen field of a timer with + // timerModifiedEarlier status. Because the timer may have been + // modified again, there need not be any timer with this value. + // This is updated using atomic functions. + // This is 0 if the value is unknown. + timerModifiedEarliest uint64 + // Per-P GC state gcAssistTime int64 // Nanoseconds in assistAlloc gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic) diff --git a/src/runtime/time.go b/src/runtime/time.go index f895bf8443..99290f66d0 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -491,6 +491,8 @@ loop: newStatus = timerModifiedEarlier } + tpp := t.pp.ptr() + // Update the adjustTimers field. Subtract one if we // are removing a timerModifiedEarlier, add one if we // are adding a timerModifiedEarlier. @@ -500,9 +502,10 @@ loop: } if newStatus == timerModifiedEarlier { adjust++ + updateTimerModifiedEarliest(tpp, when) } if adjust != 0 { - atomic.Xadd(&t.pp.ptr().adjustTimers, adjust) + atomic.Xadd(&tpp.adjustTimers, adjust) } // Set the new status of the timer. @@ -637,16 +640,36 @@ func moveTimers(pp *p, timers []*timer) { // the correct place in the heap. While looking for those timers, // it also moves timers that have been modified to run later, // and removes deleted timers. The caller must have locked the timers for pp. -func adjusttimers(pp *p) { - if len(pp.timers) == 0 { - return - } +func adjusttimers(pp *p, now int64) { if atomic.Load(&pp.adjustTimers) == 0 { if verifyTimers { verifyTimerHeap(pp) } + // There are no timers to adjust, so it is safe to clear + // timerModifiedEarliest. Do so in case it is stale. + // Everything will work if we don't do this, + // but clearing here may save future calls to adjusttimers. + atomic.Store64(&pp.timerModifiedEarliest, 0) return } + + // If we haven't yet reached the time of the first timerModifiedEarlier + // timer, don't do anything. This speeds up programs that adjust + // a lot of timers back and forth if the timers rarely expire. + // We'll postpone looking through all the adjusted timers until + // one would actually expire. + if first := atomic.Load64(&pp.timerModifiedEarliest); first != 0 { + if int64(first) > now { + if verifyTimers { + verifyTimerHeap(pp) + } + return + } + + // We are going to clear all timerModifiedEarlier timers. + atomic.Store64(&pp.timerModifiedEarliest, 0) + } + var moved []*timer loop: for i := 0; i < len(pp.timers); i++ { @@ -868,6 +891,10 @@ func runOneTimer(pp *p, t *timer, now int64) { // // The caller must have locked the timers for pp. func clearDeletedTimers(pp *p) { + // We are going to clear all timerModifiedEarlier timers. + // Do this now in case new ones show up while we are looping. + atomic.Store64(&pp.timerModifiedEarliest, 0) + cdel := int32(0) cearlier := int32(0) to := 0 @@ -977,6 +1004,21 @@ func updateTimer0When(pp *p) { } } +// updateTimerModifiedEarliest updates the recorded nextwhen field of the +// earlier timerModifiedEarier value. +// The timers for pp will not be locked. +func updateTimerModifiedEarliest(pp *p, nextwhen int64) { + for { + old := atomic.Load64(&pp.timerModifiedEarliest) + if old != 0 && int64(old) < nextwhen { + return + } + if atomic.Cas64(&pp.timerModifiedEarliest, old, uint64(nextwhen)) { + return + } + } +} + // timeSleepUntil returns the time when the next timer should fire, // and the P that holds the timer heap that that timer is on. // This is only called by sysmon and checkdead. @@ -993,48 +1035,17 @@ func timeSleepUntil() (int64, *p) { continue } - c := atomic.Load(&pp.adjustTimers) - if c == 0 { - w := int64(atomic.Load64(&pp.timer0When)) - if w != 0 && w < next { - next = w - pret = pp - } - continue + w := int64(atomic.Load64(&pp.timer0When)) + if w != 0 && w < next { + next = w + pret = pp } - lock(&pp.timersLock) - for _, t := range pp.timers { - switch s := atomic.Load(&t.status); s { - case timerWaiting: - if t.when < next { - next = t.when - } - case timerModifiedEarlier, timerModifiedLater: - if t.nextwhen < next { - next = t.nextwhen - } - if s == timerModifiedEarlier { - c-- - } - } - // The timers are sorted, so we only have to check - // the first timer for each P, unless there are - // some timerModifiedEarlier timers. The number - // of timerModifiedEarlier timers is in the adjustTimers - // field, used to initialize c, above. - // - // We don't worry about cases like timerModifying. - // New timers can show up at any time, - // so this function is necessarily imprecise. - // Do a signed check here since we aren't - // synchronizing the read of pp.adjustTimers - // with the check of a timer status. - if int32(c) <= 0 { - break - } + w = int64(atomic.Load64(&pp.timerModifiedEarliest)) + if w != 0 && w < next { + next = w + pret = pp } - unlock(&pp.timersLock) } unlock(&allpLock) -- cgit v1.2.1 From 49b017fe59bf628795f2c4fdbcb5db942e865fa9 Mon Sep 17 00:00:00 2001 From: George Tsilias Date: Thu, 4 Jun 2020 23:11:56 +0300 Subject: runtime: handle signal 34 for musl setgid It has been observed that setgid hangs when using cgo with musl. This fix ensures that signal 34 gets handled in an appropriate way, like signal 33 when using glibc. Fixes #39343 Change-Id: I89565663e2c361f62cbccfe80aaedf290bd58d57 Reviewed-on: https://go-review.googlesource.com/c/go/+/236518 Run-TryBot: Tobias Klauser TryBot-Result: Go Bot Trust: Tobias Klauser Reviewed-by: Ian Lance Taylor --- src/runtime/sigtab_linux_generic.go | 2 +- src/runtime/sigtab_linux_mipsx.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/sigtab_linux_generic.go b/src/runtime/sigtab_linux_generic.go index b26040b803..38d686544f 100644 --- a/src/runtime/sigtab_linux_generic.go +++ b/src/runtime/sigtab_linux_generic.go @@ -45,7 +45,7 @@ var sigtable = [...]sigTabT{ /* 31 */ {_SigThrow, "SIGSYS: bad system call"}, /* 32 */ {_SigSetStack + _SigUnblock, "signal 32"}, /* SIGCANCEL; see issue 6997 */ /* 33 */ {_SigSetStack + _SigUnblock, "signal 33"}, /* SIGSETXID; see issues 3871, 9400, 12498 */ - /* 34 */ {_SigNotify, "signal 34"}, + /* 34 */ {_SigSetStack + _SigUnblock, "signal 34"}, /* musl SIGSYNCCALL; see issue 39343 */ /* 35 */ {_SigNotify, "signal 35"}, /* 36 */ {_SigNotify, "signal 36"}, /* 37 */ {_SigNotify, "signal 37"}, diff --git a/src/runtime/sigtab_linux_mipsx.go b/src/runtime/sigtab_linux_mipsx.go index 81dd2314c5..51ef470ce7 100644 --- a/src/runtime/sigtab_linux_mipsx.go +++ b/src/runtime/sigtab_linux_mipsx.go @@ -42,7 +42,7 @@ var sigtable = [...]sigTabT{ /* 31 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"}, /* 32 */ {_SigSetStack + _SigUnblock, "signal 32"}, /* SIGCANCEL; see issue 6997 */ /* 33 */ {_SigSetStack + _SigUnblock, "signal 33"}, /* SIGSETXID; see issues 3871, 9400, 12498 */ - /* 34 */ {_SigNotify, "signal 34"}, + /* 34 */ {_SigSetStack + _SigUnblock, "signal 34"}, /* musl SIGSYNCCALL; see issue 39343 */ /* 35 */ {_SigNotify, "signal 35"}, /* 36 */ {_SigNotify, "signal 36"}, /* 37 */ {_SigNotify, "signal 37"}, -- cgit v1.2.1 From 368c40116434532dc0b53b72fa04788ca6742898 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 27 Oct 2020 16:09:40 -0700 Subject: runtime: block signals in needm before allocating M Otherwise, if a signal occurs just after we allocated the M, we can deadlock if the signal handler needs to allocate an M itself. Fixes #42207 Change-Id: I76f44547f419e8b1c14cbf49bf602c6e645d8c14 Reviewed-on: https://go-review.googlesource.com/c/go/+/265759 Trust: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Bryan C. Mills --- src/runtime/crash_unix_test.go | 9 +++ src/runtime/os_js.go | 2 +- src/runtime/os_plan9.go | 2 +- src/runtime/os_windows.go | 2 +- src/runtime/proc.go | 26 ++++--- src/runtime/signal_unix.go | 8 +- src/runtime/testdata/testprogcgo/needmdeadlock.go | 95 +++++++++++++++++++++++ 7 files changed, 127 insertions(+), 17 deletions(-) create mode 100644 src/runtime/testdata/testprogcgo/needmdeadlock.go (limited to 'src/runtime') diff --git a/src/runtime/crash_unix_test.go b/src/runtime/crash_unix_test.go index fc87f37408..7aba3d4846 100644 --- a/src/runtime/crash_unix_test.go +++ b/src/runtime/crash_unix_test.go @@ -358,3 +358,12 @@ func TestSignalM(t *testing.T) { t.Fatalf("signal sent to M %d, but received on M %d", want, got) } } + +// Issue #42207. +func TestNeedmDeadlock(t *testing.T) { + output := runTestProg(t, "testprogcgo", "NeedmDeadlock") + want := "OK\n" + if output != want { + t.Fatalf("want %s, got %s\n", want, output) + } +} diff --git a/src/runtime/os_js.go b/src/runtime/os_js.go index ff0ee3aa6b..94983b358d 100644 --- a/src/runtime/os_js.go +++ b/src/runtime/os_js.go @@ -59,7 +59,7 @@ func mpreinit(mp *m) { } //go:nosplit -func msigsave(mp *m) { +func sigsave(p *sigset) { } //go:nosplit diff --git a/src/runtime/os_plan9.go b/src/runtime/os_plan9.go index f3037a7508..62aecea060 100644 --- a/src/runtime/os_plan9.go +++ b/src/runtime/os_plan9.go @@ -184,7 +184,7 @@ func mpreinit(mp *m) { mp.errstr = (*byte)(mallocgc(_ERRMAX, nil, true)) } -func msigsave(mp *m) { +func sigsave(p *sigset) { } func msigrestore(sigmask sigset) { diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index 9dd140c952..ffb087f9db 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -873,7 +873,7 @@ func mpreinit(mp *m) { } //go:nosplit -func msigsave(mp *m) { +func sigsave(p *sigset) { } //go:nosplit diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 87d4b6e568..b335e1184d 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -598,7 +598,7 @@ func schedinit() { typelinksinit() // uses maps, activeModules itabsinit() // uses activeModules - msigsave(_g_.m) + sigsave(&_g_.m.sigmask) initSigmask = _g_.m.sigmask goargs() @@ -1707,6 +1707,18 @@ func needm() { exit(1) } + // Save and block signals before getting an M. + // The signal handler may call needm itself, + // and we must avoid a deadlock. Also, once g is installed, + // any incoming signals will try to execute, + // but we won't have the sigaltstack settings and other data + // set up appropriately until the end of minit, which will + // unblock the signals. This is the same dance as when + // starting a new m to run Go code via newosproc. + var sigmask sigset + sigsave(&sigmask) + sigblock() + // Lock extra list, take head, unlock popped list. // nilokay=false is safe here because of the invariant above, // that the extra list always contains or will soon contain @@ -1724,14 +1736,8 @@ func needm() { extraMCount-- unlockextra(mp.schedlink.ptr()) - // Save and block signals before installing g. - // Once g is installed, any incoming signals will try to execute, - // but we won't have the sigaltstack settings and other data - // set up appropriately until the end of minit, which will - // unblock the signals. This is the same dance as when - // starting a new m to run Go code via newosproc. - msigsave(mp) - sigblock() + // Store the original signal mask for use by minit. + mp.sigmask = sigmask // Install g (= m->g0) and set the stack bounds // to match the current stack. We don't actually know @@ -3676,7 +3682,7 @@ func beforefork() { // a signal handler before exec if a signal is sent to the process // group. See issue #18600. gp.m.locks++ - msigsave(gp.m) + sigsave(&gp.m.sigmask) sigblock() // This function is called before fork in syscall package. diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go index 9318a9b8bc..bf4a319b37 100644 --- a/src/runtime/signal_unix.go +++ b/src/runtime/signal_unix.go @@ -1031,15 +1031,15 @@ func sigfwdgo(sig uint32, info *siginfo, ctx unsafe.Pointer) bool { return true } -// msigsave saves the current thread's signal mask into mp.sigmask. +// sigsave saves the current thread's signal mask into *p. // This is used to preserve the non-Go signal mask when a non-Go // thread calls a Go function. // This is nosplit and nowritebarrierrec because it is called by needm // which may be called on a non-Go thread with no g available. //go:nosplit //go:nowritebarrierrec -func msigsave(mp *m) { - sigprocmask(_SIG_SETMASK, nil, &mp.sigmask) +func sigsave(p *sigset) { + sigprocmask(_SIG_SETMASK, nil, p) } // msigrestore sets the current thread's signal mask to sigmask. @@ -1111,7 +1111,7 @@ func minitSignalStack() { // thread's signal mask. When this is called all signals have been // blocked for the thread. This starts with m.sigmask, which was set // either from initSigmask for a newly created thread or by calling -// msigsave if this is a non-Go thread calling a Go function. It +// sigsave if this is a non-Go thread calling a Go function. It // removes all essential signals from the mask, thus causing those // signals to not be blocked. Then it sets the thread's signal mask. // After this is called the thread can receive signals. diff --git a/src/runtime/testdata/testprogcgo/needmdeadlock.go b/src/runtime/testdata/testprogcgo/needmdeadlock.go new file mode 100644 index 0000000000..5a9c359006 --- /dev/null +++ b/src/runtime/testdata/testprogcgo/needmdeadlock.go @@ -0,0 +1,95 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !plan9,!windows + +package main + +// This is for issue #42207. +// During a call to needm we could get a SIGCHLD signal +// which would itself call needm, causing a deadlock. + +/* +#include +#include +#include +#include + +extern void GoNeedM(); + +#define SIGNALERS 10 + +static void* needmSignalThread(void* p) { + pthread_t* pt = (pthread_t*)(p); + int i; + + for (i = 0; i < 100; i++) { + if (pthread_kill(*pt, SIGCHLD) < 0) { + return NULL; + } + usleep(1); + } + return NULL; +} + +// We don't need many calls, as the deadlock is only likely +// to occur the first couple of times that needm is called. +// After that there will likely be an extra M available. +#define CALLS 10 + +static void* needmCallbackThread(void* p) { + int i; + + for (i = 0; i < SIGNALERS; i++) { + sched_yield(); // Help the signal threads get started. + } + for (i = 0; i < CALLS; i++) { + GoNeedM(); + } + return NULL; +} + +static void runNeedmSignalThread() { + int i; + pthread_t caller; + pthread_t s[SIGNALERS]; + + pthread_create(&caller, NULL, needmCallbackThread, NULL); + for (i = 0; i < SIGNALERS; i++) { + pthread_create(&s[i], NULL, needmSignalThread, &caller); + } + for (i = 0; i < SIGNALERS; i++) { + pthread_join(s[i], NULL); + } + pthread_join(caller, NULL); +} +*/ +import "C" + +import ( + "fmt" + "os" + "time" +) + +func init() { + register("NeedmDeadlock", NeedmDeadlock) +} + +//export GoNeedM +func GoNeedM() { +} + +func NeedmDeadlock() { + // The failure symptom is that the program hangs because of a + // deadlock in needm, so set an alarm. + go func() { + time.Sleep(5 * time.Second) + fmt.Println("Hung for 5 seconds") + os.Exit(1) + }() + + C.runNeedmSignalThread() + fmt.Println("OK") +} -- cgit v1.2.1 From 02335cf4131f4eb1869f50f906e993676f7f414a Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 27 Oct 2020 21:05:13 -0700 Subject: runtime: move TestNeedmDeadlock to crash_cgo_test.go It requires cgo. Also, skip the test on windows and plan9. For #42207 Change-Id: I8522773f93bc3f9826506a41a08b86a083262e31 Reviewed-on: https://go-review.googlesource.com/c/go/+/265778 Trust: Ian Lance Taylor Run-TryBot: Ian Lance Taylor Reviewed-by: Brad Fitzpatrick --- src/runtime/crash_cgo_test.go | 13 +++++++++++++ src/runtime/crash_unix_test.go | 9 --------- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/crash_cgo_test.go b/src/runtime/crash_cgo_test.go index b200984050..0680d07a32 100644 --- a/src/runtime/crash_cgo_test.go +++ b/src/runtime/crash_cgo_test.go @@ -600,3 +600,16 @@ func TestEINTR(t *testing.T) { t.Fatalf("want %s, got %s\n", want, output) } } + +// Issue #42207. +func TestNeedmDeadlock(t *testing.T) { + switch runtime.GOOS { + case "plan9", "windows": + t.Skipf("no signals on %s", runtime.GOOS) + } + output := runTestProg(t, "testprogcgo", "NeedmDeadlock") + want := "OK\n" + if output != want { + t.Fatalf("want %s, got %s\n", want, output) + } +} diff --git a/src/runtime/crash_unix_test.go b/src/runtime/crash_unix_test.go index 7aba3d4846..fc87f37408 100644 --- a/src/runtime/crash_unix_test.go +++ b/src/runtime/crash_unix_test.go @@ -358,12 +358,3 @@ func TestSignalM(t *testing.T) { t.Fatalf("signal sent to M %d, but received on M %d", want, got) } } - -// Issue #42207. -func TestNeedmDeadlock(t *testing.T) { - output := runTestProg(t, "testprogcgo", "NeedmDeadlock") - want := "OK\n" - if output != want { - t.Fatalf("want %s, got %s\n", want, output) - } -} -- cgit v1.2.1 From 150d2448e5a213cd679396371c0a147918dc2125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Derkacz?= Date: Sun, 14 Jun 2020 00:06:24 +0200 Subject: cmd/compile,cmd/internal/obj/riscv,runtime: use Duff's devices on riscv64 Implement runtime.duffzero and runtime.duffcopy for riscv64. Use obj.ADUFFZERO/obj.ADUFFCOPY for medium size, word aligned zeroing/moving. Change-Id: I42ec622055630c94cb77e286d8d33dbe7c9f846c Reviewed-on: https://go-review.googlesource.com/c/go/+/237797 Run-TryBot: Cherry Zhang Reviewed-by: Joel Sing Reviewed-by: Cherry Zhang --- src/runtime/duff_riscv64.s | 907 +++++++++++++++++++++++++++++++++++++++++++++ src/runtime/mkduff.go | 28 ++ 2 files changed, 935 insertions(+) create mode 100644 src/runtime/duff_riscv64.s (limited to 'src/runtime') diff --git a/src/runtime/duff_riscv64.s b/src/runtime/duff_riscv64.s new file mode 100644 index 0000000000..f7bd3f326e --- /dev/null +++ b/src/runtime/duff_riscv64.s @@ -0,0 +1,907 @@ +// Code generated by mkduff.go; DO NOT EDIT. +// Run go generate from src/runtime to update. +// See mkduff.go for comments. + +#include "textflag.h" + +TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + MOV ZERO, (X10) + ADD $8, X10 + RET + +TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + MOV (X10), X31 + ADD $8, X10 + MOV X31, (X11) + ADD $8, X11 + + RET diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index 8859ed68cc..6ddf0256e9 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -38,6 +38,7 @@ func main() { gen("arm64", notags, zeroARM64, copyARM64) gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x) gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x) + gen("riscv64", notags, zeroRISCV64, copyRISCV64) } func gen(arch string, tags, zero, copy func(io.Writer)) { @@ -227,3 +228,30 @@ func copyMIPS64x(w io.Writer) { } fmt.Fprintln(w, "\tRET") } + +func zeroRISCV64(w io.Writer) { + // ZERO: always zero + // X10: ptr to memory to be zeroed + // X10 is updated as a side effect. + fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") + for i := 0; i < 128; i++ { + fmt.Fprintln(w, "\tMOV\tZERO, (X10)") + fmt.Fprintln(w, "\tADD\t$8, X10") + } + fmt.Fprintln(w, "\tRET") +} + +func copyRISCV64(w io.Writer) { + // X10: ptr to source memory + // X11: ptr to destination memory + // X10 and X11 are updated as a side effect + fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") + for i := 0; i < 128; i++ { + fmt.Fprintln(w, "\tMOV\t(X10), X31") + fmt.Fprintln(w, "\tADD\t$8, X10") + fmt.Fprintln(w, "\tMOV\tX31, (X11)") + fmt.Fprintln(w, "\tADD\t$8, X11") + fmt.Fprintln(w) + } + fmt.Fprintln(w, "\tRET") +} -- cgit v1.2.1