diff options
author | Jordan Rhee <jordanrh@microsoft.com> | 2018-12-11 14:24:07 -0800 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2018-12-12 01:35:51 +0000 |
commit | 6764d7aeacd24f48875ce88a8e3c789cedfb2e28 (patch) | |
tree | 4972b0705557064b414b17434ad0a53702048bb7 /src | |
parent | 12c0f1b3e6ace3972bb3d328b2bdcadb6dceb198 (diff) | |
download | go-git-6764d7aeacd24f48875ce88a8e3c789cedfb2e28.tar.gz |
runtime: fix profiling on windows/ARM
Fix profiling handler to get the correct g for the m being profiled.
Store a pointer to the TLS slot holding g in the thread's m. This
enables the profiling handler to get the current g for the thread,
even if the thread is executing external code or system code.
Updates #26148
Signed-off-by: Jordan Rhee <jordanrh@microsoft.com>
Change-Id: Ie061284c12341c76c7d96cc0c2d5bac969230829
Reviewed-on: https://go-review.googlesource.com/c/153718
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src')
-rw-r--r-- | src/runtime/os_windows.go | 16 | ||||
-rw-r--r-- | src/runtime/sys_windows_arm.s | 95 | ||||
-rw-r--r-- | src/runtime/tls_arm.s | 45 |
3 files changed, 101 insertions, 55 deletions
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go index 03dd95bf17..5870a342c2 100644 --- a/src/runtime/os_windows.go +++ b/src/runtime/os_windows.go @@ -872,24 +872,14 @@ func profilem(mp *m, thread uintptr) { default: panic("unsupported architecture") case "arm": - // TODO(jordanrh1): this is incorrect when Go is executing - // on the system or signal stacks because curg returns - // the current user g. The true g is stored in thread - // local storage, which we cannot access from another CPU. - // We cannot pull R10 from the thread context because - // it might be executing C code, in which case R10 - // would not be g. - gp = mp.curg + tls := &mp.tls[0] + gp = **((***g)(unsafe.Pointer(tls))) case "386", "amd64": tls := &mp.tls[0] gp = *((**g)(unsafe.Pointer(tls))) } - if gp == nil { - sigprofNonGoPC(r.ip()) - } else { - sigprof(r.ip(), r.sp(), 0, gp, mp) - } + sigprof(r.ip(), r.sp(), 0, gp, mp) } func profileloop1(param uintptr) uint32 { diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s index 60a85b8ffb..60be74b95c 100644 --- a/src/runtime/sys_windows_arm.s +++ b/src/runtime/sys_windows_arm.s @@ -362,6 +362,9 @@ TEXT runtime·tstart_stdcall(SB),NOSPLIT|NOFRAME,$0 MOVW R0, g_m(g) BL runtime·save_g(SB) + // do per-thread TLS initialization + BL runtime·init_thread_tls(SB) + // Layout new m scheduler stack on os stack. MOVW R13, R0 MOVW R0, g_stack+stack_hi(g) @@ -595,3 +598,95 @@ useQPC: B runtime·nanotimeQPC(SB) // tail call RET +// save_g saves the g register (R10) into thread local memory +// so that we can call externally compiled +// ARM code that will overwrite those registers. +// NOTE: runtime.gogo assumes that R1 is preserved by this function. +// runtime.mcall assumes this function only clobbers R0 and R11. +// Returns with g in R0. +// Save the value in the _TEB->TlsSlots array. +// Effectively implements TlsSetValue(). +// tls_g stores the TLS slot allocated TlsAlloc(). +TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0 + MRC 15, 0, R0, C13, C0, 2 + ADD $0xe10, R0 + MOVW $runtime·tls_g(SB), R11 + MOVW (R11), R11 + MOVW g, R11<<2(R0) + MOVW g, R0 // preserve R0 across call to setg<> + RET + +// load_g loads the g register from thread-local memory, +// for use after calling externally compiled +// ARM code that overwrote those registers. +// Get the value from the _TEB->TlsSlots array. +// Effectively implements TlsGetValue(). +TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0 + MRC 15, 0, R0, C13, C0, 2 + ADD $0xe10, R0 + MOVW $runtime·tls_g(SB), g + MOVW (g), g + MOVW g<<2(R0), g + RET + +// This is called from rt0_go, which runs on the system stack +// using the initial stack allocated by the OS. +// It calls back into standard C using the BL below. +// To do that, the stack pointer must be 8-byte-aligned. +TEXT runtime·_initcgo(SB),NOSPLIT|NOFRAME,$0 + MOVM.DB.W [R4, R14], (R13) // push {r4, lr} + + // Ensure stack is 8-byte aligned before calling C code + MOVW R13, R4 + BIC $0x7, R13 + + // Allocate a TLS slot to hold g across calls to external code + MOVW $runtime·_TlsAlloc(SB), R0 + MOVW (R0), R0 + BL (R0) + + // Assert that slot is less than 64 so we can use _TEB->TlsSlots + CMP $64, R0 + MOVW $runtime·abort(SB), R1 + BL.GE (R1) + + // Save Slot into tls_g + MOVW $runtime·tls_g(SB), R1 + MOVW R0, (R1) + + BL runtime·init_thread_tls(SB) + + MOVW R4, R13 + MOVM.IA.W (R13), [R4, R15] // pop {r4, pc} + +// void init_thread_tls() +// +// Does per-thread TLS initialization. Saves a pointer to the TLS slot +// holding G, in the current m. +// +// g->m->tls[0] = &_TEB->TlsSlots[tls_g] +// +// The purpose of this is to enable the profiling handler to get the +// current g associated with the thread. We cannot use m->curg because curg +// only holds the current user g. If the thread is executing system code or +// external code, m->curg will be NULL. The thread's TLS slot always holds +// the current g, so save a reference to this location so the profiling +// handler can get the real g from the thread's m. +// +// Clobbers R0-R3 +TEXT runtime·init_thread_tls(SB),NOSPLIT|NOFRAME,$0 + // compute &_TEB->TlsSlots[tls_g] + MRC 15, 0, R0, C13, C0, 2 + ADD $0xe10, R0 + MOVW $runtime·tls_g(SB), R1 + MOVW (R1), R1 + MOVW R1<<2, R1 + ADD R1, R0 + + // save in g->m->tls[0] + MOVW g_m(g), R1 + MOVW R0, m_tls(R1) + RET + +// Holds the TLS Slot, which was allocated by TlsAlloc() +GLOBL runtime·tls_g+0(SB), NOPTR, $4 diff --git a/src/runtime/tls_arm.s b/src/runtime/tls_arm.s index e2c945d183..400c16a177 100644 --- a/src/runtime/tls_arm.s +++ b/src/runtime/tls_arm.s @@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +// +build !windows + #include "go_asm.h" #include "go_tls.h" #include "funcdata.h" @@ -23,9 +25,6 @@ #ifdef GOOS_darwin #define TLSG_IS_VARIABLE #endif -#ifdef GOOS_windows -#define TLSG_IS_VARIABLE -#endif // save_g saves the g register into pthread-provided // thread-local memory, so that we can call externally compiled @@ -39,17 +38,6 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0 MOVW g, R0 // preserve R0 across call to setg<> RET #else -#ifdef GOOS_windows - // Save the value in the _TEB->TlsSlots array. - // Effectively implements TlsSetValue(). - MRC 15, 0, R0, C13, C0, 2 - ADD $0xe10, R0 - MOVW $runtime·tls_g(SB), R11 - MOVW (R11), R11 - MOVW g, R11<<2(R0) - MOVW g, R0 // preserve R0 accross call to setg<> - RET -#else // If the host does not support MRC the linker will replace it with // a call to runtime.read_tls_fallback which jumps to __kuser_get_tls. // The replacement function saves LR in R11 over the call to read_tls_fallback. @@ -61,7 +49,6 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0 MOVW g, R0 // preserve R0 across call to setg<> RET #endif -#endif // load_g loads the g register from pthread-provided // thread-local memory, for use after calling externally compiled @@ -71,16 +58,6 @@ TEXT runtime·load_g(SB),NOSPLIT,$0 // nothing to do as nacl/arm does not use TLS at all. RET #else -#ifdef GOOS_windows - // Get the value from the _TEB->TlsSlots array. - // Effectively implements TlsGetValue(). - MRC 15, 0, R0, C13, C0, 2 - ADD $0xe10, R0 - MOVW $runtime·tls_g(SB), g - MOVW (g), g - MOVW g<<2(R0), g - RET -#else // See save_g MRC 15, 0, R0, C13, C0, 3 // fetch TLS base pointer BIC $3, R0 // Darwin/ARM might return unaligned pointer @@ -89,7 +66,6 @@ TEXT runtime·load_g(SB),NOSPLIT,$0 MOVW 0(R0), g RET #endif -#endif // This is called from rt0_go, which runs on the system stack // using the initial stack allocated by the OS. @@ -102,20 +78,6 @@ TEXT runtime·load_g(SB),NOSPLIT,$0 // Declare a dummy word ($4, not $0) to make sure the // frame is 8 bytes and stays 8-byte-aligned. TEXT runtime·_initcgo(SB),NOSPLIT,$4 -#ifdef GOOS_windows - MOVW R13, R4 - BIC $0x7, R13 - MOVW $runtime·_TlsAlloc(SB), R0 - MOVW (R0), R0 - BL (R0) - // Assert that slot is less than 64 so we can use _TEB->TlsSlots - CMP $64, R0 - MOVW $runtime·abort(SB), R1 - BL.GE (R1) - MOVW $runtime·tls_g(SB), R1 - MOVW R0, (R1) - MOVW R4, R13 -#else #ifndef GOOS_nacl // if there is an _cgo_init, call it. MOVW _cgo_init(SB), R4 @@ -131,8 +93,7 @@ TEXT runtime·_initcgo(SB),NOSPLIT,$4 MOVW $setg_gcc<>(SB), R1 // arg 1: setg MOVW g, R0 // arg 0: G BL (R4) // will clobber R0-R3 -#endif // GOOS_nacl -#endif // GOOS_windows +#endif nocgo: RET |