From 15ead857dbc638b9d83a7686acf0dc746fc45918 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Wed, 9 Sep 2020 17:24:23 -0500 Subject: cmd/compiler,cmd/go,sync: add internal {LoadAcq,StoreRel}64 on ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an internal atomic intrinsic for load with acquire semantics (extending LoadAcq to 64b) and add LoadAcquintptr for internal use within the sync package. For other arches, this remaps to the appropriate atomic.Load{,64} intrinsic which should not alter code generation. Similarly, add StoreRel{uintptr,64} for consistency, and inline. Finally, add an exception to allow sync to directly use the runtime/internal/atomic package which avoids more convoluted workarounds (contributed by Lynn Boger). In an extreme example, sync.(*Pool).pin consumes 20% of wall time during fmt tests. This is reduced to 5% on ppc64le/power9. From the fmt benchmarks on ppc64le: name old time/op new time/op delta SprintfPadding 468ns ± 0% 451ns ± 0% -3.63% SprintfEmpty 73.3ns ± 0% 51.9ns ± 0% -29.20% SprintfString 135ns ± 0% 122ns ± 0% -9.63% SprintfTruncateString 232ns ± 0% 214ns ± 0% -7.76% SprintfTruncateBytes 216ns ± 0% 202ns ± 0% -6.48% SprintfSlowParsingPath 162ns ± 0% 142ns ± 0% -12.35% SprintfQuoteString 1.00µs ± 0% 0.99µs ± 0% -1.39% SprintfInt 117ns ± 0% 104ns ± 0% -11.11% SprintfIntInt 190ns ± 0% 175ns ± 0% -7.89% SprintfPrefixedInt 232ns ± 0% 212ns ± 0% -8.62% SprintfFloat 270ns ± 0% 255ns ± 0% -5.56% SprintfComplex 1.01µs ± 0% 0.99µs ± 0% -1.68% SprintfBoolean 127ns ± 0% 111ns ± 0% -12.60% SprintfHexString 220ns ± 0% 198ns ± 0% -10.00% SprintfHexBytes 261ns ± 0% 252ns ± 0% -3.45% SprintfBytes 600ns ± 0% 590ns ± 0% -1.67% SprintfStringer 684ns ± 0% 658ns ± 0% -3.80% SprintfStructure 2.57µs ± 0% 2.57µs ± 0% -0.12% ManyArgs 669ns ± 0% 646ns ± 0% -3.44% FprintInt 140ns ± 0% 136ns ± 0% -2.86% FprintfBytes 184ns ± 0% 181ns ± 0% -1.63% FprintIntNoAlloc 140ns ± 0% 136ns ± 0% -2.86% ScanInts 929µs ± 0% 921µs ± 0% -0.79% ScanRecursiveInt 122ms ± 0% 121ms ± 0% -0.11% ScanRecursiveIntReaderWrapper 122ms ± 0% 122ms ± 0% -0.18% Change-Id: I4d66780261b57b06ef600229e475462e7313f0d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/253748 Run-TryBot: Lynn Boger Reviewed-by: Lynn Boger Reviewed-by: Keith Randall Trust: Lynn Boger TryBot-Result: Go Bot --- src/runtime/internal/atomic/atomic_wasm.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/runtime/internal/atomic/atomic_wasm.go') diff --git a/src/runtime/internal/atomic/atomic_wasm.go b/src/runtime/internal/atomic/atomic_wasm.go index 2c0c3a8174..60a4942884 100644 --- a/src/runtime/internal/atomic/atomic_wasm.go +++ b/src/runtime/internal/atomic/atomic_wasm.go @@ -45,6 +45,18 @@ func LoadAcq(ptr *uint32) uint32 { return *ptr } +//go:nosplit +//go:noinline +func LoadAcq64(ptr *uint64) uint64 { + return *ptr +} + +//go:nosplit +//go:noinline +func LoadAcquintptr(ptr *uintptr) uintptr { + return *ptr +} + //go:nosplit //go:noinline func Load8(ptr *uint8) uint8 { @@ -141,6 +153,18 @@ func StoreRel(ptr *uint32, val uint32) { *ptr = val } +//go:nosplit +//go:noinline +func StoreRel64(ptr *uint64, val uint64) { + *ptr = val +} + +//go:nosplit +//go:noinline +func StoreReluintptr(ptr *uintptr, val uintptr) { + *ptr = val +} + //go:nosplit //go:noinline func Store8(ptr *uint8, val uint8) { -- cgit v1.2.1 From ad61343f886cc5ce677e7bd62385144b2ba7b8f5 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 8 Oct 2020 14:38:39 -0400 Subject: runtime/internal/atomic: add 32-bit And/Or These will be used in a following CL to perform larger bit clear and bit set than And8/Or8. Change-Id: I60f7b1099e29b69eb64add77564faee862880a8d Reviewed-on: https://go-review.googlesource.com/c/go/+/260977 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Cherry Zhang Trust: Michael Pratt --- src/runtime/internal/atomic/atomic_wasm.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/runtime/internal/atomic/atomic_wasm.go') diff --git a/src/runtime/internal/atomic/atomic_wasm.go b/src/runtime/internal/atomic/atomic_wasm.go index 60a4942884..b05d98ed51 100644 --- a/src/runtime/internal/atomic/atomic_wasm.go +++ b/src/runtime/internal/atomic/atomic_wasm.go @@ -131,6 +131,18 @@ func Or8(ptr *uint8, val uint8) { // NOTE: Do not add atomicxor8 (XOR is not idempotent). +//go:nosplit +//go:noinline +func And(ptr *uint32, val uint32) { + *ptr = *ptr & val +} + +//go:nosplit +//go:noinline +func Or(ptr *uint32, val uint32) { + *ptr = *ptr | val +} + //go:nosplit //go:noinline func Cas64(ptr *uint64, old, new uint64) bool { -- cgit v1.2.1