From adef4deeb85ede59201f37f5145763ed55a807f7 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 7 Aug 2020 22:46:43 -0400 Subject: cmd/compile: enable late expansion for interface calls Includes a few tweaks to Value.copyOf(a) (make it a no-op for a self-copy) and new pattern hack "___" (3 underscores) is like ellipsis, except the replacement doesn't need to have matching ellipsis/underscores. Moved the arg-length check in generated pattern-matching code BEFORE the args are probed, because not all instances of variable length OpFoo will have all the args mentioned in some rule for OpFoo, and when that happens, the compiler panics without the early check. Change-Id: I66de40672b3794a6427890ff96c805a488d783f4 Reviewed-on: https://go-review.googlesource.com/c/go/+/247537 Trust: David Chase Run-TryBot: David Chase TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/rewrite.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'src/cmd/compile/internal/ssa/rewrite.go') diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index d9c3e455a0..9f4de83a77 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -764,6 +764,36 @@ func devirt(v *Value, aux interface{}, sym Sym, offset int64) *AuxCall { return StaticAuxCall(lsym, va.args, va.results) } +// de-virtualize an InterLECall +// 'sym' is the symbol for the itab +func devirtLESym(v *Value, aux interface{}, sym Sym, offset int64) *obj.LSym { + n, ok := sym.(*obj.LSym) + if !ok { + return nil + } + + f := v.Block.Func + lsym := f.fe.DerefItab(n, offset) + if f.pass.debug > 0 { + if lsym != nil { + f.Warnl(v.Pos, "de-virtualizing call") + } else { + f.Warnl(v.Pos, "couldn't de-virtualize call") + } + } + if lsym == nil { + return nil + } + return lsym +} + +func devirtLECall(v *Value, sym *obj.LSym) *Value { + v.Op = OpStaticLECall + v.Aux.(*AuxCall).Fn = sym + v.RemoveArg(0) + return v +} + // isSamePtr reports whether p1 and p2 point to the same address. func isSamePtr(p1, p2 *Value) bool { if p1 == p2 { -- cgit v1.2.1 From cc2a5cf4b8b0aeaccd3dd439f8d3d68f25eef358 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Mon, 28 Sep 2020 18:20:12 -0400 Subject: cmd/compile,cmd/internal/obj/ppc64: fix some shift rules due to a regression A recent change to improve shifts was generating some invalid cases when the rule was based on an AND. The extended mnemonics CLRLSLDI and CLRLSLWI only allow certain values for the operands and in the mask case those values were not being checked properly. This adds a check to those rules to verify that the 'b' and 'n' values used when an AND was part of the rule have correct values. There was a bug in some diag messages in asm9. The message expected 3 values but only provided 2. Those are corrected here also. The test/codegen/shift.go was updated to add a few more cases to check for the case mentioned here. Some of the comments that mention the order of operands in these extended mnemonics were wrong and those have been corrected. Fixes #41683. Change-Id: If5bb860acaa5051b9e0cd80784b2868b85898c31 Reviewed-on: https://go-review.googlesource.com/c/go/+/258138 Run-TryBot: Lynn Boger Reviewed-by: Paul Murphy Reviewed-by: Carlos Eduardo Seo TryBot-Result: Go Bot Trust: Lynn Boger --- src/cmd/compile/internal/ssa/rewrite.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/cmd/compile/internal/ssa/rewrite.go') diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 9f4de83a77..5d8b3ddc4e 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1380,8 +1380,8 @@ func GetPPC64Shiftme(auxint int64) int64 { return int64(int8(auxint)) } -// Catch the simple ones first -// TODO: Later catch more cases +// This verifies that the mask occupies the +// rightmost bits. func isPPC64ValidShiftMask(v int64) bool { if ((v + 1) & v) == 0 { return true -- cgit v1.2.1 From 7bda6154caa6f0c527f4a8302e38d450b44ae68b Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 12 Aug 2020 23:47:57 -0400 Subject: cmd/compile: add generic optimization patterns for late-expanded calls. Repeats existing patterns for old calls, so that these will apply during the optimization phases that precede call expansion. Change-Id: I1ca0a78c159aa1a51004db217edde4ecc772b646 Reviewed-on: https://go-review.googlesource.com/c/go/+/248190 Trust: David Chase Run-TryBot: David Chase Reviewed-by: Cherry Zhang TryBot-Result: Go Bot --- src/cmd/compile/internal/ssa/rewrite.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/cmd/compile/internal/ssa/rewrite.go') diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 5d8b3ddc4e..e5f858a339 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -395,7 +395,8 @@ func canMergeLoad(target, load *Value) bool { // isSameCall reports whether sym is the same as the given named symbol func isSameCall(sym interface{}, name string) bool { - return sym.(*AuxCall).Fn.String() == name + fn := sym.(*AuxCall).Fn + return fn != nil && fn.String() == name } // nlz returns the number of leading zeros. -- cgit v1.2.1 From c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 23 Oct 2020 12:12:34 -0500 Subject: cmd/compile: combine more 32 bit shift and mask operations on ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is and the shift value produce constant which can be encoded into an RLWINM instruction. Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate masks produces a constant which can be encoded into RLWINM. Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)). Combine rotate word + and operations which can be encoded as a single RLWINM/RLWNM instruction. The most notable performance improvements arise from the crypto benchmarks below (GOARCH=power8 on a ppc64le/linux): pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le ExpandKeyWithSalt 52.2µs ± 0% 47.5µs ± 0% -8.88% ExpandKey 44.4µs ± 0% 40.3µs ± 0% -9.15% pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le Key 57.6ms ± 0% 52.3ms ± 0% -9.13% pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le Equal 90.9ms ± 0% 82.6ms ± 0% -9.13% DefaultCost 91.0ms ± 0% 82.7ms ± 0% -9.12% Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce Reviewed-on: https://go-review.googlesource.com/c/go/+/260798 Run-TryBot: Carlos Eduardo Seo TryBot-Result: Go Bot Reviewed-by: Lynn Boger Reviewed-by: Carlos Eduardo Seo Trust: Lynn Boger --- src/cmd/compile/internal/ssa/rewrite.go | 137 ++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) (limited to 'src/cmd/compile/internal/ssa/rewrite.go') diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index e5f858a339..9b3c83d1cf 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1381,6 +1381,71 @@ func GetPPC64Shiftme(auxint int64) int64 { return int64(int8(auxint)) } +// Test if this value can encoded as a mask for a rlwinm like +// operation. Masks can also extend from the msb and wrap to +// the lsb too. That is, the valid masks are 32 bit strings +// of the form: 0..01..10..0 or 1..10..01..1 or 1...1 +func isPPC64WordRotateMask(v64 int64) bool { + // Isolate rightmost 1 (if none 0) and add. + v := uint32(v64) + vp := (v & -v) + v + // Likewise, for the wrapping case. + vn := ^v + vpn := (vn & -vn) + vn + return (v&vp == 0 || vn&vpn == 0) && v != 0 +} + +// Compress mask and and shift into single value of the form +// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can +// be used to regenerate the input mask. +func encodePPC64RotateMask(rotate, mask, nbits int64) int64 { + var mb, me, mbn, men int + + // Determine boundaries and then decode them + if mask == 0 || ^mask == 0 || rotate >= nbits { + panic("Invalid PPC64 rotate mask") + } else if nbits == 32 { + mb = bits.LeadingZeros32(uint32(mask)) + me = 32 - bits.TrailingZeros32(uint32(mask)) + mbn = bits.LeadingZeros32(^uint32(mask)) + men = 32 - bits.TrailingZeros32(^uint32(mask)) + } else { + mb = bits.LeadingZeros64(uint64(mask)) + me = 64 - bits.TrailingZeros64(uint64(mask)) + mbn = bits.LeadingZeros64(^uint64(mask)) + men = 64 - bits.TrailingZeros64(^uint64(mask)) + } + // Check for a wrapping mask (e.g bits at 0 and 63) + if mb == 0 && me == int(nbits) { + // swap the inverted values + mb, me = men, mbn + } + + return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24) +} + +// The inverse operation of encodePPC64RotateMask. The values returned as +// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask. +func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) { + auxint := uint64(sauxint) + rotate = int64((auxint >> 16) & 0xFF) + mb = int64((auxint >> 8) & 0xFF) + me = int64((auxint >> 0) & 0xFF) + nbits := int64((auxint >> 24) & 0xFF) + mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1) + if mb > me { + mask = ^mask + } + if nbits == 32 { + mask = uint64(uint32(mask)) + } + + // Fixup ME to match ISA definition. The second argument to MASK(..,me) + // is inclusive. + me = (me - 1) & (nbits - 1) + return +} + // This verifies that the mask occupies the // rightmost bits. func isPPC64ValidShiftMask(v int64) bool { @@ -1394,6 +1459,78 @@ func getPPC64ShiftMaskLength(v int64) int64 { return int64(bits.Len64(uint64(v))) } +// Decompose a shift right into an equivalent rotate/mask, +// and return mask & m. +func mergePPC64RShiftMask(m, s, nbits int64) int64 { + smask := uint64((1<> uint(s) + return m & int64(smask) +} + +// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0 +func mergePPC64AndSrwi(m, s int64) int64 { + mask := mergePPC64RShiftMask(m, s, 32) + if !isPPC64WordRotateMask(mask) { + return 0 + } + return encodePPC64RotateMask(32-s, mask, 32) +} + +// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM. +// Return the encoded RLWINM constant, or 0 if they cannot be merged. +func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { + mask_1 := uint64(0xFFFFFFFF >> uint(srw)) + // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. + mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) + + // Rewrite mask to apply after the final left shift. + mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) + + r_1 := 32 - srw + r_2 := GetPPC64Shiftsh(sld) + r_3 := (r_1 + r_2) & 31 // This can wrap. + + if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { + return 0 + } + return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32) +} + +// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return +// the encoded RLWINM constant, or 0 if they cannot be merged. +func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 { + r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw) + // for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left. + mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) + + // combine the masks, and adjust for the final left shift. + mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld))) + r_2 := GetPPC64Shiftsh(int64(sld)) + r_3 := (r_1 + r_2) & 31 // This can wrap. + + // Verify the result is still a valid bitmask of <= 32 bits. + if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 { + return 0 + } + return encodePPC64RotateMask(r_3, int64(mask_3), 32) +} + +// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)), +// or return 0 if they cannot be combined. +func mergePPC64SldiSrw(sld, srw int64) int64 { + if sld > srw || srw >= 32 { + return 0 + } + mask_r := uint32(0xFFFFFFFF) >> uint(srw) + mask_l := uint32(0xFFFFFFFF) >> uint(sld) + mask := (mask_r & mask_l) << uint(sld) + return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32) +} + +// Convenience function to rotate a 32 bit constant value by another constant. +func rotateLeft32(v, rotate int64) int64 { + return int64(bits.RotateLeft32(uint32(v), int(rotate))) +} + // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. func armBFAuxInt(lsb, width int64) arm64BitField { if lsb < 0 || lsb > 63 { -- cgit v1.2.1 From 5c1122b52895c1d7f2d41ea16d354bce636496d0 Mon Sep 17 00:00:00 2001 From: Alberto Donizetti Date: Tue, 27 Oct 2020 11:03:21 +0100 Subject: cmd/compile: delete isPowerOfTwo, switch to isPowerOfTwo64 rewrite.go has two identical functions isPowerOfTwo and isPowerOfTwo64; the former has been there for a while, while the latter was added together with isPowerOfTwo{8,16,32} for use in typed rules. This change deletes isPowerOfTwo and switch to using isPowerOfTwo64 everywhere. Change-Id: If26c94565d2393fac6f0ba117ee7ee2fc915f7cd Reviewed-on: https://go-review.googlesource.com/c/go/+/265417 Trust: Alberto Donizetti Run-TryBot: Alberto Donizetti TryBot-Result: Go Bot Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/rewrite.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/cmd/compile/internal/ssa/rewrite.go') diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 9b3c83d1cf..ab6d020942 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -449,10 +449,7 @@ func log2uint32(n int64) int64 { return int64(bits.Len32(uint32(n))) - 1 } -// isPowerOfTwo reports whether n is a power of 2. -func isPowerOfTwo(n int64) bool { - return n > 0 && n&(n-1) == 0 -} +// isPowerOfTwo functions report whether n is a power of 2. func isPowerOfTwo8(n int8) bool { return n > 0 && n&(n-1) == 0 } @@ -1555,7 +1552,7 @@ func (bfc arm64BitField) getARM64BFwidth() int64 { // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask. func isARM64BFMask(lsb, mask, rshift int64) bool { shiftedMask := int64(uint64(mask) >> uint64(rshift)) - return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64 + return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64 } // returns the bitfield width of mask >> rshift for arm64 bitfield ops -- cgit v1.2.1