diff options
author | eric fang <eric.fang@arm.com> | 2022-07-29 04:05:03 +0000 |
---|---|---|
committer | Eric Fang <eric.fang@arm.com> | 2022-08-10 02:13:53 +0000 |
commit | efe5929dbd23054395ea128325edba8d23b6d5fc (patch) | |
tree | 5085d9d0206273ab96162d39380b02b0510561de /test/codegen/arithmetic.go | |
parent | 8dc7710faeda33b03fe32d4e7c800f0dcf27c698 (diff) | |
download | go-git-efe5929dbd23054395ea128325edba8d23b6d5fc.tar.gz |
cmd/compile/internal/ssa: optimize ARM64 code with TST
For signed comparisons, the following four optimization rules hold:
(CMPconst [0] z:(AND x y)) && z.Uses == 1 => (TST x y)
(CMPWconst [0] z:(AND x y)) && z.Uses == 1 => (TSTW x y)
(CMPconst [0] x:(ANDconst [c] y)) && x.Uses == 1 => (TSTconst [c] y)
(CMPWconst [0] x:(ANDconst [c] y)) && x.Uses == 1 => (TSTWconst [int32(c)] y)
But currently they only apply to jump instructions, not to conditional
instructions within a block, such as cset, csel, etc. This CL extends
the above rules into blocks so that conditional instructions can also be
optimized.
name old time/op new time/op delta
DivisiblePow2constI64-160 1.04ns ± 0% 0.86ns ± 0% -17.30% (p=0.008 n=5+5)
DivisiblePow2constI32-160 1.04ns ± 0% 0.87ns ± 0% -16.16% (p=0.016 n=4+5)
DivisiblePow2constI16-160 1.04ns ± 0% 0.87ns ± 0% -16.03% (p=0.008 n=5+5)
DivisiblePow2constI8-160 1.04ns ± 0% 0.86ns ± 0% -17.15% (p=0.008 n=5+5)
Change-Id: I6bc34bff30862210e8dd001e0340b8fe502fe3de
Reviewed-on: https://go-review.googlesource.com/c/go/+/420434
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Eric Fang <eric.fang@arm.com>
Diffstat (limited to 'test/codegen/arithmetic.go')
-rw-r--r-- | test/codegen/arithmetic.go | 22 |
1 files changed, 11 insertions, 11 deletions
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 00841d52ae..3fb9ce646b 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -176,7 +176,7 @@ func MergeMuls2(n int) int { // amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)" // 386:"IMUL3L\t[$]23","ADDL\t[$]29" // ppc64le/power9:"MADDLD",-"MULLD\t[$]23",-"ADD\t[$]29" - // ppc64le/power8:"MULLD\t[$]23","ADD\t[$]29" + // ppc64le/power8:"MULLD\t[$]23","ADD\t[$]29" return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29 } @@ -282,7 +282,7 @@ func Pow2DivisibleSigned(n1, n2 int) (bool, bool) { // 386:"TESTL\t[$]63",-"DIVL",-"SHRL" // amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ" // arm:"AND\t[$]63",-".*udiv",-"SRA" - // arm64:"AND\t[$]63",-"UDIV",-"ASR" + // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND" // ppc64:"ANDCC\t[$]63",-"SRAD" // ppc64le:"ANDCC\t[$]63",-"SRAD" a := n1%64 == 0 // signed divisible @@ -290,7 +290,7 @@ func Pow2DivisibleSigned(n1, n2 int) (bool, bool) { // 386:"TESTL\t[$]63",-"DIVL",-"SHRL" // amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ" // arm:"AND\t[$]63",-".*udiv",-"SRA" - // arm64:"AND\t[$]63",-"UDIV",-"ASR" + // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND" // ppc64:"ANDCC\t[$]63",-"SRAD" // ppc64le:"ANDCC\t[$]63",-"SRAD" b := n2%64 != 0 // signed indivisible @@ -572,16 +572,16 @@ func divInt(v int64) int64 { // "(z + C) -x -> C + (z - x)" can optimize the following cases. func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) { // arm64:"SUB","ADD\t[$]2" - // ppc64:"SUB","ADD\t[$]2" - // ppc64le:"SUB","ADD\t[$]2" + // ppc64:"SUB","ADD\t[$]2" + // ppc64le:"SUB","ADD\t[$]2" r0 := (i0 + 3) - (j0 + 1) // arm64:"SUB","SUB\t[$]4" - // ppc64:"SUB","ADD\t[$]-4" - // ppc64le:"SUB","ADD\t[$]-4" + // ppc64:"SUB","ADD\t[$]-4" + // ppc64le:"SUB","ADD\t[$]-4" r1 := (i1 - 3) - (j1 + 1) // arm64:"SUB","ADD\t[$]4" - // ppc64:"SUB","ADD\t[$]4" - // ppc64le:"SUB","ADD\t[$]4" + // ppc64:"SUB","ADD\t[$]4" + // ppc64le:"SUB","ADD\t[$]4" r2 := (i2 + 3) - (j2 - 1) // arm64:"SUB","SUB\t[$]2" // ppc64:"SUB","ADD\t[$]-2" @@ -606,8 +606,8 @@ func constantFold2(i0, j0, i1, j1 int) (int, int) { func constantFold3(i, j int) int { // arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL" - // ppc64:"MULLD\t[$]30","MULLD" - // ppc64le:"MULLD\t[$]30","MULLD" + // ppc64:"MULLD\t[$]30","MULLD" + // ppc64le:"MULLD\t[$]30","MULLD" r := (5 * i) * (6 * j) return r } |