summaryrefslogtreecommitdiff
path: root/test/codegen/arithmetic.go
diff options
context:
space:
mode:
authoreric fang <eric.fang@arm.com>2022-07-29 04:05:03 +0000
committerEric Fang <eric.fang@arm.com>2022-08-10 02:13:53 +0000
commitefe5929dbd23054395ea128325edba8d23b6d5fc (patch)
tree5085d9d0206273ab96162d39380b02b0510561de /test/codegen/arithmetic.go
parent8dc7710faeda33b03fe32d4e7c800f0dcf27c698 (diff)
downloadgo-git-efe5929dbd23054395ea128325edba8d23b6d5fc.tar.gz
cmd/compile/internal/ssa: optimize ARM64 code with TST
For signed comparisons, the following four optimization rules hold: (CMPconst [0] z:(AND x y)) && z.Uses == 1 => (TST x y) (CMPWconst [0] z:(AND x y)) && z.Uses == 1 => (TSTW x y) (CMPconst [0] x:(ANDconst [c] y)) && x.Uses == 1 => (TSTconst [c] y) (CMPWconst [0] x:(ANDconst [c] y)) && x.Uses == 1 => (TSTWconst [int32(c)] y) But currently they only apply to jump instructions, not to conditional instructions within a block, such as cset, csel, etc. This CL extends the above rules into blocks so that conditional instructions can also be optimized. name old time/op new time/op delta DivisiblePow2constI64-160 1.04ns ± 0% 0.86ns ± 0% -17.30% (p=0.008 n=5+5) DivisiblePow2constI32-160 1.04ns ± 0% 0.87ns ± 0% -16.16% (p=0.016 n=4+5) DivisiblePow2constI16-160 1.04ns ± 0% 0.87ns ± 0% -16.03% (p=0.008 n=5+5) DivisiblePow2constI8-160 1.04ns ± 0% 0.86ns ± 0% -17.15% (p=0.008 n=5+5) Change-Id: I6bc34bff30862210e8dd001e0340b8fe502fe3de Reviewed-on: https://go-review.googlesource.com/c/go/+/420434 Reviewed-by: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Run-TryBot: Eric Fang <eric.fang@arm.com>
Diffstat (limited to 'test/codegen/arithmetic.go')
-rw-r--r--test/codegen/arithmetic.go22
1 files changed, 11 insertions, 11 deletions
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
index 00841d52ae..3fb9ce646b 100644
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -176,7 +176,7 @@ func MergeMuls2(n int) int {
// amd64:"IMUL3Q\t[$]23","(ADDQ\t[$]29)|(LEAQ\t29)"
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
// ppc64le/power9:"MADDLD",-"MULLD\t[$]23",-"ADD\t[$]29"
- // ppc64le/power8:"MULLD\t[$]23","ADD\t[$]29"
+ // ppc64le/power8:"MULLD\t[$]23","ADD\t[$]29"
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
}
@@ -282,7 +282,7 @@ func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
// arm:"AND\t[$]63",-".*udiv",-"SRA"
- // arm64:"AND\t[$]63",-"UDIV",-"ASR"
+ // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
// ppc64:"ANDCC\t[$]63",-"SRAD"
// ppc64le:"ANDCC\t[$]63",-"SRAD"
a := n1%64 == 0 // signed divisible
@@ -290,7 +290,7 @@ func Pow2DivisibleSigned(n1, n2 int) (bool, bool) {
// 386:"TESTL\t[$]63",-"DIVL",-"SHRL"
// amd64:"TESTQ\t[$]63",-"DIVQ",-"SHRQ"
// arm:"AND\t[$]63",-".*udiv",-"SRA"
- // arm64:"AND\t[$]63",-"UDIV",-"ASR"
+ // arm64:"TST\t[$]63",-"UDIV",-"ASR",-"AND"
// ppc64:"ANDCC\t[$]63",-"SRAD"
// ppc64le:"ANDCC\t[$]63",-"SRAD"
b := n2%64 != 0 // signed indivisible
@@ -572,16 +572,16 @@ func divInt(v int64) int64 {
// "(z + C) -x -> C + (z - x)" can optimize the following cases.
func constantFold1(i0, j0, i1, j1, i2, j2, i3, j3 int) (int, int, int, int) {
// arm64:"SUB","ADD\t[$]2"
- // ppc64:"SUB","ADD\t[$]2"
- // ppc64le:"SUB","ADD\t[$]2"
+ // ppc64:"SUB","ADD\t[$]2"
+ // ppc64le:"SUB","ADD\t[$]2"
r0 := (i0 + 3) - (j0 + 1)
// arm64:"SUB","SUB\t[$]4"
- // ppc64:"SUB","ADD\t[$]-4"
- // ppc64le:"SUB","ADD\t[$]-4"
+ // ppc64:"SUB","ADD\t[$]-4"
+ // ppc64le:"SUB","ADD\t[$]-4"
r1 := (i1 - 3) - (j1 + 1)
// arm64:"SUB","ADD\t[$]4"
- // ppc64:"SUB","ADD\t[$]4"
- // ppc64le:"SUB","ADD\t[$]4"
+ // ppc64:"SUB","ADD\t[$]4"
+ // ppc64le:"SUB","ADD\t[$]4"
r2 := (i2 + 3) - (j2 - 1)
// arm64:"SUB","SUB\t[$]2"
// ppc64:"SUB","ADD\t[$]-2"
@@ -606,8 +606,8 @@ func constantFold2(i0, j0, i1, j1 int) (int, int) {
func constantFold3(i, j int) int {
// arm64: "MOVD\t[$]30","MUL",-"ADD",-"LSL"
- // ppc64:"MULLD\t[$]30","MULLD"
- // ppc64le:"MULLD\t[$]30","MULLD"
+ // ppc64:"MULLD\t[$]30","MULLD"
+ // ppc64le:"MULLD\t[$]30","MULLD"
r := (5 * i) * (6 * j)
return r
}