summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2020-04-11 22:15:58 -0700
committerKeith Randall <khr@golang.org>2020-04-30 17:19:57 +0000
commit882ec701d2bc5df807637418d726f644ed2e0ef6 (patch)
tree3cc6a88d461ea7a70b5e96dbf5c1b140f4cf3a19
parent553e003414d3aa90cc39830ee22f08453d9f3408 (diff)
downloadgo-git-882ec701d2bc5df807637418d726f644ed2e0ef6.tar.gz
cmd/compile: add indexed load+op operations to amd64
name old time/op new time/op delta LoadAdd-16 545ns ± 0% 456ns ± 0% -16.31% (p=0.000 n=10+10) Update #36468 Change-Id: I84f390d55490648fa1f58cdbc24fd74c4f1bc8c1 Reviewed-on: https://go-review.googlesource.com/c/go/+/227960 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
-rw-r--r--src/cmd/compile/internal/amd64/ssa.go22
-rw-r--r--src/cmd/compile/internal/gc/bench_test.go21
-rw-r--r--src/cmd/compile/internal/ssa/addressingmodes.go37
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64Ops.go35
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go525
-rw-r--r--test/codegen/memops.go22
6 files changed, 657 insertions, 5 deletions
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 2b75bd6549..e348e24a95 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -846,6 +846,28 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
if v.Reg() != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
+ case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
+ ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
+ ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
+ ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
+ ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8:
+ p := s.Prog(v.Op.Asm())
+
+ r, i := v.Args[1].Reg(), v.Args[2].Reg()
+ p.From.Type = obj.TYPE_MEM
+ p.From.Scale = v.Op.Scale()
+ if p.From.Scale == 1 && i == x86.REG_SP {
+ r, i = i, r
+ }
+ p.From.Reg = r
+ p.From.Index = i
+
+ gc.AddAux(&p.From, v)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg()
+ if v.Reg() != v.Args[0].Reg() {
+ v.Fatalf("input[0] and output not in same register %s", v.LongString())
+ }
case ssa.OpAMD64DUFFZERO:
off := duffStart(v.AuxInt)
adj := duffAdj(v.AuxInt)
diff --git a/src/cmd/compile/internal/gc/bench_test.go b/src/cmd/compile/internal/gc/bench_test.go
new file mode 100644
index 0000000000..b20adefa43
--- /dev/null
+++ b/src/cmd/compile/internal/gc/bench_test.go
@@ -0,0 +1,21 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc
+
+import "testing"
+
+var globl int64
+
+func BenchmarkLoadAdd(b *testing.B) {
+ x := make([]int64, 1024)
+ y := make([]int64, 1024)
+ for i := 0; i < b.N; i++ {
+ var s int64
+ for i := range x {
+ s ^= x[i] + y[i]
+ }
+ globl = s
+ }
+}
diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go
index eff0f8686a..0f68923e1f 100644
--- a/src/cmd/compile/internal/ssa/addressingmodes.go
+++ b/src/cmd/compile/internal/ssa/addressingmodes.go
@@ -217,6 +217,43 @@ var combine = map[[2]Op]Op{
[2]Op{OpAMD64CMPQconstload, OpAMD64LEAQ1}: OpAMD64CMPQconstloadidx1,
[2]Op{OpAMD64CMPQconstload, OpAMD64LEAQ8}: OpAMD64CMPQconstloadidx8,
+ [2]Op{OpAMD64ADDLload, OpAMD64ADDQ}: OpAMD64ADDLloadidx1,
+ [2]Op{OpAMD64ADDQload, OpAMD64ADDQ}: OpAMD64ADDQloadidx1,
+ [2]Op{OpAMD64SUBLload, OpAMD64ADDQ}: OpAMD64SUBLloadidx1,
+ [2]Op{OpAMD64SUBQload, OpAMD64ADDQ}: OpAMD64SUBQloadidx1,
+ [2]Op{OpAMD64ANDLload, OpAMD64ADDQ}: OpAMD64ANDLloadidx1,
+ [2]Op{OpAMD64ANDQload, OpAMD64ADDQ}: OpAMD64ANDQloadidx1,
+ [2]Op{OpAMD64ORLload, OpAMD64ADDQ}: OpAMD64ORLloadidx1,
+ [2]Op{OpAMD64ORQload, OpAMD64ADDQ}: OpAMD64ORQloadidx1,
+ [2]Op{OpAMD64XORLload, OpAMD64ADDQ}: OpAMD64XORLloadidx1,
+ [2]Op{OpAMD64XORQload, OpAMD64ADDQ}: OpAMD64XORQloadidx1,
+
+ [2]Op{OpAMD64ADDLload, OpAMD64LEAQ1}: OpAMD64ADDLloadidx1,
+ [2]Op{OpAMD64ADDLload, OpAMD64LEAQ4}: OpAMD64ADDLloadidx4,
+ [2]Op{OpAMD64ADDLload, OpAMD64LEAQ8}: OpAMD64ADDLloadidx8,
+ [2]Op{OpAMD64ADDQload, OpAMD64LEAQ1}: OpAMD64ADDQloadidx1,
+ [2]Op{OpAMD64ADDQload, OpAMD64LEAQ8}: OpAMD64ADDQloadidx8,
+ [2]Op{OpAMD64SUBLload, OpAMD64LEAQ1}: OpAMD64SUBLloadidx1,
+ [2]Op{OpAMD64SUBLload, OpAMD64LEAQ4}: OpAMD64SUBLloadidx4,
+ [2]Op{OpAMD64SUBLload, OpAMD64LEAQ8}: OpAMD64SUBLloadidx8,
+ [2]Op{OpAMD64SUBQload, OpAMD64LEAQ1}: OpAMD64SUBQloadidx1,
+ [2]Op{OpAMD64SUBQload, OpAMD64LEAQ8}: OpAMD64SUBQloadidx8,
+ [2]Op{OpAMD64ANDLload, OpAMD64LEAQ1}: OpAMD64ANDLloadidx1,
+ [2]Op{OpAMD64ANDLload, OpAMD64LEAQ4}: OpAMD64ANDLloadidx4,
+ [2]Op{OpAMD64ANDLload, OpAMD64LEAQ8}: OpAMD64ANDLloadidx8,
+ [2]Op{OpAMD64ANDQload, OpAMD64LEAQ1}: OpAMD64ANDQloadidx1,
+ [2]Op{OpAMD64ANDQload, OpAMD64LEAQ8}: OpAMD64ANDQloadidx8,
+ [2]Op{OpAMD64ORLload, OpAMD64LEAQ1}: OpAMD64ORLloadidx1,
+ [2]Op{OpAMD64ORLload, OpAMD64LEAQ4}: OpAMD64ORLloadidx4,
+ [2]Op{OpAMD64ORLload, OpAMD64LEAQ8}: OpAMD64ORLloadidx8,
+ [2]Op{OpAMD64ORQload, OpAMD64LEAQ1}: OpAMD64ORQloadidx1,
+ [2]Op{OpAMD64ORQload, OpAMD64LEAQ8}: OpAMD64ORQloadidx8,
+ [2]Op{OpAMD64XORLload, OpAMD64LEAQ1}: OpAMD64XORLloadidx1,
+ [2]Op{OpAMD64XORLload, OpAMD64LEAQ4}: OpAMD64XORLloadidx4,
+ [2]Op{OpAMD64XORLload, OpAMD64LEAQ8}: OpAMD64XORLloadidx8,
+ [2]Op{OpAMD64XORQload, OpAMD64LEAQ1}: OpAMD64XORQloadidx1,
+ [2]Op{OpAMD64XORQload, OpAMD64LEAQ8}: OpAMD64XORQloadidx8,
+
// 386
[2]Op{Op386MOVBload, Op386ADDL}: Op386MOVBloadidx1,
[2]Op{Op386MOVWload, Op386ADDL}: Op386MOVWloadidx1,
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index a5c1e5c84d..0ecbc940e5 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -136,10 +136,11 @@ func init() {
readflags = regInfo{inputs: nil, outputs: gponly}
flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
- gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
- gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
- gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
- gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
+ gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
+ gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
+ gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
+ gp21loadidx = regInfo{inputs: []regMask{gp, gpspsb, gpsp, 0}, outputs: gponly}
+ gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
@@ -409,6 +410,32 @@ func init() {
{name: "XORQload", argLength: 3, reg: gp21load, asm: "XORQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
{name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
+ {name: "ADDLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "ADDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
+ {name: "ADDLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+ {name: "ADDQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ADDQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "ADDQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ADDQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+ {name: "SUBLloadidx1", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "SUBLloadidx4", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
+ {name: "SUBLloadidx8", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+ {name: "SUBQloadidx1", argLength: 4, reg: gp21loadidx, asm: "SUBQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "SUBQloadidx8", argLength: 4, reg: gp21loadidx, asm: "SUBQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+ {name: "ANDLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "ANDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
+ {name: "ANDLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+ {name: "ANDQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ANDQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "ANDQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ANDQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+ {name: "ORLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "ORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
+ {name: "ORLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+ {name: "ORQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ORQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "ORQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ORQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+ {name: "XORLloadidx1", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "XORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+4*arg2+auxint+aux, arg3 = mem
+ {name: "XORLloadidx8", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+ {name: "XORQloadidx1", argLength: 4, reg: gp21loadidx, asm: "XORQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+ arg2+auxint+aux, arg3 = mem
+ {name: "XORQloadidx8", argLength: 4, reg: gp21loadidx, asm: "XORQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+8*arg2+auxint+aux, arg3 = mem
+
// direct binary-op on memory (read-modify-write)
{name: "ADDQmodify", argLength: 3, reg: gpstore, asm: "ADDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem
{name: "SUBQmodify", argLength: 3, reg: gpstore, asm: "SUBQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 614147ff2d..2bc3a5bc1d 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -718,6 +718,31 @@ const (
OpAMD64ORLload
OpAMD64XORQload
OpAMD64XORLload
+ OpAMD64ADDLloadidx1
+ OpAMD64ADDLloadidx4
+ OpAMD64ADDLloadidx8
+ OpAMD64ADDQloadidx1
+ OpAMD64ADDQloadidx8
+ OpAMD64SUBLloadidx1
+ OpAMD64SUBLloadidx4
+ OpAMD64SUBLloadidx8
+ OpAMD64SUBQloadidx1
+ OpAMD64SUBQloadidx8
+ OpAMD64ANDLloadidx1
+ OpAMD64ANDLloadidx4
+ OpAMD64ANDLloadidx8
+ OpAMD64ANDQloadidx1
+ OpAMD64ANDQloadidx8
+ OpAMD64ORLloadidx1
+ OpAMD64ORLloadidx4
+ OpAMD64ORLloadidx8
+ OpAMD64ORQloadidx1
+ OpAMD64ORQloadidx8
+ OpAMD64XORLloadidx1
+ OpAMD64XORLloadidx4
+ OpAMD64XORLloadidx8
+ OpAMD64XORQloadidx1
+ OpAMD64XORQloadidx8
OpAMD64ADDQmodify
OpAMD64SUBQmodify
OpAMD64ANDQmodify
@@ -9022,6 +9047,506 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "ADDLloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AADDL,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ADDLloadidx4",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AADDL,
+ scale: 4,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ADDLloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AADDL,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ADDQloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AADDQ,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ADDQloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AADDQ,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "SUBLloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.ASUBL,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "SUBLloadidx4",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.ASUBL,
+ scale: 4,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "SUBLloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.ASUBL,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "SUBQloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.ASUBQ,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "SUBQloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.ASUBQ,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ANDLloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AANDL,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ANDLloadidx4",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AANDL,
+ scale: 4,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ANDLloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AANDL,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ANDQloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AANDQ,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ANDQloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AANDQ,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ORLloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AORL,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ORLloadidx4",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AORL,
+ scale: 4,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ORLloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AORL,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ORQloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AORQ,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "ORQloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AORQ,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "XORLloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AXORL,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "XORLloadidx4",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AXORL,
+ scale: 4,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "XORLloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AXORL,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "XORQloadidx1",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AXORQ,
+ scale: 1,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
+ name: "XORQloadidx8",
+ auxType: auxSymOff,
+ argLen: 4,
+ resultInArg0: true,
+ clobberFlags: true,
+ symEffect: SymRead,
+ asm: x86.AXORQ,
+ scale: 8,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {2, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {1, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ outputs: []outputInfo{
+ {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ },
+ },
+ },
+ {
name: "ADDQmodify",
auxType: auxSymOff,
argLen: 3,
diff --git a/test/codegen/memops.go b/test/codegen/memops.go
index dbe4263d8d..701735f875 100644
--- a/test/codegen/memops.go
+++ b/test/codegen/memops.go
@@ -205,23 +205,43 @@ func idxFloat64(x, y []float64, i int) {
y[16*i+1] = t
}
-func idxLoadPlusOp(x []int32, i int) int32 {
+func idxLoadPlusOp32(x []int32, i int) int32 {
s := x[0]
// 386: `ADDL\t4\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `ADDL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
s += x[i+1]
// 386: `SUBL\t8\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `SUBL\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
s -= x[i+2]
// 386: `IMULL\t12\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
s *= x[i+3]
// 386: `ANDL\t16\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `ANDL\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
s &= x[i+4]
// 386: `ORL\t20\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `ORL\t20\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
s |= x[i+5]
// 386: `XORL\t24\([A-Z]+\)\([A-Z]+\*4\), [A-Z]+`
+ // amd64: `XORL\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
s ^= x[i+6]
return s
}
+func idxLoadPlusOp64(x []int64, i int) int64 {
+ s := x[0]
+ // amd64: `ADDQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s += x[i+1]
+ // amd64: `SUBQ\t16\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s -= x[i+2]
+ // amd64: `ANDQ\t24\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s &= x[i+3]
+ // amd64: `ORQ\t32\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s |= x[i+4]
+ // amd64: `XORQ\t40\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
+ s ^= x[i+5]
+ return s
+}
+
func idxStorePlusOp(x []int32, i int, v int32) {
// 386: `ADDL\t[A-Z]+, 4\([A-Z]+\)\([A-Z]+\*4\)`
x[i+1] += v