From 6f45b39e4dbabf0b179a60ffacf434e55b2d5eab Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Tue, 19 May 2020 18:55:31 +1000 Subject: cmd/compile,cmd/internal/obj/riscv: move g register on riscv64 The original riscv64 port used the thread pointer (TP aka X4) register for the g pointer, however this register is also used when TLS support is required, resulting in a conflict (for example, when a signal is received we have no way of readily knowing if X4 contains a pointer to the TCB or a pointer to a g). In order to support cgo, free up the X4 register by moving g to X27. This unfortunately means that the X4 register is unused in non-cgo mode, however the alternative is to not support cgo on this platform. Update #36641 Change-Id: Idcaf3e8ccbe42972a1b8943aeefde7149d9c960a Reviewed-on: https://go-review.googlesource.com/c/go/+/263477 Trust: Joel Sing Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/RISCV64Ops.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/cmd/compile/internal/ssa/gen/RISCV64Ops.go') diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go index b06b86075e..fb944f3132 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go @@ -24,10 +24,11 @@ import ( // L = 64 bit int, used when the opcode starts with F const ( - riscv64REG_G = 4 + riscv64REG_G = 27 riscv64REG_CTXT = 20 riscv64REG_LR = 1 riscv64REG_SP = 2 + riscv64REG_TP = 4 riscv64REG_TMP = 31 riscv64REG_ZERO = 0 ) @@ -78,8 +79,8 @@ func init() { // Add general purpose registers to gpMask. switch r { - // ZERO, and TMP are not in any gp mask. - case riscv64REG_ZERO, riscv64REG_TMP: + // ZERO, TP and TMP are not in any gp mask. + case riscv64REG_ZERO, riscv64REG_TP, riscv64REG_TMP: case riscv64REG_G: gpgMask |= mask gpspsbgMask |= mask -- cgit v1.2.1 From 8a013233ac53d934e53cd7d118b5ff7836d8973a Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Sun, 25 Oct 2020 00:32:23 +1100 Subject: cmd/compile: use MOV pseudo-instructions for sign/zero extension Rather than handling sign and zero extension via rules, defer to the assembler and use MOV pseudo-instructions. The instruction can also be omitted where the type and size is already correct. This change results in more than 6,000 instructions being removed from the go binary (in part due to omitted instructions, in part due to MOVBU having a more efficient implementation in the assembler than what is used in the current ZeroExt8to{16,32,64} rules). This will also allow for further rewriting to remove redundant sign/zero extension. Change-Id: I05e42fd9f09f40a69948be7de772cce8946c8744 Reviewed-on: https://go-review.googlesource.com/c/go/+/264658 Trust: Joel Sing Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/RISCV64Ops.go | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/cmd/compile/internal/ssa/gen/RISCV64Ops.go') diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go index fb944f3132..48be8e2c26 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go @@ -193,6 +193,15 @@ func init() { {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 32 bits {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOV", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 64 bits + // Conversions + {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte + {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"}, // move from arg0, sign-extended from half + {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"}, // move from arg0, sign-extended from word + {name: "MOVDreg", argLength: 1, reg: gp11, asm: "MOV"}, // move from arg0 + {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte + {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half + {name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word + // Shift ops {name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << (aux1 & 63) {name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> (aux1 & 63), signed -- cgit v1.2.1 From 320cc79002b5ce5f8d7f667f0aa78a1fdce59eb4 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Sun, 25 Oct 2020 01:34:17 +1100 Subject: cmd/compile: eliminate unnecessary sign/zero extension for riscv64 Add additional rules to eliminate unnecessary sign/zero extension for riscv64. Also where possible, replace an extension following a load with a different typed load. This removes almost another 8,000 instructions from the go binary. Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before subtraction, rather than zero extending after subtraction. While this appears to double the number of zero extensions, it often lets us completely eliminate them as the load can already be performed in a properly typed manner. As an example, prior to this change runtime.memequal16 was: 0000000000013028 : 13028: 00813183 ld gp,8(sp) 1302c: 00019183 lh gp,0(gp) 13030: 01013283 ld t0,16(sp) 13034: 00029283 lh t0,0(t0) 13038: 405181b3 sub gp,gp,t0 1303c: 03019193 slli gp,gp,0x30 13040: 0301d193 srli gp,gp,0x30 13044: 0011b193 seqz gp,gp 13048: 00310c23 sb gp,24(sp) 1304c: 00008067 ret Whereas it now becomes: 0000000000012fa8 : 12fa8: 00813183 ld gp,8(sp) 12fac: 0001d183 lhu gp,0(gp) 12fb0: 01013283 ld t0,16(sp) 12fb4: 0002d283 lhu t0,0(t0) 12fb8: 405181b3 sub gp,gp,t0 12fbc: 0011b193 seqz gp,gp 12fc0: 00310c23 sb gp,24(sp) 12fc4: 00008067 ret Change-Id: I16321feb18381241cab121c0097a126104c56c2c Reviewed-on: https://go-review.googlesource.com/c/go/+/264659 Trust: Joel Sing Run-TryBot: Joel Sing TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/RISCV64Ops.go | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/cmd/compile/internal/ssa/gen/RISCV64Ops.go') diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go index 48be8e2c26..ebd515b7fc 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go @@ -202,6 +202,8 @@ func init() { {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half {name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word + {name: "MOVDnop", argLength: 1, reg: regInfo{inputs: []regMask{gpMask}, outputs: []regMask{gpMask}}, resultInArg0: true}, // nop, return arg0 in same register + // Shift ops {name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << (aux1 & 63) {name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> (aux1 & 63), signed -- cgit v1.2.1 From 150d2448e5a213cd679396371c0a147918dc2125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Derkacz?= Date: Sun, 14 Jun 2020 00:06:24 +0200 Subject: cmd/compile,cmd/internal/obj/riscv,runtime: use Duff's devices on riscv64 Implement runtime.duffzero and runtime.duffcopy for riscv64. Use obj.ADUFFZERO/obj.ADUFFCOPY for medium size, word aligned zeroing/moving. Change-Id: I42ec622055630c94cb77e286d8d33dbe7c9f846c Reviewed-on: https://go-review.googlesource.com/c/go/+/237797 Run-TryBot: Cherry Zhang Reviewed-by: Joel Sing Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/RISCV64Ops.go | 38 ++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'src/cmd/compile/internal/ssa/gen/RISCV64Ops.go') diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go index ebd515b7fc..f64319230b 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go @@ -240,6 +240,44 @@ func init() { {name: "CALLclosure", argLength: 3, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: 2, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem + // duffzero + // arg0 = address of memory to zero (in X10, changed as side effect) + // arg1 = mem + // auxint = offset into duffzero code to start executing + // X1 (link register) changed because of function call + // returns mem + { + name: "DUFFZERO", + aux: "Int64", + argLength: 2, + reg: regInfo{ + inputs: []regMask{regNamed["X10"]}, + clobbers: regNamed["X1"] | regNamed["X10"], + }, + typ: "Mem", + faultOnNilArg0: true, + }, + + // duffcopy + // arg0 = address of dst memory (in X11, changed as side effect) + // arg1 = address of src memory (in X10, changed as side effect) + // arg2 = mem + // auxint = offset into duffcopy code to start executing + // X1 (link register) changed because of function call + // returns mem + { + name: "DUFFCOPY", + aux: "Int64", + argLength: 3, + reg: regInfo{ + inputs: []regMask{regNamed["X11"], regNamed["X10"]}, + clobbers: regNamed["X1"] | regNamed["X10"] | regNamed["X11"], + }, + typ: "Mem", + faultOnNilArg0: true, + faultOnNilArg1: true, + }, + // Generic moves and zeros // general unaligned zeroing -- cgit v1.2.1