diff options
author | Philip Reames <preames@rivosinc.com> | 2023-05-17 11:06:24 -0700 |
---|---|---|
committer | Philip Reames <listmail@philipreames.com> | 2023-05-17 11:13:57 -0700 |
commit | 0457f506fddf47cfe842b398c7f522057cef8163 (patch) | |
tree | 76a81a28f1721626ba5e0f3bfdab0f21dec3a8f5 | |
parent | 6c59f399a6ca66b7ed3298ab5bcc594aa2930043 (diff) | |
download | llvm-0457f506fddf47cfe842b398c7f522057cef8163.tar.gz |
[RISCV] Implement storeOfVectorConstantIsCheap hook to prevent store merging at VL=2
In general, VL=2 vectors are very questionable profitability wise. For constants specifically, our inability to materialize many vector constants cheaply biases us strongly towards unprofitability at VL=2.
This hook is very close to the x86 implementation. The difference is that X86 whitelists stores of zeros, and we're better off letting that stay scalar at VL=2.
Differential Revision: https://reviews.llvm.org/D150798
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.h | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/combine-store.ll | 53 |
2 files changed, 26 insertions, 34 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 3936c51884cb..6bf3a811b266 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -531,6 +531,13 @@ public: return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed); } + bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, + unsigned AddrSpace) const override { + // If we can replace 4 or more scalar stores, there will be a reduction + // in instructions even after we add a vector constant load. + return NumElem >= 4; + } + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { return VT.isScalarInteger(); } diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store.ll index 9640d7591a9b..c7187148f571 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-store.ll @@ -31,9 +31,8 @@ define void @combine_zero_stores_4xi8(ptr %p) { define void @combine_zero_stores_8xi8(ptr %p) { ; RV32-LABEL: combine_zero_stores_8xi8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: combine_zero_stores_8xi8: @@ -72,9 +71,8 @@ define void @combine_zero_stores_2xi16(ptr %p) { define void @combine_zero_stores_4xi16(ptr %p) { ; RV32-LABEL: combine_zero_stores_4xi16: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: combine_zero_stores_4xi16: @@ -104,9 +102,8 @@ define void @combine_zero_stores_8xi16(ptr %p) { ; ; RV64-LABEL: combine_zero_stores_8xi16: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: sd zero, 8(a0) ; RV64-NEXT: ret store i16 zeroinitializer, ptr %p, align 16 %gep1 = getelementptr i16, ptr %p, i64 1 @@ -129,9 +126,8 @@ define void @combine_zero_stores_8xi16(ptr %p) { define void @combine_zero_stores_2xi32(ptr %p) { ; RV32-LABEL: combine_zero_stores_2xi32: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: sw zero, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: combine_zero_stores_2xi32: @@ -154,9 +150,8 @@ define void @combine_zero_stores_4xi32(ptr %p) { ; ; RV64-LABEL: combine_zero_stores_4xi32: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: sd zero, 8(a0) ; RV64-NEXT: ret store i32 zeroinitializer, ptr %p, align 16 %gep1 = getelementptr i32, ptr %p, i64 1 @@ -201,18 +196,11 @@ define void @combine_zero_stores_8xi32(ptr %p) { } define void @combine_zero_stores_2xi32_unaligned(ptr %p) { -; RV32-LABEL: combine_zero_stores_2xi32_unaligned: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: combine_zero_stores_2xi32_unaligned: -; RV64: # %bb.0: -; RV64-NEXT: sw zero, 0(a0) -; RV64-NEXT: sw zero, 4(a0) -; RV64-NEXT: ret +; CHECK-LABEL: combine_zero_stores_2xi32_unaligned: +; CHECK: # %bb.0: +; CHECK-NEXT: sw zero, 0(a0) +; CHECK-NEXT: sw zero, 4(a0) +; CHECK-NEXT: ret store i32 zeroinitializer, ptr %p %gep = getelementptr i8, ptr %p, i64 4 store i32 zeroinitializer, ptr %gep @@ -230,9 +218,8 @@ define void @combine_zero_stores_2xi64(ptr %p) { ; ; RV64-LABEL: combine_zero_stores_2xi64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: sd zero, 8(a0) ; RV64-NEXT: ret store i64 zeroinitializer, ptr %p %gep = getelementptr i8, ptr %p, i64 8 @@ -243,10 +230,8 @@ define void @combine_zero_stores_2xi64(ptr %p) { define void @combine_fp_zero_stores_crash(ptr %ptr) { ; CHECK-LABEL: combine_fp_zero_stores_crash: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: sw zero, 4(a0) +; CHECK-NEXT: sw zero, 8(a0) ; CHECK-NEXT: ret %addr1 = getelementptr float, ptr %ptr, i64 1 %addr2 = getelementptr float, ptr %ptr, i64 2 |