diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2022-08-25 20:04:01 +0100 |
---|---|---|
committer | Dylan Baker <dylan.c.baker@intel.com> | 2022-10-04 14:54:26 -0700 |
commit | 9566c44069cf454d275e0dacb7115570a61f4af6 (patch) | |
tree | 98f713d4cdb90a3312a52fb3b702213dedfe7871 | |
parent | 642b4d6b6e126101c2a41956ad16db423e52c4f1 (diff) | |
download | mesa-9566c44069cf454d275e0dacb7115570a61f4af6.tar.gz |
aco: fix VMEMtoScalarWriteHazard s_waitcnt mitigation
It doesn't make sense for a "s_waitcnt vmcnt(0)" to affect a store or DS
instruction.
LLVM checks for "s_waitcnt vmcnt(0) lgkmcnt(0) expcnt(0)" but ignores
s_waitcnt_vscnt (which I assume is a bug).
No fossil-db changes.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: bcf94bb933e ("aco: properly recognize that s_waitcnt mitigates VMEMtoScalarWriteHazard")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18270>
(cherry picked from commit 2bd16256a6a8f830dc43aa7224879d11edb9583a)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/compiler/README-ISA.md | 2 | ||||
-rw-r--r-- | src/amd/compiler/aco_insert_NOPs.cpp | 51 |
3 files changed, 39 insertions, 16 deletions
diff --git a/.pick_status.json b/.pick_status.json index 8a92b46ca62..bed17cbf4f0 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1120,7 +1120,7 @@ "description": "aco: fix VMEMtoScalarWriteHazard s_waitcnt mitigation", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "bcf94bb933e8ccc0b91305ed8189a35e8938abbf" }, diff --git a/src/amd/compiler/README-ISA.md b/src/amd/compiler/README-ISA.md index d78cf7bdc5c..f27927615b0 100644 --- a/src/amd/compiler/README-ISA.md +++ b/src/amd/compiler/README-ISA.md @@ -220,7 +220,7 @@ VMEM/FLAT/GLOBAL/SCRATCH/DS instruction reads an SGPR (or EXEC, or M0). Then, a SALU/SMEM instruction writes the same SGPR. Mitigated by: -A VALU instruction or an `s_waitcnt vmcnt(0)` between the two instructions. +A VALU instruction or an `s_waitcnt` between the two instructions. ### SMEMtoVectorWriteHazard diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 6d07813b736..01012a78e4c 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -159,6 +159,8 @@ struct NOP_ctx_gfx10 { bool has_NSA_MIMG = false; bool has_writelane = false; std::bitset<128> sgprs_read_by_VMEM; + std::bitset<128> sgprs_read_by_VMEM_store; + std::bitset<128> sgprs_read_by_DS; std::bitset<128> sgprs_read_by_SMEM; void join(const NOP_ctx_gfx10& other) @@ -172,6 +174,8 @@ struct NOP_ctx_gfx10 { has_NSA_MIMG |= other.has_NSA_MIMG; has_writelane |= other.has_writelane; sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM; + sgprs_read_by_DS |= other.sgprs_read_by_DS; + sgprs_read_by_VMEM_store |= other.sgprs_read_by_VMEM_store; sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM; } @@ -182,6 +186,8 @@ struct NOP_ctx_gfx10 { has_DS == other.has_DS && has_branch_after_DS == other.has_branch_after_DS && has_NSA_MIMG == other.has_NSA_MIMG && has_writelane == other.has_writelane && sgprs_read_by_VMEM == other.sgprs_read_by_VMEM && + sgprs_read_by_DS == other.sgprs_read_by_DS && + sgprs_read_by_VMEM_store == other.sgprs_read_by_VMEM_store && sgprs_read_by_SMEM == other.sgprs_read_by_SMEM; } }; @@ -582,6 +588,16 @@ mark_read_regs(const aco_ptr<Instruction>& instr, std::bitset<N>& reg_reads) } } +template <std::size_t N> +void +mark_read_regs_exec(State& state, const aco_ptr<Instruction>& instr, std::bitset<N>& reg_reads) +{ + mark_read_regs(instr, reg_reads); + reg_reads.set(exec); + if (state.program->wave_size == 64) + reg_reads.set(exec_hi); +} + bool VALU_writes_sgpr(aco_ptr<Instruction>& instr) { @@ -638,31 +654,36 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>& // TODO: s_dcache_inv needs to be in it's own group on GFX10 /* VMEMtoScalarWriteHazard - * Handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)" + * Handle EXEC/M0/SGPR write following a VMEM/DS instruction without a VALU or "waitcnt vmcnt(0)" * in-between. */ if (instr->isVMEM() || instr->isFlatLike() || instr->isDS()) { - /* Remember all SGPRs that are read by the VMEM instruction */ - mark_read_regs(instr, ctx.sgprs_read_by_VMEM); - ctx.sgprs_read_by_VMEM.set(exec); - if (state.program->wave_size == 64) - ctx.sgprs_read_by_VMEM.set(exec_hi); + /* Remember all SGPRs that are read by the VMEM/DS instruction */ + if (instr->isVMEM() || instr->isFlatLike()) + mark_read_regs_exec( + state, instr, + instr->definitions.empty() ? ctx.sgprs_read_by_VMEM_store : ctx.sgprs_read_by_VMEM); + if (instr->isFlat() || instr->isDS()) + mark_read_regs_exec(state, instr, ctx.sgprs_read_by_DS); } else if (instr->isSALU() || instr->isSMEM()) { if (instr->opcode == aco_opcode::s_waitcnt) { - /* Hazard is mitigated by "s_waitcnt vmcnt(0)" */ - uint16_t imm = instr->sopp().imm; - unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10); - if (vmcnt == 0) + wait_imm imm(state.program->gfx_level, instr->sopp().imm); + if (imm.vm == 0) ctx.sgprs_read_by_VMEM.reset(); - } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) { + } else if (instr->opcode == aco_opcode::s_waitcnt_depctr && instr->sopp().imm == 0xffe3) { /* Hazard is mitigated by a s_waitcnt_depctr with a magic imm */ - if (instr->sopp().imm == 0xffe3) - ctx.sgprs_read_by_VMEM.reset(); + ctx.sgprs_read_by_VMEM.reset(); + ctx.sgprs_read_by_DS.reset(); + ctx.sgprs_read_by_VMEM_store.reset(); } /* Check if SALU writes an SGPR that was previously read by the VALU */ - if (check_written_regs(instr, ctx.sgprs_read_by_VMEM)) { + if (check_written_regs(instr, ctx.sgprs_read_by_VMEM) || + check_written_regs(instr, ctx.sgprs_read_by_DS) || + check_written_regs(instr, ctx.sgprs_read_by_VMEM_store)) { ctx.sgprs_read_by_VMEM.reset(); + ctx.sgprs_read_by_DS.reset(); + ctx.sgprs_read_by_VMEM_store.reset(); /* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */ aco_ptr<SOPP_instruction> depctr{ @@ -674,6 +695,8 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>& } else if (instr->isVALU()) { /* Hazard is mitigated by any VALU instruction */ ctx.sgprs_read_by_VMEM.reset(); + ctx.sgprs_read_by_DS.reset(); + ctx.sgprs_read_by_VMEM_store.reset(); } /* VcmpxPermlaneHazard |