diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2022-05-25 17:21:10 +0100 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-07-08 14:49:03 +0000 |
commit | c66206cbedddc21e724c987d48267f5304453b63 (patch) | |
tree | ed9db69d7cd553b838de15c5f019f97543e96931 /src/amd/compiler/aco_insert_waitcnt.cpp | |
parent | 7d3404490865a8b81fc4ea05927c3ac0187b74a8 (diff) | |
download | mesa-c66206cbedddc21e724c987d48267f5304453b63.tar.gz |
aco: avoid WAW hazard with BVH MIMG and other VMEM
According to LLVM, image_bvh64_intersect_ray does not write results in
order with other VMEM instructions.
fossil-db (navi21):
Totals from 7 (0.00% of 162293) affected shaders:
Instrs: 39978 -> 39985 (+0.02%)
CodeSize: 219356 -> 219384 (+0.01%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17079>
Diffstat (limited to 'src/amd/compiler/aco_insert_waitcnt.cpp')
-rw-r--r-- | src/amd/compiler/aco_insert_waitcnt.cpp | 61 |
1 files changed, 34 insertions, 27 deletions
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 076f636f266..9d6f0991316 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -79,6 +79,12 @@ enum counter_type : uint8_t { num_counters = 4, }; +enum vmem_type : uint8_t { + vmem_nosampler = 1 << 0, + vmem_sampler = 1 << 1, + vmem_bvh = 1 << 2, +}; + static const uint16_t exp_events = event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | event_vmem_gpr_lock; static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg; @@ -111,27 +117,22 @@ struct wait_entry { uint8_t counters; /* use counter_type notion */ bool wait_on_read : 1; bool logical : 1; - bool has_vmem_nosampler : 1; - bool has_vmem_sampler : 1; + uint8_t vmem_types : 4; wait_entry(wait_event event_, wait_imm imm_, bool logical_, bool wait_on_read_) : imm(imm_), events(event_), counters(get_counters_for_event(event_)), - wait_on_read(wait_on_read_), logical(logical_), has_vmem_nosampler(false), - has_vmem_sampler(false) + wait_on_read(wait_on_read_), logical(logical_), vmem_types(0) {} bool join(const wait_entry& other) { bool changed = (other.events & ~events) || (other.counters & ~counters) || - (other.wait_on_read && !wait_on_read) || - (other.has_vmem_nosampler && !has_vmem_nosampler) || - (other.has_vmem_sampler && !has_vmem_sampler); + (other.wait_on_read && !wait_on_read) || (other.vmem_types & !vmem_types); events |= other.events; counters |= other.counters; changed |= imm.combine(other.imm); wait_on_read |= other.wait_on_read; - has_vmem_nosampler |= other.has_vmem_nosampler; - has_vmem_sampler |= other.has_vmem_sampler; + vmem_types |= other.vmem_types; assert(logical == other.logical); return changed; } @@ -148,8 +149,7 @@ struct wait_entry { if (counter == counter_vm) { imm.vm = wait_imm::unset_counter; events &= ~event_vmem; - has_vmem_nosampler = false; - has_vmem_sampler = false; + vmem_types = 0; } if (counter == counter_exp) { @@ -242,6 +242,19 @@ struct wait_ctx { } }; +uint8_t +get_vmem_type(Instruction* instr) +{ + if (instr->opcode == aco_opcode::image_bvh64_intersect_ray) + return vmem_bvh; + else if (instr->isMIMG() && !instr->operands[1].isUndefined() && + instr->operands[1].regClass() == s4) + return vmem_sampler; + else if (instr->isVMEM()) + return vmem_nosampler; + return 0; +} + void check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr) { @@ -270,11 +283,9 @@ check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr) continue; /* Vector Memory reads and writes return in the order they were issued */ - bool has_sampler = instr->isMIMG() && !instr->operands[1].isUndefined() && - instr->operands[1].regClass() == s4; - if (instr->isVMEM() && ((it->second.events & vm_events) == event_vmem) && - it->second.has_vmem_nosampler == !has_sampler && - it->second.has_vmem_sampler == has_sampler) + uint8_t vmem_type = get_vmem_type(instr); + if (vmem_type && ((it->second.events & vm_events) == event_vmem) && + it->second.vmem_types == vmem_type) continue; /* LDS reads and writes return in the order they were issued. same for GDS */ @@ -568,7 +579,7 @@ update_counters_for_flat_load(wait_ctx& ctx, memory_sync_info sync = memory_sync void insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, bool wait_on_read, - bool has_sampler = false) + uint8_t vmem_types = 0) { uint16_t counters = get_counters_for_event(event); wait_imm imm; @@ -582,8 +593,7 @@ insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, boo imm.vs = 0; wait_entry new_entry(event, imm, !rc.is_linear(), wait_on_read); - new_entry.has_vmem_nosampler = (event & event_vmem) && !has_sampler; - new_entry.has_vmem_sampler = (event & event_vmem) && has_sampler; + new_entry.vmem_types |= vmem_types; for (unsigned i = 0; i < rc.size(); i++) { auto it = ctx.gpr_map.emplace(PhysReg{reg.reg() + i}, new_entry); @@ -593,16 +603,16 @@ insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, boo } void -insert_wait_entry(wait_ctx& ctx, Operand op, wait_event event, bool has_sampler = false) +insert_wait_entry(wait_ctx& ctx, Operand op, wait_event event, uint8_t vmem_types = 0) { if (!op.isConstant() && !op.isUndefined()) - insert_wait_entry(ctx, op.physReg(), op.regClass(), event, false, has_sampler); + insert_wait_entry(ctx, op.physReg(), op.regClass(), event, false, vmem_types); } void -insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, bool has_sampler = false) +insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_types = 0) { - insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, has_sampler); + insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types); } void @@ -679,11 +689,8 @@ gen(Instruction* instr, wait_ctx& ctx) !instr->definitions.empty() || ctx.gfx_level < GFX10 ? event_vmem : event_vmem_store; update_counters(ctx, ev, get_sync_info(instr)); - bool has_sampler = instr->isMIMG() && !instr->operands[1].isUndefined() && - instr->operands[1].regClass() == s4; - if (!instr->definitions.empty()) - insert_wait_entry(ctx, instr->definitions[0], ev, has_sampler); + insert_wait_entry(ctx, instr->definitions[0], ev, get_vmem_type(instr)); if (ctx.gfx_level == GFX6 && instr->format != Format::MIMG && instr->operands.size() == 4) { ctx.exp_cnt++; |