summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_insert_waitcnt.cpp
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2022-05-25 17:21:10 +0100
committerMarge Bot <emma+marge@anholt.net>2022-07-08 14:49:03 +0000
commitc66206cbedddc21e724c987d48267f5304453b63 (patch)
treeed9db69d7cd553b838de15c5f019f97543e96931 /src/amd/compiler/aco_insert_waitcnt.cpp
parent7d3404490865a8b81fc4ea05927c3ac0187b74a8 (diff)
downloadmesa-c66206cbedddc21e724c987d48267f5304453b63.tar.gz
aco: avoid WAW hazard with BVH MIMG and other VMEM
According to LLVM, image_bvh64_intersect_ray does not write results in order with other VMEM instructions. fossil-db (navi21): Totals from 7 (0.00% of 162293) affected shaders: Instrs: 39978 -> 39985 (+0.02%) CodeSize: 219356 -> 219384 (+0.01%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17079>
Diffstat (limited to 'src/amd/compiler/aco_insert_waitcnt.cpp')
-rw-r--r--src/amd/compiler/aco_insert_waitcnt.cpp61
1 files changed, 34 insertions, 27 deletions
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index 076f636f266..9d6f0991316 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -79,6 +79,12 @@ enum counter_type : uint8_t {
num_counters = 4,
};
+enum vmem_type : uint8_t {
+ vmem_nosampler = 1 << 0,
+ vmem_sampler = 1 << 1,
+ vmem_bvh = 1 << 2,
+};
+
static const uint16_t exp_events =
event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | event_vmem_gpr_lock;
static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg;
@@ -111,27 +117,22 @@ struct wait_entry {
uint8_t counters; /* use counter_type notion */
bool wait_on_read : 1;
bool logical : 1;
- bool has_vmem_nosampler : 1;
- bool has_vmem_sampler : 1;
+ uint8_t vmem_types : 4;
wait_entry(wait_event event_, wait_imm imm_, bool logical_, bool wait_on_read_)
: imm(imm_), events(event_), counters(get_counters_for_event(event_)),
- wait_on_read(wait_on_read_), logical(logical_), has_vmem_nosampler(false),
- has_vmem_sampler(false)
+ wait_on_read(wait_on_read_), logical(logical_), vmem_types(0)
{}
bool join(const wait_entry& other)
{
bool changed = (other.events & ~events) || (other.counters & ~counters) ||
- (other.wait_on_read && !wait_on_read) ||
- (other.has_vmem_nosampler && !has_vmem_nosampler) ||
- (other.has_vmem_sampler && !has_vmem_sampler);
+ (other.wait_on_read && !wait_on_read) || (other.vmem_types & !vmem_types);
events |= other.events;
counters |= other.counters;
changed |= imm.combine(other.imm);
wait_on_read |= other.wait_on_read;
- has_vmem_nosampler |= other.has_vmem_nosampler;
- has_vmem_sampler |= other.has_vmem_sampler;
+ vmem_types |= other.vmem_types;
assert(logical == other.logical);
return changed;
}
@@ -148,8 +149,7 @@ struct wait_entry {
if (counter == counter_vm) {
imm.vm = wait_imm::unset_counter;
events &= ~event_vmem;
- has_vmem_nosampler = false;
- has_vmem_sampler = false;
+ vmem_types = 0;
}
if (counter == counter_exp) {
@@ -242,6 +242,19 @@ struct wait_ctx {
}
};
+uint8_t
+get_vmem_type(Instruction* instr)
+{
+ if (instr->opcode == aco_opcode::image_bvh64_intersect_ray)
+ return vmem_bvh;
+ else if (instr->isMIMG() && !instr->operands[1].isUndefined() &&
+ instr->operands[1].regClass() == s4)
+ return vmem_sampler;
+ else if (instr->isVMEM())
+ return vmem_nosampler;
+ return 0;
+}
+
void
check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr)
{
@@ -270,11 +283,9 @@ check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr)
continue;
/* Vector Memory reads and writes return in the order they were issued */
- bool has_sampler = instr->isMIMG() && !instr->operands[1].isUndefined() &&
- instr->operands[1].regClass() == s4;
- if (instr->isVMEM() && ((it->second.events & vm_events) == event_vmem) &&
- it->second.has_vmem_nosampler == !has_sampler &&
- it->second.has_vmem_sampler == has_sampler)
+ uint8_t vmem_type = get_vmem_type(instr);
+ if (vmem_type && ((it->second.events & vm_events) == event_vmem) &&
+ it->second.vmem_types == vmem_type)
continue;
/* LDS reads and writes return in the order they were issued. same for GDS */
@@ -568,7 +579,7 @@ update_counters_for_flat_load(wait_ctx& ctx, memory_sync_info sync = memory_sync
void
insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, bool wait_on_read,
- bool has_sampler = false)
+ uint8_t vmem_types = 0)
{
uint16_t counters = get_counters_for_event(event);
wait_imm imm;
@@ -582,8 +593,7 @@ insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, boo
imm.vs = 0;
wait_entry new_entry(event, imm, !rc.is_linear(), wait_on_read);
- new_entry.has_vmem_nosampler = (event & event_vmem) && !has_sampler;
- new_entry.has_vmem_sampler = (event & event_vmem) && has_sampler;
+ new_entry.vmem_types |= vmem_types;
for (unsigned i = 0; i < rc.size(); i++) {
auto it = ctx.gpr_map.emplace(PhysReg{reg.reg() + i}, new_entry);
@@ -593,16 +603,16 @@ insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, boo
}
void
-insert_wait_entry(wait_ctx& ctx, Operand op, wait_event event, bool has_sampler = false)
+insert_wait_entry(wait_ctx& ctx, Operand op, wait_event event, uint8_t vmem_types = 0)
{
if (!op.isConstant() && !op.isUndefined())
- insert_wait_entry(ctx, op.physReg(), op.regClass(), event, false, has_sampler);
+ insert_wait_entry(ctx, op.physReg(), op.regClass(), event, false, vmem_types);
}
void
-insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, bool has_sampler = false)
+insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_types = 0)
{
- insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, has_sampler);
+ insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types);
}
void
@@ -679,11 +689,8 @@ gen(Instruction* instr, wait_ctx& ctx)
!instr->definitions.empty() || ctx.gfx_level < GFX10 ? event_vmem : event_vmem_store;
update_counters(ctx, ev, get_sync_info(instr));
- bool has_sampler = instr->isMIMG() && !instr->operands[1].isUndefined() &&
- instr->operands[1].regClass() == s4;
-
if (!instr->definitions.empty())
- insert_wait_entry(ctx, instr->definitions[0], ev, has_sampler);
+ insert_wait_entry(ctx, instr->definitions[0], ev, get_vmem_type(instr));
if (ctx.gfx_level == GFX6 && instr->format != Format::MIMG && instr->operands.size() == 4) {
ctx.exp_cnt++;