summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_insert_waitcnt.cpp
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2023-05-11 17:08:39 +0100
committerMarge Bot <emma+marge@anholt.net>2023-05-12 20:19:21 +0000
commit88f6d7f4bdf90bcfdb17e4aadddec3c855a12b13 (patch)
tree90dcd777785caea5195c9378685a0383c6463cc9 /src/amd/compiler/aco_insert_waitcnt.cpp
parent8fc5dd935f4925897cd0bebd117699628f57a645 (diff)
downloadmesa-88f6d7f4bdf90bcfdb17e4aadddec3c855a12b13.tar.gz
aco/gfx11: fix VMEM/DS->VALU WaW/RaW hazard
Previously, we could safely read/write unused lanes of VMEM/DS destination VGPRs without waiting for the load to finish. That doesn't seem to be the case on GFX11. fossil-db (gfx1100): Totals from 6698 (4.94% of 135636) affected shaders: Instrs: 11184274 -> 11199420 (+0.14%); split: -0.00%, +0.14% CodeSize: 57578344 -> 57638928 (+0.11%); split: -0.00%, +0.11% Latency: 198348808 -> 198382472 (+0.02%); split: -0.00%, +0.02% InvThroughput: 24376324 -> 24378439 (+0.01%); split: -0.00%, +0.01% VClause: 192420 -> 192559 (+0.07%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8722 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8239 Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22965>
Diffstat (limited to 'src/amd/compiler/aco_insert_waitcnt.cpp')
-rw-r--r--src/amd/compiler/aco_insert_waitcnt.cpp18
1 files changed, 13 insertions, 5 deletions
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index 9b34fb00307..0a87cccde28 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -224,14 +224,15 @@ struct wait_entry {
bool join(const wait_entry& other)
{
bool changed = (other.events & ~events) || (other.counters & ~counters) ||
- (other.wait_on_read && !wait_on_read) || (other.vmem_types & !vmem_types);
+ (other.wait_on_read && !wait_on_read) || (other.vmem_types & !vmem_types) ||
+ (!other.logical && logical);
events |= other.events;
counters |= other.counters;
changed |= imm.combine(other.imm);
changed |= delay.combine(other.delay);
wait_on_read |= other.wait_on_read;
vmem_types |= other.vmem_types;
- assert(logical == other.logical);
+ logical &= other.logical;
return changed;
}
@@ -736,7 +737,7 @@ update_counters_for_flat_load(wait_ctx& ctx, memory_sync_info sync = memory_sync
void
insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, bool wait_on_read,
- uint8_t vmem_types = 0, unsigned cycles = 0)
+ uint8_t vmem_types = 0, unsigned cycles = 0, bool force_linear = false)
{
uint16_t counters = get_counters_for_event(event);
wait_imm imm;
@@ -760,7 +761,7 @@ insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, boo
delay.salu_cycles = cycles;
}
- wait_entry new_entry(event, imm, delay, !rc.is_linear(), wait_on_read);
+ wait_entry new_entry(event, imm, delay, !rc.is_linear() && !force_linear, wait_on_read);
new_entry.vmem_types |= vmem_types;
for (unsigned i = 0; i < rc.size(); i++) {
@@ -781,7 +782,14 @@ void
insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_types = 0,
unsigned cycles = 0)
{
- insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles);
+ /* We can't safely write to unwritten destination VGPR lanes on GFX11 without waiting for
+ * the load to finish.
+ */
+ bool force_linear =
+ ctx.gfx_level >= GFX11 && (event & (event_lds | event_gds | event_vmem | event_flat));
+
+ insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles,
+ force_linear);
}
void