summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_insert_waitcnt.cpp
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2023-01-03 18:14:16 +0000
committerMarge Bot <emma+marge@anholt.net>2023-01-09 18:22:59 +0000
commitc8357136d4054e194e266610c3870026527af737 (patch)
tree41b2e054c2fc0a24bd4d878f4f30e1fc82a23b74 /src/amd/compiler/aco_insert_waitcnt.cpp
parentbbad550f3d454bde016eb1b7d7acc4ea8b9eef6e (diff)
downloadmesa-c8357136d4054e194e266610c3870026527af737.tar.gz
aco: improve parse_delay_alu
Use gpr_map to determine how many cycles each dependency of the s_delay_alu needs. This information helps the pass avoid further s_delay_alu instructions. fossil-db (gfx1100): Totals from 13097 (9.73% of 134574) affected shaders: Instrs: 30711894 -> 30702692 (-0.03%) CodeSize: 153462500 -> 153425692 (-0.02%) Latency: 372758612 -> 372741922 (-0.00%) InvThroughput: 50164111 -> 50160717 (-0.01%); split: -0.01%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20512>
Diffstat (limited to 'src/amd/compiler/aco_insert_waitcnt.cpp')
-rw-r--r--src/amd/compiler/aco_insert_waitcnt.cpp12
1 files changed, 12 insertions, 0 deletions
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index c3ff61a6f27..fdaea72e5a1 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -432,6 +432,18 @@ parse_delay_alu(wait_ctx& ctx, alu_delay_info& delay, Instruction* instr)
else if (wait >= alu_delay_wait::SALU_CYCLE_1)
delay.salu_cycles = imm[i] - (uint32_t)alu_delay_wait::SALU_CYCLE_1 + 1;
}
+
+ for (std::pair<const PhysReg, wait_entry>& e : ctx.gpr_map) {
+ wait_entry& entry = e.second;
+
+ if (delay.valu_instrs <= entry.delay.valu_instrs)
+ delay.valu_cycles = std::max(delay.valu_cycles, entry.delay.valu_cycles);
+ if (delay.trans_instrs <= entry.delay.trans_instrs)
+ delay.trans_cycles = std::max(delay.trans_cycles, entry.delay.trans_cycles);
+ if (delay.salu_cycles <= entry.delay.salu_cycles)
+ delay.salu_cycles = std::max(delay.salu_cycles, entry.delay.salu_cycles);
+ }
+
return true;
}