summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_insert_waitcnt.cpp
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2023-01-03 16:35:17 +0000
committerMarge Bot <emma+marge@anholt.net>2023-01-09 18:22:59 +0000
commitb64afc1d37a2ed165f0f3bb059c71a298253c9a5 (patch)
tree929f8653834fa563264dc79235d6247cac116e18 /src/amd/compiler/aco_insert_waitcnt.cpp
parente2f083c0a7d64506f7a1509f05bc4daf000e102c (diff)
downloadmesa-b64afc1d37a2ed165f0f3bb059c71a298253c9a5.tar.gz
aco: use s_delay_alu skip field
fossil-db (gfx1100): Totals from 130066 (96.65% of 134574) affected shaders: Instrs: 80208817 -> 71420648 (-10.96%) CodeSize: 403523036 -> 368370360 (-8.71%) Latency: 658064779 -> 657935384 (-0.02%); split: -0.02%, +0.00% InvThroughput: 87698268 -> 87693326 (-0.01%); split: -0.01%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20512>
Diffstat (limited to 'src/amd/compiler/aco_insert_waitcnt.cpp')
-rw-r--r--src/amd/compiler/aco_insert_waitcnt.cpp27
1 files changed, 27 insertions, 0 deletions
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index fdaea72e5a1..7ac2c822ff0 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -1097,6 +1097,33 @@ insert_wait_states(Program* program)
out_ctx[current.index] = std::move(ctx);
}
+
+ /* Combine s_delay_alu using the skip field. */
+ if (program->gfx_level >= GFX11) {
+ for (Block& block : program->blocks) {
+ int i = 0;
+ int prev_delay_alu = -1;
+ for (aco_ptr<Instruction>& instr : block.instructions) {
+ if (instr->opcode != aco_opcode::s_delay_alu) {
+ block.instructions[i++] = std::move(instr);
+ continue;
+ }
+
+ uint16_t imm = instr->sopp().imm;
+ int skip = i - prev_delay_alu - 1;
+ if (imm >> 7 || prev_delay_alu < 0 || skip >= 6) {
+ if (imm >> 7 == 0)
+ prev_delay_alu = i;
+ block.instructions[i++] = std::move(instr);
+ continue;
+ }
+
+ block.instructions[prev_delay_alu]->sopp().imm |= (skip << 4) | (imm << 7);
+ prev_delay_alu = -1;
+ }
+ block.instructions.resize(i);
+ }
+ }
}
} // namespace aco