diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2023-01-03 16:35:17 +0000 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2023-01-09 18:22:59 +0000 |
commit | b64afc1d37a2ed165f0f3bb059c71a298253c9a5 (patch) | |
tree | 929f8653834fa563264dc79235d6247cac116e18 /src/amd/compiler/aco_insert_waitcnt.cpp | |
parent | e2f083c0a7d64506f7a1509f05bc4daf000e102c (diff) | |
download | mesa-b64afc1d37a2ed165f0f3bb059c71a298253c9a5.tar.gz |
aco: use s_delay_alu skip field
fossil-db (gfx1100):
Totals from 130066 (96.65% of 134574) affected shaders:
Instrs: 80208817 -> 71420648 (-10.96%)
CodeSize: 403523036 -> 368370360 (-8.71%)
Latency: 658064779 -> 657935384 (-0.02%); split: -0.02%, +0.00%
InvThroughput: 87698268 -> 87693326 (-0.01%); split: -0.01%, +0.00%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20512>
Diffstat (limited to 'src/amd/compiler/aco_insert_waitcnt.cpp')
-rw-r--r-- | src/amd/compiler/aco_insert_waitcnt.cpp | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index fdaea72e5a1..7ac2c822ff0 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -1097,6 +1097,33 @@ insert_wait_states(Program* program) out_ctx[current.index] = std::move(ctx); } + + /* Combine s_delay_alu using the skip field. */ + if (program->gfx_level >= GFX11) { + for (Block& block : program->blocks) { + int i = 0; + int prev_delay_alu = -1; + for (aco_ptr<Instruction>& instr : block.instructions) { + if (instr->opcode != aco_opcode::s_delay_alu) { + block.instructions[i++] = std::move(instr); + continue; + } + + uint16_t imm = instr->sopp().imm; + int skip = i - prev_delay_alu - 1; + if (imm >> 7 || prev_delay_alu < 0 || skip >= 6) { + if (imm >> 7 == 0) + prev_delay_alu = i; + block.instructions[i++] = std::move(instr); + continue; + } + + block.instructions[prev_delay_alu]->sopp().imm |= (skip << 4) | (imm << 7); + prev_delay_alu = -1; + } + block.instructions.resize(i); + } + } } } // namespace aco |