summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_ssa_elimination.cpp
diff options
context:
space:
mode:
authorGeorg Lehmann <dadschoorse@gmail.com>2022-09-10 11:35:56 +0200
committerMarge Bot <emma+marge@anholt.net>2022-11-11 16:02:12 +0000
commite21448d0d31808059fab3a4d35e44ddcb9683cf6 (patch)
tree050c6ec3142e273935fc6ba49cf721b542728f84 /src/amd/compiler/aco_ssa_elimination.cpp
parenta653a390e11a55f25235af1d0b39a6ceae30016f (diff)
downloadmesa-e21448d0d31808059fab3a4d35e44ddcb9683cf6.tar.gz
aco: Don't create useless exec movs while creating v_cmpx.
In a lot of situations the previous exec value was already copied from the same registers that exec should be saved to. In that case we don't have to insert an extra copy to save exec. This breaks ssa but this pass is going out of ssa anyway. Foz-DB Navi21: Totals from 16129 (11.96% of 134913) affected shaders: CodeSize: 128184044 -> 128054468 (-0.10%) Instrs: 23902694 -> 23870325 (-0.14%) Latency: 387124324 -> 387095955 (-0.01%); split: -0.01%, +0.00% InvThroughput: 79949118 -> 79945859 (-0.00%); split: -0.01%, +0.00% Copies: 1620768 -> 1588752 (-1.98%); split: -1.99%, +0.01% Foz-DB Vega10: Totals from 15546 (11.51% of 135041) affected shaders: CodeSize: 120322524 -> 120200568 (-0.10%) Instrs: 23448344 -> 23417855 (-0.13%) Latency: 414018749 -> 413639289 (-0.09%); split: -0.09%, +0.00% InvThroughput: 183819363 -> 183726539 (-0.05%); split: -0.05%, +0.00% Copies: 2194937 -> 2164448 (-1.39%) Signed-off-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18528>
Diffstat (limited to 'src/amd/compiler/aco_ssa_elimination.cpp')
-rw-r--r--src/amd/compiler/aco_ssa_elimination.cpp32
1 files changed, 28 insertions, 4 deletions
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp
index 1d75aa067dc..98a00b050c0 100644
--- a/src/amd/compiler/aco_ssa_elimination.cpp
+++ b/src/amd/compiler/aco_ssa_elimination.cpp
@@ -364,7 +364,34 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
const bool vopc = v_cmpx_op != aco_opcode::num_opcodes;
/* If s_and_saveexec is used, we'll need to insert a new instruction to save the old exec. */
- const bool save_original_exec = exec_copy->opcode == and_saveexec;
+ bool save_original_exec = exec_copy->opcode == and_saveexec;
+
+ const Definition exec_wr_def = exec_val->definitions[0];
+ const Definition exec_copy_def = exec_copy->definitions[0];
+
+ if (save_original_exec) {
+ for (int i = exec_copy_idx - 1; i >= 0; i--) {
+ const aco_ptr<Instruction>& instr = block.instructions[i];
+ if (instr->opcode == aco_opcode::p_parallelcopy &&
+ instr->definitions[0].physReg() == exec &&
+ instr->definitions[0].regClass() == ctx.program->lane_mask &&
+ instr->operands[0].physReg() == exec_copy_def.physReg()) {
+ /* The register that we should save exec to already contains the same value as exec. */
+ save_original_exec = false;
+ break;
+ }
+ /* exec_copy_def is clobbered or exec written before we found a copy. */
+ if ((i != exec_val_idx || !vcmpx_exec_only) &&
+ std::any_of(instr->definitions.begin(), instr->definitions.end(),
+ [&exec_copy_def, &ctx](const Definition& def) -> bool
+ {
+ return regs_intersect(exec_copy_def, def) ||
+ regs_intersect(Definition(exec, ctx.program->lane_mask), def);
+ }))
+ break;
+ }
+ }
+
/* Position where the original exec mask copy should be inserted. */
const int save_original_exec_idx = exec_val_idx;
/* The copy can be removed when it kills its operand.
@@ -386,9 +413,6 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
if (!can_reassign || (save_original_exec && !can_remove_copy))
return;
- const Definition exec_wr_def = exec_val->definitions[0];
- const Definition exec_copy_def = exec_copy->definitions[0];
-
/* When exec_val and exec_copy are non-adjacent, check whether there are any
* instructions inbetween (besides p_logical_end) which may inhibit the optimization.
*/