summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_ssa_elimination.cpp
diff options
context:
space:
mode:
authorGeorg Lehmann <dadschoorse@gmail.com>2022-08-14 13:56:30 +0200
committerMarge Bot <emma+marge@anholt.net>2022-08-15 13:25:38 +0000
commit410eff4d2f3e37d2581d35eb11c82ee1355481f3 (patch)
tree8ca74e6970c4f70a50be080b99a866a40f0f0194 /src/amd/compiler/aco_ssa_elimination.cpp
parent5ffc73896f3a7b96d0053ee3ac302e05dbc4f5cc (diff)
downloadmesa-410eff4d2f3e37d2581d35eb11c82ee1355481f3.tar.gz
aco: Fix optimizing branching sequence with s_and_saveexec.
This optimization was broken for two reasons: - s_and_saveexec has two operands, the copy value and exec - s_and_saveexec has an exec read, so exec_write_used will always be true before we find branch_exec_val_idx Foz-DB Navi21: Totals from 31453 (23.31% of 134913) affected shaders: CodeSize: 204831260 -> 204831156 (-0.00%) Instrs: 38157117 -> 38157091 (-0.00%) Latency: 533708882 -> 531211721 (-0.47%); split: -0.47%, +0.00% InvThroughput: 107088408 -> 106719188 (-0.34%); split: -0.35%, +0.00% Copies: 2326179 -> 2502490 (+7.58%) Signed-off-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18049>
Diffstat (limited to 'src/amd/compiler/aco_ssa_elimination.cpp')
-rw-r--r--src/amd/compiler/aco_ssa_elimination.cpp13
1 files changed, 8 insertions, 5 deletions
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp
index f82058a6c44..2d8c3a10dfc 100644
--- a/src/amd/compiler/aco_ssa_elimination.cpp
+++ b/src/amd/compiler/aco_ssa_elimination.cpp
@@ -544,7 +544,7 @@ eliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block)
/* For a newly encountered exec write, clear the used flag. */
if (writes_exec) {
- if (!logical_end_found && branch_reads_exec && instr->operands.size() == 1) {
+ if (!logical_end_found && branch_reads_exec && instr->operands.size()) {
/* We are in a branch that jumps according to exec.
* We just found the instruction that copies to exec before the branch.
*/
@@ -560,13 +560,16 @@ eliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block)
}
exec_write_used = false;
- }
-
- if (branch_exec_tempid && !exec_write_used && instr->definitions.size() &&
- instr->definitions[0].tempId() == branch_exec_tempid) {
+ } else if (branch_exec_tempid && instr->definitions.size() &&
+ instr->definitions[0].tempId() == branch_exec_tempid) {
/* We just found the instruction that produces the exec mask that is copied. */
assert(branch_exec_val_idx == -1);
branch_exec_val_idx = i;
+ } else if (branch_exec_tempid && branch_exec_val_idx == -1 && needs_exec) {
+ /* There is an instruction that needs the original exec mask before
+ * branch_exec_val_idx was found, so we can't optimize the branching sequence. */
+ branch_exec_copy_idx = -1;
+ branch_exec_tempid = 0;
}
/* If the current instruction needs exec, mark it as used. */