summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_insert_exec_mask.cpp
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2022-02-23 17:29:25 +0000
committerMarge Bot <emma+marge@anholt.net>2022-03-08 12:49:59 +0000
commitd068eb53e84ca1e44ad96c31dab63476880b3c72 (patch)
treeb8c933592201ca9e9fab99d477715da46d619517 /src/amd/compiler/aco_insert_exec_mask.cpp
parent42a5be975adc51d1029f25719b979bf2b3bae8a9 (diff)
downloadmesa-d068eb53e84ca1e44ad96c31dab63476880b3c72.tar.gz
aco/insert_exec_mask: optimize top-level transition to exact before demote
fossil-db (Sienna Cichlid): Totals from 5767 (3.55% of 162293) affected shaders: Instrs: 3264949 -> 3257527 (-0.23%); split: -0.23%, +0.00% CodeSize: 17835692 -> 17806004 (-0.17%); split: -0.17%, +0.00% Latency: 45990060 -> 45987924 (-0.00%); split: -0.00%, +0.00% InvThroughput: 7643850 -> 7643835 (-0.00%); split: -0.00%, +0.00% Copies: 193641 -> 186219 (-3.83%); split: -3.84%, +0.01% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15244>
Diffstat (limited to 'src/amd/compiler/aco_insert_exec_mask.cpp')
-rw-r--r--src/amd/compiler/aco_insert_exec_mask.cpp13
1 files changed, 10 insertions, 3 deletions
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index 04928c3c722..e1dd3929910 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -585,8 +585,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
}
} else if (instr->opcode == aco_opcode::p_demote_to_helper) {
/* turn demote into discard_if with only exact masks */
- assert((ctx.info[block->index].exec[0].second & (mask_type_exact | mask_type_global)) ==
- (mask_type_exact | mask_type_global));
+ assert(ctx.info[block->index].exec[0].second == (mask_type_exact | mask_type_global));
int num;
Temp cond, exit_cond;
@@ -605,7 +604,15 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
}
} else {
/* demote_if: transition to exact */
- transition_to_Exact(ctx, bld, block->index);
+ if (block->kind & block_kind_top_level && ctx.info[block->index].exec.size() == 2 &&
+ ctx.info[block->index].exec.back().second & mask_type_global) {
+ /* We don't need to actually copy anything into exact, since the s_andn2
+ * instructions later will do that.
+ */
+ ctx.info[block->index].exec.pop_back();
+ } else {
+ transition_to_Exact(ctx, bld, block->index);
+ }
assert(instr->operands[0].isTemp());
cond = instr->operands[0].getTemp();
num = ctx.info[block->index].exec.size() - 1;