summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_insert_exec_mask.cpp
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2021-01-15 16:00:35 +0000
committerMarge Bot <eric+marge@anholt.net>2021-02-09 17:52:17 +0000
commited020008b57744f7b9093da0f4c13bdedffb712c (patch)
tree4051a63db606af36e4b1d3b8cf4d48c960d0fe23 /src/amd/compiler/aco_insert_exec_mask.cpp
parentf0074a6f0532196f5d9d2be00a9d884100401816 (diff)
downloadmesa-ed020008b57744f7b9093da0f4c13bdedffb712c.tar.gz
aco: rewrite setting of Exact_Branch
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8446>
Diffstat (limited to 'src/amd/compiler/aco_insert_exec_mask.cpp')
-rw-r--r--src/amd/compiler/aco_insert_exec_mask.cpp76
1 files changed, 65 insertions, 11 deletions
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index 9c184f956ce..633b8e3422f 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -154,16 +154,6 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block)
mark_block_wqm(ctx, block_idx);
block_idx++;
}
- } else if (ctx.loop && !ctx.wqm) {
- /* Ensure a branch never results in an exec mask with only helper
- * invocations (which can cause a loop to repeat infinitively if it's
- * break branches are done in exact). */
- unsigned block_idx = block->index;
- do {
- if ((ctx.program->blocks[block_idx].kind & block_kind_branch))
- exec_ctx.info[block_idx].block_needs |= Exact_Branch;
- block_idx++;
- } while (!(ctx.program->blocks[block_idx].kind & block_kind_top_level));
}
ctx.loop = false;
@@ -235,6 +225,50 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block)
ctx.loop = true;
}
+void handle_exact_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader)
+{
+ unsigned header = preheader + 1;
+ assert(exec_ctx.program->blocks[header].kind & block_kind_loop_header);
+
+ unsigned exit = header + 1;
+ for (; exit < exec_ctx.program->blocks.size(); exit++) {
+ Block& exit_block = exec_ctx.program->blocks[exit];
+ if ((exit_block.kind & block_kind_loop_exit) && exit_block.loop_nest_depth == 0)
+ break;
+ }
+ assert(exit != exec_ctx.program->blocks.size());
+
+ int parent_branch = preheader;
+ unsigned rel_branch_depth = 0;
+ for (; parent_branch >= 0; parent_branch--) {
+ Block& branch = exec_ctx.program->blocks[parent_branch];
+ if (branch.kind & block_kind_branch) {
+ if (rel_branch_depth == 0)
+ break;
+ rel_branch_depth--;
+ }
+
+ /* top-level blocks should never have empty exact exec masks */
+ if (branch.kind & block_kind_top_level)
+ return;
+
+ if (branch.kind & block_kind_merge)
+ rel_branch_depth++;
+ }
+ assert(parent_branch >= 0);
+
+ Block& branch = exec_ctx.program->blocks[parent_branch];
+ assert(branch.kind & block_kind_branch);
+ if (ctx.branch_wqm[parent_branch]) {
+ /* The branch can't be done in Exact because some other blocks in it
+ * are in WQM. So instead, ensure that the loop breaks are done in WQM. */
+ for (unsigned pred_idx : exec_ctx.program->blocks[exit].logical_preds)
+ mark_block_wqm(ctx, pred_idx);
+ } else {
+ exec_ctx.info[parent_branch].block_needs |= Exact_Branch;
+ }
+}
+
void calculate_wqm_needs(exec_ctx& exec_ctx)
{
wqm_ctx ctx(exec_ctx.program);
@@ -243,7 +277,27 @@ void calculate_wqm_needs(exec_ctx& exec_ctx)
unsigned block_index = *std::prev(ctx.worklist.end());
ctx.worklist.erase(std::prev(ctx.worklist.end()));
- get_block_needs(ctx, exec_ctx, &exec_ctx.program->blocks[block_index]);
+ Block& block = exec_ctx.program->blocks[block_index];
+ get_block_needs(ctx, exec_ctx, &block);
+
+ /* If an outer loop and it's nested loops does not need WQM,
+ * add_branch_code() will ensure that it enters in Exact. We have to
+ * ensure that the exact exec mask is not empty by adding Exact_Branch to
+ * the outer divergent branch.
+ *
+ * If the loop or a nested loop needs WQM, branch_wqm will be true for the
+ * preheader.
+ */
+ if (block.kind & block_kind_top_level && block.index != exec_ctx.program->blocks.size() - 1) {
+ unsigned preheader = block.index;
+ do {
+ Block& preheader_block = exec_ctx.program->blocks[preheader];
+ if ((preheader_block.kind & block_kind_loop_preheader) &&
+ preheader_block.loop_nest_depth == 0 && !ctx.branch_wqm[preheader])
+ handle_exact_loops(ctx, exec_ctx, preheader);
+ preheader++;
+ } while (!(exec_ctx.program->blocks[preheader].kind & block_kind_top_level));
+ }
}
uint8_t ever_again_needs = 0;