diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2021-01-15 16:00:35 +0000 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-02-09 17:52:17 +0000 |
commit | ed020008b57744f7b9093da0f4c13bdedffb712c (patch) | |
tree | 4051a63db606af36e4b1d3b8cf4d48c960d0fe23 /src/amd/compiler/aco_insert_exec_mask.cpp | |
parent | f0074a6f0532196f5d9d2be00a9d884100401816 (diff) | |
download | mesa-ed020008b57744f7b9093da0f4c13bdedffb712c.tar.gz |
aco: rewrite setting of Exact_Branch
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8446>
Diffstat (limited to 'src/amd/compiler/aco_insert_exec_mask.cpp')
-rw-r--r-- | src/amd/compiler/aco_insert_exec_mask.cpp | 76 |
1 files changed, 65 insertions, 11 deletions
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 9c184f956ce..633b8e3422f 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -154,16 +154,6 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block) mark_block_wqm(ctx, block_idx); block_idx++; } - } else if (ctx.loop && !ctx.wqm) { - /* Ensure a branch never results in an exec mask with only helper - * invocations (which can cause a loop to repeat infinitively if it's - * break branches are done in exact). */ - unsigned block_idx = block->index; - do { - if ((ctx.program->blocks[block_idx].kind & block_kind_branch)) - exec_ctx.info[block_idx].block_needs |= Exact_Branch; - block_idx++; - } while (!(ctx.program->blocks[block_idx].kind & block_kind_top_level)); } ctx.loop = false; @@ -235,6 +225,50 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block) ctx.loop = true; } +void handle_exact_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader) +{ + unsigned header = preheader + 1; + assert(exec_ctx.program->blocks[header].kind & block_kind_loop_header); + + unsigned exit = header + 1; + for (; exit < exec_ctx.program->blocks.size(); exit++) { + Block& exit_block = exec_ctx.program->blocks[exit]; + if ((exit_block.kind & block_kind_loop_exit) && exit_block.loop_nest_depth == 0) + break; + } + assert(exit != exec_ctx.program->blocks.size()); + + int parent_branch = preheader; + unsigned rel_branch_depth = 0; + for (; parent_branch >= 0; parent_branch--) { + Block& branch = exec_ctx.program->blocks[parent_branch]; + if (branch.kind & block_kind_branch) { + if (rel_branch_depth == 0) + break; + rel_branch_depth--; + } + + /* top-level blocks should never have empty exact exec masks */ + if (branch.kind & block_kind_top_level) + return; + + if (branch.kind & block_kind_merge) + rel_branch_depth++; + } + assert(parent_branch >= 0); + + Block& branch = exec_ctx.program->blocks[parent_branch]; + assert(branch.kind & block_kind_branch); + if (ctx.branch_wqm[parent_branch]) { + /* The branch can't be done in Exact because some other blocks in it + * are in WQM. So instead, ensure that the loop breaks are done in WQM. */ + for (unsigned pred_idx : exec_ctx.program->blocks[exit].logical_preds) + mark_block_wqm(ctx, pred_idx); + } else { + exec_ctx.info[parent_branch].block_needs |= Exact_Branch; + } +} + void calculate_wqm_needs(exec_ctx& exec_ctx) { wqm_ctx ctx(exec_ctx.program); @@ -243,7 +277,27 @@ void calculate_wqm_needs(exec_ctx& exec_ctx) unsigned block_index = *std::prev(ctx.worklist.end()); ctx.worklist.erase(std::prev(ctx.worklist.end())); - get_block_needs(ctx, exec_ctx, &exec_ctx.program->blocks[block_index]); + Block& block = exec_ctx.program->blocks[block_index]; + get_block_needs(ctx, exec_ctx, &block); + + /* If an outer loop and it's nested loops does not need WQM, + * add_branch_code() will ensure that it enters in Exact. We have to + * ensure that the exact exec mask is not empty by adding Exact_Branch to + * the outer divergent branch. + * + * If the loop or a nested loop needs WQM, branch_wqm will be true for the + * preheader. + */ + if (block.kind & block_kind_top_level && block.index != exec_ctx.program->blocks.size() - 1) { + unsigned preheader = block.index; + do { + Block& preheader_block = exec_ctx.program->blocks[preheader]; + if ((preheader_block.kind & block_kind_loop_preheader) && + preheader_block.loop_nest_depth == 0 && !ctx.branch_wqm[preheader]) + handle_exact_loops(ctx, exec_ctx, preheader); + preheader++; + } while (!(exec_ctx.program->blocks[preheader].kind & block_kind_top_level)); + } } uint8_t ever_again_needs = 0; |