summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_ssa_elimination.cpp
diff options
context:
space:
mode:
authorDaniel Schürmann <daniel@schuermann.dev>2020-01-14 13:14:38 +0100
committerDaniel Schürmann <daniel@schuermann.dev>2020-01-16 16:01:59 +0100
commit3bca0af25dbf6d6b162463138100abb20bc1a1cc (patch)
treeee1fdbf285154f4dace51674a4c50650e1964dbe /src/amd/compiler/aco_ssa_elimination.cpp
parent427e5eeb02053745f4f1c7a655058ce3d804c206 (diff)
downloadmesa-3bca0af25dbf6d6b162463138100abb20bc1a1cc.tar.gz
aco: ignore parallelcopies to the same register on jump threading
The more conservative lowering to CSSA inserts unnecessary parallelcopies which might get coalesced and can be ignored on jump threading. v2: outline is_empty_block() check. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3385> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3385>
Diffstat (limited to 'src/amd/compiler/aco_ssa_elimination.cpp')
-rw-r--r--src/amd/compiler/aco_ssa_elimination.cpp69
1 files changed, 38 insertions, 31 deletions
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp
index 95e2e842a9c..94f5a1ade83 100644
--- a/src/amd/compiler/aco_ssa_elimination.cpp
+++ b/src/amd/compiler/aco_ssa_elimination.cpp
@@ -111,6 +111,35 @@ void insert_parallelcopies(ssa_elimination_ctx& ctx)
}
}
+bool is_empty_block(Block* block, bool ignore_exec_writes)
+{
+ /* check if this block is empty and the exec mask is not needed */
+ for (aco_ptr<Instruction>& instr : block->instructions) {
+ switch (instr->opcode) {
+ case aco_opcode::p_linear_phi:
+ case aco_opcode::p_phi:
+ case aco_opcode::p_logical_start:
+ case aco_opcode::p_logical_end:
+ case aco_opcode::p_branch:
+ break;
+ case aco_opcode::p_parallelcopy:
+ for (unsigned i = 0; i < instr->definitions.size(); i++) {
+ if (ignore_exec_writes && instr->definitions[i].physReg() == exec)
+ continue;
+ if (instr->definitions[i].physReg() != instr->operands[i].physReg())
+ return false;
+ }
+ break;
+ case aco_opcode::s_andn2_b64:
+ case aco_opcode::s_andn2_b32:
+ if (ignore_exec_writes && instr->definitions[0].physReg() == exec)
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
{
@@ -120,22 +149,9 @@ void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
!(ctx.program->blocks[block->linear_succs[0]].kind & block_kind_merge))
return;
- /* check if this block is empty and the exec mask is not needed */
- for (aco_ptr<Instruction>& instr : block->instructions) {
- if (instr->opcode == aco_opcode::p_parallelcopy) {
- if (instr->definitions[0].physReg() == exec)
- continue;
- else
- return;
- }
-
- if (instr->opcode != aco_opcode::p_linear_phi &&
- instr->opcode != aco_opcode::p_phi &&
- instr->opcode != aco_opcode::p_logical_start &&
- instr->opcode != aco_opcode::p_logical_end &&
- instr->opcode != aco_opcode::p_branch)
- return;
- }
+ /* check if this block is empty */
+ if (!is_empty_block(block, true))
+ return;
/* keep the branch instruction and remove the rest */
aco_ptr<Instruction> branch = std::move(block->instructions.back());
@@ -146,18 +162,13 @@ void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
{
assert(block->linear_succs.size() == 2);
+ /* only remove this block if the successor got removed as well */
if (block->linear_succs[0] != block->linear_succs[1])
return;
- /* check if we can remove this block */
- for (aco_ptr<Instruction>& instr : block->instructions) {
- if (instr->opcode != aco_opcode::p_linear_phi &&
- instr->opcode != aco_opcode::p_phi &&
- (instr->opcode != aco_opcode::s_andn2_b64 || ctx.program->wave_size != 64) &&
- (instr->opcode != aco_opcode::s_andn2_b32 || ctx.program->wave_size != 32) &&
- instr->opcode != aco_opcode::p_branch)
- return;
- }
+ /* check if block is otherwise empty */
+ if (!is_empty_block(block, true))
+ return;
unsigned succ_idx = block->linear_succs[0];
assert(block->linear_preds.size() == 2);
@@ -179,12 +190,8 @@ void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
{
- for (aco_ptr<Instruction>& instr : block->instructions) {
- if (instr->opcode != aco_opcode::p_logical_start &&
- instr->opcode != aco_opcode::p_logical_end &&
- instr->opcode != aco_opcode::p_branch)
- return;
- }
+ if (!is_empty_block(block, false))
+ return;
Block& pred = ctx.program->blocks[block->linear_preds[0]];
Block& succ = ctx.program->blocks[block->linear_succs[0]];