diff options
author | Daniel Schürmann <daniel@schuermann.dev> | 2020-01-14 13:14:38 +0100 |
---|---|---|
committer | Daniel Schürmann <daniel@schuermann.dev> | 2020-01-16 16:01:59 +0100 |
commit | 3bca0af25dbf6d6b162463138100abb20bc1a1cc (patch) | |
tree | ee1fdbf285154f4dace51674a4c50650e1964dbe /src/amd/compiler/aco_ssa_elimination.cpp | |
parent | 427e5eeb02053745f4f1c7a655058ce3d804c206 (diff) | |
download | mesa-3bca0af25dbf6d6b162463138100abb20bc1a1cc.tar.gz |
aco: ignore parallelcopies to the same register on jump threading
The more conservative lowering to CSSA inserts unnecessary parallelcopies
which might get coalesced and can be ignored on jump threading.
v2: outline is_empty_block() check.
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3385>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3385>
Diffstat (limited to 'src/amd/compiler/aco_ssa_elimination.cpp')
-rw-r--r-- | src/amd/compiler/aco_ssa_elimination.cpp | 69 |
1 files changed, 38 insertions, 31 deletions
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 95e2e842a9c..94f5a1ade83 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -111,6 +111,35 @@ void insert_parallelcopies(ssa_elimination_ctx& ctx) } } +bool is_empty_block(Block* block, bool ignore_exec_writes) +{ + /* check if this block is empty and the exec mask is not needed */ + for (aco_ptr<Instruction>& instr : block->instructions) { + switch (instr->opcode) { + case aco_opcode::p_linear_phi: + case aco_opcode::p_phi: + case aco_opcode::p_logical_start: + case aco_opcode::p_logical_end: + case aco_opcode::p_branch: + break; + case aco_opcode::p_parallelcopy: + for (unsigned i = 0; i < instr->definitions.size(); i++) { + if (ignore_exec_writes && instr->definitions[i].physReg() == exec) + continue; + if (instr->definitions[i].physReg() != instr->operands[i].physReg()) + return false; + } + break; + case aco_opcode::s_andn2_b64: + case aco_opcode::s_andn2_b32: + if (ignore_exec_writes && instr->definitions[0].physReg() == exec) + break; + default: + return false; + } + } + return true; +} void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block) { @@ -120,22 +149,9 @@ void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block) !(ctx.program->blocks[block->linear_succs[0]].kind & block_kind_merge)) return; - /* check if this block is empty and the exec mask is not needed */ - for (aco_ptr<Instruction>& instr : block->instructions) { - if (instr->opcode == aco_opcode::p_parallelcopy) { - if (instr->definitions[0].physReg() == exec) - continue; - else - return; - } - - if (instr->opcode != aco_opcode::p_linear_phi && - instr->opcode != aco_opcode::p_phi && - instr->opcode != aco_opcode::p_logical_start && - instr->opcode != aco_opcode::p_logical_end && - instr->opcode != aco_opcode::p_branch) - return; - } + /* check if this block is empty */ + if (!is_empty_block(block, true)) + return; /* keep the branch instruction and remove the rest */ aco_ptr<Instruction> branch = std::move(block->instructions.back()); @@ -146,18 +162,13 @@ void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block) void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block) { assert(block->linear_succs.size() == 2); + /* only remove this block if the successor got removed as well */ if (block->linear_succs[0] != block->linear_succs[1]) return; - /* check if we can remove this block */ - for (aco_ptr<Instruction>& instr : block->instructions) { - if (instr->opcode != aco_opcode::p_linear_phi && - instr->opcode != aco_opcode::p_phi && - (instr->opcode != aco_opcode::s_andn2_b64 || ctx.program->wave_size != 64) && - (instr->opcode != aco_opcode::s_andn2_b32 || ctx.program->wave_size != 32) && - instr->opcode != aco_opcode::p_branch) - return; - } + /* check if block is otherwise empty */ + if (!is_empty_block(block, true)) + return; unsigned succ_idx = block->linear_succs[0]; assert(block->linear_preds.size() == 2); @@ -179,12 +190,8 @@ void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block) void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block) { - for (aco_ptr<Instruction>& instr : block->instructions) { - if (instr->opcode != aco_opcode::p_logical_start && - instr->opcode != aco_opcode::p_logical_end && - instr->opcode != aco_opcode::p_branch) - return; - } + if (!is_empty_block(block, false)) + return; Block& pred = ctx.program->blocks[block->linear_preds[0]]; Block& succ = ctx.program->blocks[block->linear_succs[0]]; |