summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_ssa_elimination.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd/compiler/aco_ssa_elimination.cpp')
-rw-r--r--src/amd/compiler/aco_ssa_elimination.cpp291
1 files changed, 291 insertions, 0 deletions
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp
new file mode 100644
index 00000000000..3d76dcd8867
--- /dev/null
+++ b/src/amd/compiler/aco_ssa_elimination.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright © 2018 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+
+#include "aco_ir.h"
+
+#include <map>
+
+namespace aco {
+namespace {
+
+/* map: block-id -> pair (dest, src) to store phi information */
+typedef std::map<uint32_t, std::vector<std::pair<Definition, Operand>>> phi_info;
+
+struct ssa_elimination_ctx {
+ phi_info logical_phi_info;
+ phi_info linear_phi_info;
+ std::vector<bool> empty_blocks;
+ Program* program;
+
+ ssa_elimination_ctx(Program* program) : empty_blocks(program->blocks.size(), true), program(program) {}
+};
+
+void collect_phi_info(ssa_elimination_ctx& ctx)
+{
+ for (Block& block : ctx.program->blocks) {
+ for (aco_ptr<Instruction>& phi : block.instructions) {
+ if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi)
+ break;
+
+ for (unsigned i = 0; i < phi->operands.size(); i++) {
+ if (phi->operands[i].isUndefined())
+ continue;
+ if (phi->operands[i].isTemp() && phi->operands[i].physReg() == phi->definitions[0].physReg())
+ continue;
+
+ std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds;
+ phi_info& info = phi->opcode == aco_opcode::p_phi ? ctx.logical_phi_info : ctx.linear_phi_info;
+ const auto result = info.emplace(preds[i], std::vector<std::pair<Definition, Operand>>());
+ result.first->second.emplace_back(phi->definitions[0], phi->operands[i]);
+ ctx.empty_blocks[preds[i]] = false;
+ }
+ }
+ }
+}
+
+void insert_parallelcopies(ssa_elimination_ctx& ctx)
+{
+ /* insert the parallelcopies from logical phis before p_logical_end */
+ for (auto&& entry : ctx.logical_phi_info) {
+ Block& block = ctx.program->blocks[entry.first];
+ unsigned idx = block.instructions.size() - 1;
+ while (block.instructions[idx]->opcode != aco_opcode::p_logical_end) {
+ assert(idx > 0);
+ idx--;
+ }
+
+ std::vector<aco_ptr<Instruction>>::iterator it = std::next(block.instructions.begin(), idx);
+ aco_ptr<Pseudo_instruction> pc{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, entry.second.size(), entry.second.size())};
+ unsigned i = 0;
+ for (std::pair<Definition, Operand>& pair : entry.second)
+ {
+ pc->definitions[i] = pair.first;
+ pc->operands[i] = pair.second;
+ i++;
+ }
+ /* this shouldn't be needed since we're only copying vgprs */
+ pc->tmp_in_scc = false;
+ block.instructions.insert(it, std::move(pc));
+ }
+
+ /* insert parallelcopies for the linear phis at the end of blocks just before the branch */
+ for (auto&& entry : ctx.linear_phi_info) {
+ Block& block = ctx.program->blocks[entry.first];
+ std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.end();
+ --it;
+ assert((*it)->format == Format::PSEUDO_BRANCH);
+ aco_ptr<Pseudo_instruction> pc{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, entry.second.size(), entry.second.size())};
+ unsigned i = 0;
+ for (std::pair<Definition, Operand>& pair : entry.second)
+ {
+ pc->definitions[i] = pair.first;
+ pc->operands[i] = pair.second;
+ i++;
+ }
+ pc->tmp_in_scc = block.scc_live_out;
+ pc->scratch_sgpr = block.scratch_sgpr;
+ block.instructions.insert(it, std::move(pc));
+ }
+}
+
+
+void try_remove_merge_block(ssa_elimination_ctx& ctx, Block* block)
+{
+ /* check if the successor is another merge block which restores exec */
+ // TODO: divergent loops also restore exec
+ if (block->linear_succs.size() != 1 ||
+ !(ctx.program->blocks[block->linear_succs[0]].kind & block_kind_merge))
+ return;
+
+ /* check if this block is empty and the exec mask is not needed */
+ for (aco_ptr<Instruction>& instr : block->instructions) {
+ if (instr->opcode == aco_opcode::p_parallelcopy) {
+ if (instr->definitions[0].physReg() == exec)
+ continue;
+ else
+ return;
+ }
+
+ if (instr->opcode != aco_opcode::p_linear_phi &&
+ instr->opcode != aco_opcode::p_phi &&
+ instr->opcode != aco_opcode::p_logical_start &&
+ instr->opcode != aco_opcode::p_logical_end &&
+ instr->opcode != aco_opcode::p_branch)
+ return;
+ }
+
+ /* keep the branch instruction and remove the rest */
+ aco_ptr<Instruction> branch = std::move(block->instructions.back());
+ block->instructions.clear();
+ block->instructions.emplace_back(std::move(branch));
+}
+
+void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
+{
+ assert(block->linear_succs.size() == 2);
+ if (block->linear_succs[0] != block->linear_succs[1])
+ return;
+
+ /* check if we can remove this block */
+ for (aco_ptr<Instruction>& instr : block->instructions) {
+ if (instr->opcode != aco_opcode::p_linear_phi &&
+ instr->opcode != aco_opcode::p_phi &&
+ instr->opcode != aco_opcode::s_andn2_b64 &&
+ instr->opcode != aco_opcode::p_branch)
+ return;
+ }
+
+ unsigned succ_idx = block->linear_succs[0];
+ assert(block->linear_preds.size() == 2);
+ for (unsigned i = 0; i < 2; i++) {
+ Block *pred = &ctx.program->blocks[block->linear_preds[i]];
+ pred->linear_succs[0] = succ_idx;
+ ctx.program->blocks[succ_idx].linear_preds[i] = pred->index;
+
+ Pseudo_branch_instruction *branch = static_cast<Pseudo_branch_instruction*>(pred->instructions.back().get());
+ assert(branch->format == Format::PSEUDO_BRANCH);
+ branch->target[0] = succ_idx;
+ branch->target[1] = succ_idx;
+ }
+
+ block->instructions.clear();
+ block->linear_preds.clear();
+ block->linear_succs.clear();
+}
+
+void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
+{
+ for (aco_ptr<Instruction>& instr : block->instructions) {
+ if (instr->opcode != aco_opcode::p_logical_start &&
+ instr->opcode != aco_opcode::p_logical_end &&
+ instr->opcode != aco_opcode::p_branch)
+ return;
+ }
+
+ Block& pred = ctx.program->blocks[block->linear_preds[0]];
+ Block& succ = ctx.program->blocks[block->linear_succs[0]];
+ Pseudo_branch_instruction* branch = static_cast<Pseudo_branch_instruction*>(pred.instructions.back().get());
+ if (branch->opcode == aco_opcode::p_branch) {
+ branch->target[0] = succ.index;
+ branch->target[1] = succ.index;
+ } else if (branch->target[0] == block->index) {
+ branch->target[0] = succ.index;
+ } else if (branch->target[0] == succ.index) {
+ assert(branch->target[1] == block->index);
+ branch->target[1] = succ.index;
+ branch->opcode = aco_opcode::p_branch;
+ } else if (branch->target[1] == block->index) {
+ /* check if there is a fall-through path from block to succ */
+ bool falls_through = true;
+ for (unsigned j = block->index + 1; falls_through && j < succ.index; j++) {
+ assert(ctx.program->blocks[j].index == j);
+ if (!ctx.program->blocks[j].instructions.empty())
+ falls_through = false;
+ }
+ if (falls_through) {
+ branch->target[1] = succ.index;
+ } else {
+ /* check if there is a fall-through path for the alternative target */
+ for (unsigned j = block->index + 1; j < branch->target[0]; j++) {
+ if (!ctx.program->blocks[j].instructions.empty())
+ return;
+ }
+
+ /* This is a (uniform) break or continue block. The branch condition has to be inverted. */
+ if (branch->opcode == aco_opcode::p_cbranch_z)
+ branch->opcode = aco_opcode::p_cbranch_nz;
+ else if (branch->opcode == aco_opcode::p_cbranch_nz)
+ branch->opcode = aco_opcode::p_cbranch_z;
+ else
+ assert(false);
+ /* also invert the linear successors */
+ pred.linear_succs[0] = pred.linear_succs[1];
+ pred.linear_succs[1] = succ.index;
+ branch->target[1] = branch->target[0];
+ branch->target[0] = succ.index;
+ }
+ } else {
+ assert(false);
+ }
+
+ if (branch->target[0] == branch->target[1])
+ branch->opcode = aco_opcode::p_branch;
+
+ for (unsigned i = 0; i < pred.linear_succs.size(); i++)
+ if (pred.linear_succs[i] == block->index)
+ pred.linear_succs[i] = succ.index;
+
+ for (unsigned i = 0; i < succ.linear_preds.size(); i++)
+ if (succ.linear_preds[i] == block->index)
+ succ.linear_preds[i] = pred.index;
+
+ block->instructions.clear();
+ block->linear_preds.clear();
+ block->linear_succs.clear();
+}
+
+void jump_threading(ssa_elimination_ctx& ctx)
+{
+ for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) {
+ Block* block = &ctx.program->blocks[i];
+
+ if (!ctx.empty_blocks[i])
+ continue;
+
+ if (block->kind & block_kind_invert) {
+ try_remove_invert_block(ctx, block);
+ continue;
+ }
+
+ if (block->linear_succs.size() > 1)
+ continue;
+
+ if (block->kind & block_kind_merge ||
+ block->kind & block_kind_loop_exit)
+ try_remove_merge_block(ctx, block);
+
+ if (block->linear_preds.size() == 1)
+ try_remove_simple_block(ctx, block);
+ }
+}
+
+} /* end namespace */
+
+
+void ssa_elimination(Program* program)
+{
+ ssa_elimination_ctx ctx(program);
+
+ /* Collect information about every phi-instruction */
+ collect_phi_info(ctx);
+
+ /* eliminate empty blocks */
+ jump_threading(ctx);
+
+ /* insert parallelcopies from SSA elimination */
+ insert_parallelcopies(ctx);
+
+}
+}