summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_scheduler.cpp
diff options
context:
space:
mode:
authorTony Wasserka <tony.wasserka@gmx.de>2021-06-07 12:02:43 +0200
committerTony Wasserka <tony.wasserka@gmx.de>2021-06-07 12:09:39 +0200
commit3c390e2eb654dc9b89e79ce8606dd718ed51dcc5 (patch)
treeaed117ad413528614c781d4ae15bb03c2c90b07a /src/amd/compiler/aco_scheduler.cpp
parent81761a311e1af3ca91b3c45d7031bacd2d78cb6e (diff)
downloadmesa-3c390e2eb654dc9b89e79ce8606dd718ed51dcc5.tar.gz
aco/scheduler: Move cursor handling state to dedicated interfaces
This clarifies the semantics of the index variables compared to the previous version, which used the same variables in a slightly different way depending on whether they were used for downwards moves or upwards ones. Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10885>
Diffstat (limited to 'src/amd/compiler/aco_scheduler.cpp')
-rw-r--r--src/amd/compiler/aco_scheduler.cpp258
1 files changed, 145 insertions, 113 deletions
diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp
index f2a13eac892..bdd27bdb0c7 100644
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -47,12 +47,60 @@ enum MoveResult {
move_fail_pressure,
};
+/**
+ * Cursor for downwards moves, where a single instruction is moved towards
+ * or below a group of instruction that hardware can execute as a clause.
+ */
+struct DownwardsCursor {
+ int source_idx; /* Current instruction to consider for moving */
+
+ int insert_idx_clause; /* First clause instruction */
+ int insert_idx; /* First instruction *after* the clause */
+
+ /* Maximum demand of all clause instructions,
+ * i.e. from insert_idx_clause (inclusive) to insert_idx (exclusive) */
+ RegisterDemand clause_demand;
+ /* Maximum demand of instructions from source_idx to insert_idx_clause (both exclusive) */
+ RegisterDemand total_demand;
+
+ DownwardsCursor(int current_idx, RegisterDemand initial_clause_demand)
+ : source_idx(current_idx - 1),
+ insert_idx_clause(current_idx),
+ insert_idx(current_idx + 1),
+ clause_demand(initial_clause_demand) {
+ }
+
+ void verify_invariants(const RegisterDemand* register_demand);
+};
+
+/**
+ * Cursor for upwards moves, where a single instruction is moved below
+ * another instruction.
+ */
+struct UpwardsCursor {
+ int source_idx; /* Current instruction to consider for moving */
+ int insert_idx; /* Instruction to move in front of */
+
+ /* Maximum demand of instructions from insert_idx (inclusive) to source_idx (exclusive) */
+ RegisterDemand total_demand;
+
+ UpwardsCursor(int source_idx_) : source_idx(source_idx_)
+ {
+ insert_idx = -1; /* to be initialized later */
+ }
+
+ bool has_insert_idx() const {
+ return insert_idx != -1;
+ }
+ void verify_invariants(const RegisterDemand* register_demand);
+};
+
struct MoveState {
RegisterDemand max_registers;
Block *block;
Instruction *current;
- RegisterDemand *register_demand;
+ RegisterDemand *register_demand; /* demand per instruction */
bool improved_rar;
std::vector<bool> depends_on;
@@ -62,25 +110,17 @@ struct MoveState {
std::vector<bool> RAR_dependencies;
std::vector<bool> RAR_dependencies_clause;
- int source_idx;
- int insert_idx, insert_idx_clause;
- RegisterDemand clause_demand, total_demand;
-
/* for moving instructions before the current instruction to after it */
- void downwards_init(int current_idx, bool improved_rar, bool may_form_clauses);
- MoveResult downwards_move(bool clause);
- void downwards_skip();
+ DownwardsCursor downwards_init(int current_idx, bool improved_rar, bool may_form_clauses);
+ MoveResult downwards_move(DownwardsCursor&, bool clause);
+ void downwards_skip(DownwardsCursor&);
/* for moving instructions after the first use of the current instruction upwards */
- void upwards_init(int source_idx, bool improved_rar);
- bool upwards_check_deps();
- void upwards_update_insert_idx();
- MoveResult upwards_move();
- void upwards_skip();
-
-private:
- void downwards_advance_helper();
- void upwards_verify_invariants();
+ UpwardsCursor upwards_init(int source_idx, bool improved_rar);
+ bool upwards_check_deps(UpwardsCursor&);
+ void upwards_update_insert_idx(UpwardsCursor&);
+ MoveResult upwards_move(UpwardsCursor&);
+ void upwards_skip(UpwardsCursor&);
};
struct sched_ctx {
@@ -113,10 +153,8 @@ void move_element(T begin_it, size_t idx, size_t before) {
}
}
-void MoveState::downwards_advance_helper()
+void DownwardsCursor::verify_invariants(const RegisterDemand* register_demand)
{
- source_idx--;
-
assert(source_idx < insert_idx_clause);
assert(insert_idx_clause < insert_idx);
@@ -135,16 +173,9 @@ void MoveState::downwards_advance_helper()
#endif
}
-void MoveState::downwards_init(int current_idx, bool improved_rar_, bool may_form_clauses)
+DownwardsCursor MoveState::downwards_init(int current_idx, bool improved_rar_, bool may_form_clauses)
{
improved_rar = improved_rar_;
- source_idx = current_idx;
-
- insert_idx = current_idx + 1;
- insert_idx_clause = current_idx;
-
- clause_demand = register_demand[current_idx];
- total_demand = {};
std::fill(depends_on.begin(), depends_on.end(), false);
if (improved_rar) {
@@ -161,16 +192,17 @@ void MoveState::downwards_init(int current_idx, bool improved_rar_, bool may_for
}
}
- /* update total_demand/source_idx */
- downwards_advance_helper();
+ DownwardsCursor cursor(current_idx, register_demand[current_idx]);
+ cursor.verify_invariants(register_demand);
+ return cursor;
}
/* If add_to_clause is true, the current clause is extended by moving the
* instruction at source_idx in front of the clause. Otherwise, the instruction
* is moved past the end of the clause without extending it */
-MoveResult MoveState::downwards_move(bool add_to_clause)
+MoveResult MoveState::downwards_move(DownwardsCursor& cursor, bool add_to_clause)
{
- aco_ptr<Instruction>& instr = block->instructions[source_idx];
+ aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
for (const Definition& def : instr->definitions)
if (def.isTemp() && depends_on[def.tempId()])
@@ -195,10 +227,10 @@ MoveResult MoveState::downwards_move(bool add_to_clause)
}
}
- const int dest_insert_idx = add_to_clause ? insert_idx_clause : insert_idx;
- RegisterDemand register_pressure = total_demand;
+ const int dest_insert_idx = add_to_clause ? cursor.insert_idx_clause : cursor.insert_idx;
+ RegisterDemand register_pressure = cursor.total_demand;
if (!add_to_clause) {
- register_pressure.update(clause_demand);
+ register_pressure.update(cursor.clause_demand);
}
/* Check the new demand of the instructions being moved over */
@@ -214,34 +246,35 @@ MoveResult MoveState::downwards_move(bool add_to_clause)
return move_fail_pressure;
/* move the candidate below the memory load */
- move_element(block->instructions.begin(), source_idx, dest_insert_idx);
+ move_element(block->instructions.begin(), cursor.source_idx, dest_insert_idx);
/* update register pressure */
- move_element(register_demand, source_idx, dest_insert_idx);
- for (int i = source_idx; i < dest_insert_idx - 1; i++)
+ move_element(register_demand, cursor.source_idx, dest_insert_idx);
+ for (int i = cursor.source_idx; i < dest_insert_idx - 1; i++)
register_demand[i] -= candidate_diff;
register_demand[dest_insert_idx - 1] = new_demand;
- insert_idx_clause--;
- if (source_idx != insert_idx_clause) {
+ cursor.insert_idx_clause--;
+ if (cursor.source_idx != cursor.insert_idx_clause) {
/* Update demand if we moved over any instructions before the clause */
- total_demand -= candidate_diff;
+ cursor.total_demand -= candidate_diff;
} else {
- assert(total_demand == RegisterDemand{});
+ assert(cursor.total_demand == RegisterDemand{});
}
if (add_to_clause) {
- clause_demand.update(new_demand);
+ cursor.clause_demand.update(new_demand);
} else {
- clause_demand -= candidate_diff;
- insert_idx--;
+ cursor.clause_demand -= candidate_diff;
+ cursor.insert_idx--;
}
- downwards_advance_helper();
+ cursor.source_idx--;
+ cursor.verify_invariants(register_demand);
return move_success;
}
-void MoveState::downwards_skip()
+void MoveState::downwards_skip(DownwardsCursor& cursor)
{
- aco_ptr<Instruction>& instr = block->instructions[source_idx];
+ aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
for (const Operand& op : instr->operands) {
if (op.isTemp()) {
@@ -252,14 +285,14 @@ void MoveState::downwards_skip()
}
}
}
- total_demand.update(register_demand[source_idx]);
-
- downwards_advance_helper();
+ cursor.total_demand.update(register_demand[cursor.source_idx]);
+ cursor.source_idx--;
+ cursor.verify_invariants(register_demand);
}
-void MoveState::upwards_verify_invariants() {
+void UpwardsCursor::verify_invariants(const RegisterDemand* register_demand) {
#ifndef NDEBUG
- if (insert_idx < 0) {
+ if (!has_insert_idx()) {
return;
}
@@ -273,13 +306,10 @@ void MoveState::upwards_verify_invariants() {
#endif
}
-void MoveState::upwards_init(int source_idx_, bool improved_rar_)
+UpwardsCursor MoveState::upwards_init(int source_idx, bool improved_rar_)
{
- source_idx = source_idx_;
improved_rar = improved_rar_;
- insert_idx = -1;
-
std::fill(depends_on.begin(), depends_on.end(), false);
std::fill(RAR_dependencies.begin(), RAR_dependencies.end(), false);
@@ -287,11 +317,13 @@ void MoveState::upwards_init(int source_idx_, bool improved_rar_)
if (def.isTemp())
depends_on[def.tempId()] = true;
}
+
+ return UpwardsCursor(source_idx);
}
-bool MoveState::upwards_check_deps()
+bool MoveState::upwards_check_deps(UpwardsCursor& cursor)
{
- aco_ptr<Instruction>& instr = block->instructions[source_idx];
+ aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
for (const Operand& op : instr->operands) {
if (op.isTemp() && depends_on[op.tempId()])
return false;
@@ -299,17 +331,17 @@ bool MoveState::upwards_check_deps()
return true;
}
-void MoveState::upwards_update_insert_idx()
+void MoveState::upwards_update_insert_idx(UpwardsCursor& cursor)
{
- insert_idx = source_idx;
- total_demand = register_demand[insert_idx];
+ cursor.insert_idx = cursor.source_idx;
+ cursor.total_demand = register_demand[cursor.insert_idx];
}
-MoveResult MoveState::upwards_move()
+MoveResult MoveState::upwards_move(UpwardsCursor& cursor)
{
- assert(insert_idx >= 0);
+ assert(cursor.has_insert_idx());
- aco_ptr<Instruction>& instr = block->instructions[source_idx];
+ aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
for (const Operand& op : instr->operands) {
if (op.isTemp() && depends_on[op.tempId()])
return move_fail_ssa;
@@ -324,37 +356,37 @@ MoveResult MoveState::upwards_move()
/* check if register pressure is low enough: the diff is negative if register pressure is decreased */
const RegisterDemand candidate_diff = get_live_changes(instr);
const RegisterDemand temp = get_temp_registers(instr);
- if (RegisterDemand(total_demand + candidate_diff).exceeds(max_registers))
+ if (RegisterDemand(cursor.total_demand + candidate_diff).exceeds(max_registers))
return move_fail_pressure;
- const RegisterDemand temp2 = get_temp_registers(block->instructions[insert_idx - 1]);
- const RegisterDemand new_demand = register_demand[insert_idx - 1] - temp2 + candidate_diff + temp;
+ const RegisterDemand temp2 = get_temp_registers(block->instructions[cursor.insert_idx - 1]);
+ const RegisterDemand new_demand = register_demand[cursor.insert_idx - 1] - temp2 + candidate_diff + temp;
if (new_demand.exceeds(max_registers))
return move_fail_pressure;
/* move the candidate above the insert_idx */
- move_element(block->instructions.begin(), source_idx, insert_idx);
+ move_element(block->instructions.begin(), cursor.source_idx, cursor.insert_idx);
/* update register pressure */
- move_element(register_demand, source_idx, insert_idx);
- for (int i = insert_idx + 1; i <= source_idx; i++)
+ move_element(register_demand, cursor.source_idx, cursor.insert_idx);
+ register_demand[cursor.insert_idx] = new_demand;
+ for (int i = cursor.insert_idx + 1; i <= cursor.source_idx; i++)
register_demand[i] += candidate_diff;
- register_demand[insert_idx] = new_demand;
- total_demand += candidate_diff;
+ cursor.total_demand += candidate_diff;
- insert_idx++;
+ cursor.total_demand.update(register_demand[cursor.source_idx]);
- total_demand.update(register_demand[source_idx]);
- source_idx++;
+ cursor.insert_idx++;
+ cursor.source_idx++;
- upwards_verify_invariants();
+ cursor.verify_invariants(register_demand);
return move_success;
}
-void MoveState::upwards_skip()
+void MoveState::upwards_skip(UpwardsCursor& cursor)
{
- if (insert_idx >= 0) {
- aco_ptr<Instruction>& instr = block->instructions[source_idx];
+ if (cursor.has_insert_idx()) {
+ aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
for (const Definition& def : instr->definitions) {
if (def.isTemp())
depends_on[def.tempId()] = true;
@@ -363,12 +395,12 @@ void MoveState::upwards_skip()
if (op.isTemp())
RAR_dependencies[op.tempId()] = true;
}
- total_demand.update(register_demand[source_idx]);
+ cursor.total_demand.update(register_demand[cursor.source_idx]);
}
- source_idx++;
+ cursor.source_idx++;
- upwards_verify_invariants();
+ cursor.verify_invariants(register_demand);
}
bool is_gs_or_done_sendmsg(const Instruction *instr)
@@ -598,11 +630,11 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
init_hazard_query(&hq);
add_to_hazard_query(&hq, current);
- ctx.mv.downwards_init(idx, false, false);
+ DownwardsCursor cursor = ctx.mv.downwards_init(idx, false, false);
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
assert(candidate_idx >= 0);
- assert(candidate_idx == ctx.mv.source_idx);
+ assert(candidate_idx == cursor.source_idx);
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
/* break if we'd make the previous SMEM instruction stall */
@@ -628,14 +660,14 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
* significanly worsen LDS scheduling */
if (candidate->isDS() || !can_move_down) {
add_to_hazard_query(&hq, candidate.get());
- ctx.mv.downwards_skip();
+ ctx.mv.downwards_skip(cursor);
continue;
}
- MoveResult res = ctx.mv.downwards_move(false);
+ MoveResult res = ctx.mv.downwards_move(cursor, false);
if (res == move_fail_ssa || res == move_fail_rar) {
add_to_hazard_query(&hq, candidate.get());
- ctx.mv.downwards_skip();
+ ctx.mv.downwards_skip(cursor);
continue;
} else if (res == move_fail_pressure) {
break;
@@ -647,12 +679,12 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
}
/* find the first instruction depending on current or find another MEM */
- ctx.mv.upwards_init(idx + 1, false);
+ UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, false);
bool found_dependency = false;
/* second, check if we have instructions after current to move up */
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
- assert(candidate_idx == ctx.mv.source_idx);
+ assert(candidate_idx == up_cursor.source_idx);
assert(candidate_idx < (int) block->instructions.size());
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
@@ -660,7 +692,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
break;
/* check if candidate depends on current */
- bool is_dependency = !found_dependency && !ctx.mv.upwards_check_deps();
+ bool is_dependency = !found_dependency && !ctx.mv.upwards_check_deps(up_cursor);
/* no need to steal from following VMEM instructions */
if (is_dependency && candidate->isVMEM())
break;
@@ -677,7 +709,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
if (is_dependency) {
if (!found_dependency) {
- ctx.mv.upwards_update_insert_idx();
+ ctx.mv.upwards_update_insert_idx(up_cursor);
init_hazard_query(&hq);
found_dependency = true;
}
@@ -688,17 +720,17 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
add_to_hazard_query(&hq, candidate.get());
else
k++;
- ctx.mv.upwards_skip();
+ ctx.mv.upwards_skip(up_cursor);
continue;
}
- MoveResult res = ctx.mv.upwards_move();
+ MoveResult res = ctx.mv.upwards_move(up_cursor);
if (res == move_fail_ssa || res == move_fail_rar) {
/* no need to steal from following VMEM instructions */
if (res == move_fail_ssa && candidate->isVMEM())
break;
add_to_hazard_query(&hq, candidate.get());
- ctx.mv.upwards_skip();
+ ctx.mv.upwards_skip(up_cursor);
continue;
} else if (res == move_fail_pressure) {
break;
@@ -706,7 +738,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
k++;
}
- ctx.last_SMEM_dep_idx = found_dependency ? ctx.mv.insert_idx : 0;
+ ctx.last_SMEM_dep_idx = found_dependency ? up_cursor.insert_idx : 0;
ctx.last_SMEM_stall = 10 - ctx.num_waves - k;
}
@@ -727,10 +759,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
init_hazard_query(&clause_hq);
add_to_hazard_query(&indep_hq, current);
- ctx.mv.downwards_init(idx, true, true);
+ DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
- assert(candidate_idx == ctx.mv.source_idx);
+ assert(candidate_idx == cursor.source_idx);
assert(candidate_idx >= 0);
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
bool is_vmem = candidate->isVMEM() || candidate->isFlatLike();
@@ -746,7 +778,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
bool part_of_clause = false;
if (current->isVMEM() == candidate->isVMEM()) {
- int grab_dist = ctx.mv.insert_idx_clause - candidate_idx;
+ int grab_dist = cursor.insert_idx_clause - candidate_idx;
/* We can't easily tell how much this will decrease the def-to-use
* distances, so just use how far it will be moved as a heuristic. */
part_of_clause = grab_dist < clause_max_grab_dist &&
@@ -767,16 +799,16 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
if (!can_move_down) {
add_to_hazard_query(&indep_hq, candidate.get());
add_to_hazard_query(&clause_hq, candidate.get());
- ctx.mv.downwards_skip();
+ ctx.mv.downwards_skip(cursor);
continue;
}
Instruction *candidate_ptr = candidate.get();
- MoveResult res = ctx.mv.downwards_move(part_of_clause);
+ MoveResult res = ctx.mv.downwards_move(cursor, part_of_clause);
if (res == move_fail_ssa || res == move_fail_rar) {
add_to_hazard_query(&indep_hq, candidate.get());
add_to_hazard_query(&clause_hq, candidate.get());
- ctx.mv.downwards_skip();
+ ctx.mv.downwards_skip(cursor);
continue;
} else if (res == move_fail_pressure) {
break;
@@ -790,12 +822,12 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
}
/* find the first instruction depending on current or find another VMEM */
- ctx.mv.upwards_init(idx + 1, true);
+ UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, true);
bool found_dependency = false;
/* second, check if we have instructions after current to move up */
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
- assert(candidate_idx == ctx.mv.source_idx);
+ assert(candidate_idx == up_cursor.source_idx);
assert(candidate_idx < (int) block->instructions.size());
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
bool is_vmem = candidate->isVMEM() || candidate->isFlatLike();
@@ -815,10 +847,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
break;
}
- is_dependency |= !found_dependency && !ctx.mv.upwards_check_deps();
+ is_dependency |= !found_dependency && !ctx.mv.upwards_check_deps(up_cursor);
if (is_dependency) {
if (!found_dependency) {
- ctx.mv.upwards_update_insert_idx();
+ ctx.mv.upwards_update_insert_idx(up_cursor);
init_hazard_query(&indep_hq);
found_dependency = true;
}
@@ -835,14 +867,14 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
add_to_hazard_query(&indep_hq, candidate.get());
else
k++;
- ctx.mv.upwards_skip();
+ ctx.mv.upwards_skip(up_cursor);
continue;
}
- MoveResult res = ctx.mv.upwards_move();
+ MoveResult res = ctx.mv.upwards_move(up_cursor);
if (res == move_fail_ssa || res == move_fail_rar) {
add_to_hazard_query(&indep_hq, candidate.get());
- ctx.mv.upwards_skip();
+ ctx.mv.upwards_skip(up_cursor);
continue;
} else if (res == move_fail_pressure) {
break;
@@ -860,7 +892,7 @@ void schedule_position_export(sched_ctx& ctx, Block* block,
int max_moves = POS_EXP_MAX_MOVES;
int16_t k = 0;
- ctx.mv.downwards_init(idx, true, false);
+ DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
hazard_query hq;
init_hazard_query(&hq);
@@ -881,14 +913,14 @@ void schedule_position_export(sched_ctx& ctx, Block* block,
if (haz != hazard_success) {
add_to_hazard_query(&hq, candidate.get());
- ctx.mv.downwards_skip();
+ ctx.mv.downwards_skip(cursor);
continue;
}
- MoveResult res = ctx.mv.downwards_move(false);
+ MoveResult res = ctx.mv.downwards_move(cursor, false);
if (res == move_fail_ssa || res == move_fail_rar) {
add_to_hazard_query(&hq, candidate.get());
- ctx.mv.downwards_skip();
+ ctx.mv.downwards_skip(cursor);
continue;
} else if (res == move_fail_pressure) {
break;