summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimur Kristóf <timur.kristof@gmail.com>2023-03-02 17:30:49 -0800
committerMarge Bot <emma+marge@anholt.net>2023-03-08 04:39:18 +0000
commit3058ab6090725273e9109c13c8a9798e328923e8 (patch)
tree771afc5967e724139dc09e4bb96abdbfd5f4843b
parent1583bea9dab571069790fd773e1b4e9cb8a382a6 (diff)
downloadmesa-3058ab6090725273e9109c13c8a9798e328923e8.tar.gz
aco: Generalize vs_inputs to args_pending_vmem.
Handle arguments that need a waitcnt without relying on RADV specific VS input information. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21696>
-rw-r--r--src/amd/compiler/aco_insert_waitcnt.cpp8
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp23
-rw-r--r--src/amd/compiler/aco_ir.h2
-rw-r--r--src/amd/compiler/aco_statistics.cpp12
4 files changed, 17 insertions, 28 deletions
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index 948799f1f3e..9b34fb00307 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -1032,11 +1032,9 @@ insert_wait_states(Program* program)
std::stack<unsigned, std::vector<unsigned>> loop_header_indices;
unsigned loop_progress = 0;
- if (program->stage.has(SWStage::VS) && program->info.vs.dynamic_inputs) {
- for (Definition def : program->vs_inputs) {
- update_counters(in_ctx[0], event_vmem);
- insert_wait_entry(in_ctx[0], def, event_vmem);
- }
+ for (Definition def : program->args_pending_vmem) {
+ update_counters(in_ctx[0], event_vmem);
+ insert_wait_entry(in_ctx[0], def, event_vmem);
}
for (unsigned i = 0; i < program->blocks.size();) {
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 73971c5d439..7e02cfd8a92 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -11271,10 +11271,15 @@ add_startpgm(struct isel_context* ctx)
ctx->arg_temps[i] = create_vec_from_array(ctx, elems, size, RegType::sgpr, 4);
} else {
Temp dst = ctx->program->allocateTmp(type);
+ Definition def(dst);
+ def.setFixed(PhysReg{file == AC_ARG_SGPR ? reg : reg + 256});
ctx->arg_temps[i] = dst;
- startpgm->definitions[arg] = Definition(dst);
- startpgm->definitions[arg].setFixed(PhysReg{file == AC_ARG_SGPR ? reg : reg + 256});
- arg++;
+ startpgm->definitions[arg++] = def;
+
+ if (ctx->args->ac.args[i].pending_vmem) {
+ assert(file == AC_ARG_VGPR);
+ ctx->program->args_pending_vmem.push_back(def);
+ }
}
}
@@ -11294,18 +11299,6 @@ add_startpgm(struct isel_context* ctx)
}
}
- if (ctx->stage.has(SWStage::VS) && ctx->program->info.vs.dynamic_inputs) {
- unsigned num_attributes = util_last_bit(ctx->program->info.vs.input_slot_usage_mask);
- for (unsigned i = 0; i < num_attributes; i++) {
- Definition def(get_arg(ctx, ctx->args->vs_inputs[i]));
-
- unsigned idx = ctx->args->vs_inputs[i].arg_index;
- def.setFixed(PhysReg(256 + ctx->args->ac.args[idx].offset));
-
- ctx->program->vs_inputs.push_back(def);
- }
- }
-
return startpgm;
}
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 6eda59892fa..d512de53b11 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -2120,7 +2120,7 @@ public:
unsigned next_divergent_if_logical_depth = 0;
unsigned next_uniform_if_depth = 0;
- std::vector<Definition> vs_inputs;
+ std::vector<Definition> args_pending_vmem;
struct {
FILE* output = stderr;
diff --git a/src/amd/compiler/aco_statistics.cpp b/src/amd/compiler/aco_statistics.cpp
index 5662bc7af4c..34037604765 100644
--- a/src/amd/compiler/aco_statistics.cpp
+++ b/src/amd/compiler/aco_statistics.cpp
@@ -544,13 +544,11 @@ collect_preasm_stats(Program* program)
double usage[(int)BlockCycleEstimator::resource_count] = {0};
std::vector<BlockCycleEstimator> blocks(program->blocks.size(), program);
- if (program->stage.has(SWStage::VS) && program->info.vs.has_prolog) {
- unsigned vs_input_latency = 320;
- for (Definition def : program->vs_inputs) {
- blocks[0].vm.push_back(vs_input_latency);
- for (unsigned i = 0; i < def.size(); i++)
- blocks[0].reg_available[def.physReg().reg() + i] = vs_input_latency;
- }
+ constexpr const unsigned vmem_latency = 320;
+ for (const Definition def : program->args_pending_vmem) {
+ blocks[0].vm.push_back(vmem_latency);
+ for (unsigned i = 0; i < def.size(); i++)
+ blocks[0].reg_available[def.physReg().reg() + i] = vmem_latency;
}
for (Block& block : program->blocks) {