summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQiang Yu <yuq825@gmail.com>2023-04-26 14:58:58 +0800
committerQiang Yu <yuq825@gmail.com>2023-04-28 11:33:28 +0800
commit3c59df73185852516c6ebcd9f62ee2cad07d0eb4 (patch)
tree4a572b9a3fd9cd861c6c405022cee50c470f4849
parent360176b671fc7ec30a75eb45557bd2c1fc019fec (diff)
downloadmesa-3c59df73185852516c6ebcd9f62ee2cad07d0eb4.tar.gz
aco: get scratch addr from symbol for radeonsi
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22727>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp40
-rw-r--r--src/amd/compiler/aco_lower_to_hw_instr.cpp9
-rw-r--r--src/amd/compiler/aco_shader_info.h2
-rw-r--r--src/amd/compiler/aco_spill.cpp10
-rw-r--r--src/amd/compiler/aco_validate.cpp3
5 files changed, 46 insertions, 18 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 4e3990e4fb6..c9b78c520d6 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -7519,9 +7519,16 @@ get_scratch_resource(isel_context* ctx)
{
Builder bld(ctx->program, ctx->block);
Temp scratch_addr = ctx->program->private_segment_buffer;
- if (ctx->stage.hw != HWStage::CS)
+ if (!scratch_addr.bytes()) {
+ Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
+ Operand::c32(aco_symbol_scratch_addr_lo));
+ Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
+ Operand::c32(aco_symbol_scratch_addr_hi));
+ scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
+ } else if (ctx->stage.hw != HWStage::CS) {
scratch_addr =
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero());
+ }
uint32_t rsrc_conf =
S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx->program->wave_size == 64 ? 3 : 2);
@@ -11138,22 +11145,25 @@ add_startpgm(struct isel_context* ctx)
}
}
- if (ctx->args->ring_offsets.used) {
- if (ctx->program->gfx_level < GFX9) {
- /* Stash these in the program so that they can be accessed later when
- * handling spilling.
- */
+ if (ctx->program->gfx_level < GFX9) {
+ /* Stash these in the program so that they can be accessed later when
+ * handling spilling.
+ */
+ if (ctx->args->ring_offsets.used)
ctx->program->private_segment_buffer = get_arg(ctx, ctx->args->ring_offsets);
- ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset);
- } else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) {
- /* Manually initialize scratch. For RT stages scratch initialization is done in the prolog. */
- Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
- scratch_offset.setLateKill(true);
- Builder bld(ctx->program, ctx->block);
- bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
- get_arg(ctx, ctx->args->ring_offsets), scratch_offset);
- }
+ ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset);
+ } else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) {
+ /* Manually initialize scratch. For RT stages scratch initialization is done in the prolog. */
+ Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
+ scratch_offset.setLateKill(true);
+
+ Operand scratch_addr = ctx->args->ring_offsets.used ?
+ Operand(get_arg(ctx, ctx->args->ring_offsets)) : Operand(s2);
+
+ Builder bld(ctx->program, ctx->block);
+ bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
+ scratch_addr, scratch_offset);
}
return startpgm;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 19e8ec70609..a008cc04d7d 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2489,7 +2489,14 @@ lower_to_hw_instr(Program* program)
break;
Operand scratch_addr = instr->operands[0];
- if (program->stage.hw != HWStage::CS) {
+ if (scratch_addr.isUndefined()) {
+ PhysReg reg = instr->definitions[0].physReg();
+ bld.sop1(aco_opcode::p_load_symbol, Definition(reg, s1),
+ Operand::c32(aco_symbol_scratch_addr_lo));
+ bld.sop1(aco_opcode::p_load_symbol, Definition(reg.advance(4), s1),
+ Operand::c32(aco_symbol_scratch_addr_hi));
+ scratch_addr.setFixed(reg);
+ } else if (program->stage.hw != HWStage::CS) {
bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr,
Operand::zero());
scratch_addr.setFixed(instr->definitions[0].physReg());
diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h
index 0d1dcd96392..af741d1d840 100644
--- a/src/amd/compiler/aco_shader_info.h
+++ b/src/amd/compiler/aco_shader_info.h
@@ -162,6 +162,8 @@ enum aco_statistic {
enum aco_symbol_id {
aco_symbol_invalid,
+ aco_symbol_scratch_addr_lo,
+ aco_symbol_scratch_addr_hi,
};
struct aco_symbol {
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index d2cbca24405..3f8426921ae 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -1416,9 +1416,17 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
return bld.copy(bld.def(s1), Operand::c32(offset));
Temp private_segment_buffer = ctx.program->private_segment_buffer;
- if (ctx.program->stage.hw != HWStage::CS)
+ if (!private_segment_buffer.bytes()) {
+ Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
+ Operand::c32(aco_symbol_scratch_addr_lo));
+ Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
+ Operand::c32(aco_symbol_scratch_addr_hi));
+ private_segment_buffer =
+ bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
+ } else if (ctx.program->stage.hw != HWStage::CS) {
private_segment_buffer =
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
+ }
if (offset)
scratch_offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc),
diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp
index abf0a592591..d4073296ae9 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -270,7 +270,8 @@ validate_ir(Program* program)
(instr->opcode == aco_opcode::p_bpermute_gfx11w64 && i == 0) ||
(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
- (instr->isScratch() && i == 0);
+ (instr->isScratch() && i == 0) ||
+ (instr->opcode == aco_opcode::p_init_scratch && i == 0);
check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
} else {
check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||