diff options
author | Qiang Yu <yuq825@gmail.com> | 2023-04-26 14:58:58 +0800 |
---|---|---|
committer | Qiang Yu <yuq825@gmail.com> | 2023-04-28 11:33:28 +0800 |
commit | 3c59df73185852516c6ebcd9f62ee2cad07d0eb4 (patch) | |
tree | 4a572b9a3fd9cd861c6c405022cee50c470f4849 | |
parent | 360176b671fc7ec30a75eb45557bd2c1fc019fec (diff) | |
download | mesa-3c59df73185852516c6ebcd9f62ee2cad07d0eb4.tar.gz |
aco: get scratch addr from symbol for radeonsi
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22727>
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 40 | ||||
-rw-r--r-- | src/amd/compiler/aco_lower_to_hw_instr.cpp | 9 | ||||
-rw-r--r-- | src/amd/compiler/aco_shader_info.h | 2 | ||||
-rw-r--r-- | src/amd/compiler/aco_spill.cpp | 10 | ||||
-rw-r--r-- | src/amd/compiler/aco_validate.cpp | 3 |
5 files changed, 46 insertions, 18 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 4e3990e4fb6..c9b78c520d6 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7519,9 +7519,16 @@ get_scratch_resource(isel_context* ctx) { Builder bld(ctx->program, ctx->block); Temp scratch_addr = ctx->program->private_segment_buffer; - if (ctx->stage.hw != HWStage::CS) + if (!scratch_addr.bytes()) { + Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), + Operand::c32(aco_symbol_scratch_addr_lo)); + Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), + Operand::c32(aco_symbol_scratch_addr_hi)); + scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi); + } else if (ctx->stage.hw != HWStage::CS) { scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero()); + } uint32_t rsrc_conf = S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx->program->wave_size == 64 ? 3 : 2); @@ -11138,22 +11145,25 @@ add_startpgm(struct isel_context* ctx) } } - if (ctx->args->ring_offsets.used) { - if (ctx->program->gfx_level < GFX9) { - /* Stash these in the program so that they can be accessed later when - * handling spilling. - */ + if (ctx->program->gfx_level < GFX9) { + /* Stash these in the program so that they can be accessed later when + * handling spilling. + */ + if (ctx->args->ring_offsets.used) ctx->program->private_segment_buffer = get_arg(ctx, ctx->args->ring_offsets); - ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset); - } else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) { - /* Manually initialize scratch. For RT stages scratch initialization is done in the prolog. */ - Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset)); - scratch_offset.setLateKill(true); - Builder bld(ctx->program, ctx->block); - bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), - get_arg(ctx, ctx->args->ring_offsets), scratch_offset); - } + ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset); + } else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) { + /* Manually initialize scratch. For RT stages scratch initialization is done in the prolog. */ + Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset)); + scratch_offset.setLateKill(true); + + Operand scratch_addr = ctx->args->ring_offsets.used ? + Operand(get_arg(ctx, ctx->args->ring_offsets)) : Operand(s2); + + Builder bld(ctx->program, ctx->block); + bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), + scratch_addr, scratch_offset); } return startpgm; diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 19e8ec70609..a008cc04d7d 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2489,7 +2489,14 @@ lower_to_hw_instr(Program* program) break; Operand scratch_addr = instr->operands[0]; - if (program->stage.hw != HWStage::CS) { + if (scratch_addr.isUndefined()) { + PhysReg reg = instr->definitions[0].physReg(); + bld.sop1(aco_opcode::p_load_symbol, Definition(reg, s1), + Operand::c32(aco_symbol_scratch_addr_lo)); + bld.sop1(aco_opcode::p_load_symbol, Definition(reg.advance(4), s1), + Operand::c32(aco_symbol_scratch_addr_hi)); + scratch_addr.setFixed(reg); + } else if (program->stage.hw != HWStage::CS) { bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr, Operand::zero()); scratch_addr.setFixed(instr->definitions[0].physReg()); diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index 0d1dcd96392..af741d1d840 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -162,6 +162,8 @@ enum aco_statistic { enum aco_symbol_id { aco_symbol_invalid, + aco_symbol_scratch_addr_lo, + aco_symbol_scratch_addr_hi, }; struct aco_symbol { diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index d2cbca24405..3f8426921ae 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -1416,9 +1416,17 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block, return bld.copy(bld.def(s1), Operand::c32(offset)); Temp private_segment_buffer = ctx.program->private_segment_buffer; - if (ctx.program->stage.hw != HWStage::CS) + if (!private_segment_buffer.bytes()) { + Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), + Operand::c32(aco_symbol_scratch_addr_lo)); + Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), + Operand::c32(aco_symbol_scratch_addr_hi)); + private_segment_buffer = + bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi); + } else if (ctx.program->stage.hw != HWStage::CS) { private_segment_buffer = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero()); + } if (offset) scratch_offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index abf0a592591..d4073296ae9 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -270,7 +270,8 @@ validate_ir(Program* program) (instr->opcode == aco_opcode::p_bpermute_gfx11w64 && i == 0) || (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) || ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) || - (instr->isScratch() && i == 0); + (instr->isScratch() && i == 0) || + (instr->opcode == aco_opcode::p_init_scratch && i == 0); check(can_be_undef, "Undefs can only be used in certain operands", instr.get()); } else { check(instr->operands[i].isFixed() || instr->operands[i].isTemp() || |