diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2022-05-19 16:09:13 +0100 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-07-08 14:49:03 +0000 |
commit | 98a65eafb72f6d2ba452c52b1867078e7cb059b2 (patch) | |
tree | 02c315c6ee0c08e7ce7913f934347006ac0084f2 /src/amd/compiler/aco_spill.cpp | |
parent | 0e783d687a3e13284eeae4081f16ee22033ff89b (diff) | |
download | mesa-98a65eafb72f6d2ba452c52b1867078e7cb059b2.tar.gz |
aco: use scratch_* for VGPR spill/reload on GFX9+
fossil-db (navi21):
Totals from 12 (0.01% of 162293) affected shaders:
Instrs: 122808 -> 122782 (-0.02%); split: -0.11%, +0.09%
CodeSize: 711248 -> 710788 (-0.06%); split: -0.16%, +0.10%
SpillSGPRs: 928 -> 831 (-10.45%)
SpillVGPRs: 1626 -> 1624 (-0.12%)
Latency: 4960285 -> 4932547 (-0.56%)
InvThroughput: 2574083 -> 2559953 (-0.55%)
VClause: 3404 -> 3402 (-0.06%)
Copies: 36992 -> 37181 (+0.51%); split: -0.05%, +0.56%
Branches: 3582 -> 3585 (+0.08%)
PreVGPRs: 3055 -> 3057 (+0.07%)
fossil-db (vega10):
Totals from 12 (0.01% of 161355) affected shaders:
Instrs: 124817 -> 124383 (-0.35%); split: -0.46%, +0.12%
CodeSize: 705116 -> 703664 (-0.21%); split: -0.44%, +0.23%
SpillSGPRs: 1012 -> 898 (-11.26%)
SpillVGPRs: 1632 -> 1624 (-0.49%)
Scratch: 201728 -> 200704 (-0.51%)
Latency: 6160115 -> 6266025 (+1.72%); split: -0.34%, +2.06%
InvThroughput: 6440203 -> 6544595 (+1.62%); split: -0.35%, +1.97%
VClause: 3409 -> 3423 (+0.41%)
Copies: 37929 -> 37748 (-0.48%); split: -1.16%, +0.69%
Branches: 3851 -> 3855 (+0.10%); split: -0.13%, +0.23%
PreVGPRs: 3053 -> 3055 (+0.07%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17079>
Diffstat (limited to 'src/amd/compiler/aco_spill.cpp')
-rw-r--r-- | src/amd/compiler/aco_spill.cpp | 76 |
1 files changed, 56 insertions, 20 deletions
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 848df3dfe95..4b1099e8b28 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -1408,6 +1408,10 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block, } } + /* GFX9+ uses scratch_* instructions, which don't use a resource. Return a SADDR instead. */ + if (ctx.program->gfx_level >= GFX9) + return bld.copy(bld.def(s1), Operand::c32(offset)); + Temp private_segment_buffer = ctx.program->private_segment_buffer; if (ctx.program->stage.hw != HWStage::CS) private_segment_buffer = @@ -1445,17 +1449,29 @@ setup_vgpr_spill_reload(spill_ctx& ctx, Block& block, Temp scratch_offset = ctx.program->scratch_offset; *offset = spill_slot * 4; - - bool add_offset_to_sgpr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + - ctx.vgpr_spill_slots * 4 > - 4096; - if (!add_offset_to_sgpr) - *offset += ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size; - - if (ctx.scratch_rsrc == Temp()) { - unsigned rsrc_offset = add_offset_to_sgpr ? ctx.program->config->scratch_bytes_per_wave : 0; - ctx.scratch_rsrc = - load_scratch_resource(ctx, scratch_offset, block, instructions, rsrc_offset); + if (ctx.program->gfx_level >= GFX9) { + *offset += ctx.program->dev.scratch_global_offset_min; + + if (ctx.scratch_rsrc == Temp()) { + int32_t saddr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size - + ctx.program->dev.scratch_global_offset_min; + ctx.scratch_rsrc = + load_scratch_resource(ctx, scratch_offset, block, instructions, saddr); + } + } else { + bool add_offset_to_sgpr = + ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + + ctx.vgpr_spill_slots * 4 > + 4096; + if (!add_offset_to_sgpr) + *offset += ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size; + + if (ctx.scratch_rsrc == Temp()) { + unsigned rsrc_offset = + add_offset_to_sgpr ? ctx.program->config->scratch_bytes_per_wave : 0; + ctx.scratch_rsrc = + load_scratch_resource(ctx, scratch_offset, block, instructions, rsrc_offset); + } } } @@ -1485,11 +1501,19 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst bld.insert(split); for (unsigned i = 0; i < temp.size(); i++, offset += 4) { Temp elem = split->definitions[i].getTemp(); - Instruction* instr = - bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1), - ctx.program->scratch_offset, elem, offset, false, true); - instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); + if (ctx.program->gfx_level >= GFX9) { + bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, elem, + offset, memory_sync_info(storage_vgpr_spill, semantic_private)); + } else { + Instruction* instr = + bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1), + ctx.program->scratch_offset, elem, offset, false, true); + instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); + } } + } else if (ctx.program->gfx_level >= GFX9) { + bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, temp, offset, + memory_sync_info(storage_vgpr_spill, semantic_private)); } else { Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1), ctx.program->scratch_offset, temp, offset, false, true); @@ -1517,12 +1541,21 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins for (unsigned i = 0; i < def.size(); i++, offset += 4) { Temp tmp = bld.tmp(v1); vec->operands[i] = Operand(tmp); - Instruction* instr = - bld.mubuf(aco_opcode::buffer_load_dword, Definition(tmp), ctx.scratch_rsrc, Operand(v1), - ctx.program->scratch_offset, offset, false, true); - instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); + if (ctx.program->gfx_level >= GFX9) { + bld.scratch(aco_opcode::scratch_load_dword, Definition(tmp), Operand(v1), + ctx.scratch_rsrc, offset, + memory_sync_info(storage_vgpr_spill, semantic_private)); + } else { + Instruction* instr = + bld.mubuf(aco_opcode::buffer_load_dword, Definition(tmp), ctx.scratch_rsrc, + Operand(v1), ctx.program->scratch_offset, offset, false, true); + instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); + } } bld.insert(vec); + } else if (ctx.program->gfx_level >= GFX9) { + bld.scratch(aco_opcode::scratch_load_dword, def, Operand(v1), ctx.scratch_rsrc, offset, + memory_sync_info(storage_vgpr_spill, semantic_private)); } else { Instruction* instr = bld.mubuf(aco_opcode::buffer_load_dword, def, ctx.scratch_rsrc, Operand(v1), ctx.program->scratch_offset, offset, false, true); @@ -1907,7 +1940,10 @@ spill(Program* program, live& live_vars) } /* add extra SGPRs required for spilling VGPRs */ if (demand.vgpr + extra_vgprs > vgpr_limit) { - extra_sgprs = 5; /* scratch_resource (s4) + scratch_offset (s1) */ + if (program->gfx_level >= GFX9) + extra_sgprs = 1; /* SADDR */ + else + extra_sgprs = 5; /* scratch_resource (s4) + scratch_offset (s1) */ if (demand.sgpr + extra_sgprs > sgpr_limit) { /* re-calculate in case something has changed */ unsigned sgpr_spills = demand.sgpr + extra_sgprs - sgpr_limit; |