diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2023-04-27 16:22:52 +0100 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2023-05-04 19:08:58 +0000 |
commit | d5398b62da1913e7224c826da0dbd5fa88436f18 (patch) | |
tree | 2b5d21384cfe41fe53cb4d261a49b95efd836e22 /src/amd | |
parent | 9dd6fcd9ece4433229181241bdaba9b2d0baa232 (diff) | |
download | mesa-d5398b62da1913e7224c826da0dbd5fa88436f18.tar.gz |
aco/ra: create M0-affinities for s_sendmsg
v2 by Timur Kristóf:
Do not add the affinity for instructions that can't write m0
reliably, such as readlane-like instructions on GFX8.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22690>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/compiler/aco_ir.cpp | 21 | ||||
-rw-r--r-- | src/amd/compiler/aco_ir.h | 1 | ||||
-rw-r--r-- | src/amd/compiler/aco_register_allocation.cpp | 8 |
3 files changed, 30 insertions, 0 deletions
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index ef04a89c780..03f6931be1c 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -480,6 +480,27 @@ can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx) } bool +can_write_m0(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr) +{ + if (instr->isSALU()) + return true; + + if (instr->isVALU()) + return gfx_level >= GFX9; + + switch (instr->opcode) { + case aco_opcode::p_parallelcopy: + case aco_opcode::p_extract: + case aco_opcode::p_insert: + return true; + case aco_opcode::p_reload: + return gfx_level >= GFX9; + default: + return false; + } +} + +bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op) { /* partial register writes are GFX9+, only */ diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 6ae4571abd8..a39e949847f 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1804,6 +1804,7 @@ bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op); uint8_t get_gfx11_true16_mask(aco_opcode op); bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra); bool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra, bool dpp8); +bool can_write_m0(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr); /* updates "instr" and returns the old instruction (or NULL if no update was needed) */ aco_ptr<Instruction> convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr); aco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8); diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index ca9b8702419..fba6e1c012c 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -55,6 +55,7 @@ struct assignment { struct { bool assigned : 1; bool vcc : 1; + bool m0 : 1; }; uint8_t _ = 0; }; @@ -1653,6 +1654,11 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp, if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, vcc)) return vcc; } + if (ctx.assignments[temp.id()].m0) { + if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) && + can_write_m0(ctx.program->gfx_level, instr)) + return m0; + } std::optional<PhysReg> res; @@ -2461,6 +2467,8 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block) if (!instr->definitions[1].isKill() && instr->operands[0].isTemp() && instr->operands[1].isFixed() && instr->operands[1].physReg() == exec) ctx.assignments[instr->operands[0].tempId()].vcc = true; + } else if (instr->opcode == aco_opcode::s_sendmsg) { + ctx.assignments[instr->operands[0].tempId()].m0 = true; } /* add operands to live variables */ |