summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_lower_to_hw_instr.cpp
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2022-10-26 21:11:31 +0100
committerMarge Bot <emma+marge@anholt.net>2022-11-01 12:42:43 +0000
commit6113ee650a272dc737a200e276de474e083a2fdf (patch)
tree1e2a309793c6662db91a431f2a659c7ad47a7039 /src/amd/compiler/aco_lower_to_hw_instr.cpp
parent16d2c7ad557b46104f91365ab3405f0a3ed7e36d (diff)
downloadmesa-6113ee650a272dc737a200e276de474e083a2fdf.tar.gz
aco/gfx11: fix FS input loads in quad-divergent control flow
This is not ideal and it would be great to somehow make it better some day. fossil-db (gfx1100): Totals from 5208 (3.86% of 135032) affected shaders: MaxWaves: 127058 -> 126962 (-0.08%); split: +0.01%, -0.09% Instrs: 3983440 -> 4072736 (+2.24%); split: -0.00%, +2.24% CodeSize: 21872468 -> 22230852 (+1.64%); split: -0.00%, +1.64% VGPRs: 206688 -> 206984 (+0.14%); split: -0.05%, +0.20% Latency: 37447383 -> 37491197 (+0.12%); split: -0.05%, +0.17% InvThroughput: 6421955 -> 6422348 (+0.01%); split: -0.03%, +0.03% VClause: 71579 -> 71545 (-0.05%); split: -0.09%, +0.04% SClause: 148289 -> 147146 (-0.77%); split: -0.84%, +0.07% Copies: 259011 -> 258084 (-0.36%); split: -0.61%, +0.25% Branches: 101366 -> 101314 (-0.05%); split: -0.10%, +0.05% PreSGPRs: 223482 -> 223460 (-0.01%); split: -0.21%, +0.20% PreVGPRs: 184448 -> 184744 (+0.16%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19370>
Diffstat (limited to 'src/amd/compiler/aco_lower_to_hw_instr.cpp')
-rw-r--r--src/amd/compiler/aco_lower_to_hw_instr.cpp48
1 files changed, 48 insertions, 0 deletions
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 028876e31da..9732933cd84 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2378,6 +2378,54 @@ lower_to_hw_instr(Program* program)
bld.sop1(aco_opcode::s_setpc_b64, instr->operands[0]);
break;
}
+ case aco_opcode::p_interp_gfx11: {
+ assert(instr->definitions[0].regClass() == v1 ||
+ instr->definitions[0].regClass() == v2b);
+ assert(instr->definitions[1].regClass() == bld.lm);
+ assert(instr->operands[0].regClass() == v1.as_linear());
+ assert(instr->operands[1].isConstant());
+ assert(instr->operands[2].isConstant());
+ assert(instr->operands.back().physReg() == m0);
+ Definition dst = instr->definitions[0];
+ PhysReg exec_tmp = instr->definitions[1].physReg();
+ PhysReg lin_vgpr = instr->operands[0].physReg();
+ unsigned attribute = instr->operands[1].constantValue();
+ unsigned component = instr->operands[2].constantValue();
+ uint16_t dpp_ctrl = 0;
+ Operand coord1, coord2;
+ if (instr->operands.size() == 6) {
+ assert(instr->operands[3].regClass() == v1);
+ assert(instr->operands[4].regClass() == v1);
+ coord1 = instr->operands[3];
+ coord2 = instr->operands[4];
+ } else {
+ assert(instr->operands[3].isConstant());
+ dpp_ctrl = instr->operands[3].constantValue();
+ }
+
+ bld.sop1(Builder::s_mov, Definition(exec_tmp, bld.lm), Operand(exec, bld.lm));
+ bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), Operand(exec, bld.lm));
+ bld.ldsdir(aco_opcode::lds_param_load, Definition(lin_vgpr, v1), Operand(m0, s1),
+ attribute, component);
+ bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(exec_tmp, bld.lm));
+
+ Operand p(lin_vgpr, v1);
+ Operand dst_op(dst.physReg(), v1);
+ if (instr->operands.size() == 5) {
+ bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), p, dpp_ctrl);
+ } else if (dst.regClass() == v2b) {
+ bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, Definition(dst), p,
+ coord1, p);
+ bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, Definition(dst), p,
+ coord2, dst_op);
+ } else {
+ bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, Definition(dst), p, coord1,
+ p);
+ bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, Definition(dst), p, coord2,
+ dst_op);
+ }
+ break;
+ }
default: break;
}
} else if (instr->isBranch()) {