summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>2022-11-16 15:19:32 +0100
committerEric Engestrom <eric@engestrom.ch>2022-11-17 14:05:04 +0000
commit1f23f529d1d0cd29c63e9615fc939727f8a2e08e (patch)
tree24fca6d00aa3d749ce662f10a6fd02b5e7db525f
parent578f84373ba360f3192c39515932da33d6190a7b (diff)
downloadmesa-1f23f529d1d0cd29c63e9615fc939727f8a2e08e.tar.gz
aco: fix dual source blending on GFX11
Assembly looks similar to LLVM. Cc: 22.3 mesa-stable Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19643> (cherry picked from commit fb781bfb0a5a401b5210d613479bbdfb90e94790)
-rw-r--r--.pick_status.json2
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp95
-rw-r--r--src/amd/compiler/aco_shader_info.h3
-rw-r--r--src/amd/vulkan/radv_aco_shader_info.h2
-rw-r--r--src/amd/vulkan/radv_pipeline.c2
-rw-r--r--src/amd/vulkan/radv_shader.h1
6 files changed, 94 insertions, 11 deletions
diff --git a/.pick_status.json b/.pick_status.json
index e223d99de87..eeb9f939399 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -409,7 +409,7 @@
"description": "aco: fix dual source blending on GFX11",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": null
},
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index bdc338c9742..66078d8f4eb 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -11189,9 +11189,25 @@ struct mrt_color_export {
bool enable_mrt_output_nan_fixup;
};
+struct aco_export_mrt {
+ Operand out[4];
+ unsigned enabled_channels;
+ unsigned target;
+ bool compr;
+};
+
+static void
+export_mrt(isel_context* ctx, const struct aco_export_mrt* mrt)
+{
+ Builder bld(ctx->program, ctx->block);
+
+ bld.exp(aco_opcode::exp, mrt->out[0], mrt->out[1], mrt->out[2], mrt->out[3],
+ mrt->enabled_channels, mrt->target, mrt->compr);
+}
+
static bool
-export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export *out,
- bool is_ps_epilog)
+export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out, bool is_ps_epilog,
+ struct aco_export_mrt* mrt)
{
Builder bld(ctx->program, ctx->block);
Operand values[4];
@@ -11359,8 +11375,12 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export *out,
compr = false;
}
- bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3], enabled_channels, target,
- compr);
+ for (unsigned i = 0; i < 4; i++)
+ mrt->out[i] = values[i];
+ mrt->target = target;
+ mrt->enabled_channels = enabled_channels;
+ mrt->compr = compr;
+
return true;
}
@@ -11429,6 +11449,31 @@ create_fs_jump_to_epilog(isel_context* ctx)
}
static void
+create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt* mrt0,
+ const struct aco_export_mrt* mrt1)
+{
+ Builder bld(ctx->program, ctx->block);
+
+ aco_ptr<Pseudo_instruction> exp{create_instruction<Pseudo_instruction>(
+ aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)};
+ for (unsigned i = 0; i < 4; i++) {
+ exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1);
+ exp->operands[i].setLateKill(true);
+ exp->operands[i + 4] = mrt1 ? mrt1->out[i] : Operand(v1);
+ exp->operands[i + 4].setLateKill(true);
+ }
+
+ RegClass type = RegClass(RegType::vgpr, util_bitcount(mrt0->enabled_channels));
+ exp->definitions[0] = bld.def(type); /* mrt0 */
+ exp->definitions[1] = bld.def(type); /* mrt1 */
+ exp->definitions[2] = bld.def(v1);
+ exp->definitions[3] = bld.def(bld.lm);
+ exp->definitions[4] = bld.def(bld.lm, vcc);
+ exp->definitions[5] = bld.def(s1, scc);
+ ctx->block->instructions.emplace_back(std::move(exp));
+}
+
+static void
create_fs_exports(isel_context* ctx)
{
Builder bld(ctx->program, ctx->block);
@@ -11442,10 +11487,15 @@ create_fs_exports(isel_context* ctx)
if (ctx->program->info.ps.has_epilog) {
create_fs_jump_to_epilog(ctx);
} else {
+ struct aco_export_mrt mrts[8];
unsigned compacted_mrt_index = 0;
/* Export all color render targets. */
for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) {
+ unsigned idx = i - FRAG_RESULT_DATA0;
+
+ mrts[idx].enabled_channels = 0;
+
if (!ctx->outputs.mask[i])
continue;
@@ -11453,7 +11503,7 @@ create_fs_exports(isel_context* ctx)
out.slot = compacted_mrt_index;
out.write_mask = ctx->outputs.mask[i];
- out.col_format = (ctx->options->key.ps.col_format >> (4 * (i - FRAG_RESULT_DATA0))) & 0xf;
+ out.col_format = (ctx->options->key.ps.col_format >> (4 * idx)) & 0xf;
for (unsigned c = 0; c < 4; ++c) {
if (out.write_mask & (1 << c)) {
@@ -11463,14 +11513,25 @@ create_fs_exports(isel_context* ctx)
}
}
- if (export_fs_mrt_color(ctx, &out, false)) {
+ if (export_fs_mrt_color(ctx, &out, false, &mrts[compacted_mrt_index])) {
compacted_mrt_index++;
exported = true;
}
}
- if (!exported)
+ if (exported) {
+ if (ctx->options->gfx_level >= GFX11 && ctx->options->key.ps.mrt0_is_dual_src) {
+ struct aco_export_mrt* mrt0 = mrts[0].enabled_channels ? &mrts[0] : NULL;
+ struct aco_export_mrt* mrt1 = mrts[1].enabled_channels ? &mrts[1] : NULL;
+ create_fs_dual_src_export_gfx11(ctx, mrt0, mrt1);
+ } else {
+ for (unsigned i = 0; i < compacted_mrt_index; i++) {
+ export_mrt(ctx, &mrts[i]);
+ }
+ }
+ } else {
create_fs_null_export(ctx);
+ }
}
ctx->block->kind |= block_kind_export_end;
@@ -12583,7 +12644,8 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade
Builder bld(ctx.program, ctx.block);
/* Export all color render targets */
- bool exported = false;
+ struct aco_export_mrt mrts[8];
+ uint8_t exported_mrts = 0;
for (unsigned i = 0; i < 8; i++) {
unsigned col_format = (key->spi_shader_col_format >> (i * 4)) & 0xf;
@@ -12605,11 +12667,24 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade
out.values[c] = Operand(emit_extract_vector(&ctx, inputs, c, v1));
}
- exported |= export_fs_mrt_color(&ctx, &out, true);
+ if (export_fs_mrt_color(&ctx, &out, true, &mrts[i])) {
+ exported_mrts |= 1 << i;
+ }
}
- if (!exported)
+ if (exported_mrts) {
+ if (ctx.options->gfx_level >= GFX11 && key->mrt0_is_dual_src) {
+ struct aco_export_mrt* mrt0 = (exported_mrts & BITFIELD_BIT(0)) ? &mrts[0] : NULL;
+ struct aco_export_mrt* mrt1 = (exported_mrts & BITFIELD_BIT(1)) ? &mrts[1] : NULL;
+ create_fs_dual_src_export_gfx11(&ctx, mrt0, mrt1);
+ } else {
+ u_foreach_bit (i, exported_mrts) {
+ export_mrt(&ctx, &mrts[i]);
+ }
+ }
+ } else {
create_fs_null_export(&ctx);
+ }
program->config->float_mode = program->blocks[0].fp_mode.val;
diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h
index b09e25c1a8d..43592cf3d7d 100644
--- a/src/amd/compiler/aco_shader_info.h
+++ b/src/amd/compiler/aco_shader_info.h
@@ -67,6 +67,8 @@ struct aco_ps_epilog_key {
uint8_t color_is_int8;
uint8_t color_is_int10;
uint8_t enable_mrt_output_nan_fixup;
+
+ bool mrt0_is_dual_src;
};
struct aco_vp_output_info {
@@ -173,6 +175,7 @@ struct aco_stage_input {
/* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */
bool alpha_to_coverage_via_mrtz;
+ bool mrt0_is_dual_src;
} ps;
};
diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h
index 175a2ab53f1..79597389885 100644
--- a/src/amd/vulkan/radv_aco_shader_info.h
+++ b/src/amd/vulkan/radv_aco_shader_info.h
@@ -128,6 +128,7 @@ radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_key *aco_info,
ASSIGN_FIELD(color_is_int8);
ASSIGN_FIELD(color_is_int10);
ASSIGN_FIELD(enable_mrt_output_nan_fixup);
+ ASSIGN_FIELD(mrt0_is_dual_src);
}
static inline void
@@ -146,6 +147,7 @@ radv_aco_convert_pipe_key(struct aco_stage_input *aco_info,
ASSIGN_FIELD(tcs.tess_input_vertices);
ASSIGN_FIELD(ps.col_format);
ASSIGN_FIELD(ps.alpha_to_coverage_via_mrtz);
+ ASSIGN_FIELD(ps.mrt0_is_dual_src);
}
static inline void
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 6280eb8d73c..fe132c10fb3 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3972,6 +3972,7 @@ radv_pipeline_create_ps_epilog(struct radv_graphics_pipeline *pipeline,
.color_is_int8 = pipeline_key->ps.is_int8,
.color_is_int10 = pipeline_key->ps.is_int10,
.enable_mrt_output_nan_fixup = pipeline_key->ps.enable_mrt_output_nan_fixup,
+ .mrt0_is_dual_src = pipeline_key->ps.mrt0_is_dual_src,
};
pipeline->ps_epilog = radv_create_ps_epilog(device, &epilog_key);
@@ -6339,6 +6340,7 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline,
.color_is_int8 = blend.col_format_is_int8,
.color_is_int10 = blend.col_format_is_int10,
.enable_mrt_output_nan_fixup = key.ps.enable_mrt_output_nan_fixup,
+ .mrt0_is_dual_src = blend.mrt0_is_dual_src,
};
pipeline->base.ps_epilog = radv_create_ps_epilog(device, &epilog_key);
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index bb8bf159391..8d7d6df3948 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -417,6 +417,7 @@ struct radv_ps_epilog_key {
uint8_t color_is_int10;
uint8_t enable_mrt_output_nan_fixup;
+ bool mrt0_is_dual_src;
bool wave32;
};