diff options
author | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2022-11-16 15:19:32 +0100 |
---|---|---|
committer | Eric Engestrom <eric@engestrom.ch> | 2022-11-17 14:05:04 +0000 |
commit | 1f23f529d1d0cd29c63e9615fc939727f8a2e08e (patch) | |
tree | 24fca6d00aa3d749ce662f10a6fd02b5e7db525f | |
parent | 578f84373ba360f3192c39515932da33d6190a7b (diff) | |
download | mesa-1f23f529d1d0cd29c63e9615fc939727f8a2e08e.tar.gz |
aco: fix dual source blending on GFX11
Assembly looks similar to LLVM.
Cc: 22.3 mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19643>
(cherry picked from commit fb781bfb0a5a401b5210d613479bbdfb90e94790)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 95 | ||||
-rw-r--r-- | src/amd/compiler/aco_shader_info.h | 3 | ||||
-rw-r--r-- | src/amd/vulkan/radv_aco_shader_info.h | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_pipeline.c | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.h | 1 |
6 files changed, 94 insertions, 11 deletions
diff --git a/.pick_status.json b/.pick_status.json index e223d99de87..eeb9f939399 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -409,7 +409,7 @@ "description": "aco: fix dual source blending on GFX11", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index bdc338c9742..66078d8f4eb 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11189,9 +11189,25 @@ struct mrt_color_export { bool enable_mrt_output_nan_fixup; }; +struct aco_export_mrt { + Operand out[4]; + unsigned enabled_channels; + unsigned target; + bool compr; +}; + +static void +export_mrt(isel_context* ctx, const struct aco_export_mrt* mrt) +{ + Builder bld(ctx->program, ctx->block); + + bld.exp(aco_opcode::exp, mrt->out[0], mrt->out[1], mrt->out[2], mrt->out[3], + mrt->enabled_channels, mrt->target, mrt->compr); +} + static bool -export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export *out, - bool is_ps_epilog) +export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out, bool is_ps_epilog, + struct aco_export_mrt* mrt) { Builder bld(ctx->program, ctx->block); Operand values[4]; @@ -11359,8 +11375,12 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export *out, compr = false; } - bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3], enabled_channels, target, - compr); + for (unsigned i = 0; i < 4; i++) + mrt->out[i] = values[i]; + mrt->target = target; + mrt->enabled_channels = enabled_channels; + mrt->compr = compr; + return true; } @@ -11429,6 +11449,31 @@ create_fs_jump_to_epilog(isel_context* ctx) } static void +create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt* mrt0, + const struct aco_export_mrt* mrt1) +{ + Builder bld(ctx->program, ctx->block); + + aco_ptr<Pseudo_instruction> exp{create_instruction<Pseudo_instruction>( + aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)}; + for (unsigned i = 0; i < 4; i++) { + exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1); + exp->operands[i].setLateKill(true); + exp->operands[i + 4] = mrt1 ? mrt1->out[i] : Operand(v1); + exp->operands[i + 4].setLateKill(true); + } + + RegClass type = RegClass(RegType::vgpr, util_bitcount(mrt0->enabled_channels)); + exp->definitions[0] = bld.def(type); /* mrt0 */ + exp->definitions[1] = bld.def(type); /* mrt1 */ + exp->definitions[2] = bld.def(v1); + exp->definitions[3] = bld.def(bld.lm); + exp->definitions[4] = bld.def(bld.lm, vcc); + exp->definitions[5] = bld.def(s1, scc); + ctx->block->instructions.emplace_back(std::move(exp)); +} + +static void create_fs_exports(isel_context* ctx) { Builder bld(ctx->program, ctx->block); @@ -11442,10 +11487,15 @@ create_fs_exports(isel_context* ctx) if (ctx->program->info.ps.has_epilog) { create_fs_jump_to_epilog(ctx); } else { + struct aco_export_mrt mrts[8]; unsigned compacted_mrt_index = 0; /* Export all color render targets. */ for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) { + unsigned idx = i - FRAG_RESULT_DATA0; + + mrts[idx].enabled_channels = 0; + if (!ctx->outputs.mask[i]) continue; @@ -11453,7 +11503,7 @@ create_fs_exports(isel_context* ctx) out.slot = compacted_mrt_index; out.write_mask = ctx->outputs.mask[i]; - out.col_format = (ctx->options->key.ps.col_format >> (4 * (i - FRAG_RESULT_DATA0))) & 0xf; + out.col_format = (ctx->options->key.ps.col_format >> (4 * idx)) & 0xf; for (unsigned c = 0; c < 4; ++c) { if (out.write_mask & (1 << c)) { @@ -11463,14 +11513,25 @@ create_fs_exports(isel_context* ctx) } } - if (export_fs_mrt_color(ctx, &out, false)) { + if (export_fs_mrt_color(ctx, &out, false, &mrts[compacted_mrt_index])) { compacted_mrt_index++; exported = true; } } - if (!exported) + if (exported) { + if (ctx->options->gfx_level >= GFX11 && ctx->options->key.ps.mrt0_is_dual_src) { + struct aco_export_mrt* mrt0 = mrts[0].enabled_channels ? &mrts[0] : NULL; + struct aco_export_mrt* mrt1 = mrts[1].enabled_channels ? &mrts[1] : NULL; + create_fs_dual_src_export_gfx11(ctx, mrt0, mrt1); + } else { + for (unsigned i = 0; i < compacted_mrt_index; i++) { + export_mrt(ctx, &mrts[i]); + } + } + } else { create_fs_null_export(ctx); + } } ctx->block->kind |= block_kind_export_end; @@ -12583,7 +12644,8 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade Builder bld(ctx.program, ctx.block); /* Export all color render targets */ - bool exported = false; + struct aco_export_mrt mrts[8]; + uint8_t exported_mrts = 0; for (unsigned i = 0; i < 8; i++) { unsigned col_format = (key->spi_shader_col_format >> (i * 4)) & 0xf; @@ -12605,11 +12667,24 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade out.values[c] = Operand(emit_extract_vector(&ctx, inputs, c, v1)); } - exported |= export_fs_mrt_color(&ctx, &out, true); + if (export_fs_mrt_color(&ctx, &out, true, &mrts[i])) { + exported_mrts |= 1 << i; + } } - if (!exported) + if (exported_mrts) { + if (ctx.options->gfx_level >= GFX11 && key->mrt0_is_dual_src) { + struct aco_export_mrt* mrt0 = (exported_mrts & BITFIELD_BIT(0)) ? &mrts[0] : NULL; + struct aco_export_mrt* mrt1 = (exported_mrts & BITFIELD_BIT(1)) ? &mrts[1] : NULL; + create_fs_dual_src_export_gfx11(&ctx, mrt0, mrt1); + } else { + u_foreach_bit (i, exported_mrts) { + export_mrt(&ctx, &mrts[i]); + } + } + } else { create_fs_null_export(&ctx); + } program->config->float_mode = program->blocks[0].fp_mode.val; diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index b09e25c1a8d..43592cf3d7d 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -67,6 +67,8 @@ struct aco_ps_epilog_key { uint8_t color_is_int8; uint8_t color_is_int10; uint8_t enable_mrt_output_nan_fixup; + + bool mrt0_is_dual_src; }; struct aco_vp_output_info { @@ -173,6 +175,7 @@ struct aco_stage_input { /* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */ bool alpha_to_coverage_via_mrtz; + bool mrt0_is_dual_src; } ps; }; diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index 175a2ab53f1..79597389885 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -128,6 +128,7 @@ radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_key *aco_info, ASSIGN_FIELD(color_is_int8); ASSIGN_FIELD(color_is_int10); ASSIGN_FIELD(enable_mrt_output_nan_fixup); + ASSIGN_FIELD(mrt0_is_dual_src); } static inline void @@ -146,6 +147,7 @@ radv_aco_convert_pipe_key(struct aco_stage_input *aco_info, ASSIGN_FIELD(tcs.tess_input_vertices); ASSIGN_FIELD(ps.col_format); ASSIGN_FIELD(ps.alpha_to_coverage_via_mrtz); + ASSIGN_FIELD(ps.mrt0_is_dual_src); } static inline void diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 6280eb8d73c..fe132c10fb3 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3972,6 +3972,7 @@ radv_pipeline_create_ps_epilog(struct radv_graphics_pipeline *pipeline, .color_is_int8 = pipeline_key->ps.is_int8, .color_is_int10 = pipeline_key->ps.is_int10, .enable_mrt_output_nan_fixup = pipeline_key->ps.enable_mrt_output_nan_fixup, + .mrt0_is_dual_src = pipeline_key->ps.mrt0_is_dual_src, }; pipeline->ps_epilog = radv_create_ps_epilog(device, &epilog_key); @@ -6339,6 +6340,7 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, .color_is_int8 = blend.col_format_is_int8, .color_is_int10 = blend.col_format_is_int10, .enable_mrt_output_nan_fixup = key.ps.enable_mrt_output_nan_fixup, + .mrt0_is_dual_src = blend.mrt0_is_dual_src, }; pipeline->base.ps_epilog = radv_create_ps_epilog(device, &epilog_key); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index bb8bf159391..8d7d6df3948 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -417,6 +417,7 @@ struct radv_ps_epilog_key { uint8_t color_is_int10; uint8_t enable_mrt_output_nan_fixup; + bool mrt0_is_dual_src; bool wave32; }; |