diff options
author | Lionel Landwerlin <lionel.g.landwerlin@intel.com> | 2022-12-05 19:26:40 +0200 |
---|---|---|
committer | Eric Engestrom <eric@engestrom.ch> | 2022-12-14 20:47:01 +0000 |
commit | e2fc0b33cdf3bb491e019e07f4b5c3880180628e (patch) | |
tree | 4658a7de764868e3accb40bb9feaed7369a2cfb9 | |
parent | 2510c6c502285b7ca2583454960137f887459f85 (diff) | |
download | mesa-e2fc0b33cdf3bb491e019e07f4b5c3880180628e.tar.gz |
intel: factor out dispatch PS enabling logic
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Tested-by: Mark Janes <markjanes@swizzler.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20169>
(cherry picked from commit b9403b1c477e7af04114ae6a4e16ca370e22253c)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/gallium/drivers/crocus/crocus_state.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_state.c | 22 | ||||
-rw-r--r-- | src/intel/blorp/blorp_genX_exec.h | 24 | ||||
-rw-r--r-- | src/intel/compiler/brw_compiler.h | 51 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 20 | ||||
-rw-r--r-- | src/intel/vulkan/genX_pipeline.c | 22 | ||||
-rw-r--r-- | src/intel/vulkan_hasvk/genX_pipeline.c | 8 |
8 files changed, 78 insertions, 79 deletions
diff --git a/.pick_status.json b/.pick_status.json index 69810687b29..6020112696c 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3613,7 +3613,7 @@ "description": "intel: factor out dispatch PS enabling logic", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c index a44fc3e06e6..270dfcf45eb 100644 --- a/src/gallium/drivers/crocus/crocus_state.c +++ b/src/gallium/drivers/crocus/crocus_state.c @@ -6446,9 +6446,11 @@ crocus_upload_dirty_render_state(struct crocus_context *ice, */ ps.VectorMaskEnable = GFX_VER >= 8 && wm_prog_data->uses_vmask; - ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; - ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; - ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; + brw_fs_get_dispatch_enables(&batch->screen->devinfo, wm_prog_data, + ice->state.framebuffer.samples, + &ps._8PixelDispatchEnable, + &ps._16PixelDispatchEnable, + &ps._32PixelDispatchEnable); ps.DispatchGRFStartRegisterForConstantSetupData0 = brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 062a2f3aa20..2fe43613f50 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -6225,23 +6225,11 @@ iris_upload_dirty_render_state(struct iris_context *ice, uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0}; _iris_pack_command(batch, GENX(3DSTATE_PS), ps_state, ps) { - ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; - ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; - ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; - - /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say: - * - * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, - * SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch - * mode." - * - * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8. - */ - if (GFX_VER >= 9 && cso_fb->samples == 16 && - !wm_prog_data->persample_dispatch) { - assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); - ps._32PixelDispatchEnable = false; - } + brw_fs_get_dispatch_enables(&screen->devinfo, wm_prog_data, + cso_fb->samples, + &ps._8PixelDispatchEnable, + &ps._16PixelDispatchEnable, + &ps._32PixelDispatchEnable); ps.DispatchGRFStartRegisterForConstantSetupData0 = brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 0f68e2c225c..2cb8fb0baee 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -838,6 +838,7 @@ blorp_emit_ps_config(struct blorp_batch *batch, */ #if GFX_VER >= 8 + const struct intel_device_info *devinfo = batch->blorp->compiler->devinfo; blorp_emit(batch, GENX(3DSTATE_WM), wm); @@ -854,23 +855,11 @@ blorp_emit_ps_config(struct blorp_batch *batch, ps.SamplerCount = 0; if (prog_data) { - ps._8PixelDispatchEnable = prog_data->dispatch_8; - ps._16PixelDispatchEnable = prog_data->dispatch_16; - ps._32PixelDispatchEnable = prog_data->dispatch_32; - - /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: - * - * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 - * Dispatch must not be enabled for PER_PIXEL dispatch mode." - * - * Since 16x MSAA is first introduced on SKL, we don't need to apply - * the workaround on any older hardware. - */ - if (GFX_VER >= 9 && !prog_data->persample_dispatch && - params->num_samples == 16) { - assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); - ps._32PixelDispatchEnable = false; - } + brw_fs_get_dispatch_enables(devinfo, prog_data, + params->num_samples, + &ps._8PixelDispatchEnable, + &ps._16PixelDispatchEnable, + &ps._32PixelDispatchEnable); ps.DispatchGRFStartRegisterForConstantSetupData0 = brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0); @@ -894,7 +883,6 @@ blorp_emit_ps_config(struct blorp_batch *batch, * * In Gfx8 the format is U8-2 whereas in Gfx9+ it is U9-1. */ - const struct intel_device_info *devinfo = batch->blorp->compiler->devinfo; ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1); diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index ca9a79794c3..06d0c4d26ea 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -984,6 +984,57 @@ brw_fs_simd_width_for_ksp(unsigned ksp_idx, bool simd8_enabled, } } +static inline void +brw_fs_get_dispatch_enables(const struct intel_device_info *devinfo, + const struct brw_wm_prog_data *prog_data, + unsigned rasterization_samples, + bool *enable_8, + bool *enable_16, + bool *enable_32) +{ + assert(rasterization_samples != 0); + + *enable_8 = prog_data->dispatch_8; + *enable_16 = prog_data->dispatch_16; + *enable_32 = prog_data->dispatch_32; + + if (prog_data->persample_dispatch) { + /* Starting with SandyBridge (where we first get MSAA), the different + * pixel dispatch combinations are grouped into classifications A + * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1). On most hardware + * generations, the only configurations supporting persample dispatch + * are those in which only one dispatch width is enabled. + * + * The Gfx12 hardware spec has a similar dispatch grouping table, but + * the following conflicting restriction applies (from the page on + * "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader: + * + * "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also + * enabled." + */ + if (*enable_32 || *enable_16) + *enable_8 = false; + if (devinfo->ver < 12 && *enable_32) + *enable_16 = false; + } + + /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say: + * + * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, + * SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch + * mode." + * + * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8. + */ + if (devinfo->ver >= 9 && rasterization_samples == 16 && + !prog_data->persample_dispatch) { + assert(*enable_8 || *enable_16); + *enable_32 = false; + } + + assert(*enable_8 || *enable_16 || *enable_32); +} + #define brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx) \ brw_fs_simd_width_for_ksp((ksp_idx), (wm_state)._8PixelDispatchEnable, \ (wm_state)._16PixelDispatchEnable, \ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 62445467603..761b79cf05a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7445,26 +7445,6 @@ brw_compile_fs(const struct brw_compiler *compiler, } } - if (prog_data->persample_dispatch) { - /* Starting with SandyBridge (where we first get MSAA), the different - * pixel dispatch combinations are grouped into classifications A - * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1). On most hardware - * generations, the only configurations supporting persample dispatch - * are those in which only one dispatch width is enabled. - * - * The Gfx12 hardware spec has a similar dispatch grouping table, but - * the following conflicting restriction applies (from the page on - * "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader: - * - * "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also - * enabled." - */ - if (simd32_cfg || simd16_cfg) - simd8_cfg = NULL; - if (simd32_cfg && devinfo->ver < 12) - simd16_cfg = NULL; - } - fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base, v8->runtime_check_aads_emit, MESA_SHADER_FRAGMENT); diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 08489b21601..1261a31d346 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1487,23 +1487,11 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) { - ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; - ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; - ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; - - /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: - * - * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32 - * Dispatch must not be enabled for PER_PIXEL dispatch mode." - * - * Since 16x MSAA is first introduced on SKL, we don't need to apply - * the workaround on any older hardware. - */ - if (!wm_prog_data->persample_dispatch && - ms != NULL && ms->rasterization_samples == 16) { - assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable); - ps._32PixelDispatchEnable = false; - } + brw_fs_get_dispatch_enables(devinfo, wm_prog_data, + ms != NULL ? ms->rasterization_samples : 1, + &ps._8PixelDispatchEnable, + &ps._16PixelDispatchEnable, + &ps._32PixelDispatchEnable); ps.KernelStartPointer0 = fs_bin->kernel.offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); diff --git a/src/intel/vulkan_hasvk/genX_pipeline.c b/src/intel/vulkan_hasvk/genX_pipeline.c index b35d5759c3d..8763a4144e3 100644 --- a/src/intel/vulkan_hasvk/genX_pipeline.c +++ b/src/intel/vulkan_hasvk/genX_pipeline.c @@ -1806,9 +1806,11 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, #endif anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) { - ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; - ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; - ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; + brw_fs_get_dispatch_enables(devinfo, wm_prog_data, + ms != NULL ? ms->rasterization_samples : 1, + &ps._8PixelDispatchEnable, + &ps._16PixelDispatchEnable, + &ps._32PixelDispatchEnable); /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable: * |