diff options
author | Tapani Pälli <tapani.palli@intel.com> | 2022-12-06 18:11:10 +0200 |
---|---|---|
committer | Eric Engestrom <eric@engestrom.ch> | 2022-12-14 20:47:01 +0000 |
commit | 5b6718728bd869ceab37f833ef5b53b485fae969 (patch) | |
tree | a927e6f86bc9092975bd8b7e03322e86019c633f | |
parent | d936394cf49f896429bf53d3ff95dd026c32b2ce (diff) | |
download | mesa-5b6718728bd869ceab37f833ef5b53b485fae969.tar.gz |
intel/fs: implement Wa_14017989577
The first instruction of any kernel should have non-zero emask. This
restriction needs to be obeyed to avoid GPU hangs.
Patch adds a function to insert dummy mov as first instruction
to make sure this requirement is fulfilled.
Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20194>
(cherry picked from commit bc4b7de0d0469e296f7ec4626fccdf97926b1c8e)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 64 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 1 |
3 files changed, 66 insertions, 1 deletions
diff --git a/.pick_status.json b/.pick_status.json index a9dd9b86685..bdfb0f2ccce 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2587,7 +2587,7 @@ "description": "intel/fs: implement Wa_14017989577", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 761b79cf05a..db4806b50f2 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6197,6 +6197,35 @@ needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst) return false; } +/* Wa_14017989577 + * + * The first instruction of any kernel should have non-zero emask. + * Make sure this happens by introducing a dummy mov instruction. + */ +void +fs_visitor::emit_dummy_mov_instruction() +{ + if (devinfo->verx10 < 120) + return; + + struct backend_instruction *first_inst = + cfg->first_block()->start(); + + /* We can skip the WA if first instruction is marked with + * force_writemask_all or exec_size equals dispatch_width. + */ + if (first_inst->force_writemask_all || + first_inst->exec_size == dispatch_width) + return; + + /* Insert dummy mov as first instruction. */ + const fs_builder ubld = + bld.at(cfg->first_block(), first_inst).exec_all().group(8, 0); + ubld.MOV(bld.null_reg_ud(), brw_imm_ud(0u)); + + invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); +} + /* Wa_22013689345 * * We need to emit UGM fence message before EOT, if shader has any UGM write @@ -6557,6 +6586,10 @@ fs_visitor::run_vs() fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(true /* allow_spilling */); return !failed; @@ -6679,6 +6712,10 @@ fs_visitor::run_tcs() fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(true /* allow_spilling */); return !failed; @@ -6707,6 +6744,10 @@ fs_visitor::run_tes() fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(true /* allow_spilling */); return !failed; @@ -6751,6 +6792,10 @@ fs_visitor::run_gs() fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(true /* allow_spilling */); return !failed; @@ -6851,6 +6896,9 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); } @@ -6887,6 +6935,10 @@ fs_visitor::run_cs(bool allow_spilling) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); return !failed; @@ -6915,6 +6967,10 @@ fs_visitor::run_bs(bool allow_spilling) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); return !failed; @@ -6944,6 +7000,10 @@ fs_visitor::run_task(bool allow_spilling) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); return !failed; @@ -6973,6 +7033,10 @@ fs_visitor::run_mesh(bool allow_spilling) fixup_3src_null_dest(); emit_dummy_memory_fence_before_eot(); + + /* Wa_14017989577 */ + emit_dummy_mov_instruction(); + allocate_registers(allow_spilling); return !failed; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 155e563211c..6b2f5932a5f 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -224,6 +224,7 @@ public: bool fixup_sends_duplicate_payload(); void fixup_3src_null_dest(); void emit_dummy_memory_fence_before_eot(); + void emit_dummy_mov_instruction(); bool fixup_nomask_control_flow(); void assign_curb_setup(); void assign_urb_setup(); |