summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTapani Pälli <tapani.palli@intel.com>2022-12-06 18:11:10 +0200
committerEric Engestrom <eric@engestrom.ch>2022-12-14 20:47:01 +0000
commit5b6718728bd869ceab37f833ef5b53b485fae969 (patch)
treea927e6f86bc9092975bd8b7e03322e86019c633f
parentd936394cf49f896429bf53d3ff95dd026c32b2ce (diff)
downloadmesa-5b6718728bd869ceab37f833ef5b53b485fae969.tar.gz
intel/fs: implement Wa_14017989577
The first instruction of any kernel should have non-zero emask. This restriction needs to be obeyed to avoid GPU hangs. Patch adds a function to insert dummy mov as first instruction to make sure this requirement is fulfilled. Signed-off-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20194> (cherry picked from commit bc4b7de0d0469e296f7ec4626fccdf97926b1c8e)
-rw-r--r--.pick_status.json2
-rw-r--r--src/intel/compiler/brw_fs.cpp64
-rw-r--r--src/intel/compiler/brw_fs.h1
3 files changed, 66 insertions, 1 deletions
diff --git a/.pick_status.json b/.pick_status.json
index a9dd9b86685..bdfb0f2ccce 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -2587,7 +2587,7 @@
"description": "intel/fs: implement Wa_14017989577",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": null
},
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 761b79cf05a..db4806b50f2 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6197,6 +6197,35 @@ needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst)
return false;
}
+/* Wa_14017989577
+ *
+ * The first instruction of any kernel should have non-zero emask.
+ * Make sure this happens by introducing a dummy mov instruction.
+ */
+void
+fs_visitor::emit_dummy_mov_instruction()
+{
+ if (devinfo->verx10 < 120)
+ return;
+
+ struct backend_instruction *first_inst =
+ cfg->first_block()->start();
+
+ /* We can skip the WA if first instruction is marked with
+ * force_writemask_all or exec_size equals dispatch_width.
+ */
+ if (first_inst->force_writemask_all ||
+ first_inst->exec_size == dispatch_width)
+ return;
+
+ /* Insert dummy mov as first instruction. */
+ const fs_builder ubld =
+ bld.at(cfg->first_block(), first_inst).exec_all().group(8, 0);
+ ubld.MOV(bld.null_reg_ud(), brw_imm_ud(0u));
+
+ invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
+}
+
/* Wa_22013689345
*
* We need to emit UGM fence message before EOT, if shader has any UGM write
@@ -6557,6 +6586,10 @@ fs_visitor::run_vs()
fixup_3src_null_dest();
emit_dummy_memory_fence_before_eot();
+
+ /* Wa_14017989577 */
+ emit_dummy_mov_instruction();
+
allocate_registers(true /* allow_spilling */);
return !failed;
@@ -6679,6 +6712,10 @@ fs_visitor::run_tcs()
fixup_3src_null_dest();
emit_dummy_memory_fence_before_eot();
+
+ /* Wa_14017989577 */
+ emit_dummy_mov_instruction();
+
allocate_registers(true /* allow_spilling */);
return !failed;
@@ -6707,6 +6744,10 @@ fs_visitor::run_tes()
fixup_3src_null_dest();
emit_dummy_memory_fence_before_eot();
+
+ /* Wa_14017989577 */
+ emit_dummy_mov_instruction();
+
allocate_registers(true /* allow_spilling */);
return !failed;
@@ -6751,6 +6792,10 @@ fs_visitor::run_gs()
fixup_3src_null_dest();
emit_dummy_memory_fence_before_eot();
+
+ /* Wa_14017989577 */
+ emit_dummy_mov_instruction();
+
allocate_registers(true /* allow_spilling */);
return !failed;
@@ -6851,6 +6896,9 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
fixup_3src_null_dest();
emit_dummy_memory_fence_before_eot();
+ /* Wa_14017989577 */
+ emit_dummy_mov_instruction();
+
allocate_registers(allow_spilling);
}
@@ -6887,6 +6935,10 @@ fs_visitor::run_cs(bool allow_spilling)
fixup_3src_null_dest();
emit_dummy_memory_fence_before_eot();
+
+ /* Wa_14017989577 */
+ emit_dummy_mov_instruction();
+
allocate_registers(allow_spilling);
return !failed;
@@ -6915,6 +6967,10 @@ fs_visitor::run_bs(bool allow_spilling)
fixup_3src_null_dest();
emit_dummy_memory_fence_before_eot();
+
+ /* Wa_14017989577 */
+ emit_dummy_mov_instruction();
+
allocate_registers(allow_spilling);
return !failed;
@@ -6944,6 +7000,10 @@ fs_visitor::run_task(bool allow_spilling)
fixup_3src_null_dest();
emit_dummy_memory_fence_before_eot();
+
+ /* Wa_14017989577 */
+ emit_dummy_mov_instruction();
+
allocate_registers(allow_spilling);
return !failed;
@@ -6973,6 +7033,10 @@ fs_visitor::run_mesh(bool allow_spilling)
fixup_3src_null_dest();
emit_dummy_memory_fence_before_eot();
+
+ /* Wa_14017989577 */
+ emit_dummy_mov_instruction();
+
allocate_registers(allow_spilling);
return !failed;
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 155e563211c..6b2f5932a5f 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -224,6 +224,7 @@ public:
bool fixup_sends_duplicate_payload();
void fixup_3src_null_dest();
void emit_dummy_memory_fence_before_eot();
+ void emit_dummy_mov_instruction();
bool fixup_nomask_control_flow();
void assign_curb_setup();
void assign_urb_setup();