intel/compiler: insert URB fence before task/mesh termination

Bspec 53421 says: "A URB fence memory is typically performed prior the thread exit message, so that the next thread dispatch that reads that URB memory will see it." Cc: 22.1 <mesa-stable> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16665> (cherry picked from commit 7ebae85955cbef4dae83c7888052d2c495d5c364)
author: Marcin Ślusarz <marcin.slusarz@intel.com> 2022-05-18 17:05:53 +0200
committer: Dylan Baker <dylan.c.baker@intel.com> 2022-08-10 14:16:02 -0700
commit: a8788b239297910ef4927d3f58773b29829cad44 (patch)
tree: ce6580bd3c3a29ccb8e8124d737d72e4b027418f /src
parent: acca7892340328663996c4ed06a6aef9c698b209 (diff)
download: mesa-a8788b239297910ef4927d3f58773b29829cad44.tar.gz
3 files changed, 24 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index b2e24014501..3172a79fe98 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -9442,6 +9442,8 @@ fs_visitor::run_task(bool allow_spilling)
    if (failed)
       return false;
 
+   emit_urb_fence();
+
    emit_cs_terminate();
 
    calculate_cfg();
@@ -9487,6 +9489,8 @@ fs_visitor::run_mesh(bool allow_spilling)
    if (failed)
       return false;
 
+   emit_urb_fence();
+
    emit_cs_terminate();
 
    calculate_cfg();
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 4afb77146bc..b07fa2c807b 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -324,6 +324,7 @@ public:
    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
                            unsigned base_offset, const nir_src &offset_src,
                            unsigned num_components, unsigned first_component);
+   void emit_urb_fence();
    void emit_cs_terminate();
    fs_reg emit_work_group_id_setup();
 
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 5da019bf938..c7ae9fc6e29 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -27,6 +27,7 @@
  * makes it easier to do backend-specific optimizations than doing so
  * in the GLSL IR or in the native code.
  */
+#include "brw_eu.h"
 #include "brw_fs.h"
 #include "compiler/glsl_types.h"
 
@@ -1040,6 +1041,24 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
 }
 
 void
+fs_visitor::emit_urb_fence()
+{
+   fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
+   fs_inst *fence = bld.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
+                             brw_vec8_grf(0, 0),
+                             brw_imm_ud(true),
+                             brw_imm_ud(0));
+   fence->sfid = BRW_SFID_URB;
+   fence->desc = lsc_fence_msg_desc(devinfo, LSC_FENCE_LOCAL,
+                                    LSC_FLUSH_TYPE_NONE, true);
+
+   bld.exec_all().group(1, 0).emit(FS_OPCODE_SCHEDULING_FENCE,
+                                   bld.null_reg_ud(),
+                                   &dst,
+                                   1);
+}
+
+void
 fs_visitor::emit_cs_terminate()
 {
    assert(devinfo->ver >= 7);
author	Marcin Ślusarz <marcin.slusarz@intel.com>	2022-05-18 17:05:53 +0200
committer	Dylan Baker <dylan.c.baker@intel.com>	2022-08-10 14:16:02 -0700
commit	a8788b239297910ef4927d3f58773b29829cad44 (patch)
tree	ce6580bd3c3a29ccb8e8124d737d72e4b027418f /src
parent	acca7892340328663996c4ed06a6aef9c698b209 (diff)
download	mesa-a8788b239297910ef4927d3f58773b29829cad44.tar.gz