summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarcin Ĺšlusarz <marcin.slusarz@intel.com>2022-05-18 17:05:53 +0200
committerDylan Baker <dylan.c.baker@intel.com>2022-08-10 14:16:02 -0700
commita8788b239297910ef4927d3f58773b29829cad44 (patch)
treece6580bd3c3a29ccb8e8124d737d72e4b027418f /src
parentacca7892340328663996c4ed06a6aef9c698b209 (diff)
downloadmesa-a8788b239297910ef4927d3f58773b29829cad44.tar.gz
intel/compiler: insert URB fence before task/mesh termination
Bspec 53421 says: "A URB fence memory is typically performed prior the thread exit message, so that the next thread dispatch that reads that URB memory will see it." Cc: 22.1 <mesa-stable> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16665> (cherry picked from commit 7ebae85955cbef4dae83c7888052d2c495d5c364)
Diffstat (limited to 'src')
-rw-r--r--src/intel/compiler/brw_fs.cpp4
-rw-r--r--src/intel/compiler/brw_fs.h1
-rw-r--r--src/intel/compiler/brw_fs_visitor.cpp19
3 files changed, 24 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index b2e24014501..3172a79fe98 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -9442,6 +9442,8 @@ fs_visitor::run_task(bool allow_spilling)
if (failed)
return false;
+ emit_urb_fence();
+
emit_cs_terminate();
calculate_cfg();
@@ -9487,6 +9489,8 @@ fs_visitor::run_mesh(bool allow_spilling)
if (failed)
return false;
+ emit_urb_fence();
+
emit_cs_terminate();
calculate_cfg();
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 4afb77146bc..b07fa2c807b 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -324,6 +324,7 @@ public:
void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
unsigned base_offset, const nir_src &offset_src,
unsigned num_components, unsigned first_component);
+ void emit_urb_fence();
void emit_cs_terminate();
fs_reg emit_work_group_id_setup();
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 5da019bf938..c7ae9fc6e29 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -27,6 +27,7 @@
* makes it easier to do backend-specific optimizations than doing so
* in the GLSL IR or in the native code.
*/
+#include "brw_eu.h"
#include "brw_fs.h"
#include "compiler/glsl_types.h"
@@ -1040,6 +1041,24 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
}
void
+fs_visitor::emit_urb_fence()
+{
+ fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ fs_inst *fence = bld.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
+ brw_vec8_grf(0, 0),
+ brw_imm_ud(true),
+ brw_imm_ud(0));
+ fence->sfid = BRW_SFID_URB;
+ fence->desc = lsc_fence_msg_desc(devinfo, LSC_FENCE_LOCAL,
+ LSC_FLUSH_TYPE_NONE, true);
+
+ bld.exec_all().group(1, 0).emit(FS_OPCODE_SCHEDULING_FENCE,
+ bld.null_reg_ud(),
+ &dst,
+ 1);
+}
+
+void
fs_visitor::emit_cs_terminate()
{
assert(devinfo->ver >= 7);