summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaulo Zanoni <paulo.r.zanoni@intel.com>2019-08-09 15:40:33 -0700
committerJason Ekstrand <jason@jlekstrand.net>2019-09-19 02:47:17 +0000
commitd9ddf5076d13ccfdc0ef690ed64f9fabfc0b313a (patch)
tree6c4c2aa9a381607541ff771ffc27038ccb113bd1
parent7f07046dbcb0effec79625cd598fa98ddc748bbf (diff)
downloadmesa-d9ddf5076d13ccfdc0ef690ed64f9fabfc0b313a.tar.gz
intel/fs: make scan/reduce work with SIMD32 when it fits 2 registers
When dealing with uint16_t and uint8_t on SIMD32 we can do all the operations using just 2 registers, so we don't hit the recursion at the beginning of emit_scan(). Because of that, we need to actually compute scan/reduce for channels 31:16. v2: Still missed instructions (Jason). Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
-rw-r--r--src/intel/compiler/brw_fs_builder.h23
1 files changed, 23 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index 0c1b6f5d6c7..1000d956d6f 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -514,6 +514,16 @@ namespace brw {
right = horiz_offset(tmp, 8 + 4);
set_condmod(mod, ubld.emit(opcode, right, left, right));
}
+
+ if (dispatch_width() > 16) {
+ left = component(tmp, 16 + 3);
+ right = horiz_offset(tmp, 16 + 4);
+ set_condmod(mod, ubld.emit(opcode, right, left, right));
+
+ left = component(tmp, 24 + 3);
+ right = horiz_offset(tmp, 24 + 4);
+ set_condmod(mod, ubld.emit(opcode, right, left, right));
+ }
}
if (cluster_size > 8 && dispatch_width() > 8) {
@@ -521,6 +531,19 @@ namespace brw {
src_reg left = component(tmp, 7);
dst_reg right = horiz_offset(tmp, 8);
set_condmod(mod, ubld.emit(opcode, right, left, right));
+
+ if (dispatch_width() > 16) {
+ left = component(tmp, 16 + 7);
+ right = horiz_offset(tmp, 16 + 8);
+ set_condmod(mod, ubld.emit(opcode, right, left, right));
+ }
+ }
+
+ if (cluster_size > 16 && dispatch_width() > 16) {
+ const fs_builder ubld = exec_all().group(16, 0);
+ src_reg left = component(tmp, 15);
+ dst_reg right = horiz_offset(tmp, 16);
+ set_condmod(mod, ubld.emit(opcode, right, left, right));
}
}