summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason@jlekstrand.net>2019-09-24 17:06:12 -0500
committerJuan A. Suarez Romero <jasuarez@igalia.com>2019-10-02 09:41:27 -0400
commitf04184036737b81415de6d34442ff4749abe0cfd (patch)
tree1d20bee8366fb548aa0e5253cc6413674e6cbaf6
parent4a50b8add1b6bb7601a995caa67469f2bef97f59 (diff)
downloadmesa-f04184036737b81415de6d34442ff4749abe0cfd.tar.gz
intel/fs: Fix fs_inst::flags_read for ANY/ALL predicates
Without this, we were DCEing flag writes because we didn't think their results were used because we didn't understand that an ANY32 predicate actually read all the flags. Fixes: df1aec763eb "i965/fs: Define methods to calculate the flag..." Reviewed-by: Matt Turner <mattst88@gmail.com> (cherry picked from commit 6c858b9a915b54d127a64ab817f111a82716a037)
-rw-r--r--src/intel/compiler/brw_fs.cpp34
1 files changed, 28 insertions, 6 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index f7e37d57b22..fa98758a0eb 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1011,15 +1011,37 @@ fs_inst::size_read(int arg) const
}
namespace {
+ unsigned
+ predicate_width(brw_predicate predicate)
+ {
+ switch (predicate) {
+ case BRW_PREDICATE_NONE: return 1;
+ case BRW_PREDICATE_NORMAL: return 1;
+ case BRW_PREDICATE_ALIGN1_ANY2H: return 2;
+ case BRW_PREDICATE_ALIGN1_ALL2H: return 2;
+ case BRW_PREDICATE_ALIGN1_ANY4H: return 4;
+ case BRW_PREDICATE_ALIGN1_ALL4H: return 4;
+ case BRW_PREDICATE_ALIGN1_ANY8H: return 8;
+ case BRW_PREDICATE_ALIGN1_ALL8H: return 8;
+ case BRW_PREDICATE_ALIGN1_ANY16H: return 16;
+ case BRW_PREDICATE_ALIGN1_ALL16H: return 16;
+ case BRW_PREDICATE_ALIGN1_ANY32H: return 32;
+ case BRW_PREDICATE_ALIGN1_ALL32H: return 32;
+ default: unreachable("Unsupported predicate");
+ }
+ }
+
/* Return the subset of flag registers that an instruction could
* potentially read or write based on the execution controls and flag
* subregister number of the instruction.
*/
unsigned
- flag_mask(const fs_inst *inst)
+ flag_mask(const fs_inst *inst, unsigned width)
{
- const unsigned start = inst->flag_subreg * 16 + inst->group;
- const unsigned end = start + inst->exec_size;
+ assert(util_is_power_of_two_nonzero(width));
+ const unsigned start = (inst->flag_subreg * 16 + inst->group) &
+ ~(width - 1);
+ const unsigned end = start + ALIGN(inst->exec_size, width);
return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
}
@@ -1051,9 +1073,9 @@ fs_inst::flags_read(const gen_device_info *devinfo) const
* f0.0 and f1.0 on Gen7+, and f0.0 and f0.1 on older hardware.
*/
const unsigned shift = devinfo->gen >= 7 ? 4 : 2;
- return flag_mask(this) << shift | flag_mask(this);
+ return flag_mask(this, 1) << shift | flag_mask(this, 1);
} else if (predicate) {
- return flag_mask(this);
+ return flag_mask(this, predicate_width(predicate));
} else {
unsigned mask = 0;
for (int i = 0; i < sources; i++) {
@@ -1072,7 +1094,7 @@ fs_inst::flags_written() const
opcode != BRW_OPCODE_WHILE)) ||
opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL ||
opcode == FS_OPCODE_FB_WRITE) {
- return flag_mask(this);
+ return flag_mask(this, 1);
} else {
return flag_mask(dst, size_written);
}