diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2019-11-11 19:48:54 +0000 |
---|---|---|
committer | Rhys Perry <pendingchaos02@gmail.com> | 2019-11-19 18:58:04 +0000 |
commit | 56c06c79fcf32fdec67d6bc6141b6fa76a773c16 (patch) | |
tree | aebe3a66a05c619f4d0e2a8d97f3cdd9986838ca /src/amd/compiler/aco_reduce_assign.cpp | |
parent | 33277bd66e32d50a96b7cd5dfe73a6a962138ea2 (diff) | |
download | mesa-56c06c79fcf32fdec67d6bc6141b6fa76a773c16.tar.gz |
aco: implement 64-bit integer reductions
The multiplication reduction is larger than it could be, but it should be
easier to implement this way.
No failures with dEQP-VK.subgroups.*int64* except those caused by LLVM
being used for other stages.
v2: don't call setFixed() for v_add carry-out, since setHint sets physReg
v3: add and use emit_vadd32() helper
v4: use num_opcodes instead of last_opcode
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> (v3)
Diffstat (limited to 'src/amd/compiler/aco_reduce_assign.cpp')
-rw-r--r-- | src/amd/compiler/aco_reduce_assign.cpp | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp index d9c762a65db..28a779580a2 100644 --- a/src/amd/compiler/aco_reduce_assign.cpp +++ b/src/amd/compiler/aco_reduce_assign.cpp @@ -117,10 +117,14 @@ void setup_reduce_temp(Program* program) /* same as before, except for the vector temporary instead of the reduce temporary */ unsigned cluster_size = static_cast<Pseudo_reduction_instruction *>(instr)->cluster_size; bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 || - op == fmin64 || op == fmax64; + op == fmin64 || op == fmax64 || op == umin64 || + op == umax64 || op == imin64 || op == imax64 || + op == imul64; if (program->chip_class >= GFX10 && cluster_size == 64 && op != gfx10_wave64_bpermute) need_vtmp = true; + if (program->chip_class >= GFX10 && op == iadd64) + need_vtmp = true; need_vtmp |= cluster_size == 32; @@ -161,7 +165,13 @@ void setup_reduce_temp(Program* program) } /* vcc clobber */ - if (op == iadd32 && program->chip_class < GFX9) + bool clobber_vcc = false; + if ((op == iadd32 || op == imul64) && program->chip_class < GFX9) + clobber_vcc = true; + if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64) + clobber_vcc = true; + + if (clobber_vcc) instr->definitions[4] = Definition(vcc, s2); } } |