aco: implement 64-bit integer reductions

The multiplication reduction is larger than it could be, but it should be easier to implement this way. No failures with dEQP-VK.subgroups.*int64* except those caused by LLVM being used for other stages. v2: don't call setFixed() for v_add carry-out, since setHint sets physReg v3: add and use emit_vadd32() helper v4: use num_opcodes instead of last_opcode Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> (v3)
author: Rhys Perry <pendingchaos02@gmail.com> 2019-11-11 19:48:54 +0000
committer: Rhys Perry <pendingchaos02@gmail.com> 2019-11-19 18:58:04 +0000
commit: 56c06c79fcf32fdec67d6bc6141b6fa76a773c16 (patch)
tree: aebe3a66a05c619f4d0e2a8d97f3cdd9986838ca /src/amd/compiler/aco_reduce_assign.cpp
parent: 33277bd66e32d50a96b7cd5dfe73a6a962138ea2 (diff)
download: mesa-56c06c79fcf32fdec67d6bc6141b6fa76a773c16.tar.gz
1 files changed, 12 insertions, 2 deletions
diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp
index d9c762a65db..28a779580a2 100644
--- a/src/amd/compiler/aco_reduce_assign.cpp
+++ b/src/amd/compiler/aco_reduce_assign.cpp
@@ -117,10 +117,14 @@ void setup_reduce_temp(Program* program)
          /* same as before, except for the vector temporary instead of the reduce temporary */
          unsigned cluster_size = static_cast<Pseudo_reduction_instruction *>(instr)->cluster_size;
          bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 ||
-                          op == fmin64 || op == fmax64;
+                          op == fmin64 || op == fmax64 || op == umin64 ||
+                          op == umax64 || op == imin64 || op == imax64 ||
+                          op == imul64;
 
          if (program->chip_class >= GFX10 && cluster_size == 64 && op != gfx10_wave64_bpermute)
             need_vtmp = true;
+         if (program->chip_class >= GFX10 && op == iadd64)
+            need_vtmp = true;
 
          need_vtmp |= cluster_size == 32;
 
@@ -161,7 +165,13 @@ void setup_reduce_temp(Program* program)
          }
 
          /* vcc clobber */
-         if (op == iadd32 && program->chip_class < GFX9)
+         bool clobber_vcc = false;
+         if ((op == iadd32 || op == imul64) && program->chip_class < GFX9)
+            clobber_vcc = true;
+         if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)
+            clobber_vcc = true;
+
+         if (clobber_vcc)
             instr->definitions[4] = Definition(vcc, s2);
       }
    }
author	Rhys Perry <pendingchaos02@gmail.com>	2019-11-11 19:48:54 +0000
committer	Rhys Perry <pendingchaos02@gmail.com>	2019-11-19 18:58:04 +0000
commit	56c06c79fcf32fdec67d6bc6141b6fa76a773c16 (patch)
tree	aebe3a66a05c619f4d0e2a8d97f3cdd9986838ca /src/amd/compiler/aco_reduce_assign.cpp
parent	33277bd66e32d50a96b7cd5dfe73a6a962138ea2 (diff)
download	mesa-56c06c79fcf32fdec67d6bc6141b6fa76a773c16.tar.gz