summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_reduce_assign.cpp
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2019-11-11 19:48:54 +0000
committerRhys Perry <pendingchaos02@gmail.com>2019-11-19 18:58:04 +0000
commit56c06c79fcf32fdec67d6bc6141b6fa76a773c16 (patch)
treeaebe3a66a05c619f4d0e2a8d97f3cdd9986838ca /src/amd/compiler/aco_reduce_assign.cpp
parent33277bd66e32d50a96b7cd5dfe73a6a962138ea2 (diff)
downloadmesa-56c06c79fcf32fdec67d6bc6141b6fa76a773c16.tar.gz
aco: implement 64-bit integer reductions
The multiplication reduction is larger than it could be, but it should be easier to implement this way. No failures with dEQP-VK.subgroups.*int64* except those caused by LLVM being used for other stages. v2: don't call setFixed() for v_add carry-out, since setHint sets physReg v3: add and use emit_vadd32() helper v4: use num_opcodes instead of last_opcode Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> (v3)
Diffstat (limited to 'src/amd/compiler/aco_reduce_assign.cpp')
-rw-r--r--src/amd/compiler/aco_reduce_assign.cpp14
1 files changed, 12 insertions, 2 deletions
diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp
index d9c762a65db..28a779580a2 100644
--- a/src/amd/compiler/aco_reduce_assign.cpp
+++ b/src/amd/compiler/aco_reduce_assign.cpp
@@ -117,10 +117,14 @@ void setup_reduce_temp(Program* program)
/* same as before, except for the vector temporary instead of the reduce temporary */
unsigned cluster_size = static_cast<Pseudo_reduction_instruction *>(instr)->cluster_size;
bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 ||
- op == fmin64 || op == fmax64;
+ op == fmin64 || op == fmax64 || op == umin64 ||
+ op == umax64 || op == imin64 || op == imax64 ||
+ op == imul64;
if (program->chip_class >= GFX10 && cluster_size == 64 && op != gfx10_wave64_bpermute)
need_vtmp = true;
+ if (program->chip_class >= GFX10 && op == iadd64)
+ need_vtmp = true;
need_vtmp |= cluster_size == 32;
@@ -161,7 +165,13 @@ void setup_reduce_temp(Program* program)
}
/* vcc clobber */
- if (op == iadd32 && program->chip_class < GFX9)
+ bool clobber_vcc = false;
+ if ((op == iadd32 || op == imul64) && program->chip_class < GFX9)
+ clobber_vcc = true;
+ if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)
+ clobber_vcc = true;
+
+ if (clobber_vcc)
instr->definitions[4] = Definition(vcc, s2);
}
}