summaryrefslogtreecommitdiff
path: root/src/freedreno
diff options
context:
space:
mode:
authorHyunjun Ko <zzoon@igalia.com>2019-03-21 17:30:11 +0900
committerRob Clark <robdclark@chromium.org>2019-06-03 12:44:03 -0700
commitcbd1f47433b7d735e3be5c8126f7f2b9343a1cdf (patch)
tree238eecd7e38d0f57af302f25ae61bb4753bab819 /src/freedreno
parenta9b556d3a041a817fc02c94e705fb865ffde86aa (diff)
downloadmesa-cbd1f47433b7d735e3be5c8126f7f2b9343a1cdf.tar.gz
freedreno/ir3: convert back to 32-bit values for half constant registers.
It seems to handle only 32-bit values for half constant registers within floating point opcodes according to the blob driver. So we need to convert back to 32-bit values from 16-bit values, when a lower precision pass is in effect. Signed-off-by: Rob Clark <robdclark@chromium.org>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/ir3/ir3.h35
-rw-r--r--src/freedreno/ir3/ir3_cp.c23
2 files changed, 54 insertions, 4 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 05245cd54bb..4d1a44b1dec 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -877,6 +877,41 @@ static inline bool ir3_cat2_int(opc_t opc)
}
}
+static inline bool ir3_cat2_float(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ADD_F:
+ case OPC_MIN_F:
+ case OPC_MAX_F:
+ case OPC_MUL_F:
+ case OPC_SIGN_F:
+ case OPC_CMPS_F:
+ case OPC_ABSNEG_F:
+ case OPC_CMPV_F:
+ case OPC_FLOOR_F:
+ case OPC_CEIL_F:
+ case OPC_RNDNE_F:
+ case OPC_RNDAZ_F:
+ case OPC_TRUNC_F:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static inline bool ir3_cat3_float(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ case OPC_SEL_F16:
+ case OPC_SEL_F32:
+ return true;
+ default:
+ return false;
+ }
+}
/* map cat2 instruction to valid abs/neg flags: */
static inline unsigned ir3_cat2_absneg(opc_t opc)
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c
index dedbd8dbb1d..9bd97b690e3 100644
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -25,6 +25,8 @@
*/
#include <math.h>
+#include "util/half_float.h"
+#include "util/u_math.h"
#include "ir3.h"
#include "ir3_compiler.h"
@@ -268,7 +270,7 @@ static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
}
static struct ir3_register *
-lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags)
+lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags, bool f_opcode)
{
unsigned swiz, idx, i;
@@ -318,6 +320,13 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
/* need to generate a new immediate: */
swiz = i % 4;
idx = i / 4;
+
+ /* Half constant registers seems to handle only 32-bit values
+ * within floating-point opcodes. So convert back to 32-bit values. */
+ if (f_opcode && (new_flags & IR3_REG_HALF)) {
+ reg->uim_val = fui(_mesa_half_to_float(reg->uim_val));
+ }
+
const_state->immediates[idx].val[swiz] = reg->uim_val;
const_state->immediates_count = idx + 1;
const_state->immediate_idx++;
@@ -398,8 +407,12 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
if (!valid_flags(instr, n, new_flags)) {
/* See if lowering an immediate to const would help. */
if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
+ bool f_opcode = (ir3_cat2_float(instr->opc) ||
+ ir3_cat3_float(instr->opc)) ? true : false;
+
debug_assert(new_flags & IR3_REG_IMMED);
- instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
+
+ instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags, f_opcode);
return;
}
@@ -504,10 +517,12 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
src_reg->iim_val = iim_val;
instr->regs[n+1] = src_reg;
} else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
+ bool f_opcode = (ir3_cat2_float(instr->opc) ||
+ ir3_cat3_float(instr->opc)) ? true : false;
+
/* See if lowering an immediate to const would help. */
- instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
+ instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags, f_opcode);
}
-
return;
}
}