diff options
author | Hyunjun Ko <zzoon@igalia.com> | 2019-03-21 17:30:11 +0900 |
---|---|---|
committer | Rob Clark <robdclark@chromium.org> | 2019-06-03 12:44:03 -0700 |
commit | cbd1f47433b7d735e3be5c8126f7f2b9343a1cdf (patch) | |
tree | 238eecd7e38d0f57af302f25ae61bb4753bab819 /src/freedreno | |
parent | a9b556d3a041a817fc02c94e705fb865ffde86aa (diff) | |
download | mesa-cbd1f47433b7d735e3be5c8126f7f2b9343a1cdf.tar.gz |
freedreno/ir3: convert back to 32-bit values for half constant registers.
It seems to handle only 32-bit values for half constant registers
within floating point opcodes according to the blob driver.
So we need to convert back to 32-bit values from 16-bit values, when a
lower precision pass is in effect.
Signed-off-by: Rob Clark <robdclark@chromium.org>
Diffstat (limited to 'src/freedreno')
-rw-r--r-- | src/freedreno/ir3/ir3.h | 35 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_cp.c | 23 |
2 files changed, 54 insertions, 4 deletions
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 05245cd54bb..4d1a44b1dec 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -877,6 +877,41 @@ static inline bool ir3_cat2_int(opc_t opc) } } +static inline bool ir3_cat2_float(opc_t opc) +{ + switch (opc) { + case OPC_ADD_F: + case OPC_MIN_F: + case OPC_MAX_F: + case OPC_MUL_F: + case OPC_SIGN_F: + case OPC_CMPS_F: + case OPC_ABSNEG_F: + case OPC_CMPV_F: + case OPC_FLOOR_F: + case OPC_CEIL_F: + case OPC_RNDNE_F: + case OPC_RNDAZ_F: + case OPC_TRUNC_F: + return true; + + default: + return false; + } +} + +static inline bool ir3_cat3_float(opc_t opc) +{ + switch (opc) { + case OPC_MAD_F16: + case OPC_MAD_F32: + case OPC_SEL_F16: + case OPC_SEL_F32: + return true; + default: + return false; + } +} /* map cat2 instruction to valid abs/neg flags: */ static inline unsigned ir3_cat2_absneg(opc_t opc) diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index dedbd8dbb1d..9bd97b690e3 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -25,6 +25,8 @@ */ #include <math.h> +#include "util/half_float.h" +#include "util/u_math.h" #include "ir3.h" #include "ir3_compiler.h" @@ -268,7 +270,7 @@ static void combine_flags(unsigned *dstflags, struct ir3_instruction *src) } static struct ir3_register * -lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags) +lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags, bool f_opcode) { unsigned swiz, idx, i; @@ -318,6 +320,13 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags /* need to generate a new immediate: */ swiz = i % 4; idx = i / 4; + + /* Half constant registers seems to handle only 32-bit values + * within floating-point opcodes. So convert back to 32-bit values. */ + if (f_opcode && (new_flags & IR3_REG_HALF)) { + reg->uim_val = fui(_mesa_half_to_float(reg->uim_val)); + } + const_state->immediates[idx].val[swiz] = reg->uim_val; const_state->immediates_count = idx + 1; const_state->immediate_idx++; @@ -398,8 +407,12 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, if (!valid_flags(instr, n, new_flags)) { /* See if lowering an immediate to const would help. */ if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { + bool f_opcode = (ir3_cat2_float(instr->opc) || + ir3_cat3_float(instr->opc)) ? true : false; + debug_assert(new_flags & IR3_REG_IMMED); - instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags); + + instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags, f_opcode); return; } @@ -504,10 +517,12 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, src_reg->iim_val = iim_val; instr->regs[n+1] = src_reg; } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { + bool f_opcode = (ir3_cat2_float(instr->opc) || + ir3_cat3_float(instr->opc)) ? true : false; + /* See if lowering an immediate to const would help. */ - instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags); + instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags, f_opcode); } - return; } } |