summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVasily Khoruzhick <anarsoul@gmail.com>2021-11-16 22:43:52 -0800
committerMarge Bot <emma+marge@anholt.net>2021-11-24 02:26:08 +0000
commit3b15fb35753763a0611d1209f7f55742228a2bca (patch)
tree0f980370b98a1581cf4398a28e1f7280813c8dbd
parent98a7c4c6f8e0dd8aca665ff1ae475ab3cdd53b12 (diff)
downloadmesa-3b15fb35753763a0611d1209f7f55742228a2bca.tar.gz
lima/ppir: implement gl_FragDepth support
Mali4x0 supports writing depth and stencil from fragment shader and we've been using it quite a while for depth/stencil buffer reload. The missing part was specifying output register for depth/stencil. To figure it out, I changed reload shader to use register $4 as output and poked RSW bits (or rather consecutive 4 bit groups) until tests that rely on reload started to pass again. It turns out that register number for gl_FragDepth/gl_FragStencil is in rsw->depth_test and register number for gl_FragColor is in rsw->multi_sample and it's repeated 4 times for some reason (likely for MSAA?) With this knowledge we now can modify ppir compiler to support multiple store_output intrinsics. To do that just add destination SSA for store_output to the registers list for regalloc and mark them explicitly as output. Since it's never read in shader we have to take care about it in liveness analysis - basically just mark it alive from the time when it's written to the end of the block. If it's live only in the last instruction, mark it as live_internal, so regalloc doesn't clobber it. Then just let regalloc do its job, and then copy register number to the shader state and program it in RSW. The tricky part is gl_FragStencil, since it resides in the same register as gl_FragDepth and with the current design of the compiler it's hard to merge them. However gl_FragStencil doesn't seem to be part of GL2 or GLES2, so we can just leave it not implemented. Also we need to take care of stop bit for instructions - now we can't just set it in every instruction that stores output, since there may be several outputs. So if there's any store_output instructions in the block just mark that block has a stop, and set stop bit in the last instruction in the block. The only exception is discard - we always need to set stop bit in discard instruction. Reviewed-by: Andreas Baierl <ichgeh@imkreisrum.de> Reviewed-by: Erico Nunes <nunes.erico@gmail.com> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13830>
-rw-r--r--src/gallium/drivers/lima/ir/pp/codegen.c7
-rw-r--r--src/gallium/drivers/lima/ir/pp/instr.c2
-rw-r--r--src/gallium/drivers/lima/ir/pp/liveness.c17
-rw-r--r--src/gallium/drivers/lima/ir/pp/nir.c42
-rw-r--r--src/gallium/drivers/lima/ir/pp/node.c6
-rw-r--r--src/gallium/drivers/lima/ir/pp/node_to_instr.c11
-rw-r--r--src/gallium/drivers/lima/ir/pp/ppir.h40
-rw-r--r--src/gallium/drivers/lima/ir/pp/regalloc.c28
-rw-r--r--src/gallium/drivers/lima/lima_context.h2
-rw-r--r--src/gallium/drivers/lima/lima_draw.c15
-rw-r--r--src/gallium/drivers/lima/lima_parser.c11
11 files changed, 144 insertions, 37 deletions
diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c
index 5a5ee14c78e..0760e19c1b9 100644
--- a/src/gallium/drivers/lima/ir/pp/codegen.c
+++ b/src/gallium/drivers/lima/ir/pp/codegen.c
@@ -773,7 +773,7 @@ static int encode_instr(ppir_instr *instr, void *code, void *last_code)
size = align_to_word(size) + 1;
ctrl->count = size;
- if (instr->is_end)
+ if (instr->stop)
ctrl->stop = true;
if (last_code) {
@@ -818,6 +818,11 @@ bool ppir_codegen_prog(ppir_compiler *comp)
instr->encode_size = get_instr_encode_size(instr);
size += instr->encode_size;
}
+ /* Set stop flag for the last instruction if block has stop flag */
+ if (block->stop) {
+ ppir_instr *instr = list_last_entry(&block->instr_list, ppir_instr, list);
+ instr->stop = true;
+ }
}
uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
diff --git a/src/gallium/drivers/lima/ir/pp/instr.c b/src/gallium/drivers/lima/ir/pp/instr.c
index 8e1bc95158d..fc64de8812f 100644
--- a/src/gallium/drivers/lima/ir/pp/instr.c
+++ b/src/gallium/drivers/lima/ir/pp/instr.c
@@ -284,7 +284,7 @@ void ppir_instr_print_list(ppir_compiler *comp)
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
printf("-------block %3d-------\n", block->index);
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
- printf("%c%03d: ", instr->is_end ? '*' : ' ', instr->index);
+ printf("%c%03d: ", instr->stop ? '*' : ' ', instr->index);
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
ppir_node *node = instr->slots[i];
if (node)
diff --git a/src/gallium/drivers/lima/ir/pp/liveness.c b/src/gallium/drivers/lima/ir/pp/liveness.c
index 1799a53b165..8f642e2d05c 100644
--- a/src/gallium/drivers/lima/ir/pp/liveness.c
+++ b/src/gallium/drivers/lima/ir/pp/liveness.c
@@ -121,7 +121,7 @@ ppir_liveness_instr_srcs(ppir_compiler *comp, ppir_instr *instr)
/* Update the liveness information of the instruction by removing its
* dests from the live_in set. */
static void
-ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
+ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr, ppir_instr *last)
{
for (int i = PPIR_INSTR_SLOT_NUM-1; i >= 0; i--) {
ppir_node *node = instr->slots[i];
@@ -146,9 +146,18 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
unsigned int index = reg->regalloc_index;
bool live = BITSET_TEST(instr->live_set, index);
+ /* If it's an out reg, it's alive till the end of the block, so add it
+ * to live_set of the last instruction */
+ if (!live && reg->out_reg && (instr != last)) {
+ BITSET_SET(last->live_set, index);
+ BITSET_CLEAR(instr->live_set, index);
+ continue;
+ }
+
/* If a register is written but wasn't read in a later instruction, it is
- * either dead code or a bug. For now, assign an interference to it to
- * ensure it doesn't get assigned a live register and overwrites it. */
+ * either an output register in last instruction, dead code or a bug.
+ * For now, assign an interference to it to ensure it doesn't get assigned
+ * a live register and overwrites it. */
if (!live) {
BITSET_SET(instr->live_internal, index);
continue;
@@ -230,7 +239,7 @@ ppir_liveness_compute_live_sets(ppir_compiler *comp)
instr->live_mask, next_instr->live_mask);
}
- ppir_liveness_instr_dest(comp, instr);
+ ppir_liveness_instr_dest(comp, instr, last);
ppir_liveness_instr_srcs(comp, instr);
cont |= !ppir_liveness_set_equal(comp,
diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c
index 6f61a0986fb..fc8030fc745 100644
--- a/src/gallium/drivers/lima/ir/pp/nir.c
+++ b/src/gallium/drivers/lima/ir/pp/nir.c
@@ -345,6 +345,18 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
* back to inserting a mov at the end.
* If the source node will only be able to output to pipeline
* registers, fall back to the mov as well. */
+ assert(nir_src_is_const(instr->src[1]) &&
+ "lima doesn't support indirect outputs");
+
+ nir_io_semantics io = nir_intrinsic_io_semantics(instr);
+ unsigned offset = nir_src_as_uint(instr->src[1]);
+ unsigned slot = io.location + offset;
+ ppir_output_type out_type = ppir_nir_output_to_ppir(slot);
+ if (out_type == ppir_output_invalid) {
+ ppir_debug("Unsupported output type: %d\n", slot);
+ return false;
+ }
+
if (!block->comp->uses_discard && instr->src->is_ssa) {
node = block->comp->var_nodes[instr->src->ssa->index];
switch (node->op) {
@@ -352,9 +364,12 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
case ppir_op_load_texture:
case ppir_op_const:
break;
- default:
- node->is_end = 1;
+ default: {
+ ppir_dest *dest = ppir_node_get_dest(node);
+ dest->ssa.out_type = out_type;
+ node->is_out = 1;
return true;
+ }
}
}
@@ -367,6 +382,7 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
dest->ssa.num_components = instr->num_components;
dest->ssa.index = 0;
dest->write_mask = u_bit_consecutive(0, instr->num_components);
+ dest->ssa.out_type = out_type;
alu_node->num_src = 1;
@@ -376,7 +392,7 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
u_bit_consecutive(0, instr->num_components));
- alu_node->node.is_end = 1;
+ alu_node->node.is_out = 1;
list_addtail(&alu_node->node.list, &block->node_list);
return true;
@@ -798,6 +814,7 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
comp->var_nodes = (ppir_node **)(comp + 1);
comp->reg_base = num_ssa;
comp->prog = prog;
+
return comp;
}
@@ -833,7 +850,7 @@ static void ppir_add_ordering_deps(ppir_compiler *comp)
if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
}
- if (node->is_end ||
+ if (node->is_out ||
node->op == ppir_op_discard ||
node->op == ppir_op_store_temp ||
node->op == ppir_op_branch) {
@@ -930,18 +947,11 @@ bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *n
}
}
- /* Validate outputs, we support only gl_FragColor */
- nir_foreach_shader_out_variable(var, nir) {
- switch (var->data.location) {
- case FRAG_RESULT_COLOR:
- case FRAG_RESULT_DATA0:
- break;
- default:
- ppir_error("unsupported output type\n");
- goto err_out0;
- break;
- }
- }
+ comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
+
+ /* -1 means reg is not written by the shader */
+ for (int i = 0; i < ppir_output_num; i++)
+ comp->out_type_to_reg[i] = -1;
foreach_list_typed(nir_register, reg, node, &func->registers) {
ppir_reg *r = rzalloc(comp, ppir_reg);
diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c
index 99d025e2c05..cc51448bfb3 100644
--- a/src/gallium/drivers/lima/ir/pp/node.c
+++ b/src/gallium/drivers/lima/ir/pp/node.c
@@ -618,9 +618,9 @@ static ppir_node *ppir_node_insert_mov_local(ppir_node *node)
ppir_node_add_dep(move, node, ppir_dep_src);
list_addtail(&move->list, &node->list);
- if (node->is_end) {
- node->is_end = false;
- move->is_end = true;
+ if (node->is_out) {
+ node->is_out = false;
+ move->is_out = true;
}
return move;
diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
index 0d5d37f4927..ff8d735d9a7 100644
--- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c
+++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
@@ -203,7 +203,7 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
case ppir_node_type_discard:
if (!create_new_instr(block, node))
return false;
- node->instr->is_end = true;
+ block->stop = true;
break;
case ppir_node_type_branch:
if (!create_new_instr(block, node))
@@ -276,8 +276,13 @@ static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *root)
if (!ppir_do_one_node_to_instr(block, node))
return false;
- if (node->is_end)
- node->instr->is_end = true;
+ /* The node writes output register. We can't stop at this exact
+ * instruction because there may be another node that writes another
+ * output, so set stop flag for the block. We will set stop flag on
+ * the last instruction of the block during codegen
+ */
+ if (node->is_out)
+ block->stop = true;
ppir_node_foreach_pred(node, dep) {
ppir_node *pred = dep->pred;
diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h
index 74a508e504e..f434786f4cf 100644
--- a/src/gallium/drivers/lima/ir/pp/ppir.h
+++ b/src/gallium/drivers/lima/ir/pp/ppir.h
@@ -161,7 +161,7 @@ typedef struct ppir_node {
struct ppir_instr *instr;
int instr_pos;
struct ppir_block *block;
- bool is_end;
+ bool is_out;
bool succ_different_block;
/* for scheduler */
@@ -179,9 +179,42 @@ typedef enum {
ppir_pipeline_reg_discard, /* varying load */
} ppir_pipeline;
+typedef enum {
+ ppir_output_color,
+ ppir_output_depth,
+ ppir_output_num,
+ ppir_output_invalid = -1,
+} ppir_output_type;
+
+static inline const char *ppir_output_type_to_str(ppir_output_type type)
+{
+ switch (type) {
+ case ppir_output_color:
+ return "OUTPUT_COLOR";
+ case ppir_output_depth:
+ return "OUTPUT_DEPTH";
+ default:
+ return "INVALID";
+ }
+}
+
+static inline ppir_output_type ppir_nir_output_to_ppir(gl_frag_result res)
+{
+ switch (res) {
+ case FRAG_RESULT_COLOR:
+ case FRAG_RESULT_DATA0:
+ return ppir_output_color;
+ case FRAG_RESULT_DEPTH:
+ return ppir_output_depth;
+ default:
+ return ppir_output_invalid;
+ }
+}
+
typedef struct ppir_reg {
struct list_head list;
int index;
+ ppir_output_type out_type;
int regalloc_index;
int num_components;
@@ -191,6 +224,7 @@ typedef struct ppir_reg {
bool is_head;
bool spilled;
bool undef;
+ bool out_reg;
} ppir_reg;
typedef enum {
@@ -316,7 +350,7 @@ typedef struct ppir_instr {
ppir_node *slots[PPIR_INSTR_SLOT_NUM];
ppir_const constant[2];
- bool is_end;
+ bool stop;
/* for scheduler */
struct list_head succ_list;
@@ -340,6 +374,7 @@ typedef struct ppir_block {
struct list_head list;
struct list_head node_list;
struct list_head instr_list;
+ bool stop;
struct ppir_block *successors[2];
@@ -370,6 +405,7 @@ typedef struct ppir_compiler {
struct hash_table_u64 *blocks;
int cur_index;
int cur_instr_index;
+ int *out_type_to_reg;
struct list_head reg_list;
int reg_num;
diff --git a/src/gallium/drivers/lima/ir/pp/regalloc.c b/src/gallium/drivers/lima/ir/pp/regalloc.c
index 3ea136b5660..37b54b5a4bf 100644
--- a/src/gallium/drivers/lima/ir/pp/regalloc.c
+++ b/src/gallium/drivers/lima/ir/pp/regalloc.c
@@ -82,9 +82,6 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
{
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_node, node, &block->node_list, list) {
- if (node->is_end)
- continue;
-
if (!node->instr || node->op == ppir_op_const)
continue;
@@ -94,6 +91,8 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
if (dest->type == ppir_target_ssa) {
reg = &dest->ssa;
+ if (node->is_out)
+ reg->out_reg = true;
list_addtail(&reg->list, &comp->reg_list);
comp->reg_num++;
}
@@ -133,6 +132,14 @@ static void ppir_regalloc_print_result(ppir_compiler *comp)
}
}
printf("--------------------------\n");
+
+ printf("======ppir output regs======\n");
+ for (int i = 0; i < ppir_output_num; i++) {
+ if (comp->out_type_to_reg[i] != -1)
+ printf("%s: $%d\n", ppir_output_type_to_str(i),
+ (int)comp->out_type_to_reg[i]);
+ }
+ printf("--------------------------\n");
}
static bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
@@ -578,6 +585,11 @@ static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
n = 0;
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
reg->index = ra_get_node_reg(g, n++);
+ if (reg->out_reg) {
+ /* We need actual reg number, we don't have swizzle for output regs */
+ assert(!(reg->index & 0x3) && "ppir: output regs don't have swizzle");
+ comp->out_type_to_reg[reg->out_type] = reg->index / 4;
+ }
}
ralloc_free(g);
@@ -604,8 +616,11 @@ bool ppir_regalloc_prog(ppir_compiler *comp)
ppir_regalloc_update_reglist_ssa(comp);
/* No registers? Probably shader consists of discard instruction */
- if (list_is_empty(&comp->reg_list))
+ if (list_is_empty(&comp->reg_list)) {
+ comp->prog->state.frag_color_reg = 0;
+ comp->prog->state.frag_depth_reg = -1;
return true;
+ }
/* this will most likely succeed in the first
* try, except for very complicated shaders */
@@ -613,5 +628,10 @@ bool ppir_regalloc_prog(ppir_compiler *comp)
if (!spilled)
return false;
+ comp->prog->state.frag_color_reg =
+ comp->out_type_to_reg[ppir_output_color];
+ comp->prog->state.frag_depth_reg =
+ comp->out_type_to_reg[ppir_output_depth];
+
return true;
}
diff --git a/src/gallium/drivers/lima/lima_context.h b/src/gallium/drivers/lima/lima_context.h
index e871d0b952d..bea1ddeb477 100644
--- a/src/gallium/drivers/lima/lima_context.h
+++ b/src/gallium/drivers/lima/lima_context.h
@@ -49,6 +49,8 @@ struct lima_fs_compiled_shader {
struct {
int shader_size;
int stack_size;
+ int frag_color_reg;
+ int frag_depth_reg;
bool uses_discard;
} state;
};
diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c
index 5c9f03b1d1c..889f0192179 100644
--- a/src/gallium/drivers/lima/lima_draw.c
+++ b/src/gallium/drivers/lima/lima_draw.c
@@ -677,6 +677,12 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
if (!rst->depth_clip_far || ctx->viewport.far == 1.0f)
render->depth_test |= 0x20; /* don't clip depth far */
+ if (fs->state.frag_depth_reg != -1) {
+ render->depth_test |= (fs->state.frag_depth_reg << 6);
+ /* Shader writes depth */
+ render->depth_test |= 0x801;
+ }
+
ushort far, near;
near = float_to_ushort(ctx->viewport.near);
@@ -729,6 +735,12 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
if (ctx->framebuffer.base.samples)
render->multi_sample |= 0x68;
+ /* Set gl_FragColor register, need to specify it 4 times */
+ render->multi_sample |= (fs->state.frag_color_reg << 28) |
+ (fs->state.frag_color_reg << 24) |
+ (fs->state.frag_color_reg << 20) |
+ (fs->state.frag_color_reg << 16);
+
/* alpha test */
if (ctx->zsa->base.alpha_enabled) {
render->multi_sample |= ctx->zsa->base.alpha_func;
@@ -755,7 +767,8 @@ lima_pack_render_state(struct lima_context *ctx, const struct pipe_draw_info *in
render->aux1 |= 0x00002000;
if (fs->state.uses_discard ||
- ctx->zsa->base.alpha_enabled) {
+ ctx->zsa->base.alpha_enabled ||
+ fs->state.frag_depth_reg != -1) {
early_z = false;
pixel_kill = false;
}
diff --git a/src/gallium/drivers/lima/lima_parser.c b/src/gallium/drivers/lima/lima_parser.c
index 3e69daee1b2..bcacd290aff 100644
--- a/src/gallium/drivers/lima/lima_parser.c
+++ b/src/gallium/drivers/lima/lima_parser.c
@@ -525,7 +525,7 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper)
fprintf(fp, ": ignore depth clip near");
if ((*value & 0x00000020) == 0x00000020)
fprintf(fp, ", ignore depth clip far");
- fprintf(fp, ", unknown bits 6-9: 0x%08x", *value & 0x000003c0);
+ fprintf(fp, ", register for gl_FragDepth: $%d", (*value & 0x000003c0) >> 6);
fprintf(fp, ", unknown bits 13-15: 0x%08x */\n", *value & 0x00000e000);
break;
case 4: /* DEPTH RANGE */
@@ -594,7 +594,14 @@ parse_rsw(FILE *fp, uint32_t *value, int i, uint32_t *helper)
fprintf(fp, " */\n");
else
fprintf(fp, ", UNKNOWN\n");
- fprintf(fp, "\t\t\t\t\t\t/* %s(2)", render_state_infos[i].info);
+
+ fprintf(fp, "\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info);
+ fprintf(fp, ", register for gl_FragColor: $%d $%d $%d $%d */\n",
+ (*value & 0xf0000000) >> 28,
+ (*value & 0x0f000000) >> 24,
+ (*value & 0x00f00000) >> 20,
+ (*value & 0x000f0000) >> 16);
+ fprintf(fp, "\t\t\t\t\t\t/* %s(3)", render_state_infos[i].info);
fprintf(fp, ": alpha_test_func: %d (%s) */\n",
(*value & 0x00000007),
lima_get_compare_func_string((*value & 0x00000007))); /* alpha_test_func */