summaryrefslogtreecommitdiff
path: root/src/freedreno
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2019-05-06 14:52:27 -0700
committerRob Clark <robdclark@chromium.org>2019-05-07 07:26:00 -0700
commit23e7a34466c448c4c7c9a2c2e4d200dedf2584f7 (patch)
tree9feaa00be35bce3642c01a29588956ae237c6b09 /src/freedreno
parentef3eecd66bdcaa3991dd2b53cb3e7285bed6d718 (diff)
downloadmesa-23e7a34466c448c4c7c9a2c2e4d200dedf2584f7.tar.gz
freedreno/ir3: consolidate const state
Combine the offsets of differenet parts of the constant space with (what was formerly known as) ir3_driver_const_layout. Bunch of churn, but no functional change. Signed-off-by: Rob Clark <robdclark@chromium.org>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/ir3/ir3_a4xx.c7
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c22
-rw-r--r--src/freedreno/ir3/ir3_context.c53
-rw-r--r--src/freedreno/ir3/ir3_cp.c4
-rw-r--r--src/freedreno/ir3/ir3_nir.c2
-rw-r--r--src/freedreno/ir3/ir3_nir.h2
-rw-r--r--src/freedreno/ir3/ir3_shader.c3
-rw-r--r--src/freedreno/ir3/ir3_shader.h72
8 files changed, 90 insertions, 75 deletions
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index 426a143acfb..5fe15cf8e27 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -217,10 +217,11 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var,
/* to calculate the byte offset (yes, uggg) we need (up to) three
* const values to know the bytes per pixel, and y and z stride:
*/
- unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
- ctx->so->const_layout.image_dims.off[var->data.driver_location];
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+ unsigned cb = regid(const_state->offsets.image_dims, 0) +
+ const_state->image_dims.off[var->data.driver_location];
- debug_assert(ctx->so->const_layout.image_dims.mask &
+ debug_assert(const_state->image_dims.mask &
(1 << var->data.driver_location));
/* offset = coords.x * bytes_per_pixel: */
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 7a3b4a19ad7..3eb34f44b14 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -107,7 +107,8 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
{
/* first four vec4 sysval's reserved for UBOs: */
/* NOTE: dp is in scalar, but there can be >4 dp components: */
- unsigned n = ctx->so->constbase.driver_param;
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+ unsigned n = const_state->offsets.driver_param;
unsigned r = regid(n + dp / 4, dp % 4);
return create_uniform(ctx->block, r);
}
@@ -683,7 +684,8 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
/* UBO addresses are the first driver params, but subtract 2 here to
* account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0
* is the uniforms: */
- unsigned ubo = regid(ctx->so->constbase.ubo, 0) - 2;
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+ unsigned ubo = regid(const_state->offsets.ubo, 0) - 2;
const unsigned ptrsz = ir3_pointer_size(ctx->compiler);
int off = 0;
@@ -751,11 +753,12 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
/* SSBO size stored as a const starting at ssbo_sizes: */
+ struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned blk_idx = nir_src_as_uint(intr->src[0]);
- unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) +
- ctx->so->const_layout.ssbo_size.off[blk_idx];
+ unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) +
+ const_state->ssbo_size.off[blk_idx];
- debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx));
+ debug_assert(const_state->ssbo_size.mask & (1 << blk_idx));
dst[0] = create_uniform(ctx->block, idx);
}
@@ -1006,8 +1009,9 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
* bytes-per-pixel should have been emitted in 2nd slot of
* image_dims. See ir3_shader::emit_image_dims().
*/
- unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
- ctx->so->const_layout.image_dims.off[var->data.driver_location];
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+ unsigned cb = regid(const_state->offsets.image_dims, 0) +
+ const_state->image_dims.off[var->data.driver_location];
struct ir3_instruction *aux = create_uniform(b, cb + 1);
tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
@@ -2225,7 +2229,6 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
static void
emit_stream_out(struct ir3_context *ctx)
{
- struct ir3_shader_variant *v = ctx->so;
struct ir3 *ir = ctx->ir;
struct ir3_stream_output_info *strmout =
&ctx->so->shader->stream_output;
@@ -2283,10 +2286,11 @@ emit_stream_out(struct ir3_context *ctx)
* stripped out in the backend.
*/
for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
+ struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned stride = strmout->stride[i];
struct ir3_instruction *base, *off;
- base = create_uniform(ctx->block, regid(v->constbase.tfbo, i));
+ base = create_uniform(ctx->block, regid(const_state->offsets.tfbo, i));
/* 24-bit should be enough: */
off = ir3_MUL_U(ctx->block, vtxcnt, 0,
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index 8c7d9a33f3a..d2210184a60 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -101,51 +101,34 @@ ir3_context_init(struct ir3_compiler *compiler,
nir_print_shader(ctx->s, stderr);
}
- ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
+ ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
- so->num_uniforms = ctx->s->num_uniforms;
- so->num_ubos = ctx->s->info.num_ubos;
+ struct ir3_const_state *const_state = &so->const_state;
+ memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
- ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
+ ir3_nir_scan_driver_consts(ctx->s, const_state);
+
+ const_state->num_uniforms = ctx->s->num_uniforms;
+ const_state->num_ubos = ctx->s->info.num_ubos;
- /* Layout of constant registers, each section aligned to vec4. Note
- * that pointer size (ubo, etc) changes depending on generation.
- *
- * user consts
- * UBO addresses
- * SSBO sizes
- * if (vertex shader) {
- * driver params (IR3_DP_*)
- * if (stream_output.num_outputs > 0)
- * stream-out addresses
- * }
- * immediates
- *
- * Immediates go last mostly because they are inserted in the CP pass
- * after the nir -> ir3 frontend.
- *
- * Note UBO size in bytes should be aligned to vec4
- */
debug_assert((ctx->so->shader->ubo_state.size % 16) == 0);
unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4);
unsigned ptrsz = ir3_pointer_size(ctx->compiler);
- memset(&so->constbase, ~0, sizeof(so->constbase));
-
- if (so->num_ubos > 0) {
- so->constbase.ubo = constoff;
+ if (const_state->num_ubos > 0) {
+ const_state->offsets.ubo = constoff;
constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
}
- if (so->const_layout.ssbo_size.count > 0) {
- unsigned cnt = so->const_layout.ssbo_size.count;
- so->constbase.ssbo_sizes = constoff;
+ if (const_state->ssbo_size.count > 0) {
+ unsigned cnt = const_state->ssbo_size.count;
+ const_state->offsets.ssbo_sizes = constoff;
constoff += align(cnt, 4) / 4;
}
- if (so->const_layout.image_dims.count > 0) {
- unsigned cnt = so->const_layout.image_dims.count;
- so->constbase.image_dims = constoff;
+ if (const_state->image_dims.count > 0) {
+ unsigned cnt = const_state->image_dims.count;
+ const_state->offsets.image_dims = constoff;
constoff += align(cnt, 4) / 4;
}
@@ -156,17 +139,17 @@ ir3_context_init(struct ir3_compiler *compiler,
num_driver_params = IR3_DP_CS_COUNT;
}
- so->constbase.driver_param = constoff;
+ const_state->offsets.driver_param = constoff;
constoff += align(num_driver_params, 4) / 4;
if ((so->type == MESA_SHADER_VERTEX) &&
(compiler->gpu_id < 500) &&
so->shader->stream_output.num_outputs > 0) {
- so->constbase.tfbo = constoff;
+ const_state->offsets.tfbo = constoff;
constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
}
- so->constbase.immediate = constoff;
+ const_state->offsets.immediate = constoff;
return ctx;
}
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c
index 28ba43f09ee..983c5fa61f2 100644
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -323,10 +323,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
ctx->immediate_idx++;
}
+ struct ir3_const_state *const_state = &ctx->so->const_state;
+
new_flags &= ~IR3_REG_IMMED;
new_flags |= IR3_REG_CONST;
reg->flags = new_flags;
- reg->num = i + (4 * ctx->so->constbase.immediate);
+ reg->num = i + (4 * const_state->offsets.immediate);
return reg;
}
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 744fd958fc6..804196f63e9 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -278,7 +278,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
void
ir3_nir_scan_driver_consts(nir_shader *shader,
- struct ir3_driver_const_layout *layout)
+ struct ir3_const_state *layout)
{
nir_foreach_function(function, shader) {
if (!function->impl)
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index b60374410bc..bc0d496adfb 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -33,7 +33,7 @@
#include "ir3_shader.h"
-void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout);
+void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_const_state *layout);
bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index 92e3e7b251d..63cad3ee414 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -350,8 +350,9 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
(regid >> 2), "xyzw"[regid & 0x3], i);
}
+ struct ir3_const_state *const_state = &so->const_state;
for (i = 0; i < so->immediates_count; i++) {
- fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i);
+ fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
so->immediates[i].val[0],
so->immediates[i].val[1],
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 7c1dc38de23..448f6052194 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -71,6 +71,14 @@ enum ir3_driver_param {
/**
+ * Describes the layout of shader consts. This includes:
+ * + Driver lowered UBO ranges
+ * + SSBO sizes
+ * + Image sizes/dimensions
+ * + Driver params (ie. IR3_DP_*)
+ * + TFBO addresses (for generations that do not have hardware streamout)
+ * + Lowered immediates
+ *
* For consts needed to pass internal values to shader which may or may not
* be required, rather than allocating worst-case const space, we scan the
* shader and allocate consts as-needed:
@@ -80,8 +88,46 @@ enum ir3_driver_param {
*
* + Image dimensions: needed to calculate pixel offset, but only for
* images that have a image_store intrinsic
+ *
+ * Layout of constant registers, each section aligned to vec4. Note
+ * that pointer size (ubo, etc) changes depending on generation.
+ *
+ * user consts
+ * UBO addresses
+ * SSBO sizes
+ * if (vertex shader) {
+ * driver params (IR3_DP_*)
+ * if (stream_output.num_outputs > 0)
+ * stream-out addresses
+ * } else if (compute_shader) {
+ * driver params (IR3_DP_*)
+ * }
+ * immediates
+ *
+ * Immediates go last mostly because they are inserted in the CP pass
+ * after the nir -> ir3 frontend.
+ *
+ * Note UBO size in bytes should be aligned to vec4
*/
-struct ir3_driver_const_layout {
+struct ir3_const_state {
+ /* number of uniforms (in vec4), not including built-in compiler
+ * constants, etc.
+ */
+ unsigned num_uniforms;
+
+ unsigned num_ubos;
+
+ struct {
+ /* user const start at zero */
+ unsigned ubo;
+ /* NOTE that a3xx might need a section for SSBO addresses too */
+ unsigned ssbo_sizes;
+ unsigned image_dims;
+ unsigned driver_param;
+ unsigned tfbo;
+ unsigned immediate;
+ } offsets;
+
struct {
uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */
uint32_t count; /* number of consts allocated */
@@ -340,7 +386,7 @@ struct ir3_shader_variant {
bool binning_pass;
struct ir3_shader_variant *binning;
- struct ir3_driver_const_layout const_layout;
+ struct ir3_const_state const_state;
struct ir3_info info;
struct ir3 *ir;
@@ -361,13 +407,6 @@ struct ir3_shader_variant {
*/
unsigned constlen;
- /* number of uniforms (in vec4), not including built-in compiler
- * constants, etc.
- */
- unsigned num_uniforms;
-
- unsigned num_ubos;
-
/* About Linkage:
* + Let the frag shader determine the position/compmask for the
* varyings, since it is the place where we know if the varying
@@ -451,21 +490,6 @@ struct ir3_shader_variant {
bool per_samp;
- /* Layout of constant registers, each section (in vec4). Pointer size
- * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the
- * UBO and stream-out consts.
- */
- struct {
- /* user const start at zero */
- unsigned ubo;
- /* NOTE that a3xx might need a section for SSBO addresses too */
- unsigned ssbo_sizes;
- unsigned image_dims;
- unsigned driver_param;
- unsigned tfbo;
- unsigned immediate;
- } constbase;
-
unsigned immediates_count;
unsigned immediates_size;
struct {