From b929565ea8d4846b1943f35b6e86d685a616ed54 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 18 Feb 2020 14:20:16 -0500 Subject: panfrost: Rewrite texture descriptor creation logic Rather than creating partially within the Gallium create function and monkeypatching on draw time with code split across N different files with tight Gallium dependencies, let's streamline everything into a series of maintainable routines in mesa/src/panfrost with no Gallium dependencies, doing the entire texture creation in one-shot and thus adding absolutely zero draw-time overhead (since we can allocate a BO for the descriptor and upload ahead-of-time, so switching textures is as cheap as switching pointers). Was this worth it? You know, I'm not sure :| Signed-off-by: Alyssa Rosenzweig Reviewed-by: Boris Brezillon Tested-by: Marge Bot Part-of: --- src/gallium/drivers/panfrost/pan_context.c | 188 +++++------------------ src/gallium/drivers/panfrost/pan_context.h | 4 +- src/gallium/drivers/panfrost/pan_resource.c | 21 +-- src/panfrost/encoder/pan_texture.c | 230 ++++++++++++++++++++++++++++ src/panfrost/encoder/pan_texture.h | 31 ++++ 5 files changed, 306 insertions(+), 168 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 0d69036b9a2..ff00c2129bf 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -475,95 +475,19 @@ panfrost_upload_tex( struct pipe_sampler_view *pview = &view->base; struct panfrost_resource *rsrc = pan_resource(pview->texture); - mali_ptr descriptor_gpu; - void *descriptor; - - /* Do we interleave an explicit stride with every element? */ - - bool has_manual_stride = view->manual_stride; - - /* For easy access */ - - bool is_buffer = pview->target == PIPE_BUFFER; - unsigned first_level = is_buffer ? 0 : pview->u.tex.first_level; - unsigned last_level = is_buffer ? 0 : pview->u.tex.last_level; - unsigned first_layer = is_buffer ? 0 : pview->u.tex.first_layer; - unsigned last_layer = is_buffer ? 0 : pview->u.tex.last_layer; - unsigned first_face = 0; - unsigned last_face = 0; - unsigned face_mult = 1; - - /* Cubemaps have 6 faces as layers in between each actual layer. - * There's a bit of an impedence mismatch between Gallium and the - * hardware, let's fixup for it */ - - if (pview->target == PIPE_TEXTURE_CUBE || pview->target == PIPE_TEXTURE_CUBE_ARRAY) { - /* TODO: logic wrong in the asserted out cases ... can they happen? */ - - first_face = first_layer % 6; - last_face = last_layer % 6; - first_layer /= 6; - last_layer /= 6; - - assert((first_layer == last_layer) || (first_face == 0 && last_face == 5)); - face_mult = 6; - } - - /* Lower-bit is set when sampling from colour AFBC */ - bool is_afbc = rsrc->layout == MALI_TEXTURE_AFBC; - bool is_zs = rsrc->base.bind & PIPE_BIND_DEPTH_STENCIL; - unsigned afbc_bit = (is_afbc && !is_zs) ? 1 : 0; /* Add the BO to the job so it's retained until the job is done. */ struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); + panfrost_batch_add_bo(batch, rsrc->bo, PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | panfrost_bo_access_for_stage(st)); - /* Add the usage flags in, since they can change across the CSO - * lifetime due to layout switches */ - - view->hw.format.layout = rsrc->layout; - view->hw.format.manual_stride = has_manual_stride; - - /* Inject the addresses in, interleaving array indices, mip levels, - * cube faces, and strides in that order */ - - unsigned idx = 0; - unsigned levels = 1 + last_level - first_level; - unsigned layers = 1 + last_layer - first_layer; - unsigned faces = 1 + last_face - first_face; - unsigned num_elements = levels * layers * faces; - if (has_manual_stride) - num_elements *= 2; - - descriptor = malloc(sizeof(struct mali_texture_descriptor) + - sizeof(mali_ptr) * num_elements); - memcpy(descriptor, &view->hw, sizeof(struct mali_texture_descriptor)); - - mali_ptr *pointers_and_strides = descriptor + - sizeof(struct mali_texture_descriptor); - - for (unsigned w = first_layer; w <= last_layer; ++w) { - for (unsigned l = first_level; l <= last_level; ++l) { - for (unsigned f = first_face; f <= last_face; ++f) { - pointers_and_strides[idx++] = - panfrost_get_texture_address(rsrc, l, w * face_mult + f) - + afbc_bit + view->astc_stretch; - if (has_manual_stride) { - pointers_and_strides[idx++] = - rsrc->slices[l].stride; - } - } - } - } - - descriptor_gpu = panfrost_upload_transient(batch, descriptor, - sizeof(struct mali_texture_descriptor) + - num_elements * sizeof(*pointers_and_strides)); - free(descriptor); + panfrost_batch_add_bo(batch, view->bo, + PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | + panfrost_bo_access_for_stage(st)); - return descriptor_gpu; + return view->bo->gpu; } static void @@ -2052,29 +1976,14 @@ panfrost_translate_texture_type(enum pipe_texture_target t) { } } -static uint8_t -panfrost_compute_astc_stretch( - const struct util_format_description *desc) -{ - unsigned width = desc->block.width; - unsigned height = desc->block.height; - assert(width >= 4 && width <= 12); - assert(height >= 4 && height <= 12); - if (width == 12) - width = 11; - if (height == 12) - height = 11; - return ((height - 4) * 8) + (width - 4); -} - static struct pipe_sampler_view * panfrost_create_sampler_view( struct pipe_context *pctx, struct pipe_resource *texture, const struct pipe_sampler_view *template) { + struct panfrost_screen *screen = pan_screen(pctx->screen); struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view); - int bytes_per_pixel = util_format_get_blocksize(texture->format); pipe_reference(NULL, &texture->reference); @@ -2086,12 +1995,6 @@ panfrost_create_sampler_view( so->base.reference.count = 1; so->base.context = pctx; - /* sampler_views correspond to texture descriptors, minus the texture - * (data) itself. So, we serialise the descriptor here and cache it for - * later. */ - - const struct util_format_description *desc = util_format_description(prsrc->base.format); - unsigned char user_swizzle[4] = { template->swizzle_r, template->swizzle_g, @@ -2099,32 +2002,6 @@ panfrost_create_sampler_view( template->swizzle_a }; - enum mali_format format = panfrost_find_format(desc); - - if (format == MALI_ASTC_HDR_SUPP || format == MALI_ASTC_SRGB_SUPP) - so->astc_stretch = panfrost_compute_astc_stretch(desc); - - /* Check if we need to set a custom stride by computing the "expected" - * stride and comparing it to what the BO actually wants. Only applies - * to linear textures, since tiled/compressed textures have strict - * alignment requirements for their strides as it is */ - - unsigned first_level = template->u.tex.first_level; - unsigned last_level = template->u.tex.last_level; - - if (prsrc->layout == MALI_TEXTURE_LINEAR) { - for (unsigned l = first_level; l <= last_level; ++l) { - unsigned actual_stride = prsrc->slices[l].stride; - unsigned width = u_minify(texture->width0, l); - unsigned comp_stride = width * bytes_per_pixel; - - if (comp_stride != actual_stride) { - so->manual_stride = true; - break; - } - } - } - /* In the hardware, array_size refers specifically to array textures, * whereas in Gallium, it also covers cubemaps */ @@ -2136,26 +2013,32 @@ panfrost_create_sampler_view( array_size /= 6; } - struct mali_texture_descriptor texture_descriptor = { - .width = MALI_POSITIVE(u_minify(texture->width0, first_level)), - .height = MALI_POSITIVE(u_minify(texture->height0, first_level)), - .depth = MALI_POSITIVE(u_minify(texture->depth0, first_level)), - .array_size = MALI_POSITIVE(array_size), - - .format = { - .swizzle = panfrost_translate_swizzle_4(desc->swizzle), - .format = format, - .srgb = desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB, - .type = panfrost_translate_texture_type(template->target), - .unknown2 = 0x1, - }, - - .swizzle = panfrost_translate_swizzle_4(user_swizzle) - }; - - texture_descriptor.levels = last_level - first_level; - - so->hw = texture_descriptor; + enum mali_texture_type type = + panfrost_translate_texture_type(template->target); + + unsigned size = panfrost_estimate_texture_size( + template->u.tex.first_level, + template->u.tex.last_level, + template->u.tex.first_layer, + template->u.tex.last_layer, + type, prsrc->layout); + + so->bo = panfrost_bo_create(screen, size, 0); + + panfrost_new_texture( + so->bo->cpu, + texture->width0, texture->height0, + texture->depth0, array_size, + texture->format, + type, prsrc->layout, + template->u.tex.first_level, + template->u.tex.last_level, + template->u.tex.first_layer, + template->u.tex.last_layer, + prsrc->cubemap_stride, + panfrost_translate_swizzle_4(user_swizzle), + prsrc->bo->gpu, + prsrc->slices); return (struct pipe_sampler_view *) so; } @@ -2190,9 +2073,12 @@ panfrost_set_sampler_views( static void panfrost_sampler_view_destroy( struct pipe_context *pctx, - struct pipe_sampler_view *view) + struct pipe_sampler_view *pview) { - pipe_resource_reference(&view->texture, NULL); + struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview; + + pipe_resource_reference(&pview->texture, NULL); + panfrost_bo_unreference(view->bo); ralloc_free(view); } diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 1a570261363..375569d7c73 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -262,9 +262,7 @@ struct panfrost_sampler_state { struct panfrost_sampler_view { struct pipe_sampler_view base; - struct mali_texture_descriptor hw; - uint8_t astc_stretch; - bool manual_stride; + struct panfrost_bo *bo; }; static inline struct panfrost_context * diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index 5c90a5e9b4c..d4c134f6246 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -533,15 +533,6 @@ panfrost_resource_destroy(struct pipe_screen *screen, ralloc_free(rsrc); } -static unsigned -panfrost_get_layer_stride(struct panfrost_resource *rsrc, unsigned level) -{ - if (rsrc->base.target == PIPE_TEXTURE_3D) - return rsrc->slices[level].size0; - else - return rsrc->cubemap_stride; -} - static void * panfrost_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource, @@ -645,7 +636,9 @@ panfrost_transfer_map(struct pipe_context *pctx, return transfer->map; } else { transfer->base.stride = rsrc->slices[level].stride; - transfer->base.layer_stride = panfrost_get_layer_stride(rsrc, level); + transfer->base.layer_stride = panfrost_get_layer_stride( + rsrc->slices, rsrc->base.target == PIPE_TEXTURE_3D, + rsrc->cubemap_stride, level); /* By mapping direct-write, we're implicitly already * initialized (maybe), so be conservative */ @@ -792,10 +785,8 @@ panfrost_get_texture_address( struct panfrost_resource *rsrc, unsigned level, unsigned face) { - unsigned level_offset = rsrc->slices[level].offset; - unsigned face_offset = face * panfrost_get_layer_stride(rsrc, level); - - return rsrc->bo->gpu + level_offset + face_offset; + bool is_3d = rsrc->base.target == PIPE_TEXTURE_3D; + return rsrc->bo->gpu + panfrost_texture_offset(rsrc->slices, is_3d, rsrc->cubemap_stride, level, face); } /* Given a resource that has already been allocated, hint that it should use a @@ -856,6 +847,8 @@ panfrost_resource_hint_layout( panfrost_bo_unreference(rsrc->bo); rsrc->bo = panfrost_bo_create(screen, new_size, PAN_BO_DELAY_MMAP); } + + /* TODO: If there are textures bound, regenerate their descriptors */ } static void diff --git a/src/panfrost/encoder/pan_texture.c b/src/panfrost/encoder/pan_texture.c index 8e4b00e9055..0c92464af08 100644 --- a/src/panfrost/encoder/pan_texture.c +++ b/src/panfrost/encoder/pan_texture.c @@ -26,8 +26,222 @@ */ #include "util/macros.h" +#include "util/u_math.h" #include "pan_texture.h" +/* Generates a texture descriptor. Ideally, descriptors are immutable after the + * texture is created, so we can keep these hanging around in GPU memory in a + * dedicated BO and not have to worry. In practice there are some minor gotchas + * with this (the driver sometimes will change the format of a texture on the + * fly for compression) but it's fast enough to just regenerate the descriptor + * in those cases, rather than monkeypatching at drawtime. + * + * A texture descriptor consists of a 32-byte mali_texture_descriptor structure + * followed by a variable number of pointers. Due to this variance and + * potentially large size, we actually upload directly rather than returning + * the descriptor. Whether the user does a copy themselves or not is irrelevant + * to us here. + */ + +/* Check if we need to set a custom stride by computing the "expected" + * stride and comparing it to what the user actually wants. Only applies + * to linear textures, since tiled/compressed textures have strict + * alignment requirements for their strides as it is */ + +static bool +panfrost_needs_explicit_stride( + struct panfrost_slice *slices, + uint16_t width, + unsigned first_level, unsigned last_level, + unsigned bytes_per_pixel) +{ + for (unsigned l = first_level; l <= last_level; ++l) { + unsigned actual = slices[l].stride; + unsigned expected = u_minify(width, l) * bytes_per_pixel; + + if (actual != expected) + return true; + } + + return false; +} + +/* A Scalable Texture Compression (ASTC) corresponds to just a few texture type + * in the hardware, but in fact can be parametrized to have various widths and + * heights for the so-called "stretch factor". It turns out these parameters + * are stuffed in the bottom bits of the payload pointers. This functions + * computes these magic stuffing constants based on the ASTC format in use. The + * constant in a given dimension is 3-bits, and two are stored side-by-side for + * each active dimension. + */ + +static unsigned +panfrost_astc_stretch(unsigned dim) +{ + assert(dim >= 4 && dim <= 12); + return MIN2(dim, 11) - 4; +} + +/* Texture addresses are tagged with information about AFBC (colour AFBC?) xor + * ASTC (stretch factor) if in use. */ + +static unsigned +panfrost_compression_tag( + const struct util_format_description *desc, + enum mali_format format, enum mali_texture_layout layout) +{ + if (layout == MALI_TEXTURE_AFBC) + return util_format_has_depth(desc) ? 0x0 : 0x1; + else if (format == MALI_ASTC_HDR_SUPP || format == MALI_ASTC_SRGB_SUPP) + return (panfrost_astc_stretch(desc->block.height) << 3) | + panfrost_astc_stretch(desc->block.width); + else + return 0; +} + + +/* Cubemaps have 6 faces as "layers" in between each actual layer. We + * need to fix this up. TODO: logic wrong in the asserted out cases ... + * can they happen, perhaps from cubemap arrays? */ + +static void +panfrost_adjust_cube_dimensions( + unsigned *first_face, unsigned *last_face, + unsigned *first_layer, unsigned *last_layer) +{ + *first_face = *first_layer % 6; + *last_face = *last_layer % 6; + *first_layer /= 6; + *last_layer /= 6; + + assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5)); +} + +/* Following the texture descriptor is a number of pointers. How many? */ + +static unsigned +panfrost_texture_num_elements( + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer, + bool is_cube, bool manual_stride) +{ + unsigned first_face = 0, last_face = 0; + + if (is_cube) { + panfrost_adjust_cube_dimensions(&first_face, &last_face, + &first_layer, &last_layer); + } + + unsigned levels = 1 + last_level - first_level; + unsigned layers = 1 + last_layer - first_layer; + unsigned faces = 1 + last_face - first_face; + unsigned num_elements = levels * layers * faces; + + if (manual_stride) + num_elements *= 2; + + return num_elements; +} + +/* Conservative estimate of the size of the texture descriptor a priori. + * Average case, size equal to the actual size. Worst case, off by 2x (if + * a manual stride is not needed on a linear texture). Returned value + * must be greater than or equal to the actual size, so it's safe to use + * as an allocation amount */ + +unsigned +panfrost_estimate_texture_size( + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer, + enum mali_texture_type type, enum mali_texture_layout layout) +{ + /* Assume worst case */ + unsigned manual_stride = (layout == MALI_TEXTURE_LINEAR); + + unsigned elements = panfrost_texture_num_elements( + first_level, last_level, + first_layer, last_layer, + type == MALI_TEX_CUBE, manual_stride); + + return sizeof(struct mali_texture_descriptor) + + sizeof(mali_ptr) * elements; +} + +void +panfrost_new_texture( + void *out, + uint16_t width, uint16_t height, + uint16_t depth, uint16_t array_size, + enum pipe_format format, + enum mali_texture_type type, + enum mali_texture_layout layout, + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer, + unsigned cube_stride, + unsigned swizzle, + mali_ptr base, + struct panfrost_slice *slices) +{ + const struct util_format_description *desc = + util_format_description(format); + + unsigned bytes_per_pixel = util_format_get_blocksize(format); + + enum mali_format mali_format = panfrost_find_format(desc); + + bool manual_stride = (layout == MALI_TEXTURE_LINEAR) + && panfrost_needs_explicit_stride(slices, width, + first_level, last_level, bytes_per_pixel); + + struct mali_texture_descriptor descriptor = { + .width = MALI_POSITIVE(u_minify(width, first_level)), + .height = MALI_POSITIVE(u_minify(height, first_level)), + .depth = MALI_POSITIVE(u_minify(depth, first_level)), + .array_size = MALI_POSITIVE(array_size), + .format = { + .swizzle = panfrost_translate_swizzle_4(desc->swizzle), + .format = mali_format, + .srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB), + .type = type, + .layout = layout, + .manual_stride = manual_stride, + .unknown2 = 1, + }, + .levels = last_level - first_level, + .swizzle = swizzle + }; + + memcpy(out, &descriptor, sizeof(descriptor)); + + base |= panfrost_compression_tag(desc, mali_format, layout); + + /* Inject the addresses in, interleaving array indices, mip levels, + * cube faces, and strides in that order */ + + unsigned first_face = 0, last_face = 0, face_mult = 1; + + if (type == MALI_TEX_CUBE) { + face_mult = 6; + panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer); + } + + mali_ptr *payload = (mali_ptr *) (out + sizeof(struct mali_texture_descriptor)); + unsigned idx = 0; + + for (unsigned w = first_layer; w <= last_layer; ++w) { + for (unsigned l = first_level; l <= last_level; ++l) { + for (unsigned f = first_face; f <= last_face; ++f) { + payload[idx++] = base + panfrost_texture_offset( + slices, type == MALI_TEX_3D, + cube_stride, l, w * face_mult + f); + + if (manual_stride) + payload[idx++] = slices[l].stride; + } + } + } +} + /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile. * Checksumming is believed to be a CRC variant (CRC64 based on the size?). * This feature is also known as "transaction elimination". */ @@ -52,3 +266,19 @@ panfrost_compute_checksum_size( return slice->checksum_stride * tile_count_y; } + +unsigned +panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level) +{ + return is_3d ? slices[level].size0 : cube_stride; +} + +/* Computes the offset into a texture at a particular level/face. Add to + * the base address of a texture to get the address to that level/face */ + +unsigned +panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face) +{ + unsigned layer_stride = panfrost_get_layer_stride(slices, is_3d, cube_stride, level); + return slices[level].offset + (face * layer_stride); +} diff --git a/src/panfrost/encoder/pan_texture.h b/src/panfrost/encoder/pan_texture.h index 29be91cca83..241e1f8b4e6 100644 --- a/src/panfrost/encoder/pan_texture.h +++ b/src/panfrost/encoder/pan_texture.h @@ -30,6 +30,7 @@ #include #include "util/format/u_format.h" +#include "panfrost-job.h" struct panfrost_slice { unsigned offset; @@ -63,6 +64,36 @@ panfrost_format_supports_afbc(enum pipe_format format); unsigned panfrost_afbc_header_size(unsigned width, unsigned height); +/* mali_texture_descriptor */ + +unsigned +panfrost_estimate_texture_size( + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer, + enum mali_texture_type type, enum mali_texture_layout layout); + +void +panfrost_new_texture( + void *out, + uint16_t width, uint16_t height, + uint16_t depth, uint16_t array_size, + enum pipe_format format, + enum mali_texture_type type, + enum mali_texture_layout layout, + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer, + unsigned cube_stride, + unsigned swizzle, + mali_ptr base, + struct panfrost_slice *slices); + + +unsigned +panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level); + +unsigned +panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face); + /* Formats */ enum mali_format -- cgit v1.2.1