diff options
-rw-r--r-- | drivers/gpu/drm/vc4/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 28 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 59 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_render_cl.c | 447 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate.c | 307 | ||||
-rw-r--r-- | include/uapi/drm/vc4_drm.h | 40 |
6 files changed, 561 insertions, 321 deletions
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index 0abf23d62f9e..cf3898da60f5 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile @@ -13,6 +13,7 @@ vc4-y := \ vc4_hvs.o \ vc4_irq.o \ vc4_plane.o \ + vc4_render_cl.o \ vc4_v3d.o \ vc4_validate.o \ vc4_validate_shaders.o \ diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index bc4384e3d4cb..e4e2e081628c 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -228,14 +228,9 @@ struct vc4_exec_info { uint32_t shader_state_count; bool found_tile_binning_mode_config_packet; - bool found_tile_rendering_mode_config_packet; bool found_start_tile_binning_packet; bool found_increment_semaphore_packet; - bool found_wait_on_semaphore_packet; uint8_t bin_tiles_x, bin_tiles_y; - uint32_t fb_width, fb_height; - uint32_t tile_alloc_init_block_mask; - uint32_t tile_alloc_init_block_last; struct drm_gem_cma_object *tile_alloc_bo; /** @@ -430,16 +425,25 @@ int vc4_v3d_set_power(struct vc4_dev *vc4, bool on); /* vc4_validate.c */ int -vc4_validate_cl(struct drm_device *dev, - void *validated, - void *unvalidated, - uint32_t len, - bool is_bin, - bool has_bin, - struct vc4_exec_info *exec); +vc4_validate_bin_cl(struct drm_device *dev, + void *validated, + void *unvalidated, + struct vc4_exec_info *exec); int vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec); struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj); + +bool vc4_use_bo(struct vc4_exec_info *exec, + uint32_t hindex, + enum vc4_bo_mode mode, + struct drm_gem_cma_object **obj); + +int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec); + +bool vc4_check_tex_size(struct vc4_exec_info *exec, + struct drm_gem_cma_object *fbo, + uint32_t offset, uint8_t tiling_format, + uint32_t width, uint32_t height, uint8_t cpp); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 54fe0b83421b..28c6a9c80fa0 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -328,24 +328,22 @@ fail: } static int -vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) +vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) { struct drm_vc4_submit_cl *args = exec->args; void *temp = NULL; - void *bin, *render; + void *bin; int ret = 0; uint32_t bin_offset = 0; - uint32_t render_offset = bin_offset + args->bin_cl_size; - uint32_t shader_rec_offset = roundup(render_offset + - args->render_cl_size, 16); + uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, + 16); uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; uint32_t exec_size = uniforms_offset + args->uniforms_size; uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * args->shader_rec_count); struct vc4_bo *bo; - if (shader_rec_offset < render_offset || - uniforms_offset < shader_rec_offset || + if (uniforms_offset < shader_rec_offset || exec_size < uniforms_offset || args->shader_rec_count >= (UINT_MAX / sizeof(struct vc4_shader_state)) || @@ -369,7 +367,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) goto fail; } bin = temp + bin_offset; - render = temp + render_offset; exec->shader_rec_u = temp + shader_rec_offset; exec->uniforms_u = temp + uniforms_offset; exec->shader_state = temp + exec_size; @@ -383,14 +380,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) goto fail; } - ret = copy_from_user(render, - (void __user *)(uintptr_t)args->render_cl, - args->render_cl_size); - if (ret) { - DRM_ERROR("Failed to copy in render cl\n"); - goto fail; - } - ret = copy_from_user(exec->shader_rec_u, (void __user *)(uintptr_t)args->shader_rec, args->shader_rec_size); @@ -409,7 +398,7 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) bo = vc4_bo_create(dev, exec_size); if (!bo) { - DRM_ERROR("Couldn't allocate BO for exec\n"); + DRM_ERROR("Couldn't allocate BO for binning\n"); ret = PTR_ERR(exec->exec_bo); goto fail; } @@ -419,7 +408,6 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) &exec->unref_list); exec->ct0ca = exec->exec_bo->paddr + bin_offset; - exec->ct1ca = exec->exec_bo->paddr + render_offset; exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; @@ -429,23 +417,10 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; exec->uniforms_size = args->uniforms_size; - ret = vc4_validate_cl(dev, - exec->exec_bo->vaddr + bin_offset, - bin, - args->bin_cl_size, - true, - args->bin_cl_size != 0, - exec); - if (ret) - goto fail; - - ret = vc4_validate_cl(dev, - exec->exec_bo->vaddr + render_offset, - render, - args->render_cl_size, - false, - args->bin_cl_size != 0, - exec); + ret = vc4_validate_bin_cl(dev, + exec->exec_bo->vaddr + bin_offset, + bin, + exec); if (ret) goto fail; @@ -577,8 +552,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_exec_info *exec; int ret; - if (args->flags != 0) + if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { + DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); return -EINVAL; + } exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); if (!exec) { @@ -595,7 +572,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; - ret = vc4_cl_validate(dev, exec); + if (exec->args->bin_cl_size != 0) { + ret = vc4_get_bcl(dev, exec); + if (ret) + goto fail; + } else { + exec->ct0ca = exec->ct0ea = 0; + } + + ret = vc4_get_rcl(dev, exec); if (ret) goto fail; diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c new file mode 100644 index 000000000000..241adbfa84ca --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -0,0 +1,447 @@ +/* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * DOC: Render command list generation + * + * In the VC4 driver, render command list generation is performed by the + * kernel instead of userspace. We do this because validating a + * user-submitted command list is hard to get right and has high CPU overhead, + * while the number of valid configurations for render command lists is + * actually fairly low. + */ + +#include "uapi/drm/vc4_drm.h" +#include "vc4_drv.h" +#include "vc4_packet.h" + +struct vc4_rcl_setup { + struct drm_gem_cma_object *color_read; + struct drm_gem_cma_object *color_ms_write; + struct drm_gem_cma_object *zs_read; + struct drm_gem_cma_object *zs_write; + + struct drm_gem_cma_object *rcl; + u32 next_offset; +}; + +static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val) +{ + *(u8 *)(setup->rcl->vaddr + setup->next_offset) = val; + setup->next_offset += 1; +} + +static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val) +{ + *(u16 *)(setup->rcl->vaddr + setup->next_offset) = val; + setup->next_offset += 2; +} + +static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val) +{ + *(u32 *)(setup->rcl->vaddr + setup->next_offset) = val; + setup->next_offset += 4; +} + + +/* + * Emits a no-op STORE_TILE_BUFFER_GENERAL. + * + * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of + * some sort before another load is triggered. + */ +static void vc4_store_before_load(struct vc4_rcl_setup *setup) +{ + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + rcl_u16(setup, + VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE, + VC4_LOADSTORE_TILE_BUFFER_BUFFER) | + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR); + rcl_u32(setup, 0); /* no address, since we're in None mode */ +} + +/* + * Emits a PACKET_TILE_COORDINATES if one isn't already pending. + * + * The tile coordinates packet triggers a pending load if there is one, are + * used for clipping during rendering, and determine where loads/stores happen + * relative to their base address. + */ +static void vc4_tile_coordinates(struct vc4_rcl_setup *setup, + uint32_t x, uint32_t y) +{ + rcl_u8(setup, VC4_PACKET_TILE_COORDINATES); + rcl_u8(setup, x); + rcl_u8(setup, y); +} + +static void emit_tile(struct vc4_exec_info *exec, + struct vc4_rcl_setup *setup, + uint8_t x, uint8_t y, bool first, bool last) +{ + bool has_bin = exec->args->bin_cl_size != 0; + + /* Note that the load doesn't actually occur until the + * tile coords packet is processed, and only one load + * may be outstanding at a time. + */ + if (setup->color_read) { + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + rcl_u16(setup, exec->args->color_read.bits); + rcl_u32(setup, + setup->color_read->paddr + + exec->args->color_read.offset); + } + + if (setup->zs_read) { + if (setup->color_read) { + /* Exec previous load. */ + vc4_tile_coordinates(setup, x, y); + vc4_store_before_load(setup); + } + + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + rcl_u16(setup, exec->args->zs_read.bits); + rcl_u32(setup, + setup->zs_read->paddr + exec->args->zs_read.offset); + } + + /* Clipping depends on tile coordinates having been + * emitted, so we always need one here. + */ + vc4_tile_coordinates(setup, x, y); + + /* Wait for the binner before jumping to the first + * tile's lists. + */ + if (first && has_bin) + rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE); + + if (has_bin) { + rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST); + rcl_u32(setup, (exec->tile_alloc_bo->paddr + + (y * exec->bin_tiles_x + x) * 32)); + } + + if (setup->zs_write) { + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + rcl_u16(setup, exec->args->zs_write.bits | + (setup->color_ms_write ? + VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0)); + rcl_u32(setup, + (setup->zs_write->paddr + exec->args->zs_write.offset) | + ((last && !setup->color_ms_write) ? + VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); + } + + if (setup->color_ms_write) { + if (setup->zs_write) { + /* Reset after previous store */ + vc4_tile_coordinates(setup, x, y); + } + + if (last) + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); + else + rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER); + } +} + +static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, + struct vc4_rcl_setup *setup) +{ + bool has_bin = exec->args->bin_cl_size != 0; + uint8_t min_x_tile = exec->args->min_x_tile; + uint8_t min_y_tile = exec->args->min_y_tile; + uint8_t max_x_tile = exec->args->max_x_tile; + uint8_t max_y_tile = exec->args->max_y_tile; + uint8_t xtiles = max_x_tile - min_x_tile + 1; + uint8_t ytiles = max_y_tile - min_y_tile + 1; + uint8_t x, y; + uint32_t size, loop_body_size; + + size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE; + loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE; + + if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { + size += VC4_PACKET_CLEAR_COLORS_SIZE + + VC4_PACKET_TILE_COORDINATES_SIZE + + VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; + } + + if (setup->color_read) { + loop_body_size += (VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE); + } + if (setup->zs_read) { + if (setup->color_read) { + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; + loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; + } + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; + } + + if (has_bin) { + size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE; + loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE; + } + + if (setup->zs_write) + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; + if (setup->color_ms_write) { + if (setup->zs_write) + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; + loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE; + } + size += xtiles * ytiles * loop_body_size; + + setup->rcl = &vc4_bo_create(dev, size)->base; + if (!setup->rcl) + return -ENOMEM; + list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head, + &exec->unref_list); + + rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); + rcl_u32(setup, + (setup->color_ms_write ? + (setup->color_ms_write->paddr + + exec->args->color_ms_write.offset) : + 0)); + rcl_u16(setup, exec->args->width); + rcl_u16(setup, exec->args->height); + rcl_u16(setup, exec->args->color_ms_write.bits); + + /* The tile buffer gets cleared when the previous tile is stored. If + * the clear values changed between frames, then the tile buffer has + * stale clear values in it, so we have to do a store in None mode (no + * writes) so that we trigger the tile buffer clear. + */ + if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) { + rcl_u8(setup, VC4_PACKET_CLEAR_COLORS); + rcl_u32(setup, exec->args->clear_color[0]); + rcl_u32(setup, exec->args->clear_color[1]); + rcl_u32(setup, exec->args->clear_z); + rcl_u8(setup, exec->args->clear_s); + + vc4_tile_coordinates(setup, 0, 0); + + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); + rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE); + rcl_u32(setup, 0); /* no address, since we're in None mode */ + } + + for (y = min_y_tile; y <= max_y_tile; y++) { + for (x = min_x_tile; x <= max_x_tile; x++) { + bool first = (x == min_x_tile && y == min_y_tile); + bool last = (x == max_x_tile && y == max_y_tile); + emit_tile(exec, setup, x, y, first, last); + } + } + + BUG_ON(setup->next_offset != size); + exec->ct1ca = setup->rcl->paddr; + exec->ct1ea = setup->rcl->paddr + setup->next_offset; + + return 0; +} + +static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, + struct drm_gem_cma_object **obj, + struct drm_vc4_submit_rcl_surface *surf) +{ + uint8_t tiling = VC4_GET_FIELD(surf->bits, + VC4_LOADSTORE_TILE_BUFFER_TILING); + uint8_t buffer = VC4_GET_FIELD(surf->bits, + VC4_LOADSTORE_TILE_BUFFER_BUFFER); + uint8_t format = VC4_GET_FIELD(surf->bits, + VC4_LOADSTORE_TILE_BUFFER_FORMAT); + int cpp; + + if (surf->pad != 0) { + DRM_ERROR("Padding unset\n"); + return -EINVAL; + } + + if (surf->hindex == ~0) + return 0; + + if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj)) + return -EINVAL; + + if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK | + VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK | + VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) { + DRM_ERROR("Unknown bits in load/store: 0x%04x\n", + surf->bits); + return -EINVAL; + } + + if (tiling > VC4_TILING_FORMAT_LT) { + DRM_ERROR("Bad tiling format\n"); + return -EINVAL; + } + + if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) { + if (format != 0) { + DRM_ERROR("No color format should be set for ZS\n"); + return -EINVAL; + } + cpp = 4; + } else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) { + switch (format) { + case VC4_LOADSTORE_TILE_BUFFER_BGR565: + case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER: + cpp = 2; + break; + case VC4_LOADSTORE_TILE_BUFFER_RGBA8888: + cpp = 4; + break; + default: + DRM_ERROR("Bad tile buffer format\n"); + return -EINVAL; + } + } else { + DRM_ERROR("Bad load/store buffer %d.\n", buffer); + return -EINVAL; + } + + if (surf->offset & 0xf) { + DRM_ERROR("load/store buffer must be 16b aligned.\n"); + return -EINVAL; + } + + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, + exec->args->width, exec->args->height, cpp)) { + return -EINVAL; + } + + return 0; +} + +static int +vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec, + struct drm_gem_cma_object **obj, + struct drm_vc4_submit_rcl_surface *surf) +{ + uint8_t tiling = VC4_GET_FIELD(surf->bits, + VC4_RENDER_CONFIG_MEMORY_FORMAT); + uint8_t format = VC4_GET_FIELD(surf->bits, + VC4_RENDER_CONFIG_FORMAT); + int cpp; + + if (surf->pad != 0) { + DRM_ERROR("Padding unset\n"); + return -EINVAL; + } + + if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK | + VC4_RENDER_CONFIG_FORMAT_MASK)) { + DRM_ERROR("Unknown bits in render config: 0x%04x\n", + surf->bits); + return -EINVAL; + } + + if (surf->hindex == ~0) + return 0; + + if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj)) + return -EINVAL; + + if (tiling > VC4_TILING_FORMAT_LT) { + DRM_ERROR("Bad tiling format\n"); + return -EINVAL; + } + + switch (format) { + case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED: + case VC4_RENDER_CONFIG_FORMAT_BGR565: + cpp = 2; + break; + case VC4_RENDER_CONFIG_FORMAT_RGBA8888: + cpp = 4; + break; + default: + DRM_ERROR("Bad tile buffer format\n"); + return -EINVAL; + } + + if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling, + exec->args->width, exec->args->height, cpp)) { + return -EINVAL; + } + + return 0; +} + +int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) +{ + struct vc4_rcl_setup setup = {0}; + struct drm_vc4_submit_cl *args = exec->args; + bool has_bin = args->bin_cl_size != 0; + int ret; + + if (args->min_x_tile > args->max_x_tile || + args->min_y_tile > args->max_y_tile) { + DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n", + args->min_x_tile, args->min_y_tile, + args->max_x_tile, args->max_y_tile); + return -EINVAL; + } + + if (has_bin && + (args->max_x_tile > exec->bin_tiles_x || + args->max_y_tile > exec->bin_tiles_y)) { + DRM_ERROR("Render tiles (%d,%d) outside of bin config (%d,%d)\n", + args->max_x_tile, args->max_y_tile, + exec->bin_tiles_x, exec->bin_tiles_y); + return -EINVAL; + } + + ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read); + if (ret) + return ret; + + ret = vc4_rcl_ms_surface_setup(exec, &setup.color_ms_write, + &args->color_ms_write); + if (ret) + return ret; + + ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read); + if (ret) + return ret; + + ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write); + if (ret) + return ret; + + /* We shouldn't even have the job submitted to us if there's no + * surface to write out. + */ + if (!setup.color_ms_write && !setup.zs_write) { + DRM_ERROR("RCL requires color or Z/S write\n"); + return -EINVAL; + } + + return vc4_create_rcl_bo(dev, exec, &setup); +} diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index abd37867e4d2..2a825f0d0aad 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -39,6 +39,7 @@ * is where GEM relocation processing happens. */ +#include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" #include "vc4_packet.h" @@ -94,7 +95,7 @@ size_is_lt(uint32_t width, uint32_t height, int cpp) height <= 4 * utile_height(cpp)); } -static bool +bool vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex, enum vc4_bo_mode mode, @@ -147,10 +148,10 @@ gl_shader_rec_size(uint32_t pointer_bits) return 36 + attribute_count * 8; } -static bool -check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, - uint32_t offset, uint8_t tiling_format, - uint32_t width, uint32_t height, uint8_t cpp) +bool +vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, + uint32_t offset, uint8_t tiling_format, + uint32_t width, uint32_t height, uint8_t cpp) { uint32_t aligned_width, aligned_height, stride, size; uint32_t utile_w = utile_width(cpp); @@ -248,118 +249,6 @@ validate_increment_semaphore(VALIDATE_ARGS) } static int -validate_wait_on_semaphore(VALIDATE_ARGS) -{ - if (exec->found_wait_on_semaphore_packet) { - DRM_ERROR("Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n"); - return -EINVAL; - } - exec->found_wait_on_semaphore_packet = true; - - if (!exec->found_increment_semaphore_packet) { - DRM_ERROR("VC4_PACKET_WAIT_ON_SEMAPHORE without " - "VC4_PACKET_INCREMENT_SEMAPHORE\n"); - return -EINVAL; - } - - return 0; -} - -static int -validate_branch_to_sublist(VALIDATE_ARGS) -{ - uint32_t offset; - - if (!exec->tile_alloc_bo) { - DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST seen before " - "binner setup\n"); - return -EINVAL; - } - - if (!exec->found_wait_on_semaphore_packet) { - DRM_ERROR("Jumping to tile alloc before binning finished.\n"); - return -EINVAL; - } - - offset = *(uint32_t *)(untrusted + 0); - if (offset & exec->tile_alloc_init_block_mask || - offset > exec->tile_alloc_init_block_last) { - DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial " - "tile allocation space.\n"); - return -EINVAL; - } - - *(uint32_t *)(validated + 0) = exec->tile_alloc_bo->paddr + offset; - - return 0; -} - -/** - * validate_loadstore_tile_buffer_general() - Validation for - * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL and - * VC4_PACKET_STORE_TILE_BUFFER_GENERAL. - * - * The two packets are nearly the same, except for the TLB-clearing management - * bits not being present for loads. Additionally, while stores are executed - * immediately (using the current tile coordinates), loads are queued to be - * executed when the tile coordinates packet occurs. - * - * Note that coordinates packets are validated to be within the declared - * bin_x/y, which themselves are verified to match the rendering-configuration - * FB width and height (which the hardware uses to clip loads and stores). - */ -static int -validate_loadstore_tile_buffer_general(VALIDATE_ARGS) -{ - uint16_t packet_b01 = *(uint16_t *)(untrusted + 0); - struct drm_gem_cma_object *fbo; - uint32_t buffer_type = VC4_GET_FIELD(packet_b01, - VC4_LOADSTORE_TILE_BUFFER_BUFFER); - uint32_t untrusted_address, offset, cpp; - - switch (buffer_type) { - case VC4_LOADSTORE_TILE_BUFFER_NONE: - return 0; - case VC4_LOADSTORE_TILE_BUFFER_COLOR: - if (VC4_GET_FIELD(packet_b01, - VC4_LOADSTORE_TILE_BUFFER_FORMAT) == - VC4_LOADSTORE_TILE_BUFFER_RGBA8888) { - cpp = 4; - } else { - cpp = 2; - } - break; - - case VC4_LOADSTORE_TILE_BUFFER_Z: - case VC4_LOADSTORE_TILE_BUFFER_ZS: - cpp = 4; - break; - - default: - DRM_ERROR("Load/store type %d unsupported\n", buffer_type); - return -EINVAL; - } - - if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo)) - return -EINVAL; - - untrusted_address = *(uint32_t *)(untrusted + 2); - offset = untrusted_address & ~0xf; - - if (!check_tex_size(exec, fbo, offset, - VC4_GET_FIELD(packet_b01, - VC4_LOADSTORE_TILE_BUFFER_TILING), - exec->fb_width, exec->fb_height, cpp)) { - return -EINVAL; - } - - *(uint32_t *)(validated + 2) = (offset + fbo->paddr + - (untrusted_address & 0xf)); - - return 0; -} - -static int validate_indexed_prim_list(VALIDATE_ARGS) { struct drm_gem_cma_object *ib; @@ -552,9 +441,6 @@ validate_tile_binning_config(VALIDATE_ARGS) tile_allocation_size); return -EINVAL; } - exec->tile_alloc_init_block_mask = tile_alloc_init_block_size - 1; - exec->tile_alloc_init_block_last = tile_alloc_init_block_size * - (exec->bin_tiles_x * exec->bin_tiles_y - 1); if (*(uint32_t *)(untrusted + 8) != 0) { DRM_ERROR("TSDA offset != 0 unsupported\n"); @@ -572,141 +458,66 @@ validate_tile_binning_config(VALIDATE_ARGS) } static int -validate_tile_rendering_mode_config(VALIDATE_ARGS) -{ - struct drm_gem_cma_object *fbo; - uint32_t flags, offset, cpp; - - if (exec->found_tile_rendering_mode_config_packet) { - DRM_ERROR("Duplicate VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n"); - return -EINVAL; - } - exec->found_tile_rendering_mode_config_packet = true; - - if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo)) - return -EINVAL; - - exec->fb_width = *(uint16_t *)(untrusted + 4); - exec->fb_height = *(uint16_t *)(untrusted + 6); - - flags = *(uint16_t *)(untrusted + 8); - if (VC4_GET_FIELD(flags, VC4_RENDER_CONFIG_FORMAT) == - VC4_RENDER_CONFIG_FORMAT_RGBA8888) { - cpp = 4; - } else { - cpp = 2; - } - - offset = *(uint32_t *)untrusted; - if (!check_tex_size(exec, fbo, offset, - VC4_GET_FIELD(flags, - VC4_RENDER_CONFIG_MEMORY_FORMAT), - exec->fb_width, exec->fb_height, cpp)) { - return -EINVAL; - } - - *(uint32_t *)validated = fbo->paddr + offset; - - return 0; -} - -static int -validate_tile_coordinates(VALIDATE_ARGS) -{ - uint8_t tile_x = *(uint8_t *)(untrusted + 0); - uint8_t tile_y = *(uint8_t *)(untrusted + 1); - - if (tile_x * 64 >= exec->fb_width || tile_y * 64 >= exec->fb_height) { - DRM_ERROR("Tile coordinates %d,%d > render config %dx%d\n", - tile_x, tile_y, exec->fb_width, exec->fb_height); - return -EINVAL; - } - - return 0; -} - -static int validate_gem_handles(VALIDATE_ARGS) { memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index)); return 0; } -#define VC4_DEFINE_PACKET(packet, bin, render, name, func) \ - [packet] = { bin, render, packet ## _SIZE, name, func } +#define VC4_DEFINE_PACKET(packet, name, func) \ + [packet] = { packet ## _SIZE, name, func } static const struct cmd_info { - bool bin; - bool render; uint16_t len; const char *name; int (*func)(struct vc4_exec_info *exec, void *validated, void *untrusted); } cmd_info[] = { - VC4_DEFINE_PACKET(VC4_PACKET_HALT, 1, 1, "halt", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_NOP, 1, 1, "nop", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, 1, 1, "flush", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, 1, 0, "flush all state", validate_flush_all), - VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, 1, 0, "start tile binning", validate_start_tile_binning), - VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1, 0, "increment semaphore", validate_increment_semaphore), - VC4_DEFINE_PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 0, 1, "wait on semaphore", validate_wait_on_semaphore), - /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but - * we only use it from the render CL in order to jump into the tile - * allocation BO. - */ - VC4_DEFINE_PACKET(VC4_PACKET_BRANCH_TO_SUB_LIST, 0, 1, "branch to sublist", validate_branch_to_sublist), - VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 0, 1, "store MS resolved tile color buffer", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 0, 1, "store MS resolved tile color buffer and EOF", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 0, 1, "Store Tile Buffer General", validate_loadstore_tile_buffer_general), - VC4_DEFINE_PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 0, 1, "Load Tile Buffer General", validate_loadstore_tile_buffer_general), + VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all), + VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning), + VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore), - VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 1, 1, "Indexed Primitive List", validate_indexed_prim_list), + VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list), - VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 1, 1, "Vertex Array Primitives", validate_gl_array_primitive), + VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive), /* This is only used by clipped primitives (packets 48 and 49), which * we don't support parsing yet. */ - VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 1, 1, "primitive list format", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, 1, 1, "GL Shader State", validate_gl_shader_state), - VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, 1, 1, "NV Shader State", validate_nv_shader_state), - - VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, 1, 1, "configuration bits", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, 1, 1, "flat shade flags", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, 1, 1, "point size", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, 1, 1, "line width", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, 1, 1, "RHT X boundary", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, 1, 1, "Depth Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, 1, 1, "Clip Window", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, 1, 1, "Viewport Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, 1, 1, "Clipper XY Scaling", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL), + + VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state), + VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state), + + VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL), /* Note: The docs say this was also 105, but it was 106 in the * initial userland code drop. */ - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, 1, 1, "Clipper Z Scale and Offset", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 1, 0, "tile binning configuration", validate_tile_binning_config), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL), - VC4_DEFINE_PACKET(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 0, 1, "tile rendering mode configuration", validate_tile_rendering_mode_config), + VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config), - VC4_DEFINE_PACKET(VC4_PACKET_CLEAR_COLORS, 0, 1, "Clear Colors", NULL), - - VC4_DEFINE_PACKET(VC4_PACKET_TILE_COORDINATES, 0, 1, "Tile Coordinates", validate_tile_coordinates), - - VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, 1, 1, "GEM handles", validate_gem_handles), + VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles), }; int -vc4_validate_cl(struct drm_device *dev, - void *validated, - void *unvalidated, - uint32_t len, - bool is_bin, - bool has_bin, - struct vc4_exec_info *exec) +vc4_validate_bin_cl(struct drm_device *dev, + void *validated, + void *unvalidated, + struct vc4_exec_info *exec) { + uint32_t len = exec->args->bin_cl_size; uint32_t dst_offset = 0; uint32_t src_offset = 0; @@ -734,14 +545,6 @@ vc4_validate_cl(struct drm_device *dev, src_offset, cmd, info->name, info->len); #endif - if ((is_bin && !info->bin) || - (!is_bin && !info->render)) { - DRM_ERROR("0x%08x: packet %d (%s) invalid for %s\n", - src_offset, cmd, info->name, - is_bin ? "binner" : "render"); - return -EINVAL; - } - if (src_offset + info->len > len) { DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x " "exceeds bounds (0x%08x)\n", @@ -772,30 +575,16 @@ vc4_validate_cl(struct drm_device *dev, break; } - if (is_bin) { - exec->ct0ea = exec->ct0ca + dst_offset; + exec->ct0ea = exec->ct0ca + dst_offset; - if (has_bin && !exec->found_start_tile_binning_packet) { - DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); - return -EINVAL; - } - } else { - if (!exec->found_tile_rendering_mode_config_packet) { - DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n"); - return -EINVAL; - } + if (!exec->found_start_tile_binning_packet) { + DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); + return -EINVAL; + } - /* Make sure that they actually consumed the semaphore - * increment from the bin CL. Otherwise a later submit would - * have render execute immediately. - */ - if (exec->found_wait_on_semaphore_packet != has_bin) { - DRM_ERROR("Render CL %s VC4_PACKET_WAIT_ON_SEMAPHORE\n", - exec->found_wait_on_semaphore_packet ? - "has" : "missing"); - return -EINVAL; - } - exec->ct1ea = exec->ct1ca + dst_offset; + if (!exec->found_increment_semaphore_packet) { + DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE\n"); + return -EINVAL; } return 0; @@ -910,8 +699,8 @@ reloc_tex(struct vc4_exec_info *exec, tiling_format = VC4_TILING_FORMAT_T; } - if (!check_tex_size(exec, tex, offset + cube_map_stride * 5, - tiling_format, width, height, cpp)) { + if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5, + tiling_format, width, height, cpp)) { return false; } diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index 9dd54403a5d8..4d55307c5107 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -38,6 +38,15 @@ #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) +struct drm_vc4_submit_rcl_surface { + uint32_t hindex; /* Handle index, or ~0 if not present. */ + uint32_t offset; /* Offset to start of buffer. */ + /* + * Bits for either render config (color_ms_write) or load/store packet. + */ + uint16_t bits; + uint16_t pad; +}; /** * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D @@ -62,16 +71,6 @@ struct drm_vc4_submit_cl { */ uint64_t bin_cl; - /* Pointer to the render command list. - * - * The render command list contains a set of packets to load the - * current tile's state (reading from memory, or just clearing it) - * into the GPU, then call into the tile allocation BO to run the - * stored rendering for that tile, then store the tile's state back to - * memory. - */ - uint64_t render_cl; - /* Pointer to the shader records. * * Shader records are the structures read by the hardware that contain @@ -102,8 +101,6 @@ struct drm_vc4_submit_cl { /* Size in bytes of the binner command list. */ uint32_t bin_cl_size; - /* Size in bytes of the render command list */ - uint32_t render_cl_size; /* Size in bytes of the set of shader records. */ uint32_t shader_rec_size; /* Number of shader records. @@ -119,8 +116,25 @@ struct drm_vc4_submit_cl { /* Number of BO handles passed in (size is that times 4). */ uint32_t bo_handle_count; + /* RCL setup: */ + uint16_t width; + uint16_t height; + uint8_t min_x_tile; + uint8_t min_y_tile; + uint8_t max_x_tile; + uint8_t max_y_tile; + struct drm_vc4_submit_rcl_surface color_read; + struct drm_vc4_submit_rcl_surface color_ms_write; + struct drm_vc4_submit_rcl_surface zs_read; + struct drm_vc4_submit_rcl_surface zs_write; + uint32_t clear_color[2]; + uint32_t clear_z; + uint8_t clear_s; + + uint32_t pad:24; + +#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) uint32_t flags; - uint32_t pad; /* Returned value of the seqno of this render job (for the * wait ioctl). |