diff options
author | Tatsuyuki Ishi <ishitatsuyuki@gmail.com> | 2023-02-24 14:56:20 +0900 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2023-03-16 18:02:57 +0000 |
commit | a8c5fd3b1bbe279cbb7794486d817a6cab626846 (patch) | |
tree | 3de204991a3aa555d49cb939405321fb8364e8f0 /src/amd/vulkan/radv_device.c | |
parent | 3b258ae2d96b3cb7195561902193c174516b0b5f (diff) | |
download | mesa-a8c5fd3b1bbe279cbb7794486d817a6cab626846.tar.gz |
radv: Upload shaders to invisible VRAM on small BAR systems.
Following PAL's implementation, this patch avoids allocating shader code
buffers in BAR and use SDMA to upload them to invisible VRAM
directly.
For some games like HZD, shaders can take as much as 400MB, which exceeds
the non-resizable BAR size (256MB) and cause inconsistent spilling
behavior. The kernel will normally move these to invisible VRAM on its own,
but there are a few cases that it does not reliably happen. This patch does
the moving explicitly in the driver to ensure predictable results.
In this patch, we upload the shaders synchronously; so the shader will be
ready as soon as vkCreate*Pipeline returns. A following patch will make
this asynchronous and don't block until we see a use of the pipeline.
As a side effect, when SQTT is used we now store the shaders on a cacheable
buffer which would speed up writing the trace to the disk.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16271>
Diffstat (limited to 'src/amd/vulkan/radv_device.c')
-rw-r--r-- | src/amd/vulkan/radv_device.c | 23 |
1 files changed, 20 insertions, 3 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index b392909c6a7..8895b89dc1d 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -50,6 +50,8 @@ #include "radv_private.h" #include "radv_shader.h" #include "vk_util.h" +#include "vk_common_entrypoints.h" +#include "vk_semaphore.h" #ifdef _WIN32 typedef void *drmDevicePtr; #include <io.h> @@ -805,7 +807,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]); if (result != VK_SUCCESS) - goto fail; + goto fail_queue; } for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { @@ -819,7 +821,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!device->queues[qfi]) { result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; + goto fail_queue; } memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue)); @@ -829,11 +831,19 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr for (unsigned q = 0; q < queue_create->queueCount; q++) { result = radv_queue_init(device, &device->queues[qfi][q], q, queue_create, global_priority); if (result != VK_SUCCESS) - goto fail; + goto fail_queue; } } device->private_sdma_queue = VK_NULL_HANDLE; + device->shader_use_invisible_vram = + (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) && + /* SDMA buffer copy is only implemented for GFX7+. */ + device->physical_device->rad_info.gfx_level >= GFX7; + result = radv_init_shader_upload_queue(device); + if (result != VK_SUCCESS) + goto fail; + device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); @@ -1081,6 +1091,9 @@ fail: radv_device_finish_ps_epilogs(device); radv_device_finish_border_color(device); + radv_destroy_shader_upload_queue(device); + +fail_queue: for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { for (unsigned q = 0; q < device->queue_count[i]; q++) radv_queue_finish(&device->queues[i][q]); @@ -1093,6 +1106,8 @@ fail: device->ws->ctx_destroy(device->hw_ctx[i]); } + radv_destroy_shader_arenas(device); + _mesa_hash_table_destroy(device->rt_handles, NULL); simple_mtx_destroy(&device->pstate_mtx); @@ -1154,6 +1169,8 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache); radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL); + radv_destroy_shader_upload_queue(device); + radv_trap_handler_finish(device); radv_finish_trace(device); |