summaryrefslogtreecommitdiff
path: root/src/amd/vulkan/radv_device.c
diff options
context:
space:
mode:
authorTatsuyuki Ishi <ishitatsuyuki@gmail.com>2023-02-24 14:56:20 +0900
committerMarge Bot <emma+marge@anholt.net>2023-03-16 18:02:57 +0000
commita8c5fd3b1bbe279cbb7794486d817a6cab626846 (patch)
tree3de204991a3aa555d49cb939405321fb8364e8f0 /src/amd/vulkan/radv_device.c
parent3b258ae2d96b3cb7195561902193c174516b0b5f (diff)
downloadmesa-a8c5fd3b1bbe279cbb7794486d817a6cab626846.tar.gz
radv: Upload shaders to invisible VRAM on small BAR systems.
Following PAL's implementation, this patch avoids allocating shader code buffers in BAR and use SDMA to upload them to invisible VRAM directly. For some games like HZD, shaders can take as much as 400MB, which exceeds the non-resizable BAR size (256MB) and cause inconsistent spilling behavior. The kernel will normally move these to invisible VRAM on its own, but there are a few cases that it does not reliably happen. This patch does the moving explicitly in the driver to ensure predictable results. In this patch, we upload the shaders synchronously; so the shader will be ready as soon as vkCreate*Pipeline returns. A following patch will make this asynchronous and don't block until we see a use of the pipeline. As a side effect, when SQTT is used we now store the shaders on a cacheable buffer which would speed up writing the trace to the disk. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16271>
Diffstat (limited to 'src/amd/vulkan/radv_device.c')
-rw-r--r--src/amd/vulkan/radv_device.c23
1 files changed, 20 insertions, 3 deletions
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index b392909c6a7..8895b89dc1d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -50,6 +50,8 @@
#include "radv_private.h"
#include "radv_shader.h"
#include "vk_util.h"
+#include "vk_common_entrypoints.h"
+#include "vk_semaphore.h"
#ifdef _WIN32
typedef void *drmDevicePtr;
#include <io.h>
@@ -805,7 +807,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
if (result != VK_SUCCESS)
- goto fail;
+ goto fail_queue;
}
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
@@ -819,7 +821,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!device->queues[qfi]) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto fail;
+ goto fail_queue;
}
memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
@@ -829,11 +831,19 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
for (unsigned q = 0; q < queue_create->queueCount; q++) {
result = radv_queue_init(device, &device->queues[qfi][q], q, queue_create, global_priority);
if (result != VK_SUCCESS)
- goto fail;
+ goto fail_queue;
}
}
device->private_sdma_queue = VK_NULL_HANDLE;
+ device->shader_use_invisible_vram =
+ (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) &&
+ /* SDMA buffer copy is only implemented for GFX7+. */
+ device->physical_device->rad_info.gfx_level >= GFX7;
+ result = radv_init_shader_upload_queue(device);
+ if (result != VK_SUCCESS)
+ goto fail;
+
device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 &&
!(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
@@ -1081,6 +1091,9 @@ fail:
radv_device_finish_ps_epilogs(device);
radv_device_finish_border_color(device);
+ radv_destroy_shader_upload_queue(device);
+
+fail_queue:
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
for (unsigned q = 0; q < device->queue_count[i]; q++)
radv_queue_finish(&device->queues[i][q]);
@@ -1093,6 +1106,8 @@ fail:
device->ws->ctx_destroy(device->hw_ctx[i]);
}
+ radv_destroy_shader_arenas(device);
+
_mesa_hash_table_destroy(device->rt_handles, NULL);
simple_mtx_destroy(&device->pstate_mtx);
@@ -1154,6 +1169,8 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
+ radv_destroy_shader_upload_queue(device);
+
radv_trap_handler_finish(device);
radv_finish_trace(device);