diff options
author | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2019-07-25 15:38:51 +0200 |
---|---|---|
committer | Samuel Pitoiset <samuel.pitoiset@gmail.com> | 2019-07-25 15:48:21 +0200 |
commit | 6a504ab4733a97f2f1d5790c36ab380c962af5b3 (patch) | |
tree | 474b0ffc6984f3951d3ceb7cf5bf34d3dbcf32d9 /src/amd | |
parent | 9ce75826cb00a252c6012d74046fa15bf0998080 (diff) | |
download | mesa-6a504ab4733a97f2f1d5790c36ab380c962af5b3.tar.gz |
radv/gfx10: use L2 for DMA copy/fill operations
It's coherent and faster. GFX7-GFX9 should also support this but
for now only uses L2 for GFX10 because it's untested on previous gens.
This fixes dEQP-VK.memory.pipeline_barrier.transfer_*
This also fixes some missing geometry in Dawn Of War III because
VBOs weren't updated correctly.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/vulkan/si_cmd_buffer.c | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 21a90cb2514..94f759139ee 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -1501,6 +1501,14 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, unsigned dma_flags = 0; unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer)); + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + /* DMA operations via L2 are coherent and faster. + * TODO: GFX7-GFX9 should also support this but it + * requires tests/benchmarks. + */ + dma_flags |= CP_DMA_USE_L2; + } + si_cp_dma_prepare(cmd_buffer, byte_count, size + skipped_size + realign_size, &dma_flags); @@ -1545,6 +1553,14 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer)); unsigned dma_flags = CP_DMA_CLEAR; + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + /* DMA operations via L2 are coherent and faster. + * TODO: GFX7-GFX9 should also support this but it + * requires tests/benchmarks. + */ + dma_flags |= CP_DMA_USE_L2; + } + si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags); /* Emit the clear packet. */ |