diff options
author | Rob Clark <robdclark@chromium.org> | 2022-12-02 18:42:42 -0800 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2022-12-17 19:14:12 +0000 |
commit | 48610676897d989ff8ce604022e7d611cec9c131 (patch) | |
tree | 83adfa63b37ecde0a98508c68b661302e8a1d338 | |
parent | 7d0d82f25ffa88b817ecfbd974cd1eff26ac1e0d (diff) | |
download | mesa-48610676897d989ff8ce604022e7d611cec9c131.tar.gz |
freedreno/drm: Add sub-allocator
Add a heap that we can use for allocations of small mappable buffers.
This avoids the churn of mmap/unmap, which is especially expensive in
a VM. It also allows packing more smaller allocations together in a
page, which is useful for PIPE_BUFFERs (which are also mappable).
This avoid jank caused by the overhead of setting up or tearing down
guest mappings when running in a VM. And also significantly reduces
the # of BOs referenced on a submit.
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20263>
-rw-r--r-- | src/freedreno/drm/freedreno_bo.c | 38 | ||||
-rw-r--r-- | src/freedreno/drm/freedreno_bo_heap.c | 284 | ||||
-rw-r--r-- | src/freedreno/drm/freedreno_device.c | 28 | ||||
-rw-r--r-- | src/freedreno/drm/freedreno_drmif.h | 1 | ||||
-rw-r--r-- | src/freedreno/drm/freedreno_pipe.c | 6 | ||||
-rw-r--r-- | src/freedreno/drm/freedreno_priv.h | 83 | ||||
-rw-r--r-- | src/freedreno/drm/freedreno_ringbuffer_sp.c | 46 | ||||
-rw-r--r-- | src/freedreno/drm/freedreno_ringbuffer_sp.h | 10 | ||||
-rw-r--r-- | src/freedreno/drm/meson.build | 1 | ||||
-rw-r--r-- | src/freedreno/drm/msm/msm_ringbuffer.c | 8 | ||||
-rw-r--r-- | src/freedreno/drm/msm/msm_ringbuffer_sp.c | 6 | ||||
-rw-r--r-- | src/freedreno/drm/virtio/virtio_ringbuffer.c | 6 |
12 files changed, 491 insertions, 26 deletions
diff --git a/src/freedreno/drm/freedreno_bo.c b/src/freedreno/drm/freedreno_bo.c index d45f30dfa3c..c2ee1ba3bf0 100644 --- a/src/freedreno/drm/freedreno_bo.c +++ b/src/freedreno/drm/freedreno_bo.c @@ -114,6 +114,13 @@ bo_new(struct fd_device *dev, uint32_t size, uint32_t flags, { struct fd_bo *bo = NULL; + if (size < FD_BO_HEAP_BLOCK_SIZE) { + if ((flags == 0) && dev->default_heap) + return fd_bo_heap_alloc(dev->default_heap, size); + if ((flags == RING_FLAGS) && dev->ring_heap) + return fd_bo_heap_alloc(dev->ring_heap, size); + } + /* demote cached-coherent to WC if not supported: */ if ((flags & FD_BO_CACHED_COHERENT) && !dev->has_cached_coherent) flags &= ~FD_BO_CACHED_COHERENT; @@ -278,13 +285,16 @@ bo_del_or_recycle(struct fd_bo *bo) { struct fd_device *dev = bo->dev; - if ((bo->bo_reuse == BO_CACHE) && - (fd_bo_cache_free(&dev->bo_cache, bo) == 0)) - return 0; + /* No point in BO cache for suballocated buffers: */ + if (!suballoc_bo(bo)) { + if ((bo->bo_reuse == BO_CACHE) && + (fd_bo_cache_free(&dev->bo_cache, bo) == 0)) + return 0; - if ((bo->bo_reuse == RING_CACHE) && - (fd_bo_cache_free(&dev->ring_cache, bo) == 0)) - return 0; + if ((bo->bo_reuse == RING_CACHE) && + (fd_bo_cache_free(&dev->ring_cache, bo) == 0)) + return 0; + } return bo_del(bo); } @@ -355,6 +365,16 @@ fd_bo_del_list_nocache(struct list_head *list) close_handles(dev, handles, cnt); } +void +fd_bo_fini_fences(struct fd_bo *bo) +{ + for (int i = 0; i < bo->nr_fences; i++) + fd_fence_del(bo->fences[i]); + + if (bo->fences != &bo->_inline_fence) + free(bo->fences); +} + /** * Helper called by backends bo->funcs->destroy() * @@ -371,11 +391,7 @@ fd_bo_fini_common(struct fd_bo *bo) VG_BO_FREE(bo); - for (int i = 0; i < bo->nr_fences; i++) - fd_fence_del(bo->fences[i]); - - if (bo->fences != &bo->_inline_fence) - free(bo->fences); + fd_bo_fini_fences(bo); if (bo->map) os_munmap(bo->map, bo->size); diff --git a/src/freedreno/drm/freedreno_bo_heap.c b/src/freedreno/drm/freedreno_bo_heap.c new file mode 100644 index 00000000000..7d87c933076 --- /dev/null +++ b/src/freedreno/drm/freedreno_bo_heap.c @@ -0,0 +1,284 @@ +/* + * Copyright © 2022 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "freedreno_drmif.h" +#include "freedreno_priv.h" + +struct sa_bo { + struct fd_bo base; + struct fd_bo_heap *heap; + unsigned offset; +}; +FD_DEFINE_CAST(fd_bo, sa_bo); + +#define HEAP_DEBUG 0 + +static void heap_clean(struct fd_bo_heap *heap, bool idle); +static void heap_dump(struct fd_bo_heap *heap); + +struct fd_bo_heap * +fd_bo_heap_new(struct fd_device *dev, uint32_t flags) +{ + struct fd_bo_heap *heap; + + /* We cannot suballocate shared buffers! Implicit sync is not supported! */ + assert(!(flags & FD_BO_SHARED)); + + /* No internal buffers either, we need userspace fencing: */ + assert(!(flags & _FD_BO_NOSYNC)); + + heap = calloc(1, sizeof(*heap)); + + heap->dev = dev; + heap->flags = flags; + simple_mtx_init(&heap->lock, mtx_plain); + list_inithead(&heap->freelist); + + /* Note that util_vma_heap_init doesn't like offset==0, so we shift the + * entire range by one block size (see block_idx()): + */ + util_vma_heap_init(&heap->heap, FD_BO_HEAP_BLOCK_SIZE, + FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks)); + heap->heap.alloc_high = false; + heap->heap.nospan_shift = ffs(FD_BO_HEAP_BLOCK_SIZE) - 1; + + heap_dump(heap); + + return heap; +} + +void fd_bo_heap_destroy(struct fd_bo_heap *heap) +{ + /* drain the freelist: */ + heap_clean(heap, false); + + util_vma_heap_finish(&heap->heap); + for (unsigned i = 0; i < ARRAY_SIZE(heap->blocks); i++) + if (heap->blocks[i]) + fd_bo_del(heap->blocks[i]); + free(heap); +} + +static bool +sa_idle(struct fd_bo *bo) +{ + enum fd_bo_state state = fd_bo_state(bo); + assert(state != FD_BO_STATE_UNKNOWN); + return state == FD_BO_STATE_IDLE; +} + +/** + * The backing block is determined by the offset within the heap, since all + * the blocks are equal size + */ +static unsigned +block_idx(struct sa_bo *s) +{ + /* The vma allocator doesn't like offset=0 so the range is shifted up + * by one block size: + */ + return (s->offset / FD_BO_HEAP_BLOCK_SIZE) - 1; +} + +static unsigned +block_offset(struct sa_bo *s) +{ + return s->offset % FD_BO_HEAP_BLOCK_SIZE; +} + +static void +heap_dump(struct fd_bo_heap *heap) +{ + if (!HEAP_DEBUG) + return; + fprintf(stderr, "HEAP[%x]: freelist: %u\n", heap->flags, list_length(&heap->freelist)); + util_vma_heap_print(&heap->heap, stderr, "", + FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks)); +} + +static void +sa_release(struct fd_bo *bo) +{ + struct sa_bo *s = to_sa_bo(bo); + + simple_mtx_assert_locked(&s->heap->lock); + + VG_BO_FREE(bo); + + fd_bo_fini_fences(bo); + + if (HEAP_DEBUG) + mesa_logi("release: %08x-%x idx=%d", s->offset, bo->size, block_idx(s)); + + util_vma_heap_free(&s->heap->heap, s->offset, bo->size); + + /* Drop our reference to the backing block object: */ + fd_bo_del(s->heap->blocks[block_idx(s)]); + + list_del(&bo->node); + + if ((++s->heap->cnt % 256) == 0) + heap_dump(s->heap); + + free(bo); +} + +static int +sa_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op) +{ + simple_mtx_lock(&fence_lock); + unsigned nr = bo->nr_fences; + struct fd_fence *fences[nr]; + for (unsigned i = 0; i < nr; i++) + fences[i] = fd_fence_ref_locked(bo->fences[i]); + simple_mtx_unlock(&fence_lock); + + for (unsigned i = 0; i < nr; i++) { + fd_fence_wait(fences[i]); + fd_fence_del(fences[i]); + } + + /* expire completed fences */ + fd_bo_state(bo); + + assert(fd_bo_state(bo) == FD_BO_STATE_IDLE); + + return 0; +} + +static int +sa_madvise(struct fd_bo *bo, int willneed) +{ + return willneed; +} + +static uint64_t +sa_iova(struct fd_bo *bo) +{ + struct sa_bo *s = to_sa_bo(bo); + + return s->heap->blocks[block_idx(s)]->iova + block_offset(s); +} + +static void +sa_set_name(struct fd_bo *bo, const char *fmt, va_list ap) +{ + /* No-op, kernel has a single name for the entire buffer we suballoc from */ +} + +static void +sa_destroy(struct fd_bo *bo) +{ + struct fd_bo_heap *heap = to_sa_bo(bo)->heap; + + simple_mtx_lock(&heap->lock); + list_addtail(&bo->node, &heap->freelist); + simple_mtx_unlock(&heap->lock); +} + +static struct fd_bo_funcs heap_bo_funcs = { + .cpu_prep = sa_cpu_prep, + .madvise = sa_madvise, + .iova = sa_iova, + .set_name = sa_set_name, + .destroy = sa_destroy, +}; + +/** + * Get the backing heap block of a suballocated bo + */ +struct fd_bo * +fd_bo_heap_block(struct fd_bo *bo) +{ + assert(suballoc_bo(bo)); + + struct sa_bo *s = to_sa_bo(bo); + return s->heap->blocks[block_idx(s)]; +} + +static void +heap_clean(struct fd_bo_heap *heap, bool idle) +{ + simple_mtx_lock(&heap->lock); + foreach_bo_safe (bo, &heap->freelist) { + /* It might be nice if we could keep freelist sorted by fence # */ + if (idle && !sa_idle(bo)) + continue; + sa_release(bo); + } + simple_mtx_unlock(&heap->lock); +} + +struct fd_bo * +fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size) +{ + heap_clean(heap, true); + + struct sa_bo *s = calloc(1, sizeof(*s)); + + s->heap = heap; + + /* util_vma does not like zero byte allocations, which we get, for + * ex, with the initial query buffer allocation on pre-a5xx: + */ + size = MAX2(size, SUBALLOC_ALIGNMENT); + + size = ALIGN(size, SUBALLOC_ALIGNMENT); + + simple_mtx_lock(&heap->lock); + /* Allocate larger buffers from the bottom, and smaller buffers from top + * to help limit fragmentation: + * + * (The 8k threshold is just a random guess, but seems to work ok) + */ + heap->heap.alloc_high = (size <= 8 * 1024); + s->offset = util_vma_heap_alloc(&heap->heap, size, SUBALLOC_ALIGNMENT); + assert((s->offset / FD_BO_HEAP_BLOCK_SIZE) == (s->offset + size - 1) / FD_BO_HEAP_BLOCK_SIZE); + unsigned idx = block_idx(s); + if (HEAP_DEBUG) + mesa_logi("alloc: %08x-%x idx=%d", s->offset, size, idx); + if (!heap->blocks[idx]) { + heap->blocks[idx] = fd_bo_new( + heap->dev, FD_BO_HEAP_BLOCK_SIZE, heap->flags, + "heap-%x-block-%u", heap->flags, idx); + } + /* Take a reference to the backing obj: */ + fd_bo_ref(heap->blocks[idx]); + simple_mtx_unlock(&heap->lock); + + struct fd_bo *bo = &s->base; + + bo->size = size; + bo->funcs = &heap_bo_funcs; + bo->handle = 1; /* dummy handle to make fd_bo_init_common() happy */ + bo->alloc_flags = heap->flags; + + fd_bo_init_common(bo, heap->dev); + + bo->handle = FD_BO_SUBALLOC_HANDLE; + + /* Pre-initialize mmap ptr, to avoid trying to os_mmap() */ + bo->map = ((uint8_t *)fd_bo_map(heap->blocks[idx])) + block_offset(s); + + return bo; +} diff --git a/src/freedreno/drm/freedreno_device.c b/src/freedreno/drm/freedreno_device.c index bbc7cfc8b13..a862c6f5d94 100644 --- a/src/freedreno/drm/freedreno_device.c +++ b/src/freedreno/drm/freedreno_device.c @@ -43,6 +43,7 @@ fd_device_new(int fd) { struct fd_device *dev = NULL; drmVersionPtr version; + bool use_heap = false; /* figure out if we are kgsl or msm drm driver: */ version = drmGetVersion(fd); @@ -64,6 +65,10 @@ fd_device_new(int fd) } else if (!strcmp(version->name, "virtio_gpu")) { DEBUG_MSG("virtio_gpu DRM device"); dev = virtio_device_new(fd, version); + /* Only devices that support a hypervisor are a6xx+, so avoid the + * extra guest<->host round trips associated with pipe creation: + */ + use_heap = true; #endif #if HAVE_FREEDRENO_KGSL } else if (!strcmp(version->name, "kgsl")) { @@ -96,6 +101,23 @@ out: simple_mtx_init(&dev->submit_lock, mtx_plain); simple_mtx_init(&dev->suballoc_lock, mtx_plain); + if (!use_heap) { + struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D); + + /* Userspace fences don't appear to be reliable enough (missing some + * cache flushes?) on older gens, so limit sub-alloc heaps to a6xx+ + * for now: + */ + use_heap = fd_dev_gen(&pipe->dev_id) >= 6; + + fd_pipe_del(pipe); + } + + if (use_heap) { + dev->ring_heap = fd_bo_heap_new(dev, RING_FLAGS); + dev->default_heap = fd_bo_heap_new(dev, 0); + } + return dev; } @@ -158,6 +180,12 @@ fd_device_del(struct fd_device *dev) if (dev->suballoc_bo) fd_bo_del(dev->suballoc_bo); + if (dev->ring_heap) + fd_bo_heap_destroy(dev->ring_heap); + + if (dev->default_heap) + fd_bo_heap_destroy(dev->default_heap); + fd_bo_cache_cleanup(&dev->bo_cache, 0); fd_bo_cache_cleanup(&dev->ring_cache, 0); diff --git a/src/freedreno/drm/freedreno_drmif.h b/src/freedreno/drm/freedreno_drmif.h index d88fd510318..373e0b2a654 100644 --- a/src/freedreno/drm/freedreno_drmif.h +++ b/src/freedreno/drm/freedreno_drmif.h @@ -130,6 +130,7 @@ struct fd_fence *fd_fence_ref_locked(struct fd_fence *f); void fd_fence_del(struct fd_fence *f); void fd_fence_del_locked(struct fd_fence *f); void fd_fence_flush(struct fd_fence *f); +int fd_fence_wait(struct fd_fence *f); /* * bo flags: diff --git a/src/freedreno/drm/freedreno_pipe.c b/src/freedreno/drm/freedreno_pipe.c index 2e9f83c6329..4f655247ce0 100644 --- a/src/freedreno/drm/freedreno_pipe.c +++ b/src/freedreno/drm/freedreno_pipe.c @@ -286,3 +286,9 @@ fd_fence_flush(struct fd_fence *f) fd_pipe_flush(f->pipe, f->ufence); util_queue_fence_wait(&f->ready); } + +int +fd_fence_wait(struct fd_fence *f) +{ + return fd_pipe_wait(f->pipe, f); +} diff --git a/src/freedreno/drm/freedreno_priv.h b/src/freedreno/drm/freedreno_priv.h index b166b0b8735..7cc3d9b37a6 100644 --- a/src/freedreno/drm/freedreno_priv.h +++ b/src/freedreno/drm/freedreno_priv.h @@ -46,6 +46,7 @@ #include "util/u_atomic.h" #include "util/u_debug.h" #include "util/u_math.h" +#include "util/vma.h" #include "freedreno_dev_info.h" #include "freedreno_drmif.h" @@ -126,6 +127,77 @@ struct fd_bo_cache { time_t time; }; +/* Probably good for the block size to be a multiple of an available + * large-page size. For overlap of what both the MMU (with 4kb granule) + * and SMMU support, 2MB is that overlap. (Well, 4kb is as well, but + * too small to be practical ;-)) + */ +#define FD_BO_HEAP_BLOCK_SIZE (4 * 1024 * 1024) + +/* Zero is an invalid handle, use it to indicate buffers that have been sub- + * allocated from a larger backing heap block buffer. + */ +#define FD_BO_SUBALLOC_HANDLE 0 + +static inline bool +suballoc_bo(struct fd_bo *bo) +{ + return bo->handle == FD_BO_SUBALLOC_HANDLE; +} + +/** + * A heap is a virtual range of memory that is backed by N physical buffers, + * from which buffers can be suballocated. This requires kernel support for + * userspace allocated iova. + */ +struct fd_bo_heap { + struct fd_device *dev; + + int cnt; + + /** + * Buffer allocation flags for buffers allocated from this heap. + */ + uint32_t flags; + + simple_mtx_t lock; + + /** + * Ranges of the backing buffer are allocated at a granularity of + * SUBALLOC_ALIGNMENT + */ + struct util_vma_heap heap; + + /** + * List of recently freed suballocated BOs from this allocator until they + * become idle. Backend should periodically call fd_bo_suballoc_clean() + * to check for newly idle entries on the freelist, so that the memory can + * be returned to the free heap. + */ + struct list_head freelist; + + /** + * The backing buffers. Maximum total heap size is: + * FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks) + */ + struct fd_bo *blocks[256]; +}; + +struct fd_bo_heap *fd_bo_heap_new(struct fd_device *dev, uint32_t flags); +void fd_bo_heap_destroy(struct fd_bo_heap *heap); + +struct fd_bo *fd_bo_heap_block(struct fd_bo *bo); +struct fd_bo *fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size); + +static inline uint32_t +submit_offset(struct fd_bo *bo, uint32_t offset) +{ + if (suballoc_bo(bo)) { + offset += bo->iova - fd_bo_heap_block(bo)->iova; + } + return offset; +} + struct fd_device { int fd; enum fd_version version; @@ -147,6 +219,16 @@ struct fd_device { struct fd_bo_cache bo_cache; struct fd_bo_cache ring_cache; + /** + * Heap for mappable + cached-coherent + gpu-readonly (ie. cmdstream) + */ + struct fd_bo_heap *ring_heap; + + /** + * Heap for mappable (ie. majority of small buffer allocations, etc) + */ + struct fd_bo_heap *default_heap; + bool has_cached_coherent; bool closefd; /* call close(fd) upon destruction */ @@ -352,6 +434,7 @@ enum fd_bo_state { enum fd_bo_state fd_bo_state(struct fd_bo *bo); void fd_bo_init_common(struct fd_bo *bo, struct fd_device *dev); +void fd_bo_fini_fences(struct fd_bo *bo); void fd_bo_fini_common(struct fd_bo *bo); struct fd_bo *fd_bo_new_ring(struct fd_device *dev, uint32_t size); diff --git a/src/freedreno/drm/freedreno_ringbuffer_sp.c b/src/freedreno/drm/freedreno_ringbuffer_sp.c index 43def17a29f..311322fd6bd 100644 --- a/src/freedreno/drm/freedreno_ringbuffer_sp.c +++ b/src/freedreno/drm/freedreno_ringbuffer_sp.c @@ -52,17 +52,46 @@ static struct fd_ringbuffer * fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size, enum fd_ringbuffer_flags flags); + +static void +append_suballoc_bo(struct fd_submit_sp *submit, struct fd_bo *bo) +{ + uint32_t idx = READ_ONCE(bo->idx); + + if (unlikely((idx >= submit->nr_suballoc_bos) || + (submit->suballoc_bos[idx] != bo))) { + uint32_t hash = _mesa_hash_pointer(bo); + struct hash_entry *entry; + + entry = _mesa_hash_table_search_pre_hashed( + submit->suballoc_bo_table, hash, bo); + if (entry) { + /* found */ + idx = (uint32_t)(uintptr_t)entry->data; + } else { + idx = APPEND(submit, suballoc_bos, fd_bo_ref(bo)); + + _mesa_hash_table_insert_pre_hashed( + submit->suballoc_bo_table, hash, bo, (void *)(uintptr_t)idx); + } + bo->idx = idx; + } +} + /* add (if needed) bo to submit and return index: */ uint32_t fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo) { - uint32_t idx; + if (suballoc_bo(bo)) { + append_suballoc_bo(submit, bo); + bo = fd_bo_heap_block(bo); + } /* NOTE: it is legal to use the same bo on different threads for * different submits. But it is not legal to use the same submit * from different threads. */ - idx = READ_ONCE(bo->idx); + uint32_t idx = READ_ONCE(bo->idx); if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) { uint32_t hash = _mesa_hash_pointer(bo); @@ -187,6 +216,9 @@ fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd, fd_bo_add_fence(fd_submit->bos[i], out_fence); has_shared |= fd_submit->bos[i]->alloc_flags & FD_BO_SHARED; } + for (unsigned i = 0; i < fd_submit->nr_suballoc_bos; i++) { + fd_bo_add_fence(fd_submit->suballoc_bos[i], out_fence); + } simple_mtx_unlock(&fence_lock); fd_submit->out_fence = fd_fence_ref(out_fence); @@ -385,6 +417,7 @@ fd_submit_sp_destroy(struct fd_submit *submit) fd_ringbuffer_del(fd_submit->suballoc_ring); _mesa_hash_table_destroy(fd_submit->bo_table, NULL); + _mesa_hash_table_destroy(fd_submit->suballoc_bo_table, NULL); // TODO it would be nice to have a way to assert() if all // rb's haven't been free'd back to the slab, because that is @@ -392,11 +425,14 @@ fd_submit_sp_destroy(struct fd_submit *submit) slab_destroy_child(&fd_submit->ring_pool); fd_bo_del_array(fd_submit->bos, fd_submit->nr_bos); + free(fd_submit->bos); + + fd_bo_del_array(fd_submit->suballoc_bos, fd_submit->nr_suballoc_bos); + free(fd_submit->suballoc_bos); if (fd_submit->out_fence) fd_fence_del(fd_submit->out_fence); - free(fd_submit->bos); free(fd_submit); } @@ -412,8 +448,8 @@ fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list) struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit)); struct fd_submit *submit; - fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + fd_submit->bo_table = _mesa_pointer_hash_table_create(NULL); + fd_submit->suballoc_bo_table = _mesa_pointer_hash_table_create(NULL); slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool); diff --git a/src/freedreno/drm/freedreno_ringbuffer_sp.h b/src/freedreno/drm/freedreno_ringbuffer_sp.h index f5e04ce0a34..ad0399d3660 100644 --- a/src/freedreno/drm/freedreno_ringbuffer_sp.h +++ b/src/freedreno/drm/freedreno_ringbuffer_sp.h @@ -50,9 +50,19 @@ struct fd_submit_sp { DECLARE_ARRAY(struct fd_bo *, bos); + /* Keep a separate table of sub-alloc BOs.. the backing objects are + * tracked in the main bos table (because this is what the kernel + * sees), but we need to attach userspace fences to the sub-alloc'd + * BOs so the driver knows when they are idle + */ + DECLARE_ARRAY(struct fd_bo *, suballoc_bos); + /* maps fd_bo to idx in bos table: */ struct hash_table *bo_table; + /* maps fd_bo to idx in suballoc_bos table: */ + struct hash_table *suballoc_bo_table; + struct slab_child_pool ring_pool; /* Allow for sub-allocation of stateobj ring buffers (ie. sharing diff --git a/src/freedreno/drm/meson.build b/src/freedreno/drm/meson.build index dcdbbdb4012..cbbfc75f01a 100644 --- a/src/freedreno/drm/meson.build +++ b/src/freedreno/drm/meson.build @@ -20,6 +20,7 @@ libfreedreno_drm_files = files( 'freedreno_bo.c', + 'freedreno_bo_heap.c', 'freedreno_bo_cache.c', 'freedreno_device.c', 'freedreno_drmif.h', diff --git a/src/freedreno/drm/msm/msm_ringbuffer.c b/src/freedreno/drm/msm/msm_ringbuffer.c index 3bfcce55dd7..08a47077045 100644 --- a/src/freedreno/drm/msm/msm_ringbuffer.c +++ b/src/freedreno/drm/msm/msm_ringbuffer.c @@ -314,7 +314,7 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd) cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; cmds[i].submit_idx = append_bo(msm_submit, msm_ring->ring_bo); - cmds[i].submit_offset = msm_ring->offset; + cmds[i].submit_offset = submit_offset(msm_ring->ring_bo, msm_ring->offset); cmds[i].size = offset_bytes(ring->cur, ring->start); cmds[i].pad = 0; cmds[i].nr_relocs = msm_ring->cmd->nr_relocs; @@ -328,9 +328,9 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd) } else { cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; } - cmds[i].submit_idx = - append_bo(msm_submit, msm_ring->u.cmds[j]->ring_bo); - cmds[i].submit_offset = msm_ring->offset; + struct fd_bo *ring_bo = msm_ring->u.cmds[j]->ring_bo; + cmds[i].submit_idx = append_bo(msm_submit, ring_bo); + cmds[i].submit_offset = submit_offset(ring_bo, msm_ring->offset); cmds[i].size = msm_ring->u.cmds[j]->size; cmds[i].pad = 0; cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs; diff --git a/src/freedreno/drm/msm/msm_ringbuffer_sp.c b/src/freedreno/drm/msm/msm_ringbuffer_sp.c index 8e46f3f1387..9491d45fa08 100644 --- a/src/freedreno/drm/msm/msm_ringbuffer_sp.c +++ b/src/freedreno/drm/msm/msm_ringbuffer_sp.c @@ -67,10 +67,10 @@ flush_submit_list(struct list_head *submit_list) to_fd_ringbuffer_sp(submit->primary); for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) { + struct fd_bo *ring_bo = deferred_primary->u.cmds[i].ring_bo; cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF; - cmds[cmd_idx].submit_idx = - fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo); - cmds[cmd_idx].submit_offset = deferred_primary->offset; + cmds[cmd_idx].submit_idx = fd_submit_append_bo(fd_submit, ring_bo); + cmds[cmd_idx].submit_offset = submit_offset(ring_bo, deferred_primary->offset); cmds[cmd_idx].size = deferred_primary->u.cmds[i].size; cmds[cmd_idx].pad = 0; cmds[cmd_idx].nr_relocs = 0; diff --git a/src/freedreno/drm/virtio/virtio_ringbuffer.c b/src/freedreno/drm/virtio/virtio_ringbuffer.c index 5d691a93e10..0af58d476fe 100644 --- a/src/freedreno/drm/virtio/virtio_ringbuffer.c +++ b/src/freedreno/drm/virtio/virtio_ringbuffer.c @@ -85,10 +85,10 @@ flush_submit_list(struct list_head *submit_list) to_fd_ringbuffer_sp(submit->primary); for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) { + struct fd_bo *ring_bo = deferred_primary->u.cmds[i].ring_bo; cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF; - cmds[cmd_idx].submit_idx = - fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo); - cmds[cmd_idx].submit_offset = deferred_primary->offset; + cmds[cmd_idx].submit_idx = fd_submit_append_bo(fd_submit, ring_bo); + cmds[cmd_idx].submit_offset = submit_offset(ring_bo, deferred_primary->offset); cmds[cmd_idx].size = deferred_primary->u.cmds[i].size; cmds[cmd_idx].pad = 0; cmds[cmd_idx].nr_relocs = 0; |