summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robdclark@chromium.org>2022-12-02 18:42:42 -0800
committerMarge Bot <emma+marge@anholt.net>2022-12-17 19:14:12 +0000
commit48610676897d989ff8ce604022e7d611cec9c131 (patch)
tree83adfa63b37ecde0a98508c68b661302e8a1d338
parent7d0d82f25ffa88b817ecfbd974cd1eff26ac1e0d (diff)
downloadmesa-48610676897d989ff8ce604022e7d611cec9c131.tar.gz
freedreno/drm: Add sub-allocator
Add a heap that we can use for allocations of small mappable buffers. This avoids the churn of mmap/unmap, which is especially expensive in a VM. It also allows packing more smaller allocations together in a page, which is useful for PIPE_BUFFERs (which are also mappable). This avoid jank caused by the overhead of setting up or tearing down guest mappings when running in a VM. And also significantly reduces the # of BOs referenced on a submit. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20263>
-rw-r--r--src/freedreno/drm/freedreno_bo.c38
-rw-r--r--src/freedreno/drm/freedreno_bo_heap.c284
-rw-r--r--src/freedreno/drm/freedreno_device.c28
-rw-r--r--src/freedreno/drm/freedreno_drmif.h1
-rw-r--r--src/freedreno/drm/freedreno_pipe.c6
-rw-r--r--src/freedreno/drm/freedreno_priv.h83
-rw-r--r--src/freedreno/drm/freedreno_ringbuffer_sp.c46
-rw-r--r--src/freedreno/drm/freedreno_ringbuffer_sp.h10
-rw-r--r--src/freedreno/drm/meson.build1
-rw-r--r--src/freedreno/drm/msm/msm_ringbuffer.c8
-rw-r--r--src/freedreno/drm/msm/msm_ringbuffer_sp.c6
-rw-r--r--src/freedreno/drm/virtio/virtio_ringbuffer.c6
12 files changed, 491 insertions, 26 deletions
diff --git a/src/freedreno/drm/freedreno_bo.c b/src/freedreno/drm/freedreno_bo.c
index d45f30dfa3c..c2ee1ba3bf0 100644
--- a/src/freedreno/drm/freedreno_bo.c
+++ b/src/freedreno/drm/freedreno_bo.c
@@ -114,6 +114,13 @@ bo_new(struct fd_device *dev, uint32_t size, uint32_t flags,
{
struct fd_bo *bo = NULL;
+ if (size < FD_BO_HEAP_BLOCK_SIZE) {
+ if ((flags == 0) && dev->default_heap)
+ return fd_bo_heap_alloc(dev->default_heap, size);
+ if ((flags == RING_FLAGS) && dev->ring_heap)
+ return fd_bo_heap_alloc(dev->ring_heap, size);
+ }
+
/* demote cached-coherent to WC if not supported: */
if ((flags & FD_BO_CACHED_COHERENT) && !dev->has_cached_coherent)
flags &= ~FD_BO_CACHED_COHERENT;
@@ -278,13 +285,16 @@ bo_del_or_recycle(struct fd_bo *bo)
{
struct fd_device *dev = bo->dev;
- if ((bo->bo_reuse == BO_CACHE) &&
- (fd_bo_cache_free(&dev->bo_cache, bo) == 0))
- return 0;
+ /* No point in BO cache for suballocated buffers: */
+ if (!suballoc_bo(bo)) {
+ if ((bo->bo_reuse == BO_CACHE) &&
+ (fd_bo_cache_free(&dev->bo_cache, bo) == 0))
+ return 0;
- if ((bo->bo_reuse == RING_CACHE) &&
- (fd_bo_cache_free(&dev->ring_cache, bo) == 0))
- return 0;
+ if ((bo->bo_reuse == RING_CACHE) &&
+ (fd_bo_cache_free(&dev->ring_cache, bo) == 0))
+ return 0;
+ }
return bo_del(bo);
}
@@ -355,6 +365,16 @@ fd_bo_del_list_nocache(struct list_head *list)
close_handles(dev, handles, cnt);
}
+void
+fd_bo_fini_fences(struct fd_bo *bo)
+{
+ for (int i = 0; i < bo->nr_fences; i++)
+ fd_fence_del(bo->fences[i]);
+
+ if (bo->fences != &bo->_inline_fence)
+ free(bo->fences);
+}
+
/**
* Helper called by backends bo->funcs->destroy()
*
@@ -371,11 +391,7 @@ fd_bo_fini_common(struct fd_bo *bo)
VG_BO_FREE(bo);
- for (int i = 0; i < bo->nr_fences; i++)
- fd_fence_del(bo->fences[i]);
-
- if (bo->fences != &bo->_inline_fence)
- free(bo->fences);
+ fd_bo_fini_fences(bo);
if (bo->map)
os_munmap(bo->map, bo->size);
diff --git a/src/freedreno/drm/freedreno_bo_heap.c b/src/freedreno/drm/freedreno_bo_heap.c
new file mode 100644
index 00000000000..7d87c933076
--- /dev/null
+++ b/src/freedreno/drm/freedreno_bo_heap.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright © 2022 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "freedreno_drmif.h"
+#include "freedreno_priv.h"
+
+struct sa_bo {
+ struct fd_bo base;
+ struct fd_bo_heap *heap;
+ unsigned offset;
+};
+FD_DEFINE_CAST(fd_bo, sa_bo);
+
+#define HEAP_DEBUG 0
+
+static void heap_clean(struct fd_bo_heap *heap, bool idle);
+static void heap_dump(struct fd_bo_heap *heap);
+
+struct fd_bo_heap *
+fd_bo_heap_new(struct fd_device *dev, uint32_t flags)
+{
+ struct fd_bo_heap *heap;
+
+ /* We cannot suballocate shared buffers! Implicit sync is not supported! */
+ assert(!(flags & FD_BO_SHARED));
+
+ /* No internal buffers either, we need userspace fencing: */
+ assert(!(flags & _FD_BO_NOSYNC));
+
+ heap = calloc(1, sizeof(*heap));
+
+ heap->dev = dev;
+ heap->flags = flags;
+ simple_mtx_init(&heap->lock, mtx_plain);
+ list_inithead(&heap->freelist);
+
+ /* Note that util_vma_heap_init doesn't like offset==0, so we shift the
+ * entire range by one block size (see block_idx()):
+ */
+ util_vma_heap_init(&heap->heap, FD_BO_HEAP_BLOCK_SIZE,
+ FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks));
+ heap->heap.alloc_high = false;
+ heap->heap.nospan_shift = ffs(FD_BO_HEAP_BLOCK_SIZE) - 1;
+
+ heap_dump(heap);
+
+ return heap;
+}
+
+void fd_bo_heap_destroy(struct fd_bo_heap *heap)
+{
+ /* drain the freelist: */
+ heap_clean(heap, false);
+
+ util_vma_heap_finish(&heap->heap);
+ for (unsigned i = 0; i < ARRAY_SIZE(heap->blocks); i++)
+ if (heap->blocks[i])
+ fd_bo_del(heap->blocks[i]);
+ free(heap);
+}
+
+static bool
+sa_idle(struct fd_bo *bo)
+{
+ enum fd_bo_state state = fd_bo_state(bo);
+ assert(state != FD_BO_STATE_UNKNOWN);
+ return state == FD_BO_STATE_IDLE;
+}
+
+/**
+ * The backing block is determined by the offset within the heap, since all
+ * the blocks are equal size
+ */
+static unsigned
+block_idx(struct sa_bo *s)
+{
+ /* The vma allocator doesn't like offset=0 so the range is shifted up
+ * by one block size:
+ */
+ return (s->offset / FD_BO_HEAP_BLOCK_SIZE) - 1;
+}
+
+static unsigned
+block_offset(struct sa_bo *s)
+{
+ return s->offset % FD_BO_HEAP_BLOCK_SIZE;
+}
+
+static void
+heap_dump(struct fd_bo_heap *heap)
+{
+ if (!HEAP_DEBUG)
+ return;
+ fprintf(stderr, "HEAP[%x]: freelist: %u\n", heap->flags, list_length(&heap->freelist));
+ util_vma_heap_print(&heap->heap, stderr, "",
+ FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks));
+}
+
+static void
+sa_release(struct fd_bo *bo)
+{
+ struct sa_bo *s = to_sa_bo(bo);
+
+ simple_mtx_assert_locked(&s->heap->lock);
+
+ VG_BO_FREE(bo);
+
+ fd_bo_fini_fences(bo);
+
+ if (HEAP_DEBUG)
+ mesa_logi("release: %08x-%x idx=%d", s->offset, bo->size, block_idx(s));
+
+ util_vma_heap_free(&s->heap->heap, s->offset, bo->size);
+
+ /* Drop our reference to the backing block object: */
+ fd_bo_del(s->heap->blocks[block_idx(s)]);
+
+ list_del(&bo->node);
+
+ if ((++s->heap->cnt % 256) == 0)
+ heap_dump(s->heap);
+
+ free(bo);
+}
+
+static int
+sa_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
+{
+ simple_mtx_lock(&fence_lock);
+ unsigned nr = bo->nr_fences;
+ struct fd_fence *fences[nr];
+ for (unsigned i = 0; i < nr; i++)
+ fences[i] = fd_fence_ref_locked(bo->fences[i]);
+ simple_mtx_unlock(&fence_lock);
+
+ for (unsigned i = 0; i < nr; i++) {
+ fd_fence_wait(fences[i]);
+ fd_fence_del(fences[i]);
+ }
+
+ /* expire completed fences */
+ fd_bo_state(bo);
+
+ assert(fd_bo_state(bo) == FD_BO_STATE_IDLE);
+
+ return 0;
+}
+
+static int
+sa_madvise(struct fd_bo *bo, int willneed)
+{
+ return willneed;
+}
+
+static uint64_t
+sa_iova(struct fd_bo *bo)
+{
+ struct sa_bo *s = to_sa_bo(bo);
+
+ return s->heap->blocks[block_idx(s)]->iova + block_offset(s);
+}
+
+static void
+sa_set_name(struct fd_bo *bo, const char *fmt, va_list ap)
+{
+ /* No-op, kernel has a single name for the entire buffer we suballoc from */
+}
+
+static void
+sa_destroy(struct fd_bo *bo)
+{
+ struct fd_bo_heap *heap = to_sa_bo(bo)->heap;
+
+ simple_mtx_lock(&heap->lock);
+ list_addtail(&bo->node, &heap->freelist);
+ simple_mtx_unlock(&heap->lock);
+}
+
+static struct fd_bo_funcs heap_bo_funcs = {
+ .cpu_prep = sa_cpu_prep,
+ .madvise = sa_madvise,
+ .iova = sa_iova,
+ .set_name = sa_set_name,
+ .destroy = sa_destroy,
+};
+
+/**
+ * Get the backing heap block of a suballocated bo
+ */
+struct fd_bo *
+fd_bo_heap_block(struct fd_bo *bo)
+{
+ assert(suballoc_bo(bo));
+
+ struct sa_bo *s = to_sa_bo(bo);
+ return s->heap->blocks[block_idx(s)];
+}
+
+static void
+heap_clean(struct fd_bo_heap *heap, bool idle)
+{
+ simple_mtx_lock(&heap->lock);
+ foreach_bo_safe (bo, &heap->freelist) {
+ /* It might be nice if we could keep freelist sorted by fence # */
+ if (idle && !sa_idle(bo))
+ continue;
+ sa_release(bo);
+ }
+ simple_mtx_unlock(&heap->lock);
+}
+
+struct fd_bo *
+fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size)
+{
+ heap_clean(heap, true);
+
+ struct sa_bo *s = calloc(1, sizeof(*s));
+
+ s->heap = heap;
+
+ /* util_vma does not like zero byte allocations, which we get, for
+ * ex, with the initial query buffer allocation on pre-a5xx:
+ */
+ size = MAX2(size, SUBALLOC_ALIGNMENT);
+
+ size = ALIGN(size, SUBALLOC_ALIGNMENT);
+
+ simple_mtx_lock(&heap->lock);
+ /* Allocate larger buffers from the bottom, and smaller buffers from top
+ * to help limit fragmentation:
+ *
+ * (The 8k threshold is just a random guess, but seems to work ok)
+ */
+ heap->heap.alloc_high = (size <= 8 * 1024);
+ s->offset = util_vma_heap_alloc(&heap->heap, size, SUBALLOC_ALIGNMENT);
+ assert((s->offset / FD_BO_HEAP_BLOCK_SIZE) == (s->offset + size - 1) / FD_BO_HEAP_BLOCK_SIZE);
+ unsigned idx = block_idx(s);
+ if (HEAP_DEBUG)
+ mesa_logi("alloc: %08x-%x idx=%d", s->offset, size, idx);
+ if (!heap->blocks[idx]) {
+ heap->blocks[idx] = fd_bo_new(
+ heap->dev, FD_BO_HEAP_BLOCK_SIZE, heap->flags,
+ "heap-%x-block-%u", heap->flags, idx);
+ }
+ /* Take a reference to the backing obj: */
+ fd_bo_ref(heap->blocks[idx]);
+ simple_mtx_unlock(&heap->lock);
+
+ struct fd_bo *bo = &s->base;
+
+ bo->size = size;
+ bo->funcs = &heap_bo_funcs;
+ bo->handle = 1; /* dummy handle to make fd_bo_init_common() happy */
+ bo->alloc_flags = heap->flags;
+
+ fd_bo_init_common(bo, heap->dev);
+
+ bo->handle = FD_BO_SUBALLOC_HANDLE;
+
+ /* Pre-initialize mmap ptr, to avoid trying to os_mmap() */
+ bo->map = ((uint8_t *)fd_bo_map(heap->blocks[idx])) + block_offset(s);
+
+ return bo;
+}
diff --git a/src/freedreno/drm/freedreno_device.c b/src/freedreno/drm/freedreno_device.c
index bbc7cfc8b13..a862c6f5d94 100644
--- a/src/freedreno/drm/freedreno_device.c
+++ b/src/freedreno/drm/freedreno_device.c
@@ -43,6 +43,7 @@ fd_device_new(int fd)
{
struct fd_device *dev = NULL;
drmVersionPtr version;
+ bool use_heap = false;
/* figure out if we are kgsl or msm drm driver: */
version = drmGetVersion(fd);
@@ -64,6 +65,10 @@ fd_device_new(int fd)
} else if (!strcmp(version->name, "virtio_gpu")) {
DEBUG_MSG("virtio_gpu DRM device");
dev = virtio_device_new(fd, version);
+ /* Only devices that support a hypervisor are a6xx+, so avoid the
+ * extra guest<->host round trips associated with pipe creation:
+ */
+ use_heap = true;
#endif
#if HAVE_FREEDRENO_KGSL
} else if (!strcmp(version->name, "kgsl")) {
@@ -96,6 +101,23 @@ out:
simple_mtx_init(&dev->submit_lock, mtx_plain);
simple_mtx_init(&dev->suballoc_lock, mtx_plain);
+ if (!use_heap) {
+ struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D);
+
+ /* Userspace fences don't appear to be reliable enough (missing some
+ * cache flushes?) on older gens, so limit sub-alloc heaps to a6xx+
+ * for now:
+ */
+ use_heap = fd_dev_gen(&pipe->dev_id) >= 6;
+
+ fd_pipe_del(pipe);
+ }
+
+ if (use_heap) {
+ dev->ring_heap = fd_bo_heap_new(dev, RING_FLAGS);
+ dev->default_heap = fd_bo_heap_new(dev, 0);
+ }
+
return dev;
}
@@ -158,6 +180,12 @@ fd_device_del(struct fd_device *dev)
if (dev->suballoc_bo)
fd_bo_del(dev->suballoc_bo);
+ if (dev->ring_heap)
+ fd_bo_heap_destroy(dev->ring_heap);
+
+ if (dev->default_heap)
+ fd_bo_heap_destroy(dev->default_heap);
+
fd_bo_cache_cleanup(&dev->bo_cache, 0);
fd_bo_cache_cleanup(&dev->ring_cache, 0);
diff --git a/src/freedreno/drm/freedreno_drmif.h b/src/freedreno/drm/freedreno_drmif.h
index d88fd510318..373e0b2a654 100644
--- a/src/freedreno/drm/freedreno_drmif.h
+++ b/src/freedreno/drm/freedreno_drmif.h
@@ -130,6 +130,7 @@ struct fd_fence *fd_fence_ref_locked(struct fd_fence *f);
void fd_fence_del(struct fd_fence *f);
void fd_fence_del_locked(struct fd_fence *f);
void fd_fence_flush(struct fd_fence *f);
+int fd_fence_wait(struct fd_fence *f);
/*
* bo flags:
diff --git a/src/freedreno/drm/freedreno_pipe.c b/src/freedreno/drm/freedreno_pipe.c
index 2e9f83c6329..4f655247ce0 100644
--- a/src/freedreno/drm/freedreno_pipe.c
+++ b/src/freedreno/drm/freedreno_pipe.c
@@ -286,3 +286,9 @@ fd_fence_flush(struct fd_fence *f)
fd_pipe_flush(f->pipe, f->ufence);
util_queue_fence_wait(&f->ready);
}
+
+int
+fd_fence_wait(struct fd_fence *f)
+{
+ return fd_pipe_wait(f->pipe, f);
+}
diff --git a/src/freedreno/drm/freedreno_priv.h b/src/freedreno/drm/freedreno_priv.h
index b166b0b8735..7cc3d9b37a6 100644
--- a/src/freedreno/drm/freedreno_priv.h
+++ b/src/freedreno/drm/freedreno_priv.h
@@ -46,6 +46,7 @@
#include "util/u_atomic.h"
#include "util/u_debug.h"
#include "util/u_math.h"
+#include "util/vma.h"
#include "freedreno_dev_info.h"
#include "freedreno_drmif.h"
@@ -126,6 +127,77 @@ struct fd_bo_cache {
time_t time;
};
+/* Probably good for the block size to be a multiple of an available
+ * large-page size. For overlap of what both the MMU (with 4kb granule)
+ * and SMMU support, 2MB is that overlap. (Well, 4kb is as well, but
+ * too small to be practical ;-))
+ */
+#define FD_BO_HEAP_BLOCK_SIZE (4 * 1024 * 1024)
+
+/* Zero is an invalid handle, use it to indicate buffers that have been sub-
+ * allocated from a larger backing heap block buffer.
+ */
+#define FD_BO_SUBALLOC_HANDLE 0
+
+static inline bool
+suballoc_bo(struct fd_bo *bo)
+{
+ return bo->handle == FD_BO_SUBALLOC_HANDLE;
+}
+
+/**
+ * A heap is a virtual range of memory that is backed by N physical buffers,
+ * from which buffers can be suballocated. This requires kernel support for
+ * userspace allocated iova.
+ */
+struct fd_bo_heap {
+ struct fd_device *dev;
+
+ int cnt;
+
+ /**
+ * Buffer allocation flags for buffers allocated from this heap.
+ */
+ uint32_t flags;
+
+ simple_mtx_t lock;
+
+ /**
+ * Ranges of the backing buffer are allocated at a granularity of
+ * SUBALLOC_ALIGNMENT
+ */
+ struct util_vma_heap heap;
+
+ /**
+ * List of recently freed suballocated BOs from this allocator until they
+ * become idle. Backend should periodically call fd_bo_suballoc_clean()
+ * to check for newly idle entries on the freelist, so that the memory can
+ * be returned to the free heap.
+ */
+ struct list_head freelist;
+
+ /**
+ * The backing buffers. Maximum total heap size is:
+ * FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks)
+ */
+ struct fd_bo *blocks[256];
+};
+
+struct fd_bo_heap *fd_bo_heap_new(struct fd_device *dev, uint32_t flags);
+void fd_bo_heap_destroy(struct fd_bo_heap *heap);
+
+struct fd_bo *fd_bo_heap_block(struct fd_bo *bo);
+struct fd_bo *fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size);
+
+static inline uint32_t
+submit_offset(struct fd_bo *bo, uint32_t offset)
+{
+ if (suballoc_bo(bo)) {
+ offset += bo->iova - fd_bo_heap_block(bo)->iova;
+ }
+ return offset;
+}
+
struct fd_device {
int fd;
enum fd_version version;
@@ -147,6 +219,16 @@ struct fd_device {
struct fd_bo_cache bo_cache;
struct fd_bo_cache ring_cache;
+ /**
+ * Heap for mappable + cached-coherent + gpu-readonly (ie. cmdstream)
+ */
+ struct fd_bo_heap *ring_heap;
+
+ /**
+ * Heap for mappable (ie. majority of small buffer allocations, etc)
+ */
+ struct fd_bo_heap *default_heap;
+
bool has_cached_coherent;
bool closefd; /* call close(fd) upon destruction */
@@ -352,6 +434,7 @@ enum fd_bo_state {
enum fd_bo_state fd_bo_state(struct fd_bo *bo);
void fd_bo_init_common(struct fd_bo *bo, struct fd_device *dev);
+void fd_bo_fini_fences(struct fd_bo *bo);
void fd_bo_fini_common(struct fd_bo *bo);
struct fd_bo *fd_bo_new_ring(struct fd_device *dev, uint32_t size);
diff --git a/src/freedreno/drm/freedreno_ringbuffer_sp.c b/src/freedreno/drm/freedreno_ringbuffer_sp.c
index 43def17a29f..311322fd6bd 100644
--- a/src/freedreno/drm/freedreno_ringbuffer_sp.c
+++ b/src/freedreno/drm/freedreno_ringbuffer_sp.c
@@ -52,17 +52,46 @@ static struct fd_ringbuffer *
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
enum fd_ringbuffer_flags flags);
+
+static void
+append_suballoc_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
+{
+ uint32_t idx = READ_ONCE(bo->idx);
+
+ if (unlikely((idx >= submit->nr_suballoc_bos) ||
+ (submit->suballoc_bos[idx] != bo))) {
+ uint32_t hash = _mesa_hash_pointer(bo);
+ struct hash_entry *entry;
+
+ entry = _mesa_hash_table_search_pre_hashed(
+ submit->suballoc_bo_table, hash, bo);
+ if (entry) {
+ /* found */
+ idx = (uint32_t)(uintptr_t)entry->data;
+ } else {
+ idx = APPEND(submit, suballoc_bos, fd_bo_ref(bo));
+
+ _mesa_hash_table_insert_pre_hashed(
+ submit->suballoc_bo_table, hash, bo, (void *)(uintptr_t)idx);
+ }
+ bo->idx = idx;
+ }
+}
+
/* add (if needed) bo to submit and return index: */
uint32_t
fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
{
- uint32_t idx;
+ if (suballoc_bo(bo)) {
+ append_suballoc_bo(submit, bo);
+ bo = fd_bo_heap_block(bo);
+ }
/* NOTE: it is legal to use the same bo on different threads for
* different submits. But it is not legal to use the same submit
* from different threads.
*/
- idx = READ_ONCE(bo->idx);
+ uint32_t idx = READ_ONCE(bo->idx);
if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
uint32_t hash = _mesa_hash_pointer(bo);
@@ -187,6 +216,9 @@ fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
fd_bo_add_fence(fd_submit->bos[i], out_fence);
has_shared |= fd_submit->bos[i]->alloc_flags & FD_BO_SHARED;
}
+ for (unsigned i = 0; i < fd_submit->nr_suballoc_bos; i++) {
+ fd_bo_add_fence(fd_submit->suballoc_bos[i], out_fence);
+ }
simple_mtx_unlock(&fence_lock);
fd_submit->out_fence = fd_fence_ref(out_fence);
@@ -385,6 +417,7 @@ fd_submit_sp_destroy(struct fd_submit *submit)
fd_ringbuffer_del(fd_submit->suballoc_ring);
_mesa_hash_table_destroy(fd_submit->bo_table, NULL);
+ _mesa_hash_table_destroy(fd_submit->suballoc_bo_table, NULL);
// TODO it would be nice to have a way to assert() if all
// rb's haven't been free'd back to the slab, because that is
@@ -392,11 +425,14 @@ fd_submit_sp_destroy(struct fd_submit *submit)
slab_destroy_child(&fd_submit->ring_pool);
fd_bo_del_array(fd_submit->bos, fd_submit->nr_bos);
+ free(fd_submit->bos);
+
+ fd_bo_del_array(fd_submit->suballoc_bos, fd_submit->nr_suballoc_bos);
+ free(fd_submit->suballoc_bos);
if (fd_submit->out_fence)
fd_fence_del(fd_submit->out_fence);
- free(fd_submit->bos);
free(fd_submit);
}
@@ -412,8 +448,8 @@ fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
struct fd_submit *submit;
- fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+ fd_submit->bo_table = _mesa_pointer_hash_table_create(NULL);
+ fd_submit->suballoc_bo_table = _mesa_pointer_hash_table_create(NULL);
slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
diff --git a/src/freedreno/drm/freedreno_ringbuffer_sp.h b/src/freedreno/drm/freedreno_ringbuffer_sp.h
index f5e04ce0a34..ad0399d3660 100644
--- a/src/freedreno/drm/freedreno_ringbuffer_sp.h
+++ b/src/freedreno/drm/freedreno_ringbuffer_sp.h
@@ -50,9 +50,19 @@ struct fd_submit_sp {
DECLARE_ARRAY(struct fd_bo *, bos);
+ /* Keep a separate table of sub-alloc BOs.. the backing objects are
+ * tracked in the main bos table (because this is what the kernel
+ * sees), but we need to attach userspace fences to the sub-alloc'd
+ * BOs so the driver knows when they are idle
+ */
+ DECLARE_ARRAY(struct fd_bo *, suballoc_bos);
+
/* maps fd_bo to idx in bos table: */
struct hash_table *bo_table;
+ /* maps fd_bo to idx in suballoc_bos table: */
+ struct hash_table *suballoc_bo_table;
+
struct slab_child_pool ring_pool;
/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
diff --git a/src/freedreno/drm/meson.build b/src/freedreno/drm/meson.build
index dcdbbdb4012..cbbfc75f01a 100644
--- a/src/freedreno/drm/meson.build
+++ b/src/freedreno/drm/meson.build
@@ -20,6 +20,7 @@
libfreedreno_drm_files = files(
'freedreno_bo.c',
+ 'freedreno_bo_heap.c',
'freedreno_bo_cache.c',
'freedreno_device.c',
'freedreno_drmif.h',
diff --git a/src/freedreno/drm/msm/msm_ringbuffer.c b/src/freedreno/drm/msm/msm_ringbuffer.c
index 3bfcce55dd7..08a47077045 100644
--- a/src/freedreno/drm/msm/msm_ringbuffer.c
+++ b/src/freedreno/drm/msm/msm_ringbuffer.c
@@ -314,7 +314,7 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd)
cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
cmds[i].submit_idx = append_bo(msm_submit, msm_ring->ring_bo);
- cmds[i].submit_offset = msm_ring->offset;
+ cmds[i].submit_offset = submit_offset(msm_ring->ring_bo, msm_ring->offset);
cmds[i].size = offset_bytes(ring->cur, ring->start);
cmds[i].pad = 0;
cmds[i].nr_relocs = msm_ring->cmd->nr_relocs;
@@ -328,9 +328,9 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd)
} else {
cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
}
- cmds[i].submit_idx =
- append_bo(msm_submit, msm_ring->u.cmds[j]->ring_bo);
- cmds[i].submit_offset = msm_ring->offset;
+ struct fd_bo *ring_bo = msm_ring->u.cmds[j]->ring_bo;
+ cmds[i].submit_idx = append_bo(msm_submit, ring_bo);
+ cmds[i].submit_offset = submit_offset(ring_bo, msm_ring->offset);
cmds[i].size = msm_ring->u.cmds[j]->size;
cmds[i].pad = 0;
cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs;
diff --git a/src/freedreno/drm/msm/msm_ringbuffer_sp.c b/src/freedreno/drm/msm/msm_ringbuffer_sp.c
index 8e46f3f1387..9491d45fa08 100644
--- a/src/freedreno/drm/msm/msm_ringbuffer_sp.c
+++ b/src/freedreno/drm/msm/msm_ringbuffer_sp.c
@@ -67,10 +67,10 @@ flush_submit_list(struct list_head *submit_list)
to_fd_ringbuffer_sp(submit->primary);
for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
+ struct fd_bo *ring_bo = deferred_primary->u.cmds[i].ring_bo;
cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
- cmds[cmd_idx].submit_idx =
- fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo);
- cmds[cmd_idx].submit_offset = deferred_primary->offset;
+ cmds[cmd_idx].submit_idx = fd_submit_append_bo(fd_submit, ring_bo);
+ cmds[cmd_idx].submit_offset = submit_offset(ring_bo, deferred_primary->offset);
cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
cmds[cmd_idx].pad = 0;
cmds[cmd_idx].nr_relocs = 0;
diff --git a/src/freedreno/drm/virtio/virtio_ringbuffer.c b/src/freedreno/drm/virtio/virtio_ringbuffer.c
index 5d691a93e10..0af58d476fe 100644
--- a/src/freedreno/drm/virtio/virtio_ringbuffer.c
+++ b/src/freedreno/drm/virtio/virtio_ringbuffer.c
@@ -85,10 +85,10 @@ flush_submit_list(struct list_head *submit_list)
to_fd_ringbuffer_sp(submit->primary);
for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
+ struct fd_bo *ring_bo = deferred_primary->u.cmds[i].ring_bo;
cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
- cmds[cmd_idx].submit_idx =
- fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo);
- cmds[cmd_idx].submit_offset = deferred_primary->offset;
+ cmds[cmd_idx].submit_idx = fd_submit_append_bo(fd_submit, ring_bo);
+ cmds[cmd_idx].submit_offset = submit_offset(ring_bo, deferred_primary->offset);
cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
cmds[cmd_idx].pad = 0;
cmds[cmd_idx].nr_relocs = 0;