summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2015-06-30 11:15:44 -0700
committerEric Anholt <eric@anholt.net>2015-06-30 11:46:31 -0700
commite200d86a68a634d039985ef87ce5ae0aad52feb7 (patch)
tree5b3aeeb92d16ecd77c71916672f74e7fbfb3f23a
parent07009cab090ade3dd180e8a55d590b1a00072eed (diff)
downloadlinux-e200d86a68a634d039985ef87ce5ae0aad52feb7.tar.gz
drm/vc4: Add support for async pageflips.
This should be the fastest screen update path we have, but it doesn't seem to have really improved performance with vblank_mode=0 fullscreen. Signed-off-by: Eric Anholt <eric@anholt.net>
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c99
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h1
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c6
-rw-r--r--drivers/gpu/drm/vc4/vc4_kms.c1
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c40
5 files changed, 140 insertions, 7 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index f9960ac34fd7..446f8b3aa436 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -10,8 +10,10 @@
* Controls the timings of the hardware's pixel valve.
*/
+#include "drm_atomic.h"
#include "drm_atomic_helper.h"
#include "drm_crtc_helper.h"
+#include "drm_fb_cma_helper.h"
#include "linux/component.h"
#include "vc4_drv.h"
#include "vc4_regs.h"
@@ -329,10 +331,105 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
return ret;
}
+struct vc4_async_flip_state {
+ struct drm_crtc *crtc;
+ struct drm_framebuffer *fb;
+ struct drm_pending_vblank_event *event;
+
+ struct vc4_seqno_cb cb;
+};
+
+/* Called when the V3D execution for the BO being flipped to is done, so that
+ * we can actually update the plane's address to point to it.
+ */
+static void
+vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
+{
+ struct vc4_async_flip_state *flip_state =
+ container_of(cb, struct vc4_async_flip_state, cb);
+ struct drm_crtc *crtc = flip_state->crtc;
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_plane *plane = crtc->primary;
+
+ vc4_plane_async_set_fb(plane, flip_state->fb);
+ if (flip_state->event) {
+ unsigned long flags;
+ spin_lock_irqsave(&dev->event_lock, flags);
+ drm_crtc_send_vblank_event(crtc, flip_state->event);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+
+ drm_framebuffer_unreference(flip_state->fb);
+ kfree(flip_state);
+
+ up(&vc4->async_modeset);
+}
+
+/* Implements async (non-vblank-synced) page flips.
+ *
+ * The page flip ioctl needs to return immediately, so we grab the
+ * modeset semaphore on the pipe, and queue the address update for
+ * when V3D is done with the BO being flipped to.
+ */
+static int vc4_async_page_flip(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t flags)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_plane *plane = crtc->primary;
+ int ret = 0;
+ struct vc4_async_flip_state *flip_state;
+ struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
+ struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+ flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
+ if (!flip_state)
+ return -ENOMEM;
+
+ drm_framebuffer_reference(fb);
+ flip_state->fb = fb;
+ flip_state->crtc = crtc;
+ flip_state->event = event;
+
+ /* Make sure all other async modesetes have landed. */
+ ret = down_interruptible(&vc4->async_modeset);
+ if (ret) {
+ kfree(flip_state);
+ return ret;
+ }
+
+ /* Immediately update the plane's legacy fb pointer, so that later
+ * modeset prep sees the state that will be present when the semaphore
+ * is released.
+ */
+ drm_atomic_set_fb_for_plane(plane->state, fb);
+ plane->fb = fb;
+
+ vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
+ vc4_async_page_flip_complete);
+
+ /* Driver takes ownership of state on successful async commit. */
+ return 0;
+}
+
+static int vc4_page_flip(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t flags)
+{
+ if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
+ return vc4_async_page_flip(crtc, fb, event, flags);
+ else
+ return drm_atomic_helper_page_flip(crtc, fb, event, flags);
+}
+
static const struct drm_crtc_funcs vc4_crtc_funcs = {
.set_config = drm_atomic_helper_set_config,
.destroy = vc4_crtc_destroy,
- .page_flip = drm_atomic_helper_page_flip,
+ .page_flip = vc4_page_flip,
.set_property = NULL,
.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 13b475785ca0..c2db6bd57352 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -431,6 +431,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
enum drm_plane_type type);
u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
u32 vc4_plane_dlist_size(struct drm_plane_state *state);
+void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb);
/* vc4_v3d.c */
void vc4_v3d_register(void);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 577cc6d3840f..ca3955c5ea79 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -636,12 +636,6 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&dev->struct_mutex);
- /* To keep any client from getting too far ahead (particularly
- * a problem when BO caching is involved), we wait on the
- * previous rendering before returning to userspace.
- */
- vc4_wait_for_seqno(dev, args->seqno - 1, ~0ull, true);
-
return 0;
fail:
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index acfd1b23ab4a..44836d51145d 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -207,6 +207,7 @@ vc4_kms_load(struct drm_device *dev)
dev->mode_config.max_height = 2048;
dev->mode_config.funcs = &vc4_mode_funcs;
dev->mode_config.preferred_depth = 24;
+ dev->mode_config.async_page_flip = true;
ret = vc4_init_modeset_objects(dev);
if (ret)
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 0c8afb17314a..0d703154b29d 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -21,6 +21,14 @@ struct vc4_plane_state {
u32 *dlist;
u32 dlist_size; /* Number of dwords in allocated for the display list */
u32 dlist_count; /* Number of used dwords in the display list. */
+
+ /* Offset in the dlist to pointer word 0. */
+ u32 pw0_offset;
+
+ /* Offset where the plane's dlist was last stored in the
+ hardware at vc4_crtc_atomic_flush() time.
+ */
+ u32 *hw_dlist;
};
static inline struct vc4_plane_state *
@@ -191,6 +199,8 @@ vc4_plane_mode_set(struct drm_plane *plane, struct drm_plane_state *state)
/* Position Word 3: Context. Written by the HVS. */
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+ vc4_state->pw0_offset = vc4_state->dlist_count;
+
/* Pointer Word 0: RGB / Y Pointer */
vc4_dlist_write(vc4_state, bo->paddr + offset);
@@ -244,6 +254,8 @@ u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
int i;
+ vc4_state->hw_dlist = dlist;
+
/* Can't memcpy_toio() because it needs to be 32-bit writes. */
for (i = 0; i < vc4_state->dlist_count; i++)
writel(vc4_state->dlist[i], &dlist[i]);
@@ -258,6 +270,34 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state)
return vc4_state->dlist_count;
}
+/* Updates the plane to immediately (well, once the FIFO needs
+ * refilling) scan out from at a new framebuffer.
+ */
+void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
+{
+ struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
+ struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
+ uint32_t addr;
+
+ /* We're skipping the address adjustment for negative origin,
+ * because this is only called on the primary plane.
+ */
+ WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
+ addr = bo->paddr + fb->offsets[0];
+
+ /* Write the new address into the hardware immediately. The
+ * scanout will start from this address as soon as the FIFO
+ * needs to refill with pixels.
+ */
+ writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]);
+
+ /* Also update the CPU-side dlist copy, so that any later
+ * atomic updates that don't do a new modeset on our plane
+ * also use our updated address.
+ */
+ vc4_state->dlist[vc4_state->pw0_offset] = addr;
+}
+
static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
.prepare_fb = NULL,
.cleanup_fb = NULL,