Files
marfrit-packages/kernel/vb2-dma-resv-rfc/0001-media-videobuf2-add-dma_resv-release-fence-helper.patch
T
marfrit a7892bfabc kernel/vb2-dma-resv-rfc: 3-patch RFC series draft
Drafted but not yet compile-tested or runtime-validated. Draft
target: vb2 grows an opt-in dma_resv release-fence API; hantro and
rockchip-rga opt in as the demonstration drivers.

Series structure:
- 0000-cover-letter.patch  — context, motivation, validation results
- 0001-media-videobuf2-add-dma_resv-release-fence-helper.patch
    Adds vb2_buffer_attach_release_fence() that drivers call from
    their buf_queue callback. Stores the fence on vb->release_fence;
    vb2_buffer_done signals + puts. Per-queue fence context allocated
    at vb2_core_queue_init.
- 0002-media-hantro-attach-dma_resv-release-fence-at-buf_queue.patch
    Single call in hantro_buf_queue. ~5 lines.
- 0003-media-rockchip-rga-attach-dma_resv-release-fence-at-buf_queue.patch
    Same shape in rga_buf_queue. ~5 lines.

Pre-flight before sending to linux-media (per kernel/README.md):
1. Compile the touched files against the kernel tree the patches
   will land on (linux-next master as of 2026-04-28 was the source
   of truth used for context-line generation).
2. Boot-test on ohm, smoke-test hantro + rga buffer flows.
3. Validate the fence semantics: install patched kernel, uninstall
   kwin-fourier so KWin's watchDmaBuf is active, play 1080p30 H.264
   under KDE Plasma — should plays through without the bypass
   because the fence is now real.
4. Capture before/after dma_buf_export_sync_file timings.
5. Send via git format-patch --cover-letter to linux-media@,
   CC dri-devel@ and the relevant maintainers.

This series is the kernel-correct fix for the architectural hole
that the chromium-fourier campaign's kwin-fourier package is
papering over. With this kernel side upstream, kwin-fourier
becomes either redundant (if KWin's existing wait works correctly)
or rewritten as a poll-fd-direct optimization.
2026-04-28 19:13:40 +00:00

241 lines
8.1 KiB
Diff

From: Markus Fritsche <mfritsche@reauktion.de>
Subject: [PATCH RFC 1/3] media: videobuf2: add dma_resv release-fence helper
Date: 2026-04-28
Add an opt-in API that lets vb2 producers populate a `dma_resv`
exclusive write fence on the dmabufs they export to userspace,
signalled when the buffer transitions to VB2_BUF_STATE_DONE.
Drivers that opt in call `vb2_buffer_attach_release_fence(vb)` from
their `buf_queue` callback after `v4l2_m2m_buf_queue` (or equivalent).
The helper:
- allocates a dma_fence on the queue's fence context (set up at
vb2_core_queue_init time),
- attaches it as DMA_RESV_USAGE_WRITE on each plane's dmabuf->resv,
- stashes the fence in `vb->release_fence`.
`vb2_buffer_done` then signals and puts the fence as part of its
existing buffer-state transition, so the userspace consumer that
imported the dmabuf and is poll(POLLIN)-ing it (or waiting on a
sync_file from `DMA_BUF_IOCTL_EXPORT_SYNC_FILE`) sees the fence
become readable synchronously with the DQBUF wakeup.
For drivers that don't opt in, the new field stays NULL and
`vb2_buffer_done` skips the signal path. No-op for every driver
that doesn't call the new helper.
Skips planes whose `vb2_plane.dbuf` is NULL — buffers that have
never been exported via VIDIOC_EXPBUF (or imported via
V4L2_MEMORY_DMABUF) have no dmabuf for userspace to wait on.
Signed-off-by: Markus Fritsche <mfritsche@reauktion.de>
---
drivers/media/common/videobuf2/videobuf2-core.c | 116 ++++++++++++++++
include/media/videobuf2-core.h | 19 +++
2 files changed, 135 insertions(+)
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -22,6 +22,9 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/version.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-resv.h>
#include <media/videobuf2-core.h>
#include <media/v4l2-mc.h>
@@ -1175,6 +1178,107 @@ static void __enqueue_in_driver(struct vb2_buffer *vb)
call_void_vb_qop(vb, buf_queue, vb);
}
+/*
+ * dma_resv release-fence integration.
+ *
+ * Background: V4L2 producers (vb2-using drivers) historically did not
+ * propagate buffer-state-done into the dmabuf's dma_resv exclusive
+ * fence. Userspace consumers that imported V4L2-produced dmabufs and
+ * tried to do implicit synchronization the spec-clean way
+ * (poll(POLLIN), DMA_BUF_IOCTL_EXPORT_SYNC_FILE) got either zero
+ * fences or a stub fence from dma_fence_get_stub(). This is correct
+ * by accident for the common case (clients call DQBUF before
+ * importing) but represents a contract gap.
+ *
+ * The opt-in API below lets a driver attach a real fence at QBUF
+ * time and have it signalled at vb2_buffer_done. Drivers opt in by
+ * calling vb2_buffer_attach_release_fence(vb) from their buf_queue
+ * callback. No behaviour change for drivers that don't opt in.
+ */
+
+static const char *vb2_dma_resv_get_driver_name(struct dma_fence *fence)
+{
+ return "videobuf2";
+}
+
+static const char *vb2_dma_resv_get_timeline_name(struct dma_fence *fence)
+{
+ return "vb2-release-fence";
+}
+
+static const struct dma_fence_ops vb2_dma_resv_fence_ops = {
+ .get_driver_name = vb2_dma_resv_get_driver_name,
+ .get_timeline_name = vb2_dma_resv_get_timeline_name,
+};
+
+/**
+ * vb2_buffer_attach_release_fence() - attach a dma_resv exclusive fence
+ * to each of @vb's plane dmabufs, to be signalled when the buffer
+ * transitions to VB2_BUF_STATE_DONE.
+ *
+ * @vb: the buffer being queued to the producer (just-completed
+ * transition out of VB2_BUF_STATE_QUEUED into DRIVER-owned).
+ *
+ * Drivers should call this from their buf_queue callback (after the
+ * driver-internal queueing — e.g. after v4l2_m2m_buf_queue() for
+ * M2M drivers). Planes whose dbuf is NULL are skipped silently.
+ *
+ * Returns 0 on success, negative errno on allocation failure. On
+ * error, no fence is attached and vb->release_fence remains NULL.
+ */
+int vb2_buffer_attach_release_fence(struct vb2_buffer *vb)
+{
+ struct vb2_queue *q = vb->vb2_queue;
+ struct dma_fence *fence;
+ unsigned int plane;
+
+ if (WARN_ON(vb->release_fence))
+ return -EINVAL;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return -ENOMEM;
+
+ dma_fence_init(fence, &vb2_dma_resv_fence_ops, &q->dma_resv_fence_lock,
+ q->dma_resv_fence_context,
+ atomic64_inc_return(&q->dma_resv_fence_seqno));
+
+ for (plane = 0; plane < vb->num_planes; plane++) {
+ struct dma_buf *dbuf = vb->planes[plane].dbuf;
+
+ if (!dbuf)
+ continue;
+
+ dma_resv_lock(dbuf->resv, NULL);
+ dma_resv_add_fence(dbuf->resv, fence, DMA_RESV_USAGE_WRITE);
+ dma_resv_unlock(dbuf->resv);
+ }
+
+ /* Hold one reference for the eventual signal in vb2_buffer_done. */
+ vb->release_fence = dma_fence_get(fence);
+
+ /* The dma_resv held its own references for each plane. Drop ours. */
+ dma_fence_put(fence);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vb2_buffer_attach_release_fence);
+
+static void vb2_buffer_signal_release_fence(struct vb2_buffer *vb,
+ enum vb2_buffer_state state)
+{
+ struct dma_fence *fence = vb->release_fence;
+
+ if (!fence)
+ return;
+
+ if (state == VB2_BUF_STATE_ERROR)
+ dma_fence_set_error(fence, -EIO);
+ dma_fence_signal(fence);
+ dma_fence_put(fence);
+ vb->release_fence = NULL;
+}
+
static int __enqueue_in_driver_with_request(struct vb2_buffer *vb)
{
if (vb->req_obj.req) {
@@ -1182,12 +1286,15 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
dprintk(q, 4, "done processing on buffer %d, state: %s\n",
vb->index, vb2_state_name(state));
if (state != VB2_BUF_STATE_QUEUED)
__vb2_buf_mem_finish(vb);
+ if (state != VB2_BUF_STATE_QUEUED)
+ vb2_buffer_signal_release_fence(vb, state);
+
spin_lock_irqsave(&q->done_lock, flags);
if (state == VB2_BUF_STATE_QUEUED) {
vb->state = VB2_BUF_STATE_QUEUED;
} else {
@@ -2598,6 +2705,15 @@ int vb2_core_queue_init(struct vb2_queue *q)
mutex_init(&q->mmap_lock);
init_waitqueue_head(&q->done_wq);
+ /*
+ * Per-queue dma_resv fence context. Drivers that opt into
+ * vb2_buffer_attach_release_fence() use these to allocate
+ * fences in their own timeline; drivers that don't opt in
+ * pay only the four-byte cost of an unused field.
+ */
+ q->dma_resv_fence_context = dma_fence_context_alloc(1);
+ atomic64_set(&q->dma_resv_fence_seqno, 0);
+ spin_lock_init(&q->dma_resv_fence_lock);
+
q->memory = VB2_MEMORY_UNKNOWN;
if (q->buf_struct_size == 0)
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -19,6 +19,7 @@
#include <linux/dma-buf.h>
#include <linux/bitops.h>
#include <media/media-request.h>
#include <media/frame_vector.h>
+struct dma_fence;
@@ -286,6 +287,12 @@ struct vb2_buffer {
unsigned int skip_cache_sync_on_finish:1;
struct vb2_plane planes[VB2_MAX_PLANES];
+ /*
+ * dma_resv release fence — set by vb2_buffer_attach_release_fence()
+ * (driver opt-in from buf_queue), signalled by vb2_buffer_done.
+ * NULL for drivers that don't opt in.
+ */
+ struct dma_fence *release_fence;
struct list_head queued_entry;
struct list_head done_entry;
@@ -645,6 +652,11 @@ struct vb2_queue {
wait_queue_head_t done_wq;
+ /* dma_resv release-fence integration (opt-in per buffer). */
+ u64 dma_resv_fence_context;
+ atomic64_t dma_resv_fence_seqno;
+ spinlock_t dma_resv_fence_lock;
+
unsigned int streaming:1;
unsigned int start_streaming_called:1;
unsigned int error:1;
@@ -750,6 +762,13 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state);
*/
void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state);
+/**
+ * vb2_buffer_attach_release_fence() - opt-in dma_resv release fence.
+ * Called from a driver's buf_queue callback after enqueueing the
+ * buffer in the driver's own queue. See videobuf2-core.c for
+ * rationale and call shape.
+ */
+int vb2_buffer_attach_release_fence(struct vb2_buffer *vb);
+
/**
* vb2_discard_done() - discard all buffers marked as DONE.
* @q: pointer to &struct vb2_queue with videobuf2 queue.
--
2.44.0