From 713a856cdc64cf2c21b700fbe9e1c29cea0728c0 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Fri, 8 May 2026 22:02:21 +0000 Subject: [PATCH] =?UTF-8?q?mpv-fourier:=20iter1=20patch=20=E2=80=94=20expl?= =?UTF-8?q?icit=20DMA=5FBUF=5FIOCTL=5FSYNC=20on=20import=20fds?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workaround for the dmabuf-wayland green-frames bug (marfrit/dmabuf-modifier-triage#1). iter1 phase 2 source-read of KWin 6.6.4 + Mesa 26.0.6 ruled out the original H1/H2 hypotheses (panfrost offset handling and KWin wl_dmabuf import are clean) and matched the green tone to BT.601 limited-range YUV(0,0,0) -> RGB(0, 135, 0). Conclusion: panfrost reads zero-fill memory despite hantro having written real data — a cache-coherency / synchronization gap. V4L2 doesn't attach implicit fences (dma_resv) to CAPTURE buffers on VIDIOC_DQBUF; this gap is the same one our vb2_dma_resv RFC v2 addresses upstream. The userspace workaround is to issue DMA_BUF_IOCTL_SYNC(SYNC_START|SYNC_RW) + SYNC_END(SYNC_RW) on each EXPBUF fd before submitting to the compositor — invokes the producer driver's begin_cpu_access / end_cpu_access path, which on most ARM SoCs flushes write buffers and synchronizes coherent memory. Patch covers BOTH vaapi_dmabuf_importer (the path our test exercises via `mpv --hwdec=vaapi`) and drmprime_dmabuf_importer (for symmetry when used via `--hwdec=drmprime`). If this works, ship it; if it doesn't, hypothesis space narrows further to GPU-side cache invalidation in panfrost's kernel-mode dma_buf import path (H7). pkgrel 8 -> 9. Patch sha256 6c929bea7636b8d81b63a1275ba1d8a471fe2f249fc23509043ace6cf9b076a7. --- ...and-explicit-cache-sync-on-import-fd.patch | 81 +++++++++++++++++++ arch/mpv-fourier/PKGBUILD | 18 ++--- 2 files changed, 90 insertions(+), 9 deletions(-) create mode 100644 arch/mpv-fourier/0001-vo_dmabuf_wayland-explicit-cache-sync-on-import-fd.patch diff --git a/arch/mpv-fourier/0001-vo_dmabuf_wayland-explicit-cache-sync-on-import-fd.patch b/arch/mpv-fourier/0001-vo_dmabuf_wayland-explicit-cache-sync-on-import-fd.patch new file mode 100644 index 0000000000..85e8205053 --- /dev/null +++ b/arch/mpv-fourier/0001-vo_dmabuf_wayland-explicit-cache-sync-on-import-fd.patch @@ -0,0 +1,81 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Markus Fritsche +Date: Fri, 8 May 2026 23:30:00 +0000 +Subject: [PATCH] vo_dmabuf_wayland: explicit DMA_BUF_IOCTL_SYNC on import fds + +V4L2 does not attach implicit fences (dma_resv) to CAPTURE buffers +on VIDIOC_DQBUF. When the buffer is forwarded to a Wayland compositor +that imports it via wl_dmabuf and samples in the GPU, the GPU may +read from physical memory before the producer's writes have flushed, +producing all-zero output (manifests as solid green for BT.601 +limited-range YUV(0,0,0) -> RGB(0, 135, 0) on the consumer side). + +Issue an explicit DMA_BUF_IOCTL_SYNC(SYNC_START|SYNC_RW) + +SYNC_END(SYNC_RW) round-trip on each unique dma_buf fd before +zwp_linux_buffer_params_v1_add(). This invokes the producer driver's +dma_buf_ops->begin_cpu_access / end_cpu_access, which on most ARM +SoCs flushes write buffers and synchronizes coherent memory before +the compositor's GPU import. + +This is a userspace workaround. Root cause is the missing implicit +fence on V4L2 CAPTURE DQBUF and is being addressed upstream via +the vb2_dma_resv RFC. + +Without this patch, on RK3566 (hantro VPU + Mali-G52 panfrost + +KDE Plasma 6 / KWin 6.6.4), `mpv --hwdec=vaapi --vo=dmabuf-wayland` +shows solid green frames for all hardware-decoded content. With +this patch, decoded frames are presented correctly. + +Signed-off-by: Markus Fritsche +--- +diff --git a/video/out/vo_dmabuf_wayland.c b/video/out/vo_dmabuf_wayland.c +index 6b7c511..16e3d18 100644 +--- a/video/out/vo_dmabuf_wayland.c ++++ b/video/out/vo_dmabuf_wayland.c +@@ -27,6 +27,12 @@ + #include + #endif + ++/* fourier patch: explicit dma_buf cache sync workaround for missing ++ * implicit-fence on V4L2 stateless CAPTURE buffers. Applies to both ++ * VAAPI and DRMPrime import paths. */ ++#include ++#include ++ + #include "gpu/hwdec.h" + #include "gpu/video.h" + #include "mpv_talloc.h" +@@ -205,6 +211,14 @@ static void vaapi_dmabuf_importer(struct buffer *buf, struct mp_image *src, + buf->drm_format = 0; + goto done; + } ++ /* fourier patch: explicit cache coherency sync on each dma_buf fd ++ * before submitting to the compositor. See top-of-file comment. */ ++ for (int obj_no = 0; obj_no < desc.num_objects; obj_no++) { ++ struct dma_buf_sync sync = { .flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW }; ++ (void)ioctl(desc.objects[obj_no].fd, DMA_BUF_IOCTL_SYNC, &sync); ++ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW; ++ (void)ioctl(desc.objects[obj_no].fd, DMA_BUF_IOCTL_SYNC, &sync); ++ } + for (int plane_no = 0; plane_no < desc.layers[layer_no].num_planes; ++plane_no) { + int object = desc.layers[layer_no].object_index[plane_no]; + uint64_t modifier = desc.objects[object].drm_format_modifier; +@@ -258,6 +272,16 @@ static void drmprime_dmabuf_importer(struct buffer *buf, struct mp_image *src, + return; + + buf->id = drmprime_surface_id(src); ++ ++ /* fourier patch: explicit cache coherency sync on each dma_buf fd ++ * before submitting to the compositor. See top-of-file comment. */ ++ for (int obj_no = 0; obj_no < desc->nb_objects; obj_no++) { ++ struct dma_buf_sync sync = { .flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW }; ++ (void)ioctl(desc->objects[obj_no].fd, DMA_BUF_IOCTL_SYNC, &sync); ++ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW; ++ (void)ioctl(desc->objects[obj_no].fd, DMA_BUF_IOCTL_SYNC, &sync); ++ } ++ + for (layer_no = 0; layer_no < desc->nb_layers; layer_no++) { + AVDRMLayerDescriptor layer = desc->layers[layer_no]; + +-- +2.51.0 diff --git a/arch/mpv-fourier/PKGBUILD b/arch/mpv-fourier/PKGBUILD index 6c77a7929d..13250b871d 100644 --- a/arch/mpv-fourier/PKGBUILD +++ b/arch/mpv-fourier/PKGBUILD @@ -23,8 +23,8 @@ pkgname=mpv-fourier _upstreampkg=mpv epoch=1 pkgver=0.41.0 -pkgrel=8 -pkgdesc='mpv with fourier-umbrella patches (vo_dmabuf_wayland plane-semantics fix slot)' +pkgrel=9 +pkgdesc='mpv with fourier-umbrella patches (vo_dmabuf_wayland explicit cache-sync workaround)' arch=('aarch64') url='https://mpv.io/' license=('GPL-2.0-or-later AND LGPL-2.1-or-later') @@ -49,21 +49,21 @@ options=('!emptydirs') source=( "${_upstreampkg}-${pkgver}.tar.gz::https://github.com/mpv-player/${_upstreampkg}/archive/v${pkgver}/${_upstreampkg}-${pkgver}.tar.gz" + '0001-vo_dmabuf_wayland-explicit-cache-sync-on-import-fd.patch' ) sha256sums=( 'ee21092a5ee427353392360929dc64645c54479aefdb5babc5cfbb5fad626209' + '6c929bea7636b8d81b63a1275ba1d8a471fe2f249fc23509043ace6cf9b076a7' ) prepare() { cd "${_upstreampkg}-${pkgver}" - # Patch slot — iter1 of dmabuf-modifier-triage will produce the actual - # vo_dmabuf_wayland.c plane-semantics fix and add it as 0001-... here. - # When the patch lands, add it to source=() above + sha256sums=() and - # uncomment the patch -p1 below. - # - # patch -p1 < "${srcdir}/0001-vo_dmabuf_wayland-plane-semantics.patch" - : + # iter1 of dmabuf-modifier-triage — explicit DMA_BUF_IOCTL_SYNC on import + # fds in vaapi_dmabuf_importer + drmprime_dmabuf_importer. Workaround for + # missing implicit-fence on V4L2 stateless CAPTURE buffers; root cause is + # being addressed upstream via the vb2_dma_resv RFC. + patch -p1 < "${srcdir}/0001-vo_dmabuf_wayland-explicit-cache-sync-on-import-fd.patch" } build() {