45f4b5e56f
Mirror of arch/ffmpeg-v4l2-request-fourier into the Debian tree. Same source pin (Kwiboo v4l2-request-n8.1 @ b57fbbe), same 2 patches (libudev-bypass-fallback + nv15-to-p010-unpack), same configure flag policy (drop X11/AMF/CUDA/Bluray/Vulkan/SDL2/etc. per Fourier fleet focus). Output: single .deb at /usr/bin/ffmpeg + /usr/bin/ffprobe + /usr/lib/aarch64-linux-gnu/libav*.so.61. Conflicts/Replaces the stock Debian ffmpeg + per-lib packages; takes epoch 2 (matches Debian's existing ffmpeg epoch). Provides 'ffmpeg -hwaccel v4l2request' + '-hwaccel drm' routes that drive rkvdec / hantro / cedrus / rpi-hevc-dec / daedalus_v4l2 stateless decoders through libavcodec's hwdevice DRM path, bypassing libva. Required by mpv-fourier and firefox-fourier as their backing FFmpeg; also the kdirect bit-exact reference for libva-v4l2-request-fourier validation. NOT strictly required for the VAAPI-only path on daedalus-v4l2 hosts (stock Debian ffmpeg + libva-v4l2-request- fourier covers that). Install only when going firefox-fourier or kdirect. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
179 lines
7.3 KiB
Diff
179 lines
7.3 KiB
Diff
From 0cd6e669735e453ec8772f111065bbb2f70a5bc6 Mon Sep 17 00:00:00 2001
|
||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||
Date: Mon, 18 May 2026 07:27:10 +0000
|
||
Subject: [PATCH] avutil/hwcontext_v4l2request: unpack NV15 to P010 in
|
||
transfer_data_from
|
||
MIME-Version: 1.0
|
||
Content-Type: text/plain; charset=UTF-8
|
||
Content-Transfer-Encoding: 8bit
|
||
|
||
V4L2_PIX_FMT_NV15 (RK3399/RK3588 rkvdec 10-bit 4:2:0 capture) is mapped to
|
||
sw_format = AV_PIX_FMT_YUV420P10 in v4l2request_capture_pixelformats[]. The
|
||
existing transfer_get_formats explicitly blanked the format list for that
|
||
sw_format, so 'ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le' on
|
||
a Hi10P / Main10 input failed at filter init with EINVAL before reaching
|
||
the actual decode (which itself succeeds — 2 frames decoded cleanly).
|
||
|
||
Expose AV_PIX_FMT_P010 as the transfer target for NV15-backed surfaces and
|
||
unpack the packed 10-bit samples into the standard high-bits-of-16 layout
|
||
inside transfer_data_from. Luma and chroma share the same packing format
|
||
(5 bytes per 4 samples, little endian); chroma plane is W × H/2 samples
|
||
for 4:2:0.
|
||
|
||
The other 'needs custom unpack' sw_formats (YUV420P / Allwinner NV12_32L32
|
||
tiled and YUV422P10 / rkvdec NV20) keep the original ENOSYS path because
|
||
they need different unpack code that isn't covered by this patch.
|
||
|
||
Closes marfrit/marfrit-packages#21.
|
||
---
|
||
libavutil/hwcontext_v4l2request.c | 111 +++++++++++++++++++++++++++++-
|
||
1 file changed, 110 insertions(+), 1 deletion(-)
|
||
|
||
diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c
|
||
index b6633d9081..3842160dfb 100644
|
||
--- a/libavutil/hwcontext_v4l2request.c
|
||
+++ b/libavutil/hwcontext_v4l2request.c
|
||
@@ -1073,6 +1073,56 @@ fail:
|
||
return ret;
|
||
}
|
||
|
||
+/*
|
||
+ * Unpack one NV15-packed 10-bit plane (5 bytes per 4 samples, little endian)
|
||
+ * into a P010-style plane (10 bits in the high bits of a 16-bit container).
|
||
+ * `dst_stride` is in bytes; `src_stride` is bytes per row of NV15 data.
|
||
+ */
|
||
+static void v4l2request_nv15_unpack_plane_to_p010(const uint8_t *src,
|
||
+ uint16_t *dst,
|
||
+ unsigned width,
|
||
+ unsigned height,
|
||
+ unsigned src_stride,
|
||
+ unsigned dst_stride)
|
||
+{
|
||
+ for (unsigned y = 0; y < height; y++) {
|
||
+ const uint8_t *s = src + y * src_stride;
|
||
+ uint16_t *d = (uint16_t *)((uint8_t *)dst + y * dst_stride);
|
||
+ unsigned x;
|
||
+
|
||
+ for (x = 0; x + 4 <= width; x += 4) {
|
||
+ uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||
+ uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
|
||
+ uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
|
||
+ uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
|
||
+
|
||
+ d[0] = (uint16_t)(a << 6);
|
||
+ d[1] = (uint16_t)(b << 6);
|
||
+ d[2] = (uint16_t)(c << 6);
|
||
+ d[3] = (uint16_t)(e << 6);
|
||
+
|
||
+ d += 4;
|
||
+ s += 5;
|
||
+ }
|
||
+
|
||
+ if (x < width) {
|
||
+ unsigned rem = width - x;
|
||
+ uint16_t pix[4] = { 0, 0, 0, 0 };
|
||
+
|
||
+ pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||
+ if (rem >= 2)
|
||
+ pix[1] = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
|
||
+ if (rem >= 3)
|
||
+ pix[2] = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
|
||
+ if (rem >= 4)
|
||
+ pix[3] = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
|
||
+
|
||
+ for (unsigned j = 0; j < rem; j++)
|
||
+ d[j] = (uint16_t)(pix[j] << 6);
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||
enum AVHWFrameTransferDirection dir,
|
||
enum AVPixelFormat **formats)
|
||
@@ -1082,6 +1132,22 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||
if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO)
|
||
return AVERROR(ENOSYS);
|
||
|
||
+ /*
|
||
+ * NV15-backed surfaces (sw_format = YUV420P10) are exposed as P010 to
|
||
+ * downstream filters: the unpack below converts the packed 10-bit
|
||
+ * samples into the standard high-bits-of-16 layout. Hi10P / Main10
|
||
+ * VAAPI/v4l2-request decode reaches userspace through this path.
|
||
+ */
|
||
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
|
||
+ fmts = av_malloc_array(2, sizeof(*fmts));
|
||
+ if (!fmts)
|
||
+ return AVERROR(ENOMEM);
|
||
+ fmts[0] = AV_PIX_FMT_P010;
|
||
+ fmts[1] = AV_PIX_FMT_NONE;
|
||
+ *formats = fmts;
|
||
+ return 0;
|
||
+ }
|
||
+
|
||
fmts = av_malloc_array(2, sizeof(*fmts));
|
||
if (!fmts)
|
||
return AVERROR(ENOMEM);
|
||
@@ -1089,8 +1155,13 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||
fmts[0] = hwfc->sw_format;
|
||
fmts[1] = AV_PIX_FMT_NONE;
|
||
|
||
+ /*
|
||
+ * Tiled-NV12-32L32 (Allwinner) and NV20 (rkvdec 4:2:2 10-bit) still need
|
||
+ * dedicated unpacks before hwdownload can consume them; leave them as
|
||
+ * "no transfer formats" so the filter graph reports the limitation
|
||
+ * rather than silently producing garbage.
|
||
+ */
|
||
if (hwfc->sw_format == AV_PIX_FMT_YUV420P ||
|
||
- hwfc->sw_format == AV_PIX_FMT_YUV420P10 ||
|
||
hwfc->sw_format == AV_PIX_FMT_YUV422P10)
|
||
fmts[0] = AV_PIX_FMT_NONE;
|
||
|
||
@@ -1110,6 +1181,44 @@ static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc,
|
||
map = av_frame_alloc();
|
||
if (!map)
|
||
return AVERROR(ENOMEM);
|
||
+
|
||
+ /*
|
||
+ * For NV15→P010, map the raw NV15 bytes (sw_format) and unpack into
|
||
+ * dst's P010 storage. Otherwise fall through to the original byte-copy
|
||
+ * path used for 1:1 sw_format matches (NV12, NV16, AFBC handled by DRM).
|
||
+ */
|
||
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
|
||
+ /*
|
||
+ * Only P010 is advertised by transfer_get_formats for this sw_format;
|
||
+ * a caller that bypasses get_formats and asks for anything else would
|
||
+ * silently corrupt output via av_frame_copy on NV15-packed bytes.
|
||
+ * Reject explicitly.
|
||
+ */
|
||
+ if (dst->format != AV_PIX_FMT_P010) {
|
||
+ ret = AVERROR(ENOSYS);
|
||
+ goto fail;
|
||
+ }
|
||
+
|
||
+ map->format = hwfc->sw_format;
|
||
+ ret = v4l2request_map_frame(hwfc, map, src);
|
||
+ if (ret)
|
||
+ goto fail;
|
||
+
|
||
+ v4l2request_nv15_unpack_plane_to_p010(map->data[0],
|
||
+ (uint16_t *)dst->data[0],
|
||
+ dst->width, dst->height,
|
||
+ map->linesize[0],
|
||
+ dst->linesize[0]);
|
||
+ /* NV15 chroma plane is W × H/2 samples (4:2:0, UV interleaved). */
|
||
+ v4l2request_nv15_unpack_plane_to_p010(map->data[1],
|
||
+ (uint16_t *)dst->data[1],
|
||
+ dst->width, dst->height / 2,
|
||
+ map->linesize[1],
|
||
+ dst->linesize[1]);
|
||
+ ret = 0;
|
||
+ goto fail;
|
||
+ }
|
||
+
|
||
map->format = dst->format;
|
||
|
||
ret = v4l2request_map_frame(hwfc, map, src);
|
||
--
|
||
2.47.3
|
||
|