ffmpeg-v4l2-request-fourier: patch NV15→P010 unpack for Hi10P / Main10
The n8.1 pin's hwcontext_v4l2request.c deliberately blanks the transfer-formats list for AV_PIX_FMT_YUV420P10 sw_format (the mapping target for V4L2_PIX_FMT_NV15), so `ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le` on a Hi10P / Main10 input failed at filter-init with -22 EINVAL — even though kernel-side decode succeeded. 0002-nv15-to-p010-unpack.patch adds an inline NV15→P010 unpack (5 bytes per 4 samples, little-endian → high-10-of-16) inside v4l2request_transfer_data_from, exposes AV_PIX_FMT_P010 in transfer_get_formats for that sw_format, and rejects non-P010 destinations explicitly with ENOSYS instead of silently corrupting output via av_frame_copy on NV15-packed bytes. Verified on fresnel (RK3399, linux-fresnel-fourier 7.0-14): - 5-frame smoke test from issue #21 → exit 0, 13.8MB output - 20-frame mid-fixture decode → bit-exact HW==SW sha256 7d9b66d48d8f17b2281da1881c663ecc31722bb218aba1ae23bf28d07aa66b08 - 8-bit baseline (bbb_60s_720p.h264.mp4) still bit-exact HW==SW (no regression in the existing NV12 path) - Cross-device repro of original EINVAL on unpatched ampere (RK3588) pkgrel=4, confirming the bug is upstream-FFmpeg-side, not RK3399-specific Patch is upstream-able to Kwiboo's v4l2-request-n8.1 branch. Closes #21.
This commit is contained in:
@@ -0,0 +1,178 @@
|
||||
From 0cd6e669735e453ec8772f111065bbb2f70a5bc6 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Mon, 18 May 2026 07:27:10 +0000
|
||||
Subject: [PATCH] avutil/hwcontext_v4l2request: unpack NV15 to P010 in
|
||||
transfer_data_from
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
V4L2_PIX_FMT_NV15 (RK3399/RK3588 rkvdec 10-bit 4:2:0 capture) is mapped to
|
||||
sw_format = AV_PIX_FMT_YUV420P10 in v4l2request_capture_pixelformats[]. The
|
||||
existing transfer_get_formats explicitly blanked the format list for that
|
||||
sw_format, so 'ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le' on
|
||||
a Hi10P / Main10 input failed at filter init with EINVAL before reaching
|
||||
the actual decode (which itself succeeds — 2 frames decoded cleanly).
|
||||
|
||||
Expose AV_PIX_FMT_P010 as the transfer target for NV15-backed surfaces and
|
||||
unpack the packed 10-bit samples into the standard high-bits-of-16 layout
|
||||
inside transfer_data_from. Luma and chroma share the same packing format
|
||||
(5 bytes per 4 samples, little endian); chroma plane is W × H/2 samples
|
||||
for 4:2:0.
|
||||
|
||||
The other 'needs custom unpack' sw_formats (YUV420P / Allwinner NV12_32L32
|
||||
tiled and YUV422P10 / rkvdec NV20) keep the original ENOSYS path because
|
||||
they need different unpack code that isn't covered by this patch.
|
||||
|
||||
Closes marfrit/marfrit-packages#21.
|
||||
---
|
||||
libavutil/hwcontext_v4l2request.c | 111 +++++++++++++++++++++++++++++-
|
||||
1 file changed, 110 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c
|
||||
index b6633d9081..3842160dfb 100644
|
||||
--- a/libavutil/hwcontext_v4l2request.c
|
||||
+++ b/libavutil/hwcontext_v4l2request.c
|
||||
@@ -1073,6 +1073,56 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Unpack one NV15-packed 10-bit plane (5 bytes per 4 samples, little endian)
|
||||
+ * into a P010-style plane (10 bits in the high bits of a 16-bit container).
|
||||
+ * `dst_stride` is in bytes; `src_stride` is bytes per row of NV15 data.
|
||||
+ */
|
||||
+static void v4l2request_nv15_unpack_plane_to_p010(const uint8_t *src,
|
||||
+ uint16_t *dst,
|
||||
+ unsigned width,
|
||||
+ unsigned height,
|
||||
+ unsigned src_stride,
|
||||
+ unsigned dst_stride)
|
||||
+{
|
||||
+ for (unsigned y = 0; y < height; y++) {
|
||||
+ const uint8_t *s = src + y * src_stride;
|
||||
+ uint16_t *d = (uint16_t *)((uint8_t *)dst + y * dst_stride);
|
||||
+ unsigned x;
|
||||
+
|
||||
+ for (x = 0; x + 4 <= width; x += 4) {
|
||||
+ uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||||
+ uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
|
||||
+ uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
|
||||
+ uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
|
||||
+
|
||||
+ d[0] = (uint16_t)(a << 6);
|
||||
+ d[1] = (uint16_t)(b << 6);
|
||||
+ d[2] = (uint16_t)(c << 6);
|
||||
+ d[3] = (uint16_t)(e << 6);
|
||||
+
|
||||
+ d += 4;
|
||||
+ s += 5;
|
||||
+ }
|
||||
+
|
||||
+ if (x < width) {
|
||||
+ unsigned rem = width - x;
|
||||
+ uint16_t pix[4] = { 0, 0, 0, 0 };
|
||||
+
|
||||
+ pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||||
+ if (rem >= 2)
|
||||
+ pix[1] = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
|
||||
+ if (rem >= 3)
|
||||
+ pix[2] = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
|
||||
+ if (rem >= 4)
|
||||
+ pix[3] = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
|
||||
+
|
||||
+ for (unsigned j = 0; j < rem; j++)
|
||||
+ d[j] = (uint16_t)(pix[j] << 6);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||||
enum AVHWFrameTransferDirection dir,
|
||||
enum AVPixelFormat **formats)
|
||||
@@ -1082,6 +1132,22 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||||
if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO)
|
||||
return AVERROR(ENOSYS);
|
||||
|
||||
+ /*
|
||||
+ * NV15-backed surfaces (sw_format = YUV420P10) are exposed as P010 to
|
||||
+ * downstream filters: the unpack below converts the packed 10-bit
|
||||
+ * samples into the standard high-bits-of-16 layout. Hi10P / Main10
|
||||
+ * VAAPI/v4l2-request decode reaches userspace through this path.
|
||||
+ */
|
||||
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
|
||||
+ fmts = av_malloc_array(2, sizeof(*fmts));
|
||||
+ if (!fmts)
|
||||
+ return AVERROR(ENOMEM);
|
||||
+ fmts[0] = AV_PIX_FMT_P010;
|
||||
+ fmts[1] = AV_PIX_FMT_NONE;
|
||||
+ *formats = fmts;
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
fmts = av_malloc_array(2, sizeof(*fmts));
|
||||
if (!fmts)
|
||||
return AVERROR(ENOMEM);
|
||||
@@ -1089,8 +1155,13 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||||
fmts[0] = hwfc->sw_format;
|
||||
fmts[1] = AV_PIX_FMT_NONE;
|
||||
|
||||
+ /*
|
||||
+ * Tiled-NV12-32L32 (Allwinner) and NV20 (rkvdec 4:2:2 10-bit) still need
|
||||
+ * dedicated unpacks before hwdownload can consume them; leave them as
|
||||
+ * "no transfer formats" so the filter graph reports the limitation
|
||||
+ * rather than silently producing garbage.
|
||||
+ */
|
||||
if (hwfc->sw_format == AV_PIX_FMT_YUV420P ||
|
||||
- hwfc->sw_format == AV_PIX_FMT_YUV420P10 ||
|
||||
hwfc->sw_format == AV_PIX_FMT_YUV422P10)
|
||||
fmts[0] = AV_PIX_FMT_NONE;
|
||||
|
||||
@@ -1110,6 +1181,44 @@ static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc,
|
||||
map = av_frame_alloc();
|
||||
if (!map)
|
||||
return AVERROR(ENOMEM);
|
||||
+
|
||||
+ /*
|
||||
+ * For NV15→P010, map the raw NV15 bytes (sw_format) and unpack into
|
||||
+ * dst's P010 storage. Otherwise fall through to the original byte-copy
|
||||
+ * path used for 1:1 sw_format matches (NV12, NV16, AFBC handled by DRM).
|
||||
+ */
|
||||
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
|
||||
+ /*
|
||||
+ * Only P010 is advertised by transfer_get_formats for this sw_format;
|
||||
+ * a caller that bypasses get_formats and asks for anything else would
|
||||
+ * silently corrupt output via av_frame_copy on NV15-packed bytes.
|
||||
+ * Reject explicitly.
|
||||
+ */
|
||||
+ if (dst->format != AV_PIX_FMT_P010) {
|
||||
+ ret = AVERROR(ENOSYS);
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
+ map->format = hwfc->sw_format;
|
||||
+ ret = v4l2request_map_frame(hwfc, map, src);
|
||||
+ if (ret)
|
||||
+ goto fail;
|
||||
+
|
||||
+ v4l2request_nv15_unpack_plane_to_p010(map->data[0],
|
||||
+ (uint16_t *)dst->data[0],
|
||||
+ dst->width, dst->height,
|
||||
+ map->linesize[0],
|
||||
+ dst->linesize[0]);
|
||||
+ /* NV15 chroma plane is W × H/2 samples (4:2:0, UV interleaved). */
|
||||
+ v4l2request_nv15_unpack_plane_to_p010(map->data[1],
|
||||
+ (uint16_t *)dst->data[1],
|
||||
+ dst->width, dst->height / 2,
|
||||
+ map->linesize[1],
|
||||
+ dst->linesize[1]);
|
||||
+ ret = 0;
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
map->format = dst->format;
|
||||
|
||||
ret = v4l2request_map_frame(hwfc, map, src);
|
||||
--
|
||||
2.47.3
|
||||
|
||||
Reference in New Issue
Block a user