Files
marfrit-packages/arch/ffmpeg-v4l2-request-fourier/0002-nv15-to-p010-unpack.patch
T
test0r 9e9447502e ffmpeg-v4l2-request-fourier: patch NV15→P010 unpack for Hi10P / Main10
The n8.1 pin's hwcontext_v4l2request.c deliberately blanks the
transfer-formats list for AV_PIX_FMT_YUV420P10 sw_format (the mapping
target for V4L2_PIX_FMT_NV15), so `ffmpeg -hwaccel v4l2request
-vf hwdownload,format=p010le` on a Hi10P / Main10 input failed at
filter-init with -22 EINVAL — even though kernel-side decode succeeded.

0002-nv15-to-p010-unpack.patch adds an inline NV15→P010 unpack
(5 bytes per 4 samples, little-endian → high-10-of-16) inside
v4l2request_transfer_data_from, exposes AV_PIX_FMT_P010 in
transfer_get_formats for that sw_format, and rejects non-P010
destinations explicitly with ENOSYS instead of silently corrupting
output via av_frame_copy on NV15-packed bytes.

Verified on fresnel (RK3399, linux-fresnel-fourier 7.0-14):
- 5-frame smoke test from issue #21 → exit 0, 13.8MB output
- 20-frame mid-fixture decode → bit-exact HW==SW
  sha256 7d9b66d48d8f17b2281da1881c663ecc31722bb218aba1ae23bf28d07aa66b08
- 8-bit baseline (bbb_60s_720p.h264.mp4) still bit-exact HW==SW (no
  regression in the existing NV12 path)
- Cross-device repro of original EINVAL on unpatched ampere (RK3588)
  pkgrel=4, confirming the bug is upstream-FFmpeg-side, not RK3399-specific

Patch is upstream-able to Kwiboo's v4l2-request-n8.1 branch.

Closes #21.
2026-05-18 08:35:19 +00:00

179 lines
7.3 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
From 0cd6e669735e453ec8772f111065bbb2f70a5bc6 Mon Sep 17 00:00:00 2001
From: Markus Fritsche <mfritsche@reauktion.de>
Date: Mon, 18 May 2026 07:27:10 +0000
Subject: [PATCH] avutil/hwcontext_v4l2request: unpack NV15 to P010 in
transfer_data_from
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
V4L2_PIX_FMT_NV15 (RK3399/RK3588 rkvdec 10-bit 4:2:0 capture) is mapped to
sw_format = AV_PIX_FMT_YUV420P10 in v4l2request_capture_pixelformats[]. The
existing transfer_get_formats explicitly blanked the format list for that
sw_format, so 'ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le' on
a Hi10P / Main10 input failed at filter init with EINVAL before reaching
the actual decode (which itself succeeds — 2 frames decoded cleanly).
Expose AV_PIX_FMT_P010 as the transfer target for NV15-backed surfaces and
unpack the packed 10-bit samples into the standard high-bits-of-16 layout
inside transfer_data_from. Luma and chroma share the same packing format
(5 bytes per 4 samples, little endian); chroma plane is W × H/2 samples
for 4:2:0.
The other 'needs custom unpack' sw_formats (YUV420P / Allwinner NV12_32L32
tiled and YUV422P10 / rkvdec NV20) keep the original ENOSYS path because
they need different unpack code that isn't covered by this patch.
Closes marfrit/marfrit-packages#21.
---
libavutil/hwcontext_v4l2request.c | 111 +++++++++++++++++++++++++++++-
1 file changed, 110 insertions(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c
index b6633d9081..3842160dfb 100644
--- a/libavutil/hwcontext_v4l2request.c
+++ b/libavutil/hwcontext_v4l2request.c
@@ -1073,6 +1073,56 @@ fail:
return ret;
}
+/*
+ * Unpack one NV15-packed 10-bit plane (5 bytes per 4 samples, little endian)
+ * into a P010-style plane (10 bits in the high bits of a 16-bit container).
+ * `dst_stride` is in bytes; `src_stride` is bytes per row of NV15 data.
+ */
+static void v4l2request_nv15_unpack_plane_to_p010(const uint8_t *src,
+ uint16_t *dst,
+ unsigned width,
+ unsigned height,
+ unsigned src_stride,
+ unsigned dst_stride)
+{
+ for (unsigned y = 0; y < height; y++) {
+ const uint8_t *s = src + y * src_stride;
+ uint16_t *d = (uint16_t *)((uint8_t *)dst + y * dst_stride);
+ unsigned x;
+
+ for (x = 0; x + 4 <= width; x += 4) {
+ uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
+ uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
+ uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
+ uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
+
+ d[0] = (uint16_t)(a << 6);
+ d[1] = (uint16_t)(b << 6);
+ d[2] = (uint16_t)(c << 6);
+ d[3] = (uint16_t)(e << 6);
+
+ d += 4;
+ s += 5;
+ }
+
+ if (x < width) {
+ unsigned rem = width - x;
+ uint16_t pix[4] = { 0, 0, 0, 0 };
+
+ pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
+ if (rem >= 2)
+ pix[1] = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
+ if (rem >= 3)
+ pix[2] = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
+ if (rem >= 4)
+ pix[3] = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
+
+ for (unsigned j = 0; j < rem; j++)
+ d[j] = (uint16_t)(pix[j] << 6);
+ }
+ }
+}
+
static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
enum AVHWFrameTransferDirection dir,
enum AVPixelFormat **formats)
@@ -1082,6 +1132,22 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO)
return AVERROR(ENOSYS);
+ /*
+ * NV15-backed surfaces (sw_format = YUV420P10) are exposed as P010 to
+ * downstream filters: the unpack below converts the packed 10-bit
+ * samples into the standard high-bits-of-16 layout. Hi10P / Main10
+ * VAAPI/v4l2-request decode reaches userspace through this path.
+ */
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
+ fmts = av_malloc_array(2, sizeof(*fmts));
+ if (!fmts)
+ return AVERROR(ENOMEM);
+ fmts[0] = AV_PIX_FMT_P010;
+ fmts[1] = AV_PIX_FMT_NONE;
+ *formats = fmts;
+ return 0;
+ }
+
fmts = av_malloc_array(2, sizeof(*fmts));
if (!fmts)
return AVERROR(ENOMEM);
@@ -1089,8 +1155,13 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
fmts[0] = hwfc->sw_format;
fmts[1] = AV_PIX_FMT_NONE;
+ /*
+ * Tiled-NV12-32L32 (Allwinner) and NV20 (rkvdec 4:2:2 10-bit) still need
+ * dedicated unpacks before hwdownload can consume them; leave them as
+ * "no transfer formats" so the filter graph reports the limitation
+ * rather than silently producing garbage.
+ */
if (hwfc->sw_format == AV_PIX_FMT_YUV420P ||
- hwfc->sw_format == AV_PIX_FMT_YUV420P10 ||
hwfc->sw_format == AV_PIX_FMT_YUV422P10)
fmts[0] = AV_PIX_FMT_NONE;
@@ -1110,6 +1181,44 @@ static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc,
map = av_frame_alloc();
if (!map)
return AVERROR(ENOMEM);
+
+ /*
+ * For NV15→P010, map the raw NV15 bytes (sw_format) and unpack into
+ * dst's P010 storage. Otherwise fall through to the original byte-copy
+ * path used for 1:1 sw_format matches (NV12, NV16, AFBC handled by DRM).
+ */
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
+ /*
+ * Only P010 is advertised by transfer_get_formats for this sw_format;
+ * a caller that bypasses get_formats and asks for anything else would
+ * silently corrupt output via av_frame_copy on NV15-packed bytes.
+ * Reject explicitly.
+ */
+ if (dst->format != AV_PIX_FMT_P010) {
+ ret = AVERROR(ENOSYS);
+ goto fail;
+ }
+
+ map->format = hwfc->sw_format;
+ ret = v4l2request_map_frame(hwfc, map, src);
+ if (ret)
+ goto fail;
+
+ v4l2request_nv15_unpack_plane_to_p010(map->data[0],
+ (uint16_t *)dst->data[0],
+ dst->width, dst->height,
+ map->linesize[0],
+ dst->linesize[0]);
+ /* NV15 chroma plane is W × H/2 samples (4:2:0, UV interleaved). */
+ v4l2request_nv15_unpack_plane_to_p010(map->data[1],
+ (uint16_t *)dst->data[1],
+ dst->width, dst->height / 2,
+ map->linesize[1],
+ dst->linesize[1]);
+ ret = 0;
+ goto fail;
+ }
+
map->format = dst->format;
ret = v4l2request_map_frame(hwfc, map, src);
--
2.47.3