Merge PR #26: ffmpeg-v4l2-request-fourier NV15 to P010 unpack for Hi10P / Main10
build and publish packages / distcc-avahi-aarch64 (push) Successful in 1m4s
build and publish packages / lmcp-any (push) Successful in 10s
build and publish packages / lmcp-debian (push) Successful in 6s
build and publish packages / claude-his-any (push) Successful in 8s
build and publish packages / ffmpeg-v4l2-request-aarch64 (push) Successful in 15m0s
build and publish packages / claude-his-debian (push) Successful in 18s
build and publish packages / libva-v4l2-request-fourier-aarch64 (push) Successful in 15s
build and publish packages / mpv-fourier-aarch64 (push) Successful in 1m10s

This commit was merged in pull request #26.
This commit is contained in:
2026-05-18 08:52:50 +00:00
2 changed files with 183 additions and 3 deletions
@@ -0,0 +1,178 @@
From 0cd6e669735e453ec8772f111065bbb2f70a5bc6 Mon Sep 17 00:00:00 2001
From: Markus Fritsche <mfritsche@reauktion.de>
Date: Mon, 18 May 2026 07:27:10 +0000
Subject: [PATCH] avutil/hwcontext_v4l2request: unpack NV15 to P010 in
transfer_data_from
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
V4L2_PIX_FMT_NV15 (RK3399/RK3588 rkvdec 10-bit 4:2:0 capture) is mapped to
sw_format = AV_PIX_FMT_YUV420P10 in v4l2request_capture_pixelformats[]. The
existing transfer_get_formats explicitly blanked the format list for that
sw_format, so 'ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le' on
a Hi10P / Main10 input failed at filter init with EINVAL before reaching
the actual decode (which itself succeeds — 2 frames decoded cleanly).
Expose AV_PIX_FMT_P010 as the transfer target for NV15-backed surfaces and
unpack the packed 10-bit samples into the standard high-bits-of-16 layout
inside transfer_data_from. Luma and chroma share the same packing format
(5 bytes per 4 samples, little endian); chroma plane is W × H/2 samples
for 4:2:0.
The other 'needs custom unpack' sw_formats (YUV420P / Allwinner NV12_32L32
tiled and YUV422P10 / rkvdec NV20) keep the original ENOSYS path because
they need different unpack code that isn't covered by this patch.
Closes marfrit/marfrit-packages#21.
---
libavutil/hwcontext_v4l2request.c | 111 +++++++++++++++++++++++++++++-
1 file changed, 110 insertions(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c
index b6633d9081..3842160dfb 100644
--- a/libavutil/hwcontext_v4l2request.c
+++ b/libavutil/hwcontext_v4l2request.c
@@ -1073,6 +1073,56 @@ fail:
return ret;
}
+/*
+ * Unpack one NV15-packed 10-bit plane (5 bytes per 4 samples, little endian)
+ * into a P010-style plane (10 bits in the high bits of a 16-bit container).
+ * `dst_stride` is in bytes; `src_stride` is bytes per row of NV15 data.
+ */
+static void v4l2request_nv15_unpack_plane_to_p010(const uint8_t *src,
+ uint16_t *dst,
+ unsigned width,
+ unsigned height,
+ unsigned src_stride,
+ unsigned dst_stride)
+{
+ for (unsigned y = 0; y < height; y++) {
+ const uint8_t *s = src + y * src_stride;
+ uint16_t *d = (uint16_t *)((uint8_t *)dst + y * dst_stride);
+ unsigned x;
+
+ for (x = 0; x + 4 <= width; x += 4) {
+ uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
+ uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
+ uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
+ uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
+
+ d[0] = (uint16_t)(a << 6);
+ d[1] = (uint16_t)(b << 6);
+ d[2] = (uint16_t)(c << 6);
+ d[3] = (uint16_t)(e << 6);
+
+ d += 4;
+ s += 5;
+ }
+
+ if (x < width) {
+ unsigned rem = width - x;
+ uint16_t pix[4] = { 0, 0, 0, 0 };
+
+ pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
+ if (rem >= 2)
+ pix[1] = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
+ if (rem >= 3)
+ pix[2] = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
+ if (rem >= 4)
+ pix[3] = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
+
+ for (unsigned j = 0; j < rem; j++)
+ d[j] = (uint16_t)(pix[j] << 6);
+ }
+ }
+}
+
static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
enum AVHWFrameTransferDirection dir,
enum AVPixelFormat **formats)
@@ -1082,6 +1132,22 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO)
return AVERROR(ENOSYS);
+ /*
+ * NV15-backed surfaces (sw_format = YUV420P10) are exposed as P010 to
+ * downstream filters: the unpack below converts the packed 10-bit
+ * samples into the standard high-bits-of-16 layout. Hi10P / Main10
+ * VAAPI/v4l2-request decode reaches userspace through this path.
+ */
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
+ fmts = av_malloc_array(2, sizeof(*fmts));
+ if (!fmts)
+ return AVERROR(ENOMEM);
+ fmts[0] = AV_PIX_FMT_P010;
+ fmts[1] = AV_PIX_FMT_NONE;
+ *formats = fmts;
+ return 0;
+ }
+
fmts = av_malloc_array(2, sizeof(*fmts));
if (!fmts)
return AVERROR(ENOMEM);
@@ -1089,8 +1155,13 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
fmts[0] = hwfc->sw_format;
fmts[1] = AV_PIX_FMT_NONE;
+ /*
+ * Tiled-NV12-32L32 (Allwinner) and NV20 (rkvdec 4:2:2 10-bit) still need
+ * dedicated unpacks before hwdownload can consume them; leave them as
+ * "no transfer formats" so the filter graph reports the limitation
+ * rather than silently producing garbage.
+ */
if (hwfc->sw_format == AV_PIX_FMT_YUV420P ||
- hwfc->sw_format == AV_PIX_FMT_YUV420P10 ||
hwfc->sw_format == AV_PIX_FMT_YUV422P10)
fmts[0] = AV_PIX_FMT_NONE;
@@ -1110,6 +1181,44 @@ static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc,
map = av_frame_alloc();
if (!map)
return AVERROR(ENOMEM);
+
+ /*
+ * For NV15→P010, map the raw NV15 bytes (sw_format) and unpack into
+ * dst's P010 storage. Otherwise fall through to the original byte-copy
+ * path used for 1:1 sw_format matches (NV12, NV16, AFBC handled by DRM).
+ */
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
+ /*
+ * Only P010 is advertised by transfer_get_formats for this sw_format;
+ * a caller that bypasses get_formats and asks for anything else would
+ * silently corrupt output via av_frame_copy on NV15-packed bytes.
+ * Reject explicitly.
+ */
+ if (dst->format != AV_PIX_FMT_P010) {
+ ret = AVERROR(ENOSYS);
+ goto fail;
+ }
+
+ map->format = hwfc->sw_format;
+ ret = v4l2request_map_frame(hwfc, map, src);
+ if (ret)
+ goto fail;
+
+ v4l2request_nv15_unpack_plane_to_p010(map->data[0],
+ (uint16_t *)dst->data[0],
+ dst->width, dst->height,
+ map->linesize[0],
+ dst->linesize[0]);
+ /* NV15 chroma plane is W × H/2 samples (4:2:0, UV interleaved). */
+ v4l2request_nv15_unpack_plane_to_p010(map->data[1],
+ (uint16_t *)dst->data[1],
+ dst->width, dst->height / 2,
+ map->linesize[1],
+ dst->linesize[1]);
+ ret = 0;
+ goto fail;
+ }
+
map->format = dst->format;
ret = v4l2request_map_frame(hwfc, map, src);
--
2.47.3
+5 -3
View File
@@ -24,7 +24,7 @@ _srcname=FFmpeg
_version='8.1' _version='8.1'
_commit='b57fbbe50c9b2656fad86a1a7eeabfd2b2a50935' # v4l2-request-n8.1 tip 2026-04-24 _commit='b57fbbe50c9b2656fad86a1a7eeabfd2b2a50935' # v4l2-request-n8.1 tip 2026-04-24
pkgver=8.1.r123329.b57fbbe pkgver=8.1.r123329.b57fbbe
pkgrel=4 pkgrel=5
epoch=2 epoch=2
pkgdesc='FFmpeg with V4L2 Request API hwaccel (Rockchip / Allwinner stateless decode)' pkgdesc='FFmpeg with V4L2 Request API hwaccel (Rockchip / Allwinner stateless decode)'
arch=('aarch64') arch=('aarch64')
@@ -78,8 +78,9 @@ provides=(
conflicts=(ffmpeg) conflicts=(ffmpeg)
replaces=(ffmpeg ffmpeg-v4l2-request-git) replaces=(ffmpeg ffmpeg-v4l2-request-git)
source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}" source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}"
'0001-libudev-bypass-fallback.patch') '0001-libudev-bypass-fallback.patch'
sha256sums=('SKIP' 'SKIP') '0002-nv15-to-p010-unpack.patch')
sha256sums=('SKIP' 'SKIP' 'SKIP')
pkgver() { pkgver() {
cd "${_srcname}" cd "${_srcname}"
@@ -91,6 +92,7 @@ pkgver() {
prepare() { prepare() {
cd "${_srcname}" cd "${_srcname}"
patch -Np1 -i "${srcdir}/0001-libudev-bypass-fallback.patch" patch -Np1 -i "${srcdir}/0001-libudev-bypass-fallback.patch"
patch -Np1 -i "${srcdir}/0002-nv15-to-p010-unpack.patch"
} }
build() { build() {