diff --git a/arch/ffmpeg-v4l2-request-fourier/0002-nv15-to-p010-unpack.patch b/arch/ffmpeg-v4l2-request-fourier/0002-nv15-to-p010-unpack.patch new file mode 100644 index 000000000..8a2a2f04d --- /dev/null +++ b/arch/ffmpeg-v4l2-request-fourier/0002-nv15-to-p010-unpack.patch @@ -0,0 +1,178 @@ +From 0cd6e669735e453ec8772f111065bbb2f70a5bc6 Mon Sep 17 00:00:00 2001 +From: Markus Fritsche +Date: Mon, 18 May 2026 07:27:10 +0000 +Subject: [PATCH] avutil/hwcontext_v4l2request: unpack NV15 to P010 in + transfer_data_from +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +V4L2_PIX_FMT_NV15 (RK3399/RK3588 rkvdec 10-bit 4:2:0 capture) is mapped to +sw_format = AV_PIX_FMT_YUV420P10 in v4l2request_capture_pixelformats[]. The +existing transfer_get_formats explicitly blanked the format list for that +sw_format, so 'ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le' on +a Hi10P / Main10 input failed at filter init with EINVAL before reaching +the actual decode (which itself succeeds — 2 frames decoded cleanly). + +Expose AV_PIX_FMT_P010 as the transfer target for NV15-backed surfaces and +unpack the packed 10-bit samples into the standard high-bits-of-16 layout +inside transfer_data_from. Luma and chroma share the same packing format +(5 bytes per 4 samples, little endian); chroma plane is W × H/2 samples +for 4:2:0. + +The other 'needs custom unpack' sw_formats (YUV420P / Allwinner NV12_32L32 +tiled and YUV422P10 / rkvdec NV20) keep the original ENOSYS path because +they need different unpack code that isn't covered by this patch. + +Closes marfrit/marfrit-packages#21. +--- + libavutil/hwcontext_v4l2request.c | 111 +++++++++++++++++++++++++++++- + 1 file changed, 110 insertions(+), 1 deletion(-) + +diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c +index b6633d9081..3842160dfb 100644 +--- a/libavutil/hwcontext_v4l2request.c ++++ b/libavutil/hwcontext_v4l2request.c +@@ -1073,6 +1073,56 @@ fail: + return ret; + } + ++/* ++ * Unpack one NV15-packed 10-bit plane (5 bytes per 4 samples, little endian) ++ * into a P010-style plane (10 bits in the high bits of a 16-bit container). ++ * `dst_stride` is in bytes; `src_stride` is bytes per row of NV15 data. ++ */ ++static void v4l2request_nv15_unpack_plane_to_p010(const uint8_t *src, ++ uint16_t *dst, ++ unsigned width, ++ unsigned height, ++ unsigned src_stride, ++ unsigned dst_stride) ++{ ++ for (unsigned y = 0; y < height; y++) { ++ const uint8_t *s = src + y * src_stride; ++ uint16_t *d = (uint16_t *)((uint8_t *)dst + y * dst_stride); ++ unsigned x; ++ ++ for (x = 0; x + 4 <= width; x += 4) { ++ uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8); ++ uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6); ++ uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4); ++ uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2); ++ ++ d[0] = (uint16_t)(a << 6); ++ d[1] = (uint16_t)(b << 6); ++ d[2] = (uint16_t)(c << 6); ++ d[3] = (uint16_t)(e << 6); ++ ++ d += 4; ++ s += 5; ++ } ++ ++ if (x < width) { ++ unsigned rem = width - x; ++ uint16_t pix[4] = { 0, 0, 0, 0 }; ++ ++ pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8); ++ if (rem >= 2) ++ pix[1] = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6); ++ if (rem >= 3) ++ pix[2] = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4); ++ if (rem >= 4) ++ pix[3] = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2); ++ ++ for (unsigned j = 0; j < rem; j++) ++ d[j] = (uint16_t)(pix[j] << 6); ++ } ++ } ++} ++ + static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc, + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) +@@ -1082,6 +1132,22 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc, + if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO) + return AVERROR(ENOSYS); + ++ /* ++ * NV15-backed surfaces (sw_format = YUV420P10) are exposed as P010 to ++ * downstream filters: the unpack below converts the packed 10-bit ++ * samples into the standard high-bits-of-16 layout. Hi10P / Main10 ++ * VAAPI/v4l2-request decode reaches userspace through this path. ++ */ ++ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) { ++ fmts = av_malloc_array(2, sizeof(*fmts)); ++ if (!fmts) ++ return AVERROR(ENOMEM); ++ fmts[0] = AV_PIX_FMT_P010; ++ fmts[1] = AV_PIX_FMT_NONE; ++ *formats = fmts; ++ return 0; ++ } ++ + fmts = av_malloc_array(2, sizeof(*fmts)); + if (!fmts) + return AVERROR(ENOMEM); +@@ -1089,8 +1155,13 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc, + fmts[0] = hwfc->sw_format; + fmts[1] = AV_PIX_FMT_NONE; + ++ /* ++ * Tiled-NV12-32L32 (Allwinner) and NV20 (rkvdec 4:2:2 10-bit) still need ++ * dedicated unpacks before hwdownload can consume them; leave them as ++ * "no transfer formats" so the filter graph reports the limitation ++ * rather than silently producing garbage. ++ */ + if (hwfc->sw_format == AV_PIX_FMT_YUV420P || +- hwfc->sw_format == AV_PIX_FMT_YUV420P10 || + hwfc->sw_format == AV_PIX_FMT_YUV422P10) + fmts[0] = AV_PIX_FMT_NONE; + +@@ -1110,6 +1181,44 @@ static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc, + map = av_frame_alloc(); + if (!map) + return AVERROR(ENOMEM); ++ ++ /* ++ * For NV15→P010, map the raw NV15 bytes (sw_format) and unpack into ++ * dst's P010 storage. Otherwise fall through to the original byte-copy ++ * path used for 1:1 sw_format matches (NV12, NV16, AFBC handled by DRM). ++ */ ++ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) { ++ /* ++ * Only P010 is advertised by transfer_get_formats for this sw_format; ++ * a caller that bypasses get_formats and asks for anything else would ++ * silently corrupt output via av_frame_copy on NV15-packed bytes. ++ * Reject explicitly. ++ */ ++ if (dst->format != AV_PIX_FMT_P010) { ++ ret = AVERROR(ENOSYS); ++ goto fail; ++ } ++ ++ map->format = hwfc->sw_format; ++ ret = v4l2request_map_frame(hwfc, map, src); ++ if (ret) ++ goto fail; ++ ++ v4l2request_nv15_unpack_plane_to_p010(map->data[0], ++ (uint16_t *)dst->data[0], ++ dst->width, dst->height, ++ map->linesize[0], ++ dst->linesize[0]); ++ /* NV15 chroma plane is W × H/2 samples (4:2:0, UV interleaved). */ ++ v4l2request_nv15_unpack_plane_to_p010(map->data[1], ++ (uint16_t *)dst->data[1], ++ dst->width, dst->height / 2, ++ map->linesize[1], ++ dst->linesize[1]); ++ ret = 0; ++ goto fail; ++ } ++ + map->format = dst->format; + + ret = v4l2request_map_frame(hwfc, map, src); +-- +2.47.3 + diff --git a/arch/ffmpeg-v4l2-request-fourier/PKGBUILD b/arch/ffmpeg-v4l2-request-fourier/PKGBUILD index 0c6a9772a..833fca244 100644 --- a/arch/ffmpeg-v4l2-request-fourier/PKGBUILD +++ b/arch/ffmpeg-v4l2-request-fourier/PKGBUILD @@ -24,7 +24,7 @@ _srcname=FFmpeg _version='8.1' _commit='b57fbbe50c9b2656fad86a1a7eeabfd2b2a50935' # v4l2-request-n8.1 tip 2026-04-24 pkgver=8.1.r123329.b57fbbe -pkgrel=4 +pkgrel=5 epoch=2 pkgdesc='FFmpeg with V4L2 Request API hwaccel (Rockchip / Allwinner stateless decode)' arch=('aarch64') @@ -78,8 +78,9 @@ provides=( conflicts=(ffmpeg) replaces=(ffmpeg ffmpeg-v4l2-request-git) source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}" - '0001-libudev-bypass-fallback.patch') -sha256sums=('SKIP' 'SKIP') + '0001-libudev-bypass-fallback.patch' + '0002-nv15-to-p010-unpack.patch') +sha256sums=('SKIP' 'SKIP' 'SKIP') pkgver() { cd "${_srcname}" @@ -91,6 +92,7 @@ pkgver() { prepare() { cd "${_srcname}" patch -Np1 -i "${srcdir}/0001-libudev-bypass-fallback.patch" + patch -Np1 -i "${srcdir}/0002-nv15-to-p010-unpack.patch" } build() {