From 0cd6e669735e453ec8772f111065bbb2f70a5bc6 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Mon, 18 May 2026 07:27:10 +0000 Subject: [PATCH] avutil/hwcontext_v4l2request: unpack NV15 to P010 in transfer_data_from MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V4L2_PIX_FMT_NV15 (RK3399/RK3588 rkvdec 10-bit 4:2:0 capture) is mapped to sw_format = AV_PIX_FMT_YUV420P10 in v4l2request_capture_pixelformats[]. The existing transfer_get_formats explicitly blanked the format list for that sw_format, so 'ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le' on a Hi10P / Main10 input failed at filter init with EINVAL before reaching the actual decode (which itself succeeds — 2 frames decoded cleanly). Expose AV_PIX_FMT_P010 as the transfer target for NV15-backed surfaces and unpack the packed 10-bit samples into the standard high-bits-of-16 layout inside transfer_data_from. Luma and chroma share the same packing format (5 bytes per 4 samples, little endian); chroma plane is W × H/2 samples for 4:2:0. The other 'needs custom unpack' sw_formats (YUV420P / Allwinner NV12_32L32 tiled and YUV422P10 / rkvdec NV20) keep the original ENOSYS path because they need different unpack code that isn't covered by this patch. Closes marfrit/marfrit-packages#21. --- libavutil/hwcontext_v4l2request.c | 111 +++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c index b6633d9081..3842160dfb 100644 --- a/libavutil/hwcontext_v4l2request.c +++ b/libavutil/hwcontext_v4l2request.c @@ -1073,6 +1073,56 @@ fail: return ret; } +/* + * Unpack one NV15-packed 10-bit plane (5 bytes per 4 samples, little endian) + * into a P010-style plane (10 bits in the high bits of a 16-bit container). + * `dst_stride` is in bytes; `src_stride` is bytes per row of NV15 data. + */ +static void v4l2request_nv15_unpack_plane_to_p010(const uint8_t *src, + uint16_t *dst, + unsigned width, + unsigned height, + unsigned src_stride, + unsigned dst_stride) +{ + for (unsigned y = 0; y < height; y++) { + const uint8_t *s = src + y * src_stride; + uint16_t *d = (uint16_t *)((uint8_t *)dst + y * dst_stride); + unsigned x; + + for (x = 0; x + 4 <= width; x += 4) { + uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8); + uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6); + uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4); + uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2); + + d[0] = (uint16_t)(a << 6); + d[1] = (uint16_t)(b << 6); + d[2] = (uint16_t)(c << 6); + d[3] = (uint16_t)(e << 6); + + d += 4; + s += 5; + } + + if (x < width) { + unsigned rem = width - x; + uint16_t pix[4] = { 0, 0, 0, 0 }; + + pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8); + if (rem >= 2) + pix[1] = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6); + if (rem >= 3) + pix[2] = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4); + if (rem >= 4) + pix[3] = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2); + + for (unsigned j = 0; j < rem; j++) + d[j] = (uint16_t)(pix[j] << 6); + } + } +} + static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats) @@ -1082,6 +1132,22 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc, if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO) return AVERROR(ENOSYS); + /* + * NV15-backed surfaces (sw_format = YUV420P10) are exposed as P010 to + * downstream filters: the unpack below converts the packed 10-bit + * samples into the standard high-bits-of-16 layout. Hi10P / Main10 + * VAAPI/v4l2-request decode reaches userspace through this path. + */ + if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) { + fmts = av_malloc_array(2, sizeof(*fmts)); + if (!fmts) + return AVERROR(ENOMEM); + fmts[0] = AV_PIX_FMT_P010; + fmts[1] = AV_PIX_FMT_NONE; + *formats = fmts; + return 0; + } + fmts = av_malloc_array(2, sizeof(*fmts)); if (!fmts) return AVERROR(ENOMEM); @@ -1089,8 +1155,13 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc, fmts[0] = hwfc->sw_format; fmts[1] = AV_PIX_FMT_NONE; + /* + * Tiled-NV12-32L32 (Allwinner) and NV20 (rkvdec 4:2:2 10-bit) still need + * dedicated unpacks before hwdownload can consume them; leave them as + * "no transfer formats" so the filter graph reports the limitation + * rather than silently producing garbage. + */ if (hwfc->sw_format == AV_PIX_FMT_YUV420P || - hwfc->sw_format == AV_PIX_FMT_YUV420P10 || hwfc->sw_format == AV_PIX_FMT_YUV422P10) fmts[0] = AV_PIX_FMT_NONE; @@ -1110,6 +1181,44 @@ static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc, map = av_frame_alloc(); if (!map) return AVERROR(ENOMEM); + + /* + * For NV15→P010, map the raw NV15 bytes (sw_format) and unpack into + * dst's P010 storage. Otherwise fall through to the original byte-copy + * path used for 1:1 sw_format matches (NV12, NV16, AFBC handled by DRM). + */ + if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) { + /* + * Only P010 is advertised by transfer_get_formats for this sw_format; + * a caller that bypasses get_formats and asks for anything else would + * silently corrupt output via av_frame_copy on NV15-packed bytes. + * Reject explicitly. + */ + if (dst->format != AV_PIX_FMT_P010) { + ret = AVERROR(ENOSYS); + goto fail; + } + + map->format = hwfc->sw_format; + ret = v4l2request_map_frame(hwfc, map, src); + if (ret) + goto fail; + + v4l2request_nv15_unpack_plane_to_p010(map->data[0], + (uint16_t *)dst->data[0], + dst->width, dst->height, + map->linesize[0], + dst->linesize[0]); + /* NV15 chroma plane is W × H/2 samples (4:2:0, UV interleaved). */ + v4l2request_nv15_unpack_plane_to_p010(map->data[1], + (uint16_t *)dst->data[1], + dst->width, dst->height / 2, + map->linesize[1], + dst->linesize[1]); + ret = 0; + goto fail; + } + map->format = dst->format; ret = v4l2request_map_frame(hwfc, map, src); -- 2.47.3