Merge PR #26: ffmpeg-v4l2-request-fourier NV15 to P010 unpack for Hi10P / Main10

2026-05-18 08:52:50 +00:00
parent eb1782e86f 9e9447502e
commit 7542989f2b
2 changed files with 183 additions and 3 deletions
@@ -0,0 +1,178 @@
 From 0cd6e669735e453ec8772f111065bbb2f70a5bc6 Mon Sep 17 00:00:00 2001
 From: Markus Fritsche <mfritsche@reauktion.de>
 Date: Mon, 18 May 2026 07:27:10 +0000
 Subject: [PATCH] avutil/hwcontext_v4l2request: unpack NV15 to P010 in
 transfer_data_from
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
 V4L2_PIX_FMT_NV15 (RK3399/RK3588 rkvdec 10-bit 4:2:0 capture) is mapped to
 sw_format = AV_PIX_FMT_YUV420P10 in v4l2request_capture_pixelformats[]. The
 existing transfer_get_formats explicitly blanked the format list for that
 sw_format, so 'ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le' on
 a Hi10P / Main10 input failed at filter init with EINVAL before reaching
 the actual decode (which itself succeeds — 2 frames decoded cleanly).
 Expose AV_PIX_FMT_P010 as the transfer target for NV15-backed surfaces and
 unpack the packed 10-bit samples into the standard high-bits-of-16 layout
 inside transfer_data_from. Luma and chroma share the same packing format
 (5 bytes per 4 samples, little endian); chroma plane is W × H/2 samples
 for 4:2:0.
 The other 'needs custom unpack' sw_formats (YUV420P / Allwinner NV12_32L32
 tiled and YUV422P10 / rkvdec NV20) keep the original ENOSYS path because
 they need different unpack code that isn't covered by this patch.
 Closes marfrit/marfrit-packages#21.
 ---
 libavutil/hwcontext_v4l2request.c | 111 +++++++++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 1 deletion(-)
 diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c
 index b6633d9081..3842160dfb 100644
 --- a/libavutil/hwcontext_v4l2request.c
 +++ b/libavutil/hwcontext_v4l2request.c
@@ -1073,6 +1073,56 @@ fail:
     return ret;
 }
 +/*
 + * Unpack one NV15-packed 10-bit plane (5 bytes per 4 samples, little endian)
 + * into a P010-style plane (10 bits in the high bits of a 16-bit container).
 + * `dst_stride` is in bytes; `src_stride` is bytes per row of NV15 data.
 + */
 +static void v4l2request_nv15_unpack_plane_to_p010(const uint8_t *src,
 +                                                  uint16_t *dst,
 +                                                  unsigned width,
 +                                                  unsigned height,
 +                                                  unsigned src_stride,
 +                                                  unsigned dst_stride)
 +{
 +    for (unsigned y = 0; y < height; y++) {
 +        const uint8_t *s = src + y * src_stride;
 +        uint16_t *d = (uint16_t *)((uint8_t *)dst + y * dst_stride);
 +        unsigned x;
 +
 +        for (x = 0; x + 4 <= width; x += 4) {
 +            uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
 +            uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
 +            uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
 +            uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
 +
 +            d[0] = (uint16_t)(a << 6);
 +            d[1] = (uint16_t)(b << 6);
 +            d[2] = (uint16_t)(c << 6);
 +            d[3] = (uint16_t)(e << 6);
 +
 +            d += 4;
 +            s += 5;
 +        }
 +
 +        if (x < width) {
 +            unsigned rem = width - x;
 +            uint16_t pix[4] = { 0, 0, 0, 0 };
 +
 +            pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
 +            if (rem >= 2)
 +                pix[1] = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
 +            if (rem >= 3)
 +                pix[2] = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
 +            if (rem >= 4)
 +                pix[3] = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
 +
 +            for (unsigned j = 0; j < rem; j++)
 +                d[j] = (uint16_t)(pix[j] << 6);
 +        }
 +    }
 +}
 +
 static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
                                             enum AVHWFrameTransferDirection dir,
                                             enum AVPixelFormat **formats)
@@ -1082,6 +1132,22 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
     if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO)
         return AVERROR(ENOSYS);
 +    /*
 +     * NV15-backed surfaces (sw_format = YUV420P10) are exposed as P010 to
 +     * downstream filters: the unpack below converts the packed 10-bit
 +     * samples into the standard high-bits-of-16 layout. Hi10P / Main10
 +     * VAAPI/v4l2-request decode reaches userspace through this path.
 +     */
 +    if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
 +        fmts = av_malloc_array(2, sizeof(*fmts));
 +        if (!fmts)
 +            return AVERROR(ENOMEM);
 +        fmts[0] = AV_PIX_FMT_P010;
 +        fmts[1] = AV_PIX_FMT_NONE;
 +        *formats = fmts;
 +        return 0;
 +    }
 +
     fmts = av_malloc_array(2, sizeof(*fmts));
     if (!fmts)
         return AVERROR(ENOMEM);
@@ -1089,8 +1155,13 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
     fmts[0] = hwfc->sw_format;
     fmts[1] = AV_PIX_FMT_NONE;
 +    /*
 +     * Tiled-NV12-32L32 (Allwinner) and NV20 (rkvdec 4:2:2 10-bit) still need
 +     * dedicated unpacks before hwdownload can consume them; leave them as
 +     * "no transfer formats" so the filter graph reports the limitation
 +     * rather than silently producing garbage.
 +     */
     if (hwfc->sw_format == AV_PIX_FMT_YUV420P ||
 -        hwfc->sw_format == AV_PIX_FMT_YUV420P10 ||
         hwfc->sw_format == AV_PIX_FMT_YUV422P10)
         fmts[0] = AV_PIX_FMT_NONE;
@@ -1110,6 +1181,44 @@ static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc,
     map = av_frame_alloc();
     if (!map)
         return AVERROR(ENOMEM);
 +
 +    /*
 +     * For NV15→P010, map the raw NV15 bytes (sw_format) and unpack into
 +     * dst's P010 storage. Otherwise fall through to the original byte-copy
 +     * path used for 1:1 sw_format matches (NV12, NV16, AFBC handled by DRM).
 +     */
 +    if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
 +        /*
 +         * Only P010 is advertised by transfer_get_formats for this sw_format;
 +         * a caller that bypasses get_formats and asks for anything else would
 +         * silently corrupt output via av_frame_copy on NV15-packed bytes.
 +         * Reject explicitly.
 +         */
 +        if (dst->format != AV_PIX_FMT_P010) {
 +            ret = AVERROR(ENOSYS);
 +            goto fail;
 +        }
 +
 +        map->format = hwfc->sw_format;
 +        ret = v4l2request_map_frame(hwfc, map, src);
 +        if (ret)
 +            goto fail;
 +
 +        v4l2request_nv15_unpack_plane_to_p010(map->data[0],
 +                                              (uint16_t *)dst->data[0],
 +                                              dst->width, dst->height,
 +                                              map->linesize[0],
 +                                              dst->linesize[0]);
 +        /* NV15 chroma plane is W × H/2 samples (4:2:0, UV interleaved). */
 +        v4l2request_nv15_unpack_plane_to_p010(map->data[1],
 +                                              (uint16_t *)dst->data[1],
 +                                              dst->width, dst->height / 2,
 +                                              map->linesize[1],
 +                                              dst->linesize[1]);
 +        ret = 0;
 +        goto fail;
 +    }
 +
     map->format = dst->format;
     ret = v4l2request_map_frame(hwfc, map, src);
 -- 
 2.47.3
@@ -24,7 +24,7 @@ _srcname=FFmpeg
 _version='8.1'
 _commit='b57fbbe50c9b2656fad86a1a7eeabfd2b2a50935'  # v4l2-request-n8.1 tip 2026-04-24
 pkgver=8.1.r123329.b57fbbe
-pkgrel=4
+pkgrel=5
 epoch=2
 pkgdesc='FFmpeg with V4L2 Request API hwaccel (Rockchip / Allwinner stateless decode)'
 arch=('aarch64')
@@ -78,8 +78,9 @@ provides=(
 conflicts=(ffmpeg)
 replaces=(ffmpeg ffmpeg-v4l2-request-git)
 source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}"
-        '0001-libudev-bypass-fallback.patch')
+        '0001-libudev-bypass-fallback.patch'
-sha256sums=('SKIP' 'SKIP')
+        '0002-nv15-to-p010-unpack.patch')
 sha256sums=('SKIP' 'SKIP' 'SKIP')
 pkgver() {
  cd "${_srcname}"
@@ -91,6 +92,7 @@ pkgver() {
 prepare() {
  cd "${_srcname}"
  patch -Np1 -i "${srcdir}/0001-libudev-bypass-fallback.patch"
  patch -Np1 -i "${srcdir}/0002-nv15-to-p010-unpack.patch"
 }
 build() {