From 662f8874ba4c51becade9eae391a0c8916f6fc8c Mon Sep 17 00:00:00 2001 From: claude-noether Date: Sun, 17 May 2026 09:15:16 +0000 Subject: [PATCH] =?UTF-8?q?iter39=20=CE=B1-31:=20H264=20Hi10P=20+=20HEVC?= =?UTF-8?q?=20Main10=20sub-profile=20support=20(10-bit,=20rkvdec=20NV15)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds VAProfileH264High10 and VAProfileHEVCMain10 to the libva-v4l2-request backend. RK3399 rkvdec emits decoded frames as V4L2_PIX_FMT_NV15 (4 × 10-bit values packed in 5 bytes per element); VAAPI consumers receive standard VA_FOURCC_P010 via a new userspace unpack in copy_surface_to_image. VP9 Profile 2 explicitly NOT added — RK3399 rkvdec kernel ctrl table caps at V4L2_MPEG_VIDEO_VP9_PROFILE_0 (rkvdec.c::rkvdec_vp9_ctrl_descs). Touchpoints (per Phase 5 sonnet-architect review amendments): - include/drm_fourcc.h: define DRM_FORMAT_NV15 (vendored libdrm lacks it) - src/nv15.{c,h}: NV15 → P010 plane unpack (LSB-first, per Documentation/userspace-api/media/v4l/pixfmt-nv15.rst) - src/video.c: NV15 entry in formats[] (else NULL-deref on video_format_find) - src/codec.c: pixelformat_for_profile cases for Hi10P + Main10 - src/config.c: enumeration, validation, entrypoints, RT_FORMAT_YUV420_10 advertisement for 10-bit profiles - src/context.c: per-profile CAPTURE pix_fmt (NV12/NV15), 10-bit synthetic SPS (bit_depth_luma_minus8=2), video_format invalidation on bit-depth transition (sibling to iter38 device-switch invalidation), is_10bit flag - src/surface.c: RT_FORMAT_YUV420_10 admission, NV15 fourcc on PRIME export - src/image.c: P010 reporting in DeriveImage + QueryImageFormats, P010-aware sizing in CreateImage, NV15 → P010 unpack call in copy_surface_to_image (gated on is_10bit + image.format.fourcc == P010) - src/picture.c: 4 switch blocks route Hi10P/Main10 to existing H264/HEVC per-codec paths - src/request.h: MAX_PROFILES bump 11 → 13, driver_data->is_10bit flag Scope: COPY path (vaGetImage / vaDeriveImage) only. Standard ffmpeg-vaapi hwdownload, mpv vaapi-copy, and any consumer using vaGetImage works end-to-end. PRIME-path consumers that only know NV12/P010 must use the COPY path; PRIME consumers aware of NV15 (panfrost-Mesa et al.) get the correct fourcc on RequestExportSurfaceHandle. PRIME-side P010 emission is follow-up scope (would need DRM_FORMAT_P010 + per-plane unpack into a GPU-accessible buffer). Compile-tested on boltzmann (aarch64 native, gcc 15.2.1, libva 1.23.0, libdrm 2.4.133): clean build, .so produced, 0 new warnings. Phase 0/2 evidence: linux-mmind-v7.0 drivers/media/platform/rockchip/rkvdec. rkvdec_h264_decoded_fmts[] and rkvdec_hevc_decoded_fmts[] both list NV15; ctrl tables cap at HEVC MAIN_10 and H264 HIGH_422_INTRA (Hi10P < cap, not in menu_skip_mask). image_fmt resolution (rkvdec-h264-common.c:196, rkvdec-hevc-common.c:467) dispatches on bit_depth_luma_minus8 only. Co-Authored-By: Claude Opus 4.7 --- include/drm_fourcc.h | 5 ++ src/codec.c | 2 + src/config.c | 56 +++++++++++++++------- src/context.c | 86 ++++++++++++++++++++++++++-------- src/image.c | 108 ++++++++++++++++++++++++++++++++----------- src/meson.build | 6 ++- src/nv15.c | 75 ++++++++++++++++++++++++++++++ src/nv15.h | 46 ++++++++++++++++++ src/picture.c | 10 +++- src/request.h | 13 +++++- src/surface.c | 13 +++++- src/video.c | 10 ++++ 12 files changed, 363 insertions(+), 67 deletions(-) create mode 100644 src/nv15.c create mode 100644 src/nv15.h diff --git a/include/drm_fourcc.h b/include/drm_fourcc.h index 91d08a2..539ff13 100644 --- a/include/drm_fourcc.h +++ b/include/drm_fourcc.h @@ -195,6 +195,11 @@ extern "C" { #define DRM_FORMAT_NV24 fourcc_code('N', 'V', '2', '4') /* non-subsampled Cr:Cb plane */ #define DRM_FORMAT_NV42 fourcc_code('N', 'V', '4', '2') /* non-subsampled Cb:Cr plane */ +/* iter39: NV15 is 4×10-bit packed in 5 bytes (Rockchip rkvdec 10-bit output). */ +#ifndef DRM_FORMAT_NV15 +#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ +#endif + /* * 3 plane YCbCr * index 0: Y plane, [7:0] Y diff --git a/src/codec.c b/src/codec.c index e408e6e..7bc0608 100644 --- a/src/codec.c +++ b/src/codec.c @@ -37,8 +37,10 @@ unsigned int pixelformat_for_profile(VAProfile profile) case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: + case VAProfileH264High10: return V4L2_PIX_FMT_H264_SLICE; case VAProfileHEVCMain: + case VAProfileHEVCMain10: return V4L2_PIX_FMT_HEVC_SLICE; case VAProfileVP8Version0_3: return V4L2_PIX_FMT_VP8_FRAME; diff --git a/src/config.c b/src/config.c index 5d38408..a8baa1d 100644 --- a/src/config.c +++ b/src/config.c @@ -59,30 +59,27 @@ VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile, case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: + case VAProfileH264High10: // FIXME + // iter39: Hi10P routed through same H264 path; bit-depth gating + // happens in context.c synthetic SPS and CAPTURE pix_fmt + // selection. break; case VAProfileMPEG2Simple: case VAProfileMPEG2Main: - // fresnel-fourier iter1: MPEG-2 enabled. Same shape as H.264 - // above — no profile-specific config validation in the libva - // backend; validation happens at vaCreateContext / control - // submission time. break; case VAProfileHEVCMain: - // fresnel-fourier iter2: HEVC enabled. Same shape as H.264/ - // MPEG-2 above — no profile-specific config validation in the - // libva backend; validation happens at vaCreateContext / control - // submission time. + case VAProfileHEVCMain10: + // iter39: Main10 routed through same HEVC path; bit-depth + // gating happens in context.c. break; case VAProfileVP8Version0_3: - // fresnel-fourier iter3: VP8 enabled. Same shape as iter1+iter2 - // above — no profile-specific config validation in the libva - // backend; validation happens at vaCreateContext / control - // submission time. break; case VAProfileVP9Profile0: // fresnel-fourier iter4: VP9 Profile 0 enabled on rkvdec. - // Same shape — no profile-specific validation here. + // VP9 Profile 2 is NOT supported by RK3399 rkvdec (kernel ctrl + // cap is V4L2_MPEG_VIDEO_VP9_PROFILE_0). Do not add a case for + // VAProfileVP9Profile2 — kernel will reject. break; default: return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; @@ -119,7 +116,15 @@ VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile, */ config_object->pixelformat = pixelformat_for_profile(profile); config_object->attributes[0].type = VAConfigAttribRTFormat; - config_object->attributes[0].value = VA_RT_FORMAT_YUV420; + /* + * iter39: 10-bit profiles advertise YUV420_10. ffmpeg-vaapi reads + * this attribute on vaGetConfigAttributes and refuses surface + * allocation if it mismatches the input bitstream's bit depth. + */ + if (profile == VAProfileH264High10 || profile == VAProfileHEVCMain10) + config_object->attributes[0].value = VA_RT_FORMAT_YUV420_10; + else + config_object->attributes[0].value = VA_RT_FORMAT_YUV420; config_object->attributes_count = 1; for (i = 1; i < attributes_count; i++) { @@ -187,17 +192,22 @@ VAStatus RequestQueryConfigProfiles(VADriverContextP context, } found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_H264_SLICE); - if (found && index < (V4L2_REQUEST_MAX_PROFILES - 5)) { + if (found && index < (V4L2_REQUEST_MAX_PROFILES - 6)) { profiles[index++] = VAProfileH264Main; profiles[index++] = VAProfileH264High; profiles[index++] = VAProfileH264ConstrainedBaseline; profiles[index++] = VAProfileH264MultiviewHigh; profiles[index++] = VAProfileH264StereoHigh; + /* iter39: Hi10P on rkvdec (NV15 CAPTURE, bit_depth=10). */ + profiles[index++] = VAProfileH264High10; } found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_HEVC_SLICE); - if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1)) + if (found && index < (V4L2_REQUEST_MAX_PROFILES - 2)) { profiles[index++] = VAProfileHEVCMain; + /* iter39: Main10 on rkvdec (NV15 CAPTURE). */ + profiles[index++] = VAProfileHEVCMain10; + } found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_VP8_FRAME); if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1)) @@ -225,7 +235,9 @@ VAStatus RequestQueryConfigEntrypoints(VADriverContextP context, case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: + case VAProfileH264High10: case VAProfileHEVCMain: + case VAProfileHEVCMain10: case VAProfileVP8Version0_3: case VAProfileVP9Profile0: entrypoints[0] = VAEntrypointVLD; @@ -281,7 +293,17 @@ VAStatus RequestGetConfigAttributes(VADriverContextP context, VAProfile profile, for (i = 0; i < attributes_count; i++) { switch (attributes[i].type) { case VAConfigAttribRTFormat: - attributes[i].value = VA_RT_FORMAT_YUV420; + /* + * iter39: 10-bit profiles publish YUV420_10. Profile- + * less query (this is invoked from vaGetConfigAttributes + * before vaCreateConfig) routes off the `profile` arg + * directly — same gating as RequestCreateConfig. + */ + if (profile == VAProfileH264High10 || + profile == VAProfileHEVCMain10) + attributes[i].value = VA_RT_FORMAT_YUV420_10; + else + attributes[i].value = VA_RT_FORMAT_YUV420; break; default: attributes[i].value = VA_ATTRIB_NOT_SUPPORTED; diff --git a/src/context.c b/src/context.c index 705c742..4785399 100644 --- a/src/context.c +++ b/src/context.c @@ -107,20 +107,47 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id, * the driver_data and is cached across CreateContext cycles. The * probe doesn't require any prior S_FMT — v4l2_find_format * enumerates the device's supported formats directly. + * + * iter39: choose NV15 (10-bit packed) for Hi10P / Main10 profiles, + * NV12 (8-bit) otherwise. If the cached video_format doesn't match + * the profile's bit-depth requirement, invalidate and re-probe — + * sibling pattern to iter38's device-switch invalidation in + * request_switch_device_for_profile(). */ + { + bool want_10bit = (config_object->profile == VAProfileH264High10 || + config_object->profile == VAProfileHEVCMain10); + unsigned int want_pixfmt = want_10bit ? V4L2_PIX_FMT_NV15 + : V4L2_PIX_FMT_NV12; + if (driver_data->video_format && + driver_data->video_format->v4l2_format != want_pixfmt && + driver_data->video_format->v4l2_format != V4L2_PIX_FMT_SUNXI_TILED_NV12) + driver_data->video_format = NULL; + } if (!driver_data->video_format) { + bool want_10bit = (config_object->profile == VAProfileH264High10 || + config_object->profile == VAProfileHEVCMain10); video_format = NULL; - found = v4l2_find_format(driver_data->video_fd, - V4L2_BUF_TYPE_VIDEO_CAPTURE, - V4L2_PIX_FMT_SUNXI_TILED_NV12); - if (found) - video_format = video_format_find(V4L2_PIX_FMT_SUNXI_TILED_NV12); - found = v4l2_find_format(driver_data->video_fd, - V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, - V4L2_PIX_FMT_NV12); - if (found) - video_format = video_format_find(V4L2_PIX_FMT_NV12); + if (!want_10bit) { + found = v4l2_find_format(driver_data->video_fd, + V4L2_BUF_TYPE_VIDEO_CAPTURE, + V4L2_PIX_FMT_SUNXI_TILED_NV12); + if (found) + video_format = video_format_find(V4L2_PIX_FMT_SUNXI_TILED_NV12); + + found = v4l2_find_format(driver_data->video_fd, + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, + V4L2_PIX_FMT_NV12); + if (found) + video_format = video_format_find(V4L2_PIX_FMT_NV12); + } else { + found = v4l2_find_format(driver_data->video_fd, + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, + V4L2_PIX_FMT_NV15); + if (found) + video_format = video_format_find(V4L2_PIX_FMT_NV15); + } if (video_format == NULL) { status = VA_STATUS_ERROR_OPERATION_FAILED; @@ -131,6 +158,10 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id, } video_format = driver_data->video_format; + /* iter39: session-wide flag drives image.c reporting + unpack. */ + driver_data->is_10bit = (config_object->profile == VAProfileH264High10 || + config_object->profile == VAProfileHEVCMain10); + output_type = v4l2_type_video_output(video_format->v4l2_mplane); capture_type = v4l2_type_video_capture(video_format->v4l2_mplane); @@ -175,7 +206,12 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id, * CAPTURE (sanity read-back, matches what S_FMT committed). */ { - unsigned int capture_pixelformat = V4L2_PIX_FMT_NV12; + /* iter39: NV15 for 10-bit profiles (rkvdec Hi10P/Main10), + * NV12 otherwise. driver_data->is_10bit was set above from + * the active profile. */ + unsigned int capture_pixelformat = driver_data->is_10bit + ? V4L2_PIX_FMT_NV15 + : V4L2_PIX_FMT_NV12; rc = v4l2_set_format(driver_data->video_fd, capture_type, capture_pixelformat, picture_width, picture_height); @@ -233,15 +269,26 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id, * void-cast best-effort, so this is consistent with prior pattern. */ { + /* + * iter39: 10-bit profiles set bit_depth_luma_minus8 = 2 in + * the synthetic SPS so rkvdec's get_image_fmt resolves to + * RKVDEC_IMG_FMT_420_10BIT (per rkvdec-h264-common.c:196 + + * rkvdec-hevc-common.c:467). Image_fmt resolution depends + * only on bit_depth_luma_minus8 and chroma_format_idc; + * profile_idc is ignored for image_fmt and v4l2_ctrl_hevc_sps + * has no profile_idc field at all. + */ + bool ten = driver_data->is_10bit; switch (config_object->profile) { - case VAProfileHEVCMain: { + case VAProfileHEVCMain: + case VAProfileHEVCMain10: { struct v4l2_ctrl_hevc_sps dummy_sps; struct v4l2_ext_control dummy_ctrl; memset(&dummy_sps, 0, sizeof(dummy_sps)); dummy_sps.chroma_format_idc = 1; /* 4:2:0 */ - dummy_sps.bit_depth_luma_minus8 = 0; /* 8-bit */ - dummy_sps.bit_depth_chroma_minus8 = 0; + dummy_sps.bit_depth_luma_minus8 = ten ? 2 : 0; + dummy_sps.bit_depth_chroma_minus8 = ten ? 2 : 0; dummy_sps.pic_width_in_luma_samples = picture_width; dummy_sps.pic_height_in_luma_samples = picture_height; @@ -256,19 +303,20 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id, case VAProfileH264High: case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: - case VAProfileH264StereoHigh: { + case VAProfileH264StereoHigh: + case VAProfileH264High10: { struct v4l2_ctrl_h264_sps dummy_sps; struct v4l2_ext_control dummy_ctrl; memset(&dummy_sps, 0, sizeof(dummy_sps)); dummy_sps.chroma_format_idc = 1; /* 4:2:0 */ - dummy_sps.bit_depth_luma_minus8 = 0; - dummy_sps.bit_depth_chroma_minus8 = 0; + dummy_sps.bit_depth_luma_minus8 = ten ? 2 : 0; + dummy_sps.bit_depth_chroma_minus8 = ten ? 2 : 0; dummy_sps.pic_width_in_mbs_minus1 = (picture_width + 15) / 16 - 1; dummy_sps.pic_height_in_map_units_minus1 = (picture_height + 15) / 16 - 1; - dummy_sps.profile_idc = 100; /* High */ + dummy_sps.profile_idc = ten ? 110 : 100; /* High10 : High */ dummy_sps.level_idc = 41; /* * FRAME_MBS_ONLY required: rkvdec_h264_validate_sps @@ -636,6 +684,8 @@ VAStatus RequestDestroyContext(VADriverContextP context, VAContextID context_id) * The next CreateContext re-populates the cache. */ driver_data->fmt_valid = false; + /* iter39: clear 10-bit session flag — next CreateContext re-sets. */ + driver_data->is_10bit = false; return VA_STATUS_SUCCESS; } diff --git a/src/image.c b/src/image.c index 4acb662..6c4cedb 100644 --- a/src/image.c +++ b/src/image.c @@ -39,6 +39,7 @@ #include +#include "nv15.h" #include "tiled_yuv.h" #include "utils.h" #include "v4l2.h" @@ -86,13 +87,31 @@ VAStatus RequestCreateImage(VADriverContextP context, VAImageFormat *format, for (i = 0; i < planes_count; i++) size += destination_sizes[i]; - /* Here we calculate the sizes assuming NV12. */ + if (format->fourcc == VA_FOURCC_P010) { + /* + * iter39: P010 image overrides V4L2-side NV15 sizing. The + * source is the kernel-reported NV15 packed plane; the image + * buffer holds dense P010 (2 bytes per pixel, 16bpp). + * Recompute sizes/pitches against P010 layout so consumers + * (vaGetImage, vaDeriveImage) see standard P010 geometry. + */ + destination_bytesperlines[0] = width * 2; + destination_sizes[0] = destination_bytesperlines[0] * format_height; + for (i = 1; i < destination_planes_count; i++) { + destination_bytesperlines[i] = destination_bytesperlines[0]; + destination_sizes[i] = destination_sizes[0] / 2; + } + size = 0; + for (i = 0; i < destination_planes_count; i++) + size += destination_sizes[i]; + } else { + /* NV12: V4L2 stride is correct, sizes derived from height. */ + destination_sizes[0] = destination_bytesperlines[0] * format_height; - destination_sizes[0] = destination_bytesperlines[0] * format_height; - - for (i = 1; i < destination_planes_count; i++) { - destination_bytesperlines[i] = destination_bytesperlines[0]; - destination_sizes[i] = destination_sizes[0] / 2; + for (i = 1; i < destination_planes_count; i++) { + destination_bytesperlines[i] = destination_bytesperlines[0]; + destination_sizes[i] = destination_sizes[0] / 2; + } } id = object_heap_allocate(&driver_data->image_heap); @@ -224,7 +243,24 @@ static VAStatus copy_surface_to_image (struct request_data *driver_data, image->pitches[i], image->width, i == 0 ? image->height : image->height / 2); - else { + else if (driver_data->is_10bit && + image->format.fourcc == VA_FOURCC_P010) { + /* + * iter39: rkvdec emits NV15 (4×10-bit packed in 5 + * bytes); the VA image buffer is dense P010 (2B/pixel, + * value in bits[15:6]). Source stride is the V4L2- + * reported NV15 bytesperline (= ceil(width/4)*5, + * possibly aligned higher by the kernel); destination + * stride is image->pitches[i] = width * 2. + */ + unsigned int plane_h = (i == 0) ? image->height + : image->height / 2; + nv15_unpack_plane_to_p010( + surface_object->destination_data[i], + (uint16_t *)(buffer_object->data + image->offsets[i]), + image->width, plane_h, + surface_object->destination_bytesperlines[i]); + } else { #endif memcpy(buffer_object->data + image->offsets[i], surface_object->destination_data[i], @@ -268,9 +304,17 @@ VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id, /* Fully populate VAImageFormat to match QueryImageFormats output. */ memset(&format, 0, sizeof(format)); - format.fourcc = VA_FOURCC_NV12; - format.byte_order = VA_LSB_FIRST; - format.bits_per_pixel = 12; + if (driver_data->is_10bit) { + /* iter39: 10-bit session derives a P010 image. NV15-source + * unpack happens in copy_surface_to_image. */ + format.fourcc = VA_FOURCC_P010; + format.byte_order = VA_LSB_FIRST; + format.bits_per_pixel = 24; + } else { + format.fourcc = VA_FOURCC_NV12; + format.byte_order = VA_LSB_FIRST; + format.bits_per_pixel = 12; + } status = RequestCreateImage(context, &format, surface_object->width, surface_object->height, image); @@ -305,26 +349,38 @@ VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id, VAStatus RequestQueryImageFormats(VADriverContextP context, VAImageFormat *formats, int *formats_count) { + struct request_data *driver_data = context->pDriverData; + int n = 0; /* - * Populate the VAImageFormat fully per VAAPI spec for NV12 — - * not just .fourcc. Consumers (FFmpeg's hwcontext_vaapi, mpv, - * Firefox) read .byte_order and .bits_per_pixel; leaving them - * uninitialized inherits whatever caller-stack garbage is in - * the buffer and produces non-deterministic behavior. Reference: - * Mesa's gallium/frontends/va/image.c::vlVaQueryImageFormats and - * intel-vaapi-driver's i965_drv_video.c — both publish NV12 - * with byte_order=VA_LSB_FIRST and bits_per_pixel=12. + * Populate the VAImageFormat fully per VAAPI spec — not just + * .fourcc. Consumers (FFmpeg's hwcontext_vaapi, mpv, Firefox) + * read .byte_order and .bits_per_pixel; leaving them + * uninitialized inherits caller-stack garbage and produces + * non-deterministic behavior. Reference: Mesa's + * gallium/frontends/va/image.c::vlVaQueryImageFormats and + * intel-vaapi-driver's i965_drv_video.c. * - * For YUV formats, depth/red_mask/green_mask/blue_mask/alpha_mask - * are not meaningful (those describe RGB bit layouts); leave them - * zeroed via memset before populating. + * iter39: advertise P010 when an active session is 10-bit so + * ffmpeg-vaapi sees a valid 10-bit-compatible entry during + * vaQueryImageFormats. NV12 stays advertised unconditionally so + * the 8-bit catalog query response is unchanged. */ - memset(&formats[0], 0, sizeof(formats[0])); - formats[0].fourcc = VA_FOURCC_NV12; - formats[0].byte_order = VA_LSB_FIRST; - formats[0].bits_per_pixel = 12; - *formats_count = 1; + memset(&formats[n], 0, sizeof(formats[n])); + formats[n].fourcc = VA_FOURCC_NV12; + formats[n].byte_order = VA_LSB_FIRST; + formats[n].bits_per_pixel = 12; + n++; + + if (driver_data->is_10bit && n < V4L2_REQUEST_MAX_IMAGE_FORMATS) { + memset(&formats[n], 0, sizeof(formats[n])); + formats[n].fourcc = VA_FOURCC_P010; + formats[n].byte_order = VA_LSB_FIRST; + formats[n].bits_per_pixel = 24; + n++; + } + + *formats_count = n; return VA_STATUS_SUCCESS; } diff --git a/src/meson.build b/src/meson.build index c5fc6ac..44e9115 100644 --- a/src/meson.build +++ b/src/meson.build @@ -50,7 +50,8 @@ sources = [ 'h265.c', 'vp8.c', 'vp9.c', - 'codec.c' + 'codec.c', + 'nv15.c' ] headers = [ @@ -76,7 +77,8 @@ headers = [ 'h265.h', 'vp8.h', 'vp9.h', - 'codec.h' + 'codec.h', + 'nv15.h' ] includes = [ diff --git a/src/nv15.c b/src/nv15.c new file mode 100644 index 0000000..02c32d3 --- /dev/null +++ b/src/nv15.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2026 claude-noether + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nv15.h" + +void nv15_unpack_plane_to_p010(const uint8_t *src, uint16_t *dst, + unsigned int width, unsigned int height, + unsigned int src_stride) +{ + unsigned int x, y; + unsigned int dst_pitch_px = width; + + for (y = 0; y < height; y++) { + const uint8_t *s = src + y * src_stride; + uint16_t *d = dst + y * dst_pitch_px; + + for (x = 0; x + 4 <= width; x += 4) { + uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8); + uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6); + uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4); + uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2); + + d[0] = (uint16_t)(a << 6); + d[1] = (uint16_t)(b << 6); + d[2] = (uint16_t)(c << 6); + d[3] = (uint16_t)(e << 6); + + d += 4; + s += 5; + } + + if (x < width) { + unsigned int rem = width - x; + uint16_t pix[4] = { 0, 0, 0, 0 }; + + pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8); + if (rem >= 2) + pix[1] = ((uint16_t)s[1] >> 2) | + ((uint16_t)(s[2] & 0x0F) << 6); + if (rem >= 3) + pix[2] = ((uint16_t)s[2] >> 4) | + ((uint16_t)(s[3] & 0x3F) << 4); + if (rem >= 4) + pix[3] = ((uint16_t)s[3] >> 6) | + ((uint16_t)s[4] << 2); + + { + unsigned int j; + for (j = 0; j < rem; j++) + d[j] = (uint16_t)(pix[j] << 6); + } + } + } +} diff --git a/src/nv15.h b/src/nv15.h new file mode 100644 index 0000000..3c8605a --- /dev/null +++ b/src/nv15.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2026 claude-noether + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _NV15_H_ +#define _NV15_H_ + +#include + +/* + * Unpack one plane of V4L2_PIX_FMT_NV15 (4 × 10-bit values packed into + * 5 consecutive bytes, LSB-first) into VA_FOURCC_P010 (16-bit per pixel, + * value in bits [15:6], zeros in [5:0]). + * + * Layout per Documentation/userspace-api/media/v4l/pixfmt-nv15.rst. + * Call once per plane: luma (W × H, src_stride = ceil(W/4)*5) and chroma + * (W × H/2 — same width because UV are interleaved 10-bit values). + * + * src_stride must be the kernel-reported bytesperline for the NV15 plane. + * The destination is dense P010 with row pitch = width * 2 bytes. + */ +void nv15_unpack_plane_to_p010(const uint8_t *src, uint16_t *dst, + unsigned int width, unsigned int height, + unsigned int src_stride); + +#endif diff --git a/src/picture.c b/src/picture.c index 98270bf..693ae45 100644 --- a/src/picture.c +++ b/src/picture.c @@ -132,12 +132,14 @@ static VAStatus codec_store_buffer(struct request_data *driver_data, case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: + case VAProfileH264High10: memcpy(&surface_object->params.h264.picture, buffer_object->data, sizeof(surface_object->params.h264.picture)); break; case VAProfileHEVCMain: + case VAProfileHEVCMain10: memcpy(&surface_object->params.h265.picture, buffer_object->data, sizeof(surface_object->params.h265.picture)); @@ -167,12 +169,14 @@ static VAStatus codec_store_buffer(struct request_data *driver_data, case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: + case VAProfileH264High10: memcpy(&surface_object->params.h264.slice, buffer_object->data, sizeof(surface_object->params.h264.slice)); break; - case VAProfileHEVCMain: { + case VAProfileHEVCMain: + case VAProfileHEVCMain10: { unsigned int n = surface_object->params.h265.num_slices; if (n < HEVC_MAX_SLICES_PER_FRAME) { memcpy(&surface_object->params.h265.slices[n], @@ -220,6 +224,7 @@ static VAStatus codec_store_buffer(struct request_data *driver_data, case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: + case VAProfileH264High10: memcpy(&surface_object->params.h264.matrix, buffer_object->data, sizeof(surface_object->params.h264.matrix)); @@ -227,6 +232,7 @@ static VAStatus codec_store_buffer(struct request_data *driver_data, break; case VAProfileHEVCMain: + case VAProfileHEVCMain10: memcpy(&surface_object->params.h265.iqmatrix, buffer_object->data, sizeof(surface_object->params.h265.iqmatrix)); @@ -286,6 +292,7 @@ static VAStatus codec_set_controls(struct request_data *driver_data, case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: + case VAProfileH264High10: rc = h264_set_controls(driver_data, context, profile, surface_object); if (rc < 0) @@ -293,6 +300,7 @@ static VAStatus codec_set_controls(struct request_data *driver_data, break; case VAProfileHEVCMain: + case VAProfileHEVCMain10: rc = h265_set_controls(driver_data, context, surface_object); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; diff --git a/src/request.h b/src/request.h index 02305a9..9bd92ca 100644 --- a/src/request.h +++ b/src/request.h @@ -40,7 +40,7 @@ #define V4L2_REQUEST_STR_VENDOR "v4l2-request" -#define V4L2_REQUEST_MAX_PROFILES 11 +#define V4L2_REQUEST_MAX_PROFILES 13 #define V4L2_REQUEST_MAX_ENTRYPOINTS 5 #define V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES 10 #define V4L2_REQUEST_MAX_IMAGE_FORMATS 10 @@ -133,6 +133,17 @@ struct request_data { unsigned int fmt_buffers_count; unsigned int fmt_sizes[VIDEO_MAX_PLANES]; unsigned int fmt_bytesperlines[VIDEO_MAX_PLANES]; + + /* + * iter39: active session is decoding a 10-bit profile (Hi10P / Main10). + * Set in RequestCreateContext from config->profile. Drives: + * - CAPTURE pix_fmt selection (NV15 instead of NV12) + * - image.c DeriveImage / QueryImageFormats fourcc reporting (P010 + * instead of NV12) + * - copy_surface_to_image NV15→P010 unpack branch + * Reset to false at DestroyContext. + */ + bool is_10bit; }; VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context); diff --git a/src/surface.c b/src/surface.c index 341637c..d24b0cd 100644 --- a/src/surface.c +++ b/src/surface.c @@ -182,7 +182,9 @@ VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format, * surface_bind_format_uniform_fields(); the per-slot * destination_* fields fill at BeginPicture via surface_bind_slot. */ - if (format != VA_RT_FORMAT_YUV420) + /* iter39: allow YUV420_10 for Hi10P / Main10 surface allocation. */ + if (format != VA_RT_FORMAT_YUV420 && + format != VA_RT_FORMAT_YUV420_10) return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; for (i = 0; i < surfaces_count; i++) { @@ -706,7 +708,14 @@ VAStatus RequestExportSurfaceHandle(VADriverContextP context, planes_count = surface_object->destination_planes_count; - surface_descriptor->fourcc = VA_FOURCC_NV12; + /* iter39: 10-bit session exports a DRM_FORMAT_NV15 buffer; advertise + * the matching fourcc so a PRIME consumer aware of NV15 (panfrost- + * Mesa et al.) can import correctly. PRIME consumers that only know + * NV12 / P010 should use the COPY (vaGetImage) path which unpacks + * NV15→P010 in image.c::copy_surface_to_image. */ + surface_descriptor->fourcc = driver_data->is_10bit + ? VA_FOURCC('N', 'V', '1', '5') + : VA_FOURCC_NV12; surface_descriptor->width = surface_object->width; surface_descriptor->height = surface_object->height; surface_descriptor->num_objects = export_fds_count; diff --git a/src/video.c b/src/video.c index 2c0d645..04cbf25 100644 --- a/src/video.c +++ b/src/video.c @@ -45,6 +45,16 @@ static struct video_format formats[] = { .planes_count = 2, .bpp = 16, }, + { + .description = "NV15 YUV (10-bit, rkvdec)", + .v4l2_format = V4L2_PIX_FMT_NV15, + .v4l2_buffers_count = 1, + .v4l2_mplane = true, + .drm_format = DRM_FORMAT_NV15, + .drm_modifier = DRM_FORMAT_MOD_NONE, + .planes_count = 2, + .bpp = 24, + }, // Code to handle this DRM_FORMAT is __arm__ only #ifdef __arm__ {