iter39 α-31: H264 Hi10P + HEVC Main10 sub-profile support (10-bit, rkvdec NV15)

Adds VAProfileH264High10 and VAProfileHEVCMain10 to the libva-v4l2-request
backend. RK3399 rkvdec emits decoded frames as V4L2_PIX_FMT_NV15 (4 × 10-bit
values packed in 5 bytes per element); VAAPI consumers receive standard
VA_FOURCC_P010 via a new userspace unpack in copy_surface_to_image.

VP9 Profile 2 explicitly NOT added — RK3399 rkvdec kernel ctrl table
caps at V4L2_MPEG_VIDEO_VP9_PROFILE_0 (rkvdec.c::rkvdec_vp9_ctrl_descs).

Touchpoints (per Phase 5 sonnet-architect review amendments):
  - include/drm_fourcc.h: define DRM_FORMAT_NV15 (vendored libdrm lacks it)
  - src/nv15.{c,h}: NV15 → P010 plane unpack (LSB-first, per
    Documentation/userspace-api/media/v4l/pixfmt-nv15.rst)
  - src/video.c: NV15 entry in formats[] (else NULL-deref on video_format_find)
  - src/codec.c: pixelformat_for_profile cases for Hi10P + Main10
  - src/config.c: enumeration, validation, entrypoints, RT_FORMAT_YUV420_10
    advertisement for 10-bit profiles
  - src/context.c: per-profile CAPTURE pix_fmt (NV12/NV15), 10-bit synthetic
    SPS (bit_depth_luma_minus8=2), video_format invalidation on bit-depth
    transition (sibling to iter38 device-switch invalidation), is_10bit flag
  - src/surface.c: RT_FORMAT_YUV420_10 admission, NV15 fourcc on PRIME export
  - src/image.c: P010 reporting in DeriveImage + QueryImageFormats,
    P010-aware sizing in CreateImage, NV15 → P010 unpack call in
    copy_surface_to_image (gated on is_10bit + image.format.fourcc == P010)
  - src/picture.c: 4 switch blocks route Hi10P/Main10 to existing H264/HEVC
    per-codec paths
  - src/request.h: MAX_PROFILES bump 11 → 13, driver_data->is_10bit flag

Scope: COPY path (vaGetImage / vaDeriveImage) only. Standard ffmpeg-vaapi
hwdownload, mpv vaapi-copy, and any consumer using vaGetImage works
end-to-end. PRIME-path consumers that only know NV12/P010 must use the
COPY path; PRIME consumers aware of NV15 (panfrost-Mesa et al.) get the
correct fourcc on RequestExportSurfaceHandle. PRIME-side P010 emission is
follow-up scope (would need DRM_FORMAT_P010 + per-plane unpack into a
GPU-accessible buffer).

Compile-tested on boltzmann (aarch64 native, gcc 15.2.1, libva 1.23.0,
libdrm 2.4.133): clean build, .so produced, 0 new warnings.

Phase 0/2 evidence: linux-mmind-v7.0 drivers/media/platform/rockchip/rkvdec.
rkvdec_h264_decoded_fmts[] and rkvdec_hevc_decoded_fmts[] both list NV15;
ctrl tables cap at HEVC MAIN_10 and H264 HIGH_422_INTRA (Hi10P < cap, not
in menu_skip_mask). image_fmt resolution (rkvdec-h264-common.c:196,
rkvdec-hevc-common.c:467) dispatches on bit_depth_luma_minus8 only.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 09:15:16 +00:00
parent 7ac934e0c5
commit 662f8874ba
12 changed files with 363 additions and 67 deletions
+5
View File
@@ -195,6 +195,11 @@ extern "C" {
#define DRM_FORMAT_NV24 fourcc_code('N', 'V', '2', '4') /* non-subsampled Cr:Cb plane */ #define DRM_FORMAT_NV24 fourcc_code('N', 'V', '2', '4') /* non-subsampled Cr:Cb plane */
#define DRM_FORMAT_NV42 fourcc_code('N', 'V', '4', '2') /* non-subsampled Cb:Cr plane */ #define DRM_FORMAT_NV42 fourcc_code('N', 'V', '4', '2') /* non-subsampled Cb:Cr plane */
/* iter39: NV15 is 4×10-bit packed in 5 bytes (Rockchip rkvdec 10-bit output). */
#ifndef DRM_FORMAT_NV15
#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
#endif
/* /*
* 3 plane YCbCr * 3 plane YCbCr
* index 0: Y plane, [7:0] Y * index 0: Y plane, [7:0] Y
+2
View File
@@ -37,8 +37,10 @@ unsigned int pixelformat_for_profile(VAProfile profile)
case VAProfileH264ConstrainedBaseline: case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh: case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh: case VAProfileH264StereoHigh:
case VAProfileH264High10:
return V4L2_PIX_FMT_H264_SLICE; return V4L2_PIX_FMT_H264_SLICE;
case VAProfileHEVCMain: case VAProfileHEVCMain:
case VAProfileHEVCMain10:
return V4L2_PIX_FMT_HEVC_SLICE; return V4L2_PIX_FMT_HEVC_SLICE;
case VAProfileVP8Version0_3: case VAProfileVP8Version0_3:
return V4L2_PIX_FMT_VP8_FRAME; return V4L2_PIX_FMT_VP8_FRAME;
+37 -15
View File
@@ -59,30 +59,27 @@ VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile,
case VAProfileH264ConstrainedBaseline: case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh: case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh: case VAProfileH264StereoHigh:
case VAProfileH264High10:
// FIXME // FIXME
// iter39: Hi10P routed through same H264 path; bit-depth gating
// happens in context.c synthetic SPS and CAPTURE pix_fmt
// selection.
break; break;
case VAProfileMPEG2Simple: case VAProfileMPEG2Simple:
case VAProfileMPEG2Main: case VAProfileMPEG2Main:
// fresnel-fourier iter1: MPEG-2 enabled. Same shape as H.264
// above — no profile-specific config validation in the libva
// backend; validation happens at vaCreateContext / control
// submission time.
break; break;
case VAProfileHEVCMain: case VAProfileHEVCMain:
// fresnel-fourier iter2: HEVC enabled. Same shape as H.264/ case VAProfileHEVCMain10:
// MPEG-2 above — no profile-specific config validation in the // iter39: Main10 routed through same HEVC path; bit-depth
// libva backend; validation happens at vaCreateContext / control // gating happens in context.c.
// submission time.
break; break;
case VAProfileVP8Version0_3: case VAProfileVP8Version0_3:
// fresnel-fourier iter3: VP8 enabled. Same shape as iter1+iter2
// above — no profile-specific config validation in the libva
// backend; validation happens at vaCreateContext / control
// submission time.
break; break;
case VAProfileVP9Profile0: case VAProfileVP9Profile0:
// fresnel-fourier iter4: VP9 Profile 0 enabled on rkvdec. // fresnel-fourier iter4: VP9 Profile 0 enabled on rkvdec.
// Same shape — no profile-specific validation here. // VP9 Profile 2 is NOT supported by RK3399 rkvdec (kernel ctrl
// cap is V4L2_MPEG_VIDEO_VP9_PROFILE_0). Do not add a case for
// VAProfileVP9Profile2 — kernel will reject.
break; break;
default: default:
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
@@ -119,6 +116,14 @@ VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile,
*/ */
config_object->pixelformat = pixelformat_for_profile(profile); config_object->pixelformat = pixelformat_for_profile(profile);
config_object->attributes[0].type = VAConfigAttribRTFormat; config_object->attributes[0].type = VAConfigAttribRTFormat;
/*
* iter39: 10-bit profiles advertise YUV420_10. ffmpeg-vaapi reads
* this attribute on vaGetConfigAttributes and refuses surface
* allocation if it mismatches the input bitstream's bit depth.
*/
if (profile == VAProfileH264High10 || profile == VAProfileHEVCMain10)
config_object->attributes[0].value = VA_RT_FORMAT_YUV420_10;
else
config_object->attributes[0].value = VA_RT_FORMAT_YUV420; config_object->attributes[0].value = VA_RT_FORMAT_YUV420;
config_object->attributes_count = 1; config_object->attributes_count = 1;
@@ -187,17 +192,22 @@ VAStatus RequestQueryConfigProfiles(VADriverContextP context,
} }
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_H264_SLICE); found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_H264_SLICE);
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 5)) { if (found && index < (V4L2_REQUEST_MAX_PROFILES - 6)) {
profiles[index++] = VAProfileH264Main; profiles[index++] = VAProfileH264Main;
profiles[index++] = VAProfileH264High; profiles[index++] = VAProfileH264High;
profiles[index++] = VAProfileH264ConstrainedBaseline; profiles[index++] = VAProfileH264ConstrainedBaseline;
profiles[index++] = VAProfileH264MultiviewHigh; profiles[index++] = VAProfileH264MultiviewHigh;
profiles[index++] = VAProfileH264StereoHigh; profiles[index++] = VAProfileH264StereoHigh;
/* iter39: Hi10P on rkvdec (NV15 CAPTURE, bit_depth=10). */
profiles[index++] = VAProfileH264High10;
} }
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_HEVC_SLICE); found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_HEVC_SLICE);
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1)) if (found && index < (V4L2_REQUEST_MAX_PROFILES - 2)) {
profiles[index++] = VAProfileHEVCMain; profiles[index++] = VAProfileHEVCMain;
/* iter39: Main10 on rkvdec (NV15 CAPTURE). */
profiles[index++] = VAProfileHEVCMain10;
}
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_VP8_FRAME); found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_VP8_FRAME);
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1)) if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1))
@@ -225,7 +235,9 @@ VAStatus RequestQueryConfigEntrypoints(VADriverContextP context,
case VAProfileH264ConstrainedBaseline: case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh: case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh: case VAProfileH264StereoHigh:
case VAProfileH264High10:
case VAProfileHEVCMain: case VAProfileHEVCMain:
case VAProfileHEVCMain10:
case VAProfileVP8Version0_3: case VAProfileVP8Version0_3:
case VAProfileVP9Profile0: case VAProfileVP9Profile0:
entrypoints[0] = VAEntrypointVLD; entrypoints[0] = VAEntrypointVLD;
@@ -281,6 +293,16 @@ VAStatus RequestGetConfigAttributes(VADriverContextP context, VAProfile profile,
for (i = 0; i < attributes_count; i++) { for (i = 0; i < attributes_count; i++) {
switch (attributes[i].type) { switch (attributes[i].type) {
case VAConfigAttribRTFormat: case VAConfigAttribRTFormat:
/*
* iter39: 10-bit profiles publish YUV420_10. Profile-
* less query (this is invoked from vaGetConfigAttributes
* before vaCreateConfig) routes off the `profile` arg
* directly — same gating as RequestCreateConfig.
*/
if (profile == VAProfileH264High10 ||
profile == VAProfileHEVCMain10)
attributes[i].value = VA_RT_FORMAT_YUV420_10;
else
attributes[i].value = VA_RT_FORMAT_YUV420; attributes[i].value = VA_RT_FORMAT_YUV420;
break; break;
default: default:
+58 -8
View File
@@ -107,9 +107,29 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
* the driver_data and is cached across CreateContext cycles. The * the driver_data and is cached across CreateContext cycles. The
* probe doesn't require any prior S_FMT — v4l2_find_format * probe doesn't require any prior S_FMT — v4l2_find_format
* enumerates the device's supported formats directly. * enumerates the device's supported formats directly.
*
* iter39: choose NV15 (10-bit packed) for Hi10P / Main10 profiles,
* NV12 (8-bit) otherwise. If the cached video_format doesn't match
* the profile's bit-depth requirement, invalidate and re-probe —
* sibling pattern to iter38's device-switch invalidation in
* request_switch_device_for_profile().
*/ */
{
bool want_10bit = (config_object->profile == VAProfileH264High10 ||
config_object->profile == VAProfileHEVCMain10);
unsigned int want_pixfmt = want_10bit ? V4L2_PIX_FMT_NV15
: V4L2_PIX_FMT_NV12;
if (driver_data->video_format &&
driver_data->video_format->v4l2_format != want_pixfmt &&
driver_data->video_format->v4l2_format != V4L2_PIX_FMT_SUNXI_TILED_NV12)
driver_data->video_format = NULL;
}
if (!driver_data->video_format) { if (!driver_data->video_format) {
bool want_10bit = (config_object->profile == VAProfileH264High10 ||
config_object->profile == VAProfileHEVCMain10);
video_format = NULL; video_format = NULL;
if (!want_10bit) {
found = v4l2_find_format(driver_data->video_fd, found = v4l2_find_format(driver_data->video_fd,
V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_BUF_TYPE_VIDEO_CAPTURE,
V4L2_PIX_FMT_SUNXI_TILED_NV12); V4L2_PIX_FMT_SUNXI_TILED_NV12);
@@ -121,6 +141,13 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
V4L2_PIX_FMT_NV12); V4L2_PIX_FMT_NV12);
if (found) if (found)
video_format = video_format_find(V4L2_PIX_FMT_NV12); video_format = video_format_find(V4L2_PIX_FMT_NV12);
} else {
found = v4l2_find_format(driver_data->video_fd,
V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE,
V4L2_PIX_FMT_NV15);
if (found)
video_format = video_format_find(V4L2_PIX_FMT_NV15);
}
if (video_format == NULL) { if (video_format == NULL) {
status = VA_STATUS_ERROR_OPERATION_FAILED; status = VA_STATUS_ERROR_OPERATION_FAILED;
@@ -131,6 +158,10 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
} }
video_format = driver_data->video_format; video_format = driver_data->video_format;
/* iter39: session-wide flag drives image.c reporting + unpack. */
driver_data->is_10bit = (config_object->profile == VAProfileH264High10 ||
config_object->profile == VAProfileHEVCMain10);
output_type = v4l2_type_video_output(video_format->v4l2_mplane); output_type = v4l2_type_video_output(video_format->v4l2_mplane);
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane); capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
@@ -175,7 +206,12 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
* CAPTURE (sanity read-back, matches what S_FMT committed). * CAPTURE (sanity read-back, matches what S_FMT committed).
*/ */
{ {
unsigned int capture_pixelformat = V4L2_PIX_FMT_NV12; /* iter39: NV15 for 10-bit profiles (rkvdec Hi10P/Main10),
* NV12 otherwise. driver_data->is_10bit was set above from
* the active profile. */
unsigned int capture_pixelformat = driver_data->is_10bit
? V4L2_PIX_FMT_NV15
: V4L2_PIX_FMT_NV12;
rc = v4l2_set_format(driver_data->video_fd, capture_type, rc = v4l2_set_format(driver_data->video_fd, capture_type,
capture_pixelformat, picture_width, capture_pixelformat, picture_width,
picture_height); picture_height);
@@ -233,15 +269,26 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
* void-cast best-effort, so this is consistent with prior pattern. * void-cast best-effort, so this is consistent with prior pattern.
*/ */
{ {
/*
* iter39: 10-bit profiles set bit_depth_luma_minus8 = 2 in
* the synthetic SPS so rkvdec's get_image_fmt resolves to
* RKVDEC_IMG_FMT_420_10BIT (per rkvdec-h264-common.c:196 +
* rkvdec-hevc-common.c:467). Image_fmt resolution depends
* only on bit_depth_luma_minus8 and chroma_format_idc;
* profile_idc is ignored for image_fmt and v4l2_ctrl_hevc_sps
* has no profile_idc field at all.
*/
bool ten = driver_data->is_10bit;
switch (config_object->profile) { switch (config_object->profile) {
case VAProfileHEVCMain: { case VAProfileHEVCMain:
case VAProfileHEVCMain10: {
struct v4l2_ctrl_hevc_sps dummy_sps; struct v4l2_ctrl_hevc_sps dummy_sps;
struct v4l2_ext_control dummy_ctrl; struct v4l2_ext_control dummy_ctrl;
memset(&dummy_sps, 0, sizeof(dummy_sps)); memset(&dummy_sps, 0, sizeof(dummy_sps));
dummy_sps.chroma_format_idc = 1; /* 4:2:0 */ dummy_sps.chroma_format_idc = 1; /* 4:2:0 */
dummy_sps.bit_depth_luma_minus8 = 0; /* 8-bit */ dummy_sps.bit_depth_luma_minus8 = ten ? 2 : 0;
dummy_sps.bit_depth_chroma_minus8 = 0; dummy_sps.bit_depth_chroma_minus8 = ten ? 2 : 0;
dummy_sps.pic_width_in_luma_samples = picture_width; dummy_sps.pic_width_in_luma_samples = picture_width;
dummy_sps.pic_height_in_luma_samples = picture_height; dummy_sps.pic_height_in_luma_samples = picture_height;
@@ -256,19 +303,20 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
case VAProfileH264High: case VAProfileH264High:
case VAProfileH264ConstrainedBaseline: case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh: case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh: { case VAProfileH264StereoHigh:
case VAProfileH264High10: {
struct v4l2_ctrl_h264_sps dummy_sps; struct v4l2_ctrl_h264_sps dummy_sps;
struct v4l2_ext_control dummy_ctrl; struct v4l2_ext_control dummy_ctrl;
memset(&dummy_sps, 0, sizeof(dummy_sps)); memset(&dummy_sps, 0, sizeof(dummy_sps));
dummy_sps.chroma_format_idc = 1; /* 4:2:0 */ dummy_sps.chroma_format_idc = 1; /* 4:2:0 */
dummy_sps.bit_depth_luma_minus8 = 0; dummy_sps.bit_depth_luma_minus8 = ten ? 2 : 0;
dummy_sps.bit_depth_chroma_minus8 = 0; dummy_sps.bit_depth_chroma_minus8 = ten ? 2 : 0;
dummy_sps.pic_width_in_mbs_minus1 = dummy_sps.pic_width_in_mbs_minus1 =
(picture_width + 15) / 16 - 1; (picture_width + 15) / 16 - 1;
dummy_sps.pic_height_in_map_units_minus1 = dummy_sps.pic_height_in_map_units_minus1 =
(picture_height + 15) / 16 - 1; (picture_height + 15) / 16 - 1;
dummy_sps.profile_idc = 100; /* High */ dummy_sps.profile_idc = ten ? 110 : 100; /* High10 : High */
dummy_sps.level_idc = 41; dummy_sps.level_idc = 41;
/* /*
* FRAME_MBS_ONLY required: rkvdec_h264_validate_sps * FRAME_MBS_ONLY required: rkvdec_h264_validate_sps
@@ -636,6 +684,8 @@ VAStatus RequestDestroyContext(VADriverContextP context, VAContextID context_id)
* The next CreateContext re-populates the cache. * The next CreateContext re-populates the cache.
*/ */
driver_data->fmt_valid = false; driver_data->fmt_valid = false;
/* iter39: clear 10-bit session flag — next CreateContext re-sets. */
driver_data->is_10bit = false;
return VA_STATUS_SUCCESS; return VA_STATUS_SUCCESS;
} }
+75 -19
View File
@@ -39,6 +39,7 @@
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
#include "nv15.h"
#include "tiled_yuv.h" #include "tiled_yuv.h"
#include "utils.h" #include "utils.h"
#include "v4l2.h" #include "v4l2.h"
@@ -86,14 +87,32 @@ VAStatus RequestCreateImage(VADriverContextP context, VAImageFormat *format,
for (i = 0; i < planes_count; i++) for (i = 0; i < planes_count; i++)
size += destination_sizes[i]; size += destination_sizes[i];
/* Here we calculate the sizes assuming NV12. */ if (format->fourcc == VA_FOURCC_P010) {
/*
* iter39: P010 image overrides V4L2-side NV15 sizing. The
* source is the kernel-reported NV15 packed plane; the image
* buffer holds dense P010 (2 bytes per pixel, 16bpp).
* Recompute sizes/pitches against P010 layout so consumers
* (vaGetImage, vaDeriveImage) see standard P010 geometry.
*/
destination_bytesperlines[0] = width * 2;
destination_sizes[0] = destination_bytesperlines[0] * format_height;
for (i = 1; i < destination_planes_count; i++) {
destination_bytesperlines[i] = destination_bytesperlines[0];
destination_sizes[i] = destination_sizes[0] / 2;
}
size = 0;
for (i = 0; i < destination_planes_count; i++)
size += destination_sizes[i];
} else {
/* NV12: V4L2 stride is correct, sizes derived from height. */
destination_sizes[0] = destination_bytesperlines[0] * format_height; destination_sizes[0] = destination_bytesperlines[0] * format_height;
for (i = 1; i < destination_planes_count; i++) { for (i = 1; i < destination_planes_count; i++) {
destination_bytesperlines[i] = destination_bytesperlines[0]; destination_bytesperlines[i] = destination_bytesperlines[0];
destination_sizes[i] = destination_sizes[0] / 2; destination_sizes[i] = destination_sizes[0] / 2;
} }
}
id = object_heap_allocate(&driver_data->image_heap); id = object_heap_allocate(&driver_data->image_heap);
image_object = IMAGE(driver_data, id); image_object = IMAGE(driver_data, id);
@@ -224,7 +243,24 @@ static VAStatus copy_surface_to_image (struct request_data *driver_data,
image->pitches[i], image->width, image->pitches[i], image->width,
i == 0 ? image->height : i == 0 ? image->height :
image->height / 2); image->height / 2);
else { else if (driver_data->is_10bit &&
image->format.fourcc == VA_FOURCC_P010) {
/*
* iter39: rkvdec emits NV15 (4×10-bit packed in 5
* bytes); the VA image buffer is dense P010 (2B/pixel,
* value in bits[15:6]). Source stride is the V4L2-
* reported NV15 bytesperline (= ceil(width/4)*5,
* possibly aligned higher by the kernel); destination
* stride is image->pitches[i] = width * 2.
*/
unsigned int plane_h = (i == 0) ? image->height
: image->height / 2;
nv15_unpack_plane_to_p010(
surface_object->destination_data[i],
(uint16_t *)(buffer_object->data + image->offsets[i]),
image->width, plane_h,
surface_object->destination_bytesperlines[i]);
} else {
#endif #endif
memcpy(buffer_object->data + image->offsets[i], memcpy(buffer_object->data + image->offsets[i],
surface_object->destination_data[i], surface_object->destination_data[i],
@@ -268,9 +304,17 @@ VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id,
/* Fully populate VAImageFormat to match QueryImageFormats output. */ /* Fully populate VAImageFormat to match QueryImageFormats output. */
memset(&format, 0, sizeof(format)); memset(&format, 0, sizeof(format));
if (driver_data->is_10bit) {
/* iter39: 10-bit session derives a P010 image. NV15-source
* unpack happens in copy_surface_to_image. */
format.fourcc = VA_FOURCC_P010;
format.byte_order = VA_LSB_FIRST;
format.bits_per_pixel = 24;
} else {
format.fourcc = VA_FOURCC_NV12; format.fourcc = VA_FOURCC_NV12;
format.byte_order = VA_LSB_FIRST; format.byte_order = VA_LSB_FIRST;
format.bits_per_pixel = 12; format.bits_per_pixel = 12;
}
status = RequestCreateImage(context, &format, surface_object->width, status = RequestCreateImage(context, &format, surface_object->width,
surface_object->height, image); surface_object->height, image);
@@ -305,26 +349,38 @@ VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id,
VAStatus RequestQueryImageFormats(VADriverContextP context, VAStatus RequestQueryImageFormats(VADriverContextP context,
VAImageFormat *formats, int *formats_count) VAImageFormat *formats, int *formats_count)
{ {
struct request_data *driver_data = context->pDriverData;
int n = 0;
/* /*
* Populate the VAImageFormat fully per VAAPI spec for NV12 — * Populate the VAImageFormat fully per VAAPI spec — not just
* not just .fourcc. Consumers (FFmpeg's hwcontext_vaapi, mpv, * .fourcc. Consumers (FFmpeg's hwcontext_vaapi, mpv, Firefox)
* Firefox) read .byte_order and .bits_per_pixel; leaving them * read .byte_order and .bits_per_pixel; leaving them
* uninitialized inherits whatever caller-stack garbage is in * uninitialized inherits caller-stack garbage and produces
* the buffer and produces non-deterministic behavior. Reference: * non-deterministic behavior. Reference: Mesa's
* Mesa's gallium/frontends/va/image.c::vlVaQueryImageFormats and * gallium/frontends/va/image.c::vlVaQueryImageFormats and
* intel-vaapi-driver's i965_drv_video.c — both publish NV12 * intel-vaapi-driver's i965_drv_video.c.
* with byte_order=VA_LSB_FIRST and bits_per_pixel=12.
* *
* For YUV formats, depth/red_mask/green_mask/blue_mask/alpha_mask * iter39: advertise P010 when an active session is 10-bit so
* are not meaningful (those describe RGB bit layouts); leave them * ffmpeg-vaapi sees a valid 10-bit-compatible entry during
* zeroed via memset before populating. * vaQueryImageFormats. NV12 stays advertised unconditionally so
* the 8-bit catalog query response is unchanged.
*/ */
memset(&formats[0], 0, sizeof(formats[0])); memset(&formats[n], 0, sizeof(formats[n]));
formats[0].fourcc = VA_FOURCC_NV12; formats[n].fourcc = VA_FOURCC_NV12;
formats[0].byte_order = VA_LSB_FIRST; formats[n].byte_order = VA_LSB_FIRST;
formats[0].bits_per_pixel = 12; formats[n].bits_per_pixel = 12;
*formats_count = 1; n++;
if (driver_data->is_10bit && n < V4L2_REQUEST_MAX_IMAGE_FORMATS) {
memset(&formats[n], 0, sizeof(formats[n]));
formats[n].fourcc = VA_FOURCC_P010;
formats[n].byte_order = VA_LSB_FIRST;
formats[n].bits_per_pixel = 24;
n++;
}
*formats_count = n;
return VA_STATUS_SUCCESS; return VA_STATUS_SUCCESS;
} }
+4 -2
View File
@@ -50,7 +50,8 @@ sources = [
'h265.c', 'h265.c',
'vp8.c', 'vp8.c',
'vp9.c', 'vp9.c',
'codec.c' 'codec.c',
'nv15.c'
] ]
headers = [ headers = [
@@ -76,7 +77,8 @@ headers = [
'h265.h', 'h265.h',
'vp8.h', 'vp8.h',
'vp9.h', 'vp9.h',
'codec.h' 'codec.h',
'nv15.h'
] ]
includes = [ includes = [
+75
View File
@@ -0,0 +1,75 @@
/*
* Copyright (C) 2026 claude-noether <claude-noether@reauktion.de>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "nv15.h"
void nv15_unpack_plane_to_p010(const uint8_t *src, uint16_t *dst,
unsigned int width, unsigned int height,
unsigned int src_stride)
{
unsigned int x, y;
unsigned int dst_pitch_px = width;
for (y = 0; y < height; y++) {
const uint8_t *s = src + y * src_stride;
uint16_t *d = dst + y * dst_pitch_px;
for (x = 0; x + 4 <= width; x += 4) {
uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
d[0] = (uint16_t)(a << 6);
d[1] = (uint16_t)(b << 6);
d[2] = (uint16_t)(c << 6);
d[3] = (uint16_t)(e << 6);
d += 4;
s += 5;
}
if (x < width) {
unsigned int rem = width - x;
uint16_t pix[4] = { 0, 0, 0, 0 };
pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
if (rem >= 2)
pix[1] = ((uint16_t)s[1] >> 2) |
((uint16_t)(s[2] & 0x0F) << 6);
if (rem >= 3)
pix[2] = ((uint16_t)s[2] >> 4) |
((uint16_t)(s[3] & 0x3F) << 4);
if (rem >= 4)
pix[3] = ((uint16_t)s[3] >> 6) |
((uint16_t)s[4] << 2);
{
unsigned int j;
for (j = 0; j < rem; j++)
d[j] = (uint16_t)(pix[j] << 6);
}
}
}
}
+46
View File
@@ -0,0 +1,46 @@
/*
* Copyright (C) 2026 claude-noether <claude-noether@reauktion.de>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef _NV15_H_
#define _NV15_H_
#include <stdint.h>
/*
* Unpack one plane of V4L2_PIX_FMT_NV15 (4 × 10-bit values packed into
* 5 consecutive bytes, LSB-first) into VA_FOURCC_P010 (16-bit per pixel,
* value in bits [15:6], zeros in [5:0]).
*
* Layout per Documentation/userspace-api/media/v4l/pixfmt-nv15.rst.
* Call once per plane: luma (W × H, src_stride = ceil(W/4)*5) and chroma
* (W × H/2 — same width because UV are interleaved 10-bit values).
*
* src_stride must be the kernel-reported bytesperline for the NV15 plane.
* The destination is dense P010 with row pitch = width * 2 bytes.
*/
void nv15_unpack_plane_to_p010(const uint8_t *src, uint16_t *dst,
unsigned int width, unsigned int height,
unsigned int src_stride);
#endif
+9 -1
View File
@@ -132,12 +132,14 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
case VAProfileH264ConstrainedBaseline: case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh: case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh: case VAProfileH264StereoHigh:
case VAProfileH264High10:
memcpy(&surface_object->params.h264.picture, memcpy(&surface_object->params.h264.picture,
buffer_object->data, buffer_object->data,
sizeof(surface_object->params.h264.picture)); sizeof(surface_object->params.h264.picture));
break; break;
case VAProfileHEVCMain: case VAProfileHEVCMain:
case VAProfileHEVCMain10:
memcpy(&surface_object->params.h265.picture, memcpy(&surface_object->params.h265.picture,
buffer_object->data, buffer_object->data,
sizeof(surface_object->params.h265.picture)); sizeof(surface_object->params.h265.picture));
@@ -167,12 +169,14 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
case VAProfileH264ConstrainedBaseline: case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh: case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh: case VAProfileH264StereoHigh:
case VAProfileH264High10:
memcpy(&surface_object->params.h264.slice, memcpy(&surface_object->params.h264.slice,
buffer_object->data, buffer_object->data,
sizeof(surface_object->params.h264.slice)); sizeof(surface_object->params.h264.slice));
break; break;
case VAProfileHEVCMain: { case VAProfileHEVCMain:
case VAProfileHEVCMain10: {
unsigned int n = surface_object->params.h265.num_slices; unsigned int n = surface_object->params.h265.num_slices;
if (n < HEVC_MAX_SLICES_PER_FRAME) { if (n < HEVC_MAX_SLICES_PER_FRAME) {
memcpy(&surface_object->params.h265.slices[n], memcpy(&surface_object->params.h265.slices[n],
@@ -220,6 +224,7 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
case VAProfileH264ConstrainedBaseline: case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh: case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh: case VAProfileH264StereoHigh:
case VAProfileH264High10:
memcpy(&surface_object->params.h264.matrix, memcpy(&surface_object->params.h264.matrix,
buffer_object->data, buffer_object->data,
sizeof(surface_object->params.h264.matrix)); sizeof(surface_object->params.h264.matrix));
@@ -227,6 +232,7 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
break; break;
case VAProfileHEVCMain: case VAProfileHEVCMain:
case VAProfileHEVCMain10:
memcpy(&surface_object->params.h265.iqmatrix, memcpy(&surface_object->params.h265.iqmatrix,
buffer_object->data, buffer_object->data,
sizeof(surface_object->params.h265.iqmatrix)); sizeof(surface_object->params.h265.iqmatrix));
@@ -286,6 +292,7 @@ static VAStatus codec_set_controls(struct request_data *driver_data,
case VAProfileH264ConstrainedBaseline: case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh: case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh: case VAProfileH264StereoHigh:
case VAProfileH264High10:
rc = h264_set_controls(driver_data, context, profile, rc = h264_set_controls(driver_data, context, profile,
surface_object); surface_object);
if (rc < 0) if (rc < 0)
@@ -293,6 +300,7 @@ static VAStatus codec_set_controls(struct request_data *driver_data,
break; break;
case VAProfileHEVCMain: case VAProfileHEVCMain:
case VAProfileHEVCMain10:
rc = h265_set_controls(driver_data, context, surface_object); rc = h265_set_controls(driver_data, context, surface_object);
if (rc < 0) if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED; return VA_STATUS_ERROR_OPERATION_FAILED;
+12 -1
View File
@@ -40,7 +40,7 @@
#define V4L2_REQUEST_STR_VENDOR "v4l2-request" #define V4L2_REQUEST_STR_VENDOR "v4l2-request"
#define V4L2_REQUEST_MAX_PROFILES 11 #define V4L2_REQUEST_MAX_PROFILES 13
#define V4L2_REQUEST_MAX_ENTRYPOINTS 5 #define V4L2_REQUEST_MAX_ENTRYPOINTS 5
#define V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES 10 #define V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES 10
#define V4L2_REQUEST_MAX_IMAGE_FORMATS 10 #define V4L2_REQUEST_MAX_IMAGE_FORMATS 10
@@ -133,6 +133,17 @@ struct request_data {
unsigned int fmt_buffers_count; unsigned int fmt_buffers_count;
unsigned int fmt_sizes[VIDEO_MAX_PLANES]; unsigned int fmt_sizes[VIDEO_MAX_PLANES];
unsigned int fmt_bytesperlines[VIDEO_MAX_PLANES]; unsigned int fmt_bytesperlines[VIDEO_MAX_PLANES];
/*
* iter39: active session is decoding a 10-bit profile (Hi10P / Main10).
* Set in RequestCreateContext from config->profile. Drives:
* - CAPTURE pix_fmt selection (NV15 instead of NV12)
* - image.c DeriveImage / QueryImageFormats fourcc reporting (P010
* instead of NV12)
* - copy_surface_to_image NV15→P010 unpack branch
* Reset to false at DestroyContext.
*/
bool is_10bit;
}; };
VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context); VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context);
+11 -2
View File
@@ -182,7 +182,9 @@ VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format,
* surface_bind_format_uniform_fields(); the per-slot * surface_bind_format_uniform_fields(); the per-slot
* destination_* fields fill at BeginPicture via surface_bind_slot. * destination_* fields fill at BeginPicture via surface_bind_slot.
*/ */
if (format != VA_RT_FORMAT_YUV420) /* iter39: allow YUV420_10 for Hi10P / Main10 surface allocation. */
if (format != VA_RT_FORMAT_YUV420 &&
format != VA_RT_FORMAT_YUV420_10)
return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
for (i = 0; i < surfaces_count; i++) { for (i = 0; i < surfaces_count; i++) {
@@ -706,7 +708,14 @@ VAStatus RequestExportSurfaceHandle(VADriverContextP context,
planes_count = surface_object->destination_planes_count; planes_count = surface_object->destination_planes_count;
surface_descriptor->fourcc = VA_FOURCC_NV12; /* iter39: 10-bit session exports a DRM_FORMAT_NV15 buffer; advertise
* the matching fourcc so a PRIME consumer aware of NV15 (panfrost-
* Mesa et al.) can import correctly. PRIME consumers that only know
* NV12 / P010 should use the COPY (vaGetImage) path which unpacks
* NV15P010 in image.c::copy_surface_to_image. */
surface_descriptor->fourcc = driver_data->is_10bit
? VA_FOURCC('N', 'V', '1', '5')
: VA_FOURCC_NV12;
surface_descriptor->width = surface_object->width; surface_descriptor->width = surface_object->width;
surface_descriptor->height = surface_object->height; surface_descriptor->height = surface_object->height;
surface_descriptor->num_objects = export_fds_count; surface_descriptor->num_objects = export_fds_count;
+10
View File
@@ -45,6 +45,16 @@ static struct video_format formats[] = {
.planes_count = 2, .planes_count = 2,
.bpp = 16, .bpp = 16,
}, },
{
.description = "NV15 YUV (10-bit, rkvdec)",
.v4l2_format = V4L2_PIX_FMT_NV15,
.v4l2_buffers_count = 1,
.v4l2_mplane = true,
.drm_format = DRM_FORMAT_NV15,
.drm_modifier = DRM_FORMAT_MOD_NONE,
.planes_count = 2,
.bpp = 24,
},
// Code to handle this DRM_FORMAT is __arm__ only // Code to handle this DRM_FORMAT is __arm__ only
#ifdef __arm__ #ifdef __arm__
{ {