iter39 α-31: H264 Hi10P + HEVC Main10 sub-profile support (10-bit, rkvdec NV15)
Adds VAProfileH264High10 and VAProfileHEVCMain10 to the libva-v4l2-request
backend. RK3399 rkvdec emits decoded frames as V4L2_PIX_FMT_NV15 (4 × 10-bit
values packed in 5 bytes per element); VAAPI consumers receive standard
VA_FOURCC_P010 via a new userspace unpack in copy_surface_to_image.
VP9 Profile 2 explicitly NOT added — RK3399 rkvdec kernel ctrl table
caps at V4L2_MPEG_VIDEO_VP9_PROFILE_0 (rkvdec.c::rkvdec_vp9_ctrl_descs).
Touchpoints (per Phase 5 sonnet-architect review amendments):
- include/drm_fourcc.h: define DRM_FORMAT_NV15 (vendored libdrm lacks it)
- src/nv15.{c,h}: NV15 → P010 plane unpack (LSB-first, per
Documentation/userspace-api/media/v4l/pixfmt-nv15.rst)
- src/video.c: NV15 entry in formats[] (else NULL-deref on video_format_find)
- src/codec.c: pixelformat_for_profile cases for Hi10P + Main10
- src/config.c: enumeration, validation, entrypoints, RT_FORMAT_YUV420_10
advertisement for 10-bit profiles
- src/context.c: per-profile CAPTURE pix_fmt (NV12/NV15), 10-bit synthetic
SPS (bit_depth_luma_minus8=2), video_format invalidation on bit-depth
transition (sibling to iter38 device-switch invalidation), is_10bit flag
- src/surface.c: RT_FORMAT_YUV420_10 admission, NV15 fourcc on PRIME export
- src/image.c: P010 reporting in DeriveImage + QueryImageFormats,
P010-aware sizing in CreateImage, NV15 → P010 unpack call in
copy_surface_to_image (gated on is_10bit + image.format.fourcc == P010)
- src/picture.c: 4 switch blocks route Hi10P/Main10 to existing H264/HEVC
per-codec paths
- src/request.h: MAX_PROFILES bump 11 → 13, driver_data->is_10bit flag
Scope: COPY path (vaGetImage / vaDeriveImage) only. Standard ffmpeg-vaapi
hwdownload, mpv vaapi-copy, and any consumer using vaGetImage works
end-to-end. PRIME-path consumers that only know NV12/P010 must use the
COPY path; PRIME consumers aware of NV15 (panfrost-Mesa et al.) get the
correct fourcc on RequestExportSurfaceHandle. PRIME-side P010 emission is
follow-up scope (would need DRM_FORMAT_P010 + per-plane unpack into a
GPU-accessible buffer).
Compile-tested on boltzmann (aarch64 native, gcc 15.2.1, libva 1.23.0,
libdrm 2.4.133): clean build, .so produced, 0 new warnings.
Phase 0/2 evidence: linux-mmind-v7.0 drivers/media/platform/rockchip/rkvdec.
rkvdec_h264_decoded_fmts[] and rkvdec_hevc_decoded_fmts[] both list NV15;
ctrl tables cap at HEVC MAIN_10 and H264 HIGH_422_INTRA (Hi10P < cap, not
in menu_skip_mask). image_fmt resolution (rkvdec-h264-common.c:196,
rkvdec-hevc-common.c:467) dispatches on bit_depth_luma_minus8 only.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -195,6 +195,11 @@ extern "C" {
|
||||
#define DRM_FORMAT_NV24 fourcc_code('N', 'V', '2', '4') /* non-subsampled Cr:Cb plane */
|
||||
#define DRM_FORMAT_NV42 fourcc_code('N', 'V', '4', '2') /* non-subsampled Cb:Cr plane */
|
||||
|
||||
/* iter39: NV15 is 4×10-bit packed in 5 bytes (Rockchip rkvdec 10-bit output). */
|
||||
#ifndef DRM_FORMAT_NV15
|
||||
#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 3 plane YCbCr
|
||||
* index 0: Y plane, [7:0] Y
|
||||
|
||||
@@ -37,8 +37,10 @@ unsigned int pixelformat_for_profile(VAProfile profile)
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileH264High10:
|
||||
return V4L2_PIX_FMT_H264_SLICE;
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileHEVCMain10:
|
||||
return V4L2_PIX_FMT_HEVC_SLICE;
|
||||
case VAProfileVP8Version0_3:
|
||||
return V4L2_PIX_FMT_VP8_FRAME;
|
||||
|
||||
+37
-15
@@ -59,30 +59,27 @@ VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile,
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileH264High10:
|
||||
// FIXME
|
||||
// iter39: Hi10P routed through same H264 path; bit-depth gating
|
||||
// happens in context.c synthetic SPS and CAPTURE pix_fmt
|
||||
// selection.
|
||||
break;
|
||||
case VAProfileMPEG2Simple:
|
||||
case VAProfileMPEG2Main:
|
||||
// fresnel-fourier iter1: MPEG-2 enabled. Same shape as H.264
|
||||
// above — no profile-specific config validation in the libva
|
||||
// backend; validation happens at vaCreateContext / control
|
||||
// submission time.
|
||||
break;
|
||||
case VAProfileHEVCMain:
|
||||
// fresnel-fourier iter2: HEVC enabled. Same shape as H.264/
|
||||
// MPEG-2 above — no profile-specific config validation in the
|
||||
// libva backend; validation happens at vaCreateContext / control
|
||||
// submission time.
|
||||
case VAProfileHEVCMain10:
|
||||
// iter39: Main10 routed through same HEVC path; bit-depth
|
||||
// gating happens in context.c.
|
||||
break;
|
||||
case VAProfileVP8Version0_3:
|
||||
// fresnel-fourier iter3: VP8 enabled. Same shape as iter1+iter2
|
||||
// above — no profile-specific config validation in the libva
|
||||
// backend; validation happens at vaCreateContext / control
|
||||
// submission time.
|
||||
break;
|
||||
case VAProfileVP9Profile0:
|
||||
// fresnel-fourier iter4: VP9 Profile 0 enabled on rkvdec.
|
||||
// Same shape — no profile-specific validation here.
|
||||
// VP9 Profile 2 is NOT supported by RK3399 rkvdec (kernel ctrl
|
||||
// cap is V4L2_MPEG_VIDEO_VP9_PROFILE_0). Do not add a case for
|
||||
// VAProfileVP9Profile2 — kernel will reject.
|
||||
break;
|
||||
default:
|
||||
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
||||
@@ -119,6 +116,14 @@ VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile,
|
||||
*/
|
||||
config_object->pixelformat = pixelformat_for_profile(profile);
|
||||
config_object->attributes[0].type = VAConfigAttribRTFormat;
|
||||
/*
|
||||
* iter39: 10-bit profiles advertise YUV420_10. ffmpeg-vaapi reads
|
||||
* this attribute on vaGetConfigAttributes and refuses surface
|
||||
* allocation if it mismatches the input bitstream's bit depth.
|
||||
*/
|
||||
if (profile == VAProfileH264High10 || profile == VAProfileHEVCMain10)
|
||||
config_object->attributes[0].value = VA_RT_FORMAT_YUV420_10;
|
||||
else
|
||||
config_object->attributes[0].value = VA_RT_FORMAT_YUV420;
|
||||
config_object->attributes_count = 1;
|
||||
|
||||
@@ -187,17 +192,22 @@ VAStatus RequestQueryConfigProfiles(VADriverContextP context,
|
||||
}
|
||||
|
||||
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_H264_SLICE);
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 5)) {
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 6)) {
|
||||
profiles[index++] = VAProfileH264Main;
|
||||
profiles[index++] = VAProfileH264High;
|
||||
profiles[index++] = VAProfileH264ConstrainedBaseline;
|
||||
profiles[index++] = VAProfileH264MultiviewHigh;
|
||||
profiles[index++] = VAProfileH264StereoHigh;
|
||||
/* iter39: Hi10P on rkvdec (NV15 CAPTURE, bit_depth=10). */
|
||||
profiles[index++] = VAProfileH264High10;
|
||||
}
|
||||
|
||||
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_HEVC_SLICE);
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1))
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 2)) {
|
||||
profiles[index++] = VAProfileHEVCMain;
|
||||
/* iter39: Main10 on rkvdec (NV15 CAPTURE). */
|
||||
profiles[index++] = VAProfileHEVCMain10;
|
||||
}
|
||||
|
||||
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_VP8_FRAME);
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1))
|
||||
@@ -225,7 +235,9 @@ VAStatus RequestQueryConfigEntrypoints(VADriverContextP context,
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileH264High10:
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileHEVCMain10:
|
||||
case VAProfileVP8Version0_3:
|
||||
case VAProfileVP9Profile0:
|
||||
entrypoints[0] = VAEntrypointVLD;
|
||||
@@ -281,6 +293,16 @@ VAStatus RequestGetConfigAttributes(VADriverContextP context, VAProfile profile,
|
||||
for (i = 0; i < attributes_count; i++) {
|
||||
switch (attributes[i].type) {
|
||||
case VAConfigAttribRTFormat:
|
||||
/*
|
||||
* iter39: 10-bit profiles publish YUV420_10. Profile-
|
||||
* less query (this is invoked from vaGetConfigAttributes
|
||||
* before vaCreateConfig) routes off the `profile` arg
|
||||
* directly — same gating as RequestCreateConfig.
|
||||
*/
|
||||
if (profile == VAProfileH264High10 ||
|
||||
profile == VAProfileHEVCMain10)
|
||||
attributes[i].value = VA_RT_FORMAT_YUV420_10;
|
||||
else
|
||||
attributes[i].value = VA_RT_FORMAT_YUV420;
|
||||
break;
|
||||
default:
|
||||
|
||||
+58
-8
@@ -107,9 +107,29 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
* the driver_data and is cached across CreateContext cycles. The
|
||||
* probe doesn't require any prior S_FMT — v4l2_find_format
|
||||
* enumerates the device's supported formats directly.
|
||||
*
|
||||
* iter39: choose NV15 (10-bit packed) for Hi10P / Main10 profiles,
|
||||
* NV12 (8-bit) otherwise. If the cached video_format doesn't match
|
||||
* the profile's bit-depth requirement, invalidate and re-probe —
|
||||
* sibling pattern to iter38's device-switch invalidation in
|
||||
* request_switch_device_for_profile().
|
||||
*/
|
||||
{
|
||||
bool want_10bit = (config_object->profile == VAProfileH264High10 ||
|
||||
config_object->profile == VAProfileHEVCMain10);
|
||||
unsigned int want_pixfmt = want_10bit ? V4L2_PIX_FMT_NV15
|
||||
: V4L2_PIX_FMT_NV12;
|
||||
if (driver_data->video_format &&
|
||||
driver_data->video_format->v4l2_format != want_pixfmt &&
|
||||
driver_data->video_format->v4l2_format != V4L2_PIX_FMT_SUNXI_TILED_NV12)
|
||||
driver_data->video_format = NULL;
|
||||
}
|
||||
if (!driver_data->video_format) {
|
||||
bool want_10bit = (config_object->profile == VAProfileH264High10 ||
|
||||
config_object->profile == VAProfileHEVCMain10);
|
||||
video_format = NULL;
|
||||
|
||||
if (!want_10bit) {
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_CAPTURE,
|
||||
V4L2_PIX_FMT_SUNXI_TILED_NV12);
|
||||
@@ -121,6 +141,13 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
V4L2_PIX_FMT_NV12);
|
||||
if (found)
|
||||
video_format = video_format_find(V4L2_PIX_FMT_NV12);
|
||||
} else {
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE,
|
||||
V4L2_PIX_FMT_NV15);
|
||||
if (found)
|
||||
video_format = video_format_find(V4L2_PIX_FMT_NV15);
|
||||
}
|
||||
|
||||
if (video_format == NULL) {
|
||||
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
@@ -131,6 +158,10 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
}
|
||||
video_format = driver_data->video_format;
|
||||
|
||||
/* iter39: session-wide flag drives image.c reporting + unpack. */
|
||||
driver_data->is_10bit = (config_object->profile == VAProfileH264High10 ||
|
||||
config_object->profile == VAProfileHEVCMain10);
|
||||
|
||||
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
|
||||
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
||||
|
||||
@@ -175,7 +206,12 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
* CAPTURE (sanity read-back, matches what S_FMT committed).
|
||||
*/
|
||||
{
|
||||
unsigned int capture_pixelformat = V4L2_PIX_FMT_NV12;
|
||||
/* iter39: NV15 for 10-bit profiles (rkvdec Hi10P/Main10),
|
||||
* NV12 otherwise. driver_data->is_10bit was set above from
|
||||
* the active profile. */
|
||||
unsigned int capture_pixelformat = driver_data->is_10bit
|
||||
? V4L2_PIX_FMT_NV15
|
||||
: V4L2_PIX_FMT_NV12;
|
||||
rc = v4l2_set_format(driver_data->video_fd, capture_type,
|
||||
capture_pixelformat, picture_width,
|
||||
picture_height);
|
||||
@@ -233,15 +269,26 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
* void-cast best-effort, so this is consistent with prior pattern.
|
||||
*/
|
||||
{
|
||||
/*
|
||||
* iter39: 10-bit profiles set bit_depth_luma_minus8 = 2 in
|
||||
* the synthetic SPS so rkvdec's get_image_fmt resolves to
|
||||
* RKVDEC_IMG_FMT_420_10BIT (per rkvdec-h264-common.c:196 +
|
||||
* rkvdec-hevc-common.c:467). Image_fmt resolution depends
|
||||
* only on bit_depth_luma_minus8 and chroma_format_idc;
|
||||
* profile_idc is ignored for image_fmt and v4l2_ctrl_hevc_sps
|
||||
* has no profile_idc field at all.
|
||||
*/
|
||||
bool ten = driver_data->is_10bit;
|
||||
switch (config_object->profile) {
|
||||
case VAProfileHEVCMain: {
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileHEVCMain10: {
|
||||
struct v4l2_ctrl_hevc_sps dummy_sps;
|
||||
struct v4l2_ext_control dummy_ctrl;
|
||||
|
||||
memset(&dummy_sps, 0, sizeof(dummy_sps));
|
||||
dummy_sps.chroma_format_idc = 1; /* 4:2:0 */
|
||||
dummy_sps.bit_depth_luma_minus8 = 0; /* 8-bit */
|
||||
dummy_sps.bit_depth_chroma_minus8 = 0;
|
||||
dummy_sps.bit_depth_luma_minus8 = ten ? 2 : 0;
|
||||
dummy_sps.bit_depth_chroma_minus8 = ten ? 2 : 0;
|
||||
dummy_sps.pic_width_in_luma_samples = picture_width;
|
||||
dummy_sps.pic_height_in_luma_samples = picture_height;
|
||||
|
||||
@@ -256,19 +303,20 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
case VAProfileH264High:
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh: {
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileH264High10: {
|
||||
struct v4l2_ctrl_h264_sps dummy_sps;
|
||||
struct v4l2_ext_control dummy_ctrl;
|
||||
|
||||
memset(&dummy_sps, 0, sizeof(dummy_sps));
|
||||
dummy_sps.chroma_format_idc = 1; /* 4:2:0 */
|
||||
dummy_sps.bit_depth_luma_minus8 = 0;
|
||||
dummy_sps.bit_depth_chroma_minus8 = 0;
|
||||
dummy_sps.bit_depth_luma_minus8 = ten ? 2 : 0;
|
||||
dummy_sps.bit_depth_chroma_minus8 = ten ? 2 : 0;
|
||||
dummy_sps.pic_width_in_mbs_minus1 =
|
||||
(picture_width + 15) / 16 - 1;
|
||||
dummy_sps.pic_height_in_map_units_minus1 =
|
||||
(picture_height + 15) / 16 - 1;
|
||||
dummy_sps.profile_idc = 100; /* High */
|
||||
dummy_sps.profile_idc = ten ? 110 : 100; /* High10 : High */
|
||||
dummy_sps.level_idc = 41;
|
||||
/*
|
||||
* FRAME_MBS_ONLY required: rkvdec_h264_validate_sps
|
||||
@@ -636,6 +684,8 @@ VAStatus RequestDestroyContext(VADriverContextP context, VAContextID context_id)
|
||||
* The next CreateContext re-populates the cache.
|
||||
*/
|
||||
driver_data->fmt_valid = false;
|
||||
/* iter39: clear 10-bit session flag — next CreateContext re-sets. */
|
||||
driver_data->is_10bit = false;
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
+75
-19
@@ -39,6 +39,7 @@
|
||||
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
#include "nv15.h"
|
||||
#include "tiled_yuv.h"
|
||||
#include "utils.h"
|
||||
#include "v4l2.h"
|
||||
@@ -86,14 +87,32 @@ VAStatus RequestCreateImage(VADriverContextP context, VAImageFormat *format,
|
||||
for (i = 0; i < planes_count; i++)
|
||||
size += destination_sizes[i];
|
||||
|
||||
/* Here we calculate the sizes assuming NV12. */
|
||||
|
||||
if (format->fourcc == VA_FOURCC_P010) {
|
||||
/*
|
||||
* iter39: P010 image overrides V4L2-side NV15 sizing. The
|
||||
* source is the kernel-reported NV15 packed plane; the image
|
||||
* buffer holds dense P010 (2 bytes per pixel, 16bpp).
|
||||
* Recompute sizes/pitches against P010 layout so consumers
|
||||
* (vaGetImage, vaDeriveImage) see standard P010 geometry.
|
||||
*/
|
||||
destination_bytesperlines[0] = width * 2;
|
||||
destination_sizes[0] = destination_bytesperlines[0] * format_height;
|
||||
for (i = 1; i < destination_planes_count; i++) {
|
||||
destination_bytesperlines[i] = destination_bytesperlines[0];
|
||||
destination_sizes[i] = destination_sizes[0] / 2;
|
||||
}
|
||||
size = 0;
|
||||
for (i = 0; i < destination_planes_count; i++)
|
||||
size += destination_sizes[i];
|
||||
} else {
|
||||
/* NV12: V4L2 stride is correct, sizes derived from height. */
|
||||
destination_sizes[0] = destination_bytesperlines[0] * format_height;
|
||||
|
||||
for (i = 1; i < destination_planes_count; i++) {
|
||||
destination_bytesperlines[i] = destination_bytesperlines[0];
|
||||
destination_sizes[i] = destination_sizes[0] / 2;
|
||||
}
|
||||
}
|
||||
|
||||
id = object_heap_allocate(&driver_data->image_heap);
|
||||
image_object = IMAGE(driver_data, id);
|
||||
@@ -224,7 +243,24 @@ static VAStatus copy_surface_to_image (struct request_data *driver_data,
|
||||
image->pitches[i], image->width,
|
||||
i == 0 ? image->height :
|
||||
image->height / 2);
|
||||
else {
|
||||
else if (driver_data->is_10bit &&
|
||||
image->format.fourcc == VA_FOURCC_P010) {
|
||||
/*
|
||||
* iter39: rkvdec emits NV15 (4×10-bit packed in 5
|
||||
* bytes); the VA image buffer is dense P010 (2B/pixel,
|
||||
* value in bits[15:6]). Source stride is the V4L2-
|
||||
* reported NV15 bytesperline (= ceil(width/4)*5,
|
||||
* possibly aligned higher by the kernel); destination
|
||||
* stride is image->pitches[i] = width * 2.
|
||||
*/
|
||||
unsigned int plane_h = (i == 0) ? image->height
|
||||
: image->height / 2;
|
||||
nv15_unpack_plane_to_p010(
|
||||
surface_object->destination_data[i],
|
||||
(uint16_t *)(buffer_object->data + image->offsets[i]),
|
||||
image->width, plane_h,
|
||||
surface_object->destination_bytesperlines[i]);
|
||||
} else {
|
||||
#endif
|
||||
memcpy(buffer_object->data + image->offsets[i],
|
||||
surface_object->destination_data[i],
|
||||
@@ -268,9 +304,17 @@ VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id,
|
||||
|
||||
/* Fully populate VAImageFormat to match QueryImageFormats output. */
|
||||
memset(&format, 0, sizeof(format));
|
||||
if (driver_data->is_10bit) {
|
||||
/* iter39: 10-bit session derives a P010 image. NV15-source
|
||||
* unpack happens in copy_surface_to_image. */
|
||||
format.fourcc = VA_FOURCC_P010;
|
||||
format.byte_order = VA_LSB_FIRST;
|
||||
format.bits_per_pixel = 24;
|
||||
} else {
|
||||
format.fourcc = VA_FOURCC_NV12;
|
||||
format.byte_order = VA_LSB_FIRST;
|
||||
format.bits_per_pixel = 12;
|
||||
}
|
||||
|
||||
status = RequestCreateImage(context, &format, surface_object->width,
|
||||
surface_object->height, image);
|
||||
@@ -305,26 +349,38 @@ VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id,
|
||||
VAStatus RequestQueryImageFormats(VADriverContextP context,
|
||||
VAImageFormat *formats, int *formats_count)
|
||||
{
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
int n = 0;
|
||||
|
||||
/*
|
||||
* Populate the VAImageFormat fully per VAAPI spec for NV12 —
|
||||
* not just .fourcc. Consumers (FFmpeg's hwcontext_vaapi, mpv,
|
||||
* Firefox) read .byte_order and .bits_per_pixel; leaving them
|
||||
* uninitialized inherits whatever caller-stack garbage is in
|
||||
* the buffer and produces non-deterministic behavior. Reference:
|
||||
* Mesa's gallium/frontends/va/image.c::vlVaQueryImageFormats and
|
||||
* intel-vaapi-driver's i965_drv_video.c — both publish NV12
|
||||
* with byte_order=VA_LSB_FIRST and bits_per_pixel=12.
|
||||
* Populate the VAImageFormat fully per VAAPI spec — not just
|
||||
* .fourcc. Consumers (FFmpeg's hwcontext_vaapi, mpv, Firefox)
|
||||
* read .byte_order and .bits_per_pixel; leaving them
|
||||
* uninitialized inherits caller-stack garbage and produces
|
||||
* non-deterministic behavior. Reference: Mesa's
|
||||
* gallium/frontends/va/image.c::vlVaQueryImageFormats and
|
||||
* intel-vaapi-driver's i965_drv_video.c.
|
||||
*
|
||||
* For YUV formats, depth/red_mask/green_mask/blue_mask/alpha_mask
|
||||
* are not meaningful (those describe RGB bit layouts); leave them
|
||||
* zeroed via memset before populating.
|
||||
* iter39: advertise P010 when an active session is 10-bit so
|
||||
* ffmpeg-vaapi sees a valid 10-bit-compatible entry during
|
||||
* vaQueryImageFormats. NV12 stays advertised unconditionally so
|
||||
* the 8-bit catalog query response is unchanged.
|
||||
*/
|
||||
memset(&formats[0], 0, sizeof(formats[0]));
|
||||
formats[0].fourcc = VA_FOURCC_NV12;
|
||||
formats[0].byte_order = VA_LSB_FIRST;
|
||||
formats[0].bits_per_pixel = 12;
|
||||
*formats_count = 1;
|
||||
memset(&formats[n], 0, sizeof(formats[n]));
|
||||
formats[n].fourcc = VA_FOURCC_NV12;
|
||||
formats[n].byte_order = VA_LSB_FIRST;
|
||||
formats[n].bits_per_pixel = 12;
|
||||
n++;
|
||||
|
||||
if (driver_data->is_10bit && n < V4L2_REQUEST_MAX_IMAGE_FORMATS) {
|
||||
memset(&formats[n], 0, sizeof(formats[n]));
|
||||
formats[n].fourcc = VA_FOURCC_P010;
|
||||
formats[n].byte_order = VA_LSB_FIRST;
|
||||
formats[n].bits_per_pixel = 24;
|
||||
n++;
|
||||
}
|
||||
|
||||
*formats_count = n;
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
+4
-2
@@ -50,7 +50,8 @@ sources = [
|
||||
'h265.c',
|
||||
'vp8.c',
|
||||
'vp9.c',
|
||||
'codec.c'
|
||||
'codec.c',
|
||||
'nv15.c'
|
||||
]
|
||||
|
||||
headers = [
|
||||
@@ -76,7 +77,8 @@ headers = [
|
||||
'h265.h',
|
||||
'vp8.h',
|
||||
'vp9.h',
|
||||
'codec.h'
|
||||
'codec.h',
|
||||
'nv15.h'
|
||||
]
|
||||
|
||||
includes = [
|
||||
|
||||
+75
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (C) 2026 claude-noether <claude-noether@reauktion.de>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nv15.h"
|
||||
|
||||
void nv15_unpack_plane_to_p010(const uint8_t *src, uint16_t *dst,
|
||||
unsigned int width, unsigned int height,
|
||||
unsigned int src_stride)
|
||||
{
|
||||
unsigned int x, y;
|
||||
unsigned int dst_pitch_px = width;
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
const uint8_t *s = src + y * src_stride;
|
||||
uint16_t *d = dst + y * dst_pitch_px;
|
||||
|
||||
for (x = 0; x + 4 <= width; x += 4) {
|
||||
uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||||
uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
|
||||
uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
|
||||
uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
|
||||
|
||||
d[0] = (uint16_t)(a << 6);
|
||||
d[1] = (uint16_t)(b << 6);
|
||||
d[2] = (uint16_t)(c << 6);
|
||||
d[3] = (uint16_t)(e << 6);
|
||||
|
||||
d += 4;
|
||||
s += 5;
|
||||
}
|
||||
|
||||
if (x < width) {
|
||||
unsigned int rem = width - x;
|
||||
uint16_t pix[4] = { 0, 0, 0, 0 };
|
||||
|
||||
pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||||
if (rem >= 2)
|
||||
pix[1] = ((uint16_t)s[1] >> 2) |
|
||||
((uint16_t)(s[2] & 0x0F) << 6);
|
||||
if (rem >= 3)
|
||||
pix[2] = ((uint16_t)s[2] >> 4) |
|
||||
((uint16_t)(s[3] & 0x3F) << 4);
|
||||
if (rem >= 4)
|
||||
pix[3] = ((uint16_t)s[3] >> 6) |
|
||||
((uint16_t)s[4] << 2);
|
||||
|
||||
{
|
||||
unsigned int j;
|
||||
for (j = 0; j < rem; j++)
|
||||
d[j] = (uint16_t)(pix[j] << 6);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (C) 2026 claude-noether <claude-noether@reauktion.de>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NV15_H_
|
||||
#define _NV15_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
* Unpack one plane of V4L2_PIX_FMT_NV15 (4 × 10-bit values packed into
|
||||
* 5 consecutive bytes, LSB-first) into VA_FOURCC_P010 (16-bit per pixel,
|
||||
* value in bits [15:6], zeros in [5:0]).
|
||||
*
|
||||
* Layout per Documentation/userspace-api/media/v4l/pixfmt-nv15.rst.
|
||||
* Call once per plane: luma (W × H, src_stride = ceil(W/4)*5) and chroma
|
||||
* (W × H/2 — same width because UV are interleaved 10-bit values).
|
||||
*
|
||||
* src_stride must be the kernel-reported bytesperline for the NV15 plane.
|
||||
* The destination is dense P010 with row pitch = width * 2 bytes.
|
||||
*/
|
||||
void nv15_unpack_plane_to_p010(const uint8_t *src, uint16_t *dst,
|
||||
unsigned int width, unsigned int height,
|
||||
unsigned int src_stride);
|
||||
|
||||
#endif
|
||||
+9
-1
@@ -132,12 +132,14 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileH264High10:
|
||||
memcpy(&surface_object->params.h264.picture,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.h264.picture));
|
||||
break;
|
||||
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileHEVCMain10:
|
||||
memcpy(&surface_object->params.h265.picture,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.h265.picture));
|
||||
@@ -167,12 +169,14 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileH264High10:
|
||||
memcpy(&surface_object->params.h264.slice,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.h264.slice));
|
||||
break;
|
||||
|
||||
case VAProfileHEVCMain: {
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileHEVCMain10: {
|
||||
unsigned int n = surface_object->params.h265.num_slices;
|
||||
if (n < HEVC_MAX_SLICES_PER_FRAME) {
|
||||
memcpy(&surface_object->params.h265.slices[n],
|
||||
@@ -220,6 +224,7 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileH264High10:
|
||||
memcpy(&surface_object->params.h264.matrix,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.h264.matrix));
|
||||
@@ -227,6 +232,7 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
break;
|
||||
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileHEVCMain10:
|
||||
memcpy(&surface_object->params.h265.iqmatrix,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.h265.iqmatrix));
|
||||
@@ -286,6 +292,7 @@ static VAStatus codec_set_controls(struct request_data *driver_data,
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileH264High10:
|
||||
rc = h264_set_controls(driver_data, context, profile,
|
||||
surface_object);
|
||||
if (rc < 0)
|
||||
@@ -293,6 +300,7 @@ static VAStatus codec_set_controls(struct request_data *driver_data,
|
||||
break;
|
||||
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileHEVCMain10:
|
||||
rc = h265_set_controls(driver_data, context, surface_object);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
+12
-1
@@ -40,7 +40,7 @@
|
||||
|
||||
#define V4L2_REQUEST_STR_VENDOR "v4l2-request"
|
||||
|
||||
#define V4L2_REQUEST_MAX_PROFILES 11
|
||||
#define V4L2_REQUEST_MAX_PROFILES 13
|
||||
#define V4L2_REQUEST_MAX_ENTRYPOINTS 5
|
||||
#define V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES 10
|
||||
#define V4L2_REQUEST_MAX_IMAGE_FORMATS 10
|
||||
@@ -133,6 +133,17 @@ struct request_data {
|
||||
unsigned int fmt_buffers_count;
|
||||
unsigned int fmt_sizes[VIDEO_MAX_PLANES];
|
||||
unsigned int fmt_bytesperlines[VIDEO_MAX_PLANES];
|
||||
|
||||
/*
|
||||
* iter39: active session is decoding a 10-bit profile (Hi10P / Main10).
|
||||
* Set in RequestCreateContext from config->profile. Drives:
|
||||
* - CAPTURE pix_fmt selection (NV15 instead of NV12)
|
||||
* - image.c DeriveImage / QueryImageFormats fourcc reporting (P010
|
||||
* instead of NV12)
|
||||
* - copy_surface_to_image NV15→P010 unpack branch
|
||||
* Reset to false at DestroyContext.
|
||||
*/
|
||||
bool is_10bit;
|
||||
};
|
||||
|
||||
VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context);
|
||||
|
||||
+11
-2
@@ -182,7 +182,9 @@ VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format,
|
||||
* surface_bind_format_uniform_fields(); the per-slot
|
||||
* destination_* fields fill at BeginPicture via surface_bind_slot.
|
||||
*/
|
||||
if (format != VA_RT_FORMAT_YUV420)
|
||||
/* iter39: allow YUV420_10 for Hi10P / Main10 surface allocation. */
|
||||
if (format != VA_RT_FORMAT_YUV420 &&
|
||||
format != VA_RT_FORMAT_YUV420_10)
|
||||
return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
|
||||
|
||||
for (i = 0; i < surfaces_count; i++) {
|
||||
@@ -706,7 +708,14 @@ VAStatus RequestExportSurfaceHandle(VADriverContextP context,
|
||||
|
||||
planes_count = surface_object->destination_planes_count;
|
||||
|
||||
surface_descriptor->fourcc = VA_FOURCC_NV12;
|
||||
/* iter39: 10-bit session exports a DRM_FORMAT_NV15 buffer; advertise
|
||||
* the matching fourcc so a PRIME consumer aware of NV15 (panfrost-
|
||||
* Mesa et al.) can import correctly. PRIME consumers that only know
|
||||
* NV12 / P010 should use the COPY (vaGetImage) path which unpacks
|
||||
* NV15→P010 in image.c::copy_surface_to_image. */
|
||||
surface_descriptor->fourcc = driver_data->is_10bit
|
||||
? VA_FOURCC('N', 'V', '1', '5')
|
||||
: VA_FOURCC_NV12;
|
||||
surface_descriptor->width = surface_object->width;
|
||||
surface_descriptor->height = surface_object->height;
|
||||
surface_descriptor->num_objects = export_fds_count;
|
||||
|
||||
+10
@@ -45,6 +45,16 @@ static struct video_format formats[] = {
|
||||
.planes_count = 2,
|
||||
.bpp = 16,
|
||||
},
|
||||
{
|
||||
.description = "NV15 YUV (10-bit, rkvdec)",
|
||||
.v4l2_format = V4L2_PIX_FMT_NV15,
|
||||
.v4l2_buffers_count = 1,
|
||||
.v4l2_mplane = true,
|
||||
.drm_format = DRM_FORMAT_NV15,
|
||||
.drm_modifier = DRM_FORMAT_MOD_NONE,
|
||||
.planes_count = 2,
|
||||
.bpp = 24,
|
||||
},
|
||||
// Code to handle this DRM_FORMAT is __arm__ only
|
||||
#ifdef __arm__
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user