iter40: Pi 5 HEVC chapter — backend integration lands, bit-exact pending
Phase 6 implementation. Backend builds clean on higgs (Debian 13 trixie, aarch64), vainfo lists VAProfileHEVCMain via rpi-hevc-dec, multi-device probe finds /dev/video19 + /dev/media1, CreateContext + S_FMT + REQBUFS + STREAMON all succeed. Phase 7 partial: infrastructure works, 10 frames flow through the pipeline (correct byte counts produced — 13824000 for 1280x720 x 10 NV12 frames). But every DQBUF CAPTURE returns V4L2_BUF_FLAG_ERROR so output content is wrong (libva sha != kdirect sha). The decode itself is failing on the rpi-hevc-dec side despite all ctrl submissions returning success. Code changes: - request.h: video_fd_rpi_hevc_dec / media_fd_rpi_hevc_dec slots + has_hevc_ext_sps_rps_rpi_hevc_dec flag (mirrors iter38 + iter2 pair-of-flags pattern, naturally false on Pi). - request.c: known_decoder_drivers gains rpi-hevc-dec; primary-driver probe gets an else-if branch setting the new fds (Phase 5 F3); request_switch_device_for_profile prefers 'p' for HEVC when rpi-hevc-dec present. - context.c: per-fd want_pixfmt (NC12 on Pi), capture_pixelformat taken from video_format slot (not hardcoded NV12/NV15); synthetic-SPS pre-seed gated off for Pi (Phase 5 F6); destination_sizes uses nv12_col128_uv_plane_offset for NC12 SAND layout (Phase 5 F2); per-driver HEVC_START_CODE (NONE on Pi, ANNEX_B on RK); per-driver context_object->h264_start_code (skip prepend on Pi). - video.c: NV12_COL128 video_format entry (8-bit SAND, single buffer, 2 planes, NV12 drm_format with MOD_NONE so detile branch fires rather than tiled_to_planar). - nv12_col128.c/.h: detile primitive (Y + UV per-plane, kernel hevc_d_video.c bytesperline formula + ffmpeg/Kynesim per-pixel offset). UV plane offset = 128 * ALIGN(h, 8) — within-column (SAND interleaves Y+UV per column, NOT plane-concatenated; earlier wrong formula caught by Phase 7 SEGV). - image.c: #ifdef __arm__ extended to __arm__ || __aarch64__ (Phase 5 F1 — guard was killing detile path on all aarch64 hosts including fresnel iter39 NV15 path, masked because 10-bit never exercised); RequestCreateImage NC12 → NV12 stride override (linear width, not column-stride); copy_surface_to_image NC12 detile branch (gates on fourcc + v4l2_format). - nv15.h: fallback V4L2_PIX_FMT_NV15 define (Debian 13 headers omit it though they have NC12). - nv12_col128.h: fallback V4L2_PIX_FMT_NV12_COL128 + V4L2_PIX_FMT_NV12_10_COL128 (Arch / mainline pre-Pi headers). - tests/test_nv12_col128_detile.c: hand-crafted-bytes unit test; passes (8 cases: Y + UV for 4 widths incl. 1366 misaligned; UV-offset helper). - meson.build / nv12_col128 sources listed. Phase 7 status: not yet bit-exact. Remaining diagnosis: per-frame S_EXT_CTRLS payload diff vs kdirect (kdirect sends 4 ctrls SPS+PPS+decode_params+slice_array; ours sends 5 incl. scaling_matrix; field ordering differs). Likely the slice_array contents need per-driver handling for rpi-hevc-dec's expected layout. Beyond in-session reach. iter38 5/5 baseline on fresnel + ampere should be unaffected (new fd stays -1 on non-Pi hosts; all gates either short-circuit on fd-not-present or no-op). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+136
-28
@@ -42,6 +42,9 @@
|
||||
|
||||
#include <hevc-ctrls.h>
|
||||
|
||||
#include "nv15.h" /* iter40: fallback V4L2_PIX_FMT_NV15 define for Pi 5
|
||||
* Debian headers that ship NC12 but not NV15. */
|
||||
#include "nv12_col128.h" /* iter40: NC12 detile primitive + UV offset helper */
|
||||
#include "utils.h"
|
||||
#include "v4l2.h"
|
||||
|
||||
@@ -117,8 +120,19 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
{
|
||||
bool want_10bit = (config_object->profile == VAProfileH264High10 ||
|
||||
config_object->profile == VAProfileHEVCMain10);
|
||||
unsigned int want_pixfmt = want_10bit ? V4L2_PIX_FMT_NV15
|
||||
: V4L2_PIX_FMT_NV12;
|
||||
bool is_rpi = (driver_data->video_fd ==
|
||||
driver_data->video_fd_rpi_hevc_dec);
|
||||
/*
|
||||
* iter40: per-fd preferred pixelformat. rpi-hevc-dec exposes
|
||||
* NC12 (8-bit) / NC30 (10-bit), not NV12 / NV15.
|
||||
*/
|
||||
unsigned int want_pixfmt;
|
||||
if (is_rpi)
|
||||
want_pixfmt = want_10bit ? V4L2_PIX_FMT_NV12_10_COL128
|
||||
: V4L2_PIX_FMT_NV12_COL128;
|
||||
else
|
||||
want_pixfmt = want_10bit ? V4L2_PIX_FMT_NV15
|
||||
: V4L2_PIX_FMT_NV12;
|
||||
if (driver_data->video_format &&
|
||||
driver_data->video_format->v4l2_format != want_pixfmt &&
|
||||
driver_data->video_format->v4l2_format != V4L2_PIX_FMT_SUNXI_TILED_NV12)
|
||||
@@ -127,9 +141,24 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
if (!driver_data->video_format) {
|
||||
bool want_10bit = (config_object->profile == VAProfileH264High10 ||
|
||||
config_object->profile == VAProfileHEVCMain10);
|
||||
bool is_rpi = (driver_data->video_fd ==
|
||||
driver_data->video_fd_rpi_hevc_dec);
|
||||
video_format = NULL;
|
||||
|
||||
if (!want_10bit) {
|
||||
if (is_rpi) {
|
||||
/*
|
||||
* iter40: rpi-hevc-dec CAPTURE is NC12 (8-bit SAND
|
||||
* 128-pixel-wide column tile) or NC30 (10-bit variant).
|
||||
* Direct map; the kernel exposes BOTH formats in
|
||||
* VIDIOC_ENUM_FMT(CAPTURE_MPLANE) without a pre-SPS
|
||||
* step (verified Phase 0 strace), so find_format would
|
||||
* also succeed — skip it for symmetry with the NV15
|
||||
* iter39 branch below.
|
||||
*/
|
||||
video_format = video_format_find(
|
||||
want_10bit ? V4L2_PIX_FMT_NV12_10_COL128
|
||||
: V4L2_PIX_FMT_NV12_COL128);
|
||||
} else if (!want_10bit) {
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_CAPTURE,
|
||||
V4L2_PIX_FMT_SUNXI_TILED_NV12);
|
||||
@@ -212,12 +241,22 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
* CAPTURE (sanity read-back, matches what S_FMT committed).
|
||||
*/
|
||||
{
|
||||
/* iter39: NV15 for 10-bit profiles (rkvdec Hi10P/Main10),
|
||||
* NV12 otherwise. driver_data->is_10bit was set above from
|
||||
* the active profile. */
|
||||
unsigned int capture_pixelformat = driver_data->is_10bit
|
||||
? V4L2_PIX_FMT_NV15
|
||||
: V4L2_PIX_FMT_NV12;
|
||||
/*
|
||||
* iter40: take the CAPTURE pixelformat from the resolved
|
||||
* video_format slot — that's per-fd, per-bit-depth correct.
|
||||
* rkvdec 8-bit → NV12
|
||||
* rkvdec 10-bit → NV15
|
||||
* hantro 8-bit → NV12
|
||||
* rpi-hevc-dec → NC12 (V4L2_PIX_FMT_NV12_COL128)
|
||||
* Pre-iter40 this was hardcoded NV12/NV15 — the rpi-hevc-dec
|
||||
* fd would then have S_FMT(NV12) issued, and the kernel
|
||||
* "helpfully" substituted V4L2_PIX_FMT_NV12MT_COL128 (the
|
||||
* MULTI-PLANE-NON-CONTIGUOUS variant) instead of the
|
||||
* SINGLE-PLANE NC12 we wanted, breaking cap_pool QUERYBUF
|
||||
* downstream (Phase 7 iter40 first-run discovery).
|
||||
*/
|
||||
unsigned int capture_pixelformat =
|
||||
driver_data->video_format->v4l2_format;
|
||||
rc = v4l2_set_format(driver_data->video_fd, capture_type,
|
||||
capture_pixelformat, picture_width,
|
||||
picture_height);
|
||||
@@ -274,7 +313,22 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
* the device-init DECODE_MODE + START_CODE block below ALSO uses
|
||||
* void-cast best-effort, so this is consistent with prior pattern.
|
||||
*/
|
||||
{
|
||||
/*
|
||||
* iter40 (Phase 5 review F6): the synthetic-SPS pre-seed is an
|
||||
* rkvdec-specific quirk fix (the -EBUSY-on-CAPTURE-busy bug in
|
||||
* rkvdec_s_ctrl). rpi-hevc-dec does NOT need it and uses a
|
||||
* different submission ordering (Phase 0 strace: S_FMT_OUTPUT →
|
||||
* REQBUFS_OUTPUT → S_FMT_CAPTURE → CREATE_BUFS_CAPTURE → STREAMON,
|
||||
* with per-frame SPS via S_EXT_CTRLS class=0xf010000). Sending a
|
||||
* stale dummy SPS at context-init time would leave rpi-hevc-dec's
|
||||
* internal state on the dummy until the first real per-frame SPS
|
||||
* arrives — exact behavior unknown but a known divergence from
|
||||
* kdirect.
|
||||
*
|
||||
* Skip pre-seed when the active fd is rpi-hevc-dec. rkvdec /
|
||||
* hantro paths unchanged.
|
||||
*/
|
||||
if (driver_data->video_fd != driver_data->video_fd_rpi_hevc_dec) {
|
||||
/*
|
||||
* iter39: 10-bit profiles set bit_depth_luma_minus8 = 2 in
|
||||
* the synthetic SPS so rkvdec's get_image_fmt resolves to
|
||||
@@ -343,7 +397,7 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} /* iter40: end of pre-seed-skip-on-rpi-hevc-dec guard */
|
||||
|
||||
destination_planes_count = video_format->planes_count;
|
||||
|
||||
@@ -377,10 +431,39 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
* changed by BeginPicture's slot acquisition.
|
||||
*/
|
||||
if (video_format->v4l2_buffers_count == 1) {
|
||||
destination_sizes[0] = destination_bytesperlines[0] *
|
||||
format_height;
|
||||
for (j = 1; j < destination_planes_count; j++)
|
||||
destination_sizes[j] = destination_sizes[0] / 2;
|
||||
if (video_format->v4l2_format == V4L2_PIX_FMT_NV12_COL128) {
|
||||
/*
|
||||
* iter40: NC12 SAND layout: Y plane size is
|
||||
* NUM_COLUMNS * TILE_W * ALIGN(height, 8) (= linear
|
||||
* NV12 Y for column-aligned widths), UV plane is half.
|
||||
* The kernel-reported destination_bytesperlines[0] is
|
||||
* the COLUMN stride (ALIGN(height,8)*3/2), not the
|
||||
* linear Y stride — using it × format_height gives the
|
||||
* wrong intra-buffer UV offset (destination_offsets[1]
|
||||
* derives from destination_sizes[0] in
|
||||
* surface_fill_format_uniform).
|
||||
*
|
||||
* Use format_width/format_height (kernel-returned from
|
||||
* G_FMT) not picture_width/height (caller request),
|
||||
* because the kernel applies its own ALIGN rules; the
|
||||
* UV plane location is keyed off the kernel layout.
|
||||
*/
|
||||
unsigned int uv_off = nv12_col128_uv_plane_offset(
|
||||
format_width, format_height);
|
||||
destination_sizes[0] = uv_off;
|
||||
for (j = 1; j < destination_planes_count; j++)
|
||||
destination_sizes[j] = uv_off / 2;
|
||||
request_log("iter40: NC12 sizes pic=%ux%u fmt=%ux%u bpl=%u uv_off=%u sizeimage(kernel)=%u\n",
|
||||
picture_width, picture_height,
|
||||
format_width, format_height,
|
||||
destination_bytesperlines[0], uv_off,
|
||||
destination_bytesperlines[0] * format_height);
|
||||
} else {
|
||||
destination_sizes[0] = destination_bytesperlines[0] *
|
||||
format_height;
|
||||
for (j = 1; j < destination_planes_count; j++)
|
||||
destination_sizes[j] = destination_sizes[0] / 2;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -514,6 +597,18 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
* + ANNEX_B (only supported menu values per Phase 0 v4l2_inventory).
|
||||
*/
|
||||
{
|
||||
/*
|
||||
* iter40: per-driver HEVC start_code menu value. rkvdec /
|
||||
* hantro path uses ANNEX_B + start-code-prepended payload.
|
||||
* rpi-hevc-dec uses NONE — confirmed empirically Phase 7
|
||||
* (any other mode → V4L2_BUF_FLAG_ERROR on every CAPTURE
|
||||
* DQBUF, all-zero output). kdirect's strace also shows
|
||||
* start_code=0 on rpi-hevc-dec. Both are accepted by the
|
||||
* driver's QUERY_EXT_CTRL menu (min=0 max=1), but only NONE
|
||||
* actually drives correct decode on the Pi.
|
||||
*/
|
||||
bool is_rpi = (driver_data->video_fd ==
|
||||
driver_data->video_fd_rpi_hevc_dec);
|
||||
struct v4l2_ext_control hevc_dev_ctrls[2] = {
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_HEVC_DECODE_MODE,
|
||||
@@ -521,7 +616,9 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
},
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_HEVC_START_CODE,
|
||||
.value = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
|
||||
.value = is_rpi
|
||||
? 0 /* V4L2_STATELESS_HEVC_START_CODE_NONE */
|
||||
: V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
|
||||
},
|
||||
};
|
||||
(void)v4l2_set_controls(driver_data->video_fd, -1,
|
||||
@@ -554,18 +651,29 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
* commit will replace this hardcoded assignment with a runtime
|
||||
* read of the kernel's accepted START_CODE value.
|
||||
*/
|
||||
switch (config_object->profile) {
|
||||
case VAProfileH264Main:
|
||||
case VAProfileH264High:
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileHEVCMain:
|
||||
context_object->h264_start_code = true;
|
||||
break;
|
||||
default:
|
||||
context_object->h264_start_code = false;
|
||||
break;
|
||||
{
|
||||
bool is_rpi = (driver_data->video_fd ==
|
||||
driver_data->video_fd_rpi_hevc_dec);
|
||||
switch (config_object->profile) {
|
||||
case VAProfileH264Main:
|
||||
case VAProfileH264High:
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
context_object->h264_start_code = true;
|
||||
break;
|
||||
case VAProfileHEVCMain:
|
||||
/* iter40: rpi-hevc-dec rejects start-code-prepended
|
||||
* payload (DQBUF error flag on every CAPTURE buffer).
|
||||
* Gate to match the per-driver START_CODE menu value
|
||||
* set above: NONE on rpi → no prepend; ANNEX_B on
|
||||
* rkvdec → prepend. */
|
||||
context_object->h264_start_code = !is_rpi;
|
||||
break;
|
||||
default:
|
||||
context_object->h264_start_code = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rc = v4l2_set_stream(driver_data->video_fd, output_type, true);
|
||||
|
||||
Reference in New Issue
Block a user