From fdb0b728d723fe5904bd98e9e87a9ad0ee48f5ab Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Sat, 2 May 2026 12:00:00 +0000 Subject: [PATCH] h264: strip ffmpeg-vaapi POC sentinel before passing to V4L2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ROOT CAUSE for "kernel decodes successfully but produces zeroed CAPTURE buffers despite no V4L2_BUF_FLAG_ERROR": ffmpeg's H264POCContext initialises prev_poc_msb to (1 << 16) = 0x10000 as a sentinel for "uninitialised": libavcodec/h264dec.c:301 — global init in ff_h264_decode_init libavcodec/h264dec.c:444 — IDR reset in idr() helper ff_h264_init_poc (libavcodec/h264_parse.c:296-305) then computes pc->poc_msb = pc->prev_poc_msb whenever the slice header's pic_order_cnt_lsb hasn't wrapped relative to prev_poc_lsb (which is the typical case for any normal H.264 content with sane POC ordering). The sentinel leaks into field_poc[] (line 305) and from there into VAPictureH264.TopFieldOrderCnt / BottomFieldOrderCnt at libavcodec/vaapi_h264.c::fill_vaapi_pic (lines 73-78). Empirical confirmation via meitner 2026-05-02 ground-truth test: ran an LD_PRELOAD shim around vaCreateBuffer against an i965 VAAPI backend decoding a 60-frame H.264 Main clip. Every frame showed TopFieldOrderCnt = (POC | 0x10000): Frame 1 IDR: raw bytes "00 00 01 00" at offset 12 → TopFOC=65536 Frame 2: raw bytes "06 00 01 00" → TopFOC=65542 Frame 3: "02 00 01 00" → TopFOC=65538 i965 successfully decodes regardless. V4L2 stateless drivers (hantro_h264.c::prepare_table feeds the value direct to tbl->poc[i*2]/[32], the kernel reflist builder uses it directly for cur_pic_order_count comparison) cannot tolerate the high word — the kernel's resource sizing math sees POC=65536 for an IDR and breaks. This patch adds h264_strip_ffmpeg_poc_sentinel() as a small static inline in src/h264.c. It detects bit 16 set rather than blindly subtracting, so a future ffmpeg version that fixes the leak degrades gracefully. The helper is applied at all four POC sites: 1. h264_fill_dpb: dpb->top_field_order_cnt 2. h264_fill_dpb: dpb->bottom_field_order_cnt 3. h264_va_picture_to_v4l2: decode->top_field_order_cnt 4. h264_va_picture_to_v4l2: decode->bottom_field_order_cnt VA_PICTURE_H264_INVALID DPB slots are short-circuited to POC=0 because libavcodec/vaapi_h264.c::init_vaapi_pic (line 43) already sets POC=0 there; the sentinel never applies. Zeroing them explicitly removes a class of "stale POC value in invalidated slot" foot-guns. Non-trivial follow-ups identified during the meitner experiment that are NOT addressed by this patch: - PFRAME / BFRAME flags in v4l2_ctrl_h264_decode_params.flags are not yet derived from VASliceParameterBufferH264.slice_type. The bbb corpus is I-only at the start so this hasn't been a blocker, but a clip with B-frames will need the slice-type routing patch. - h264_fill_dpb's pic_num assignment (entry->pic.picture_id) is almost certainly wrong per the kernel doc — pic_num must equal the H.264 spec's PicNum / FrameNumWrap, not the VAAPI surface id. Out of scope here; will surface as a defect on streams that have multi-frame DPB lookups. Cross-references: audit_0008_decode_params_2026-05-01.md — kernel-side consumer audit confirming POC fields are userspace-required. api_contract_findings_2026-05-01.md — VAAPI doc gap on POC semantics; H.264 spec section 8.2.1 is the binding contract. meitner_2026-05-02_vaapi_idr_groundtruth/ — full empirical capture of the sentinel pattern across 60 frames. Signed-off-by: Markus Fritsche --- src/h264.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/src/h264.c b/src/h264.c index 34f5025..7d20a98 100644 --- a/src/h264.c +++ b/src/h264.c @@ -187,6 +187,43 @@ static void dpb_update(struct object_context *context, } } +/* + * Strip ffmpeg-vaapi's POC sentinel. + * + * ffmpeg's H264POCContext initialises prev_poc_msb to (1 << 16) = + * 0x10000 in libavcodec/h264dec.c (lines 301 and 444 of v8.0). After + * an IDR the idr() helper resets prev_poc_msb to that same sentinel. + * ff_h264_init_poc (libavcodec/h264_parse.c lines 296-305) then + * computes pc->poc_msb as prev_poc_msb when the slice header's + * poc_lsb hasn't wrapped — which is the typical case for normal + * content. The sentinel leaks into field_poc[] and from there into + * VAPictureH264.TopFieldOrderCnt / BottomFieldOrderCnt at + * libavcodec/vaapi_h264.c::fill_vaapi_pic. + * + * Working VAAPI backends (intel-iHD, i965 verified empirically on + * meitner 2026-05-02) tolerate the high word — they either mask it + * or treat POCs as relative comparisons. V4L2 stateless H.264 + * driver-side consumers (hantro_h264.c::prepare_table feeds the + * value direct to tbl->poc[]) need the spec value, so we strip the + * sentinel here at the libva-v4l2-request boundary. + * + * Detection by bit-16-set rather than blind subtraction so that a + * future ffmpeg version that fixes the sentinel leak degrades + * gracefully. POC values for non-degenerate H.264 content rarely + * exceed 16 bits; bit 16 set is a strong signal of the sentinel. + * + * Empty DPB slots (VA_PICTURE_H264_INVALID) carry POC=0 by + * libavcodec/vaapi_h264.c::init_vaapi_pic and need no fix-up. + */ +static inline int32_t h264_strip_ffmpeg_poc_sentinel(int32_t poc, uint32_t flags) +{ + if (flags & VA_PICTURE_H264_INVALID) + return 0; + if (poc & (1 << 16)) + return poc - (1 << 16); + return poc; +} + static void h264_fill_dpb(struct request_data *data, struct object_context *context, struct v4l2_ctrl_h264_decode_params *decode) @@ -210,8 +247,12 @@ static void h264_fill_dpb(struct request_data *data, dpb->frame_num = entry->pic.frame_idx; dpb->pic_num = entry->pic.picture_id; - dpb->top_field_order_cnt = entry->pic.TopFieldOrderCnt; - dpb->bottom_field_order_cnt = entry->pic.BottomFieldOrderCnt; + dpb->top_field_order_cnt = + h264_strip_ffmpeg_poc_sentinel(entry->pic.TopFieldOrderCnt, + entry->pic.flags); + dpb->bottom_field_order_cnt = + h264_strip_ffmpeg_poc_sentinel(entry->pic.BottomFieldOrderCnt, + entry->pic.flags); dpb->flags = V4L2_H264_DPB_ENTRY_FLAG_VALID; @@ -298,8 +339,12 @@ static void h264_va_picture_to_v4l2(struct request_data *driver_data, decode->nal_ref_idc = nal_ref_idc; decode->frame_num = VAPicture->frame_num; - decode->top_field_order_cnt = VAPicture->CurrPic.TopFieldOrderCnt; - decode->bottom_field_order_cnt = VAPicture->CurrPic.BottomFieldOrderCnt; + decode->top_field_order_cnt = + h264_strip_ffmpeg_poc_sentinel(VAPicture->CurrPic.TopFieldOrderCnt, + VAPicture->CurrPic.flags); + decode->bottom_field_order_cnt = + h264_strip_ffmpeg_poc_sentinel(VAPicture->CurrPic.BottomFieldOrderCnt, + VAPicture->CurrPic.flags); if (nal_unit_type == 5) decode->flags |= V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC;