libva-v4l2-request-fourier/src/h264.c

/*
 * Copyright (C) 2007 Intel Corporation
 * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
 * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
 * Copyright (C) 2018 Bootlin
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include <assert.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>

#include <sys/ioctl.h>
#include <sys/mman.h>

#include <linux/videodev2.h>

#include "request.h"
#include "utils.h"
#include "surface.h"
#include "v4l2.h"

enum h264_slice_type {
	H264_SLICE_P    = 0,
	H264_SLICE_B    = 1,
};

static bool is_picture_null(VAPictureH264 *pic)
{
	return pic->picture_id == VA_INVALID_SURFACE;
}

static struct h264_dpb_entry *
dpb_find_invalid_entry(struct object_context *context)
{
	unsigned int i;

	for (i = 0; i < H264_DPB_SIZE; i++) {
		struct h264_dpb_entry *entry = &context->dpb.entries[i];

		if (!entry->valid && !entry->reserved)
			return entry;
	}

	return NULL;
}

static struct h264_dpb_entry *
dpb_find_oldest_unused_entry(struct object_context *context)
{
	unsigned int min_age = UINT_MAX;
	unsigned int i;
	struct h264_dpb_entry *match = NULL;

	for (i = 0; i < H264_DPB_SIZE; i++) {
		struct h264_dpb_entry *entry = &context->dpb.entries[i];

		if (!entry->used && (entry->age < min_age)) {
			min_age = entry->age;
			match = entry;
		}
	}

	return match;
}

static struct h264_dpb_entry *dpb_find_entry(struct object_context *context)
{
	struct h264_dpb_entry *entry;

	entry = dpb_find_invalid_entry(context);
	if (!entry)
		entry = dpb_find_oldest_unused_entry(context);

	return entry;
}

static struct h264_dpb_entry *dpb_lookup(struct object_context *context,
					 VAPictureH264 *pic, unsigned int *idx,
					 unsigned char *fields)
{
	unsigned int i;

	for (i = 0; i < H264_DPB_SIZE; i++) {
		struct h264_dpb_entry *entry = &context->dpb.entries[i];

		if (!entry->valid)
			continue;

		if (entry->pic.picture_id == pic->picture_id) {
			if (idx)
				*idx = i;

			if (fields) {
				//if (entry->pic.TopFieldOrderCnt < entry->pic.BottomFieldOrderCnt) {
				//	*fields = V4L2_H264_TOP_FIELD_REF;
				//} else if (entry->pic.TopFieldOrderCnt > entry->pic.BottomFieldOrderCnt) {
				//	*fields = V4L2_H264_BOTTOM_FIELD_REF;
				//} else {
					*fields = V4L2_H264_FRAME_REF;
				//}
			}

			return entry;
		}
	}

	return NULL;
}

static void dpb_clear_entry(struct h264_dpb_entry *entry, bool reserved)
{
	memset(entry, 0, sizeof(*entry));

	if (reserved)
		entry->reserved = true;
}

static void dpb_insert(struct object_context *context, VAPictureH264 *pic,
		       struct h264_dpb_entry *entry)
{
	if (is_picture_null(pic))
		return;

	if (dpb_lookup(context, pic, NULL, NULL))
		return;

	if (!entry)
		entry = dpb_find_entry(context);

	memcpy(&entry->pic, pic, sizeof(entry->pic));
	entry->age = context->dpb.age;
	entry->valid = true;
	entry->reserved = false;

	if (!(pic->flags & VA_PICTURE_H264_INVALID))
		entry->used = true;
}

static void dpb_update(struct object_context *context,
		       VAPictureParameterBufferH264 *parameters)
{
	unsigned int i;

	context->dpb.age++;

	for (i = 0; i < H264_DPB_SIZE; i++) {
		struct h264_dpb_entry *entry = &context->dpb.entries[i];

		entry->used = false;
	}

	for (i = 0; i < parameters->num_ref_frames; i++) {
		VAPictureH264 *pic = &parameters->ReferenceFrames[i];
		struct h264_dpb_entry *entry;

		if (is_picture_null(pic))
			continue;

		entry = dpb_lookup(context, pic, NULL, NULL);
		if (entry) {
			entry->age = context->dpb.age;
			entry->used = true;
		} else {
			dpb_insert(context, pic, NULL);
		}
	}
}

/*
 * Strip ffmpeg-vaapi's POC sentinel.
 *
 * ffmpeg's H264POCContext initialises prev_poc_msb to (1 << 16) =
 * 0x10000 in libavcodec/h264dec.c (lines 301 and 444 of v8.0). After
 * an IDR the idr() helper resets prev_poc_msb to that same sentinel.
 * ff_h264_init_poc (libavcodec/h264_parse.c lines 296-305) then
 * computes pc->poc_msb as prev_poc_msb when the slice header's
 * poc_lsb hasn't wrapped — which is the typical case for normal
 * content. The sentinel leaks into field_poc[] and from there into
 * VAPictureH264.TopFieldOrderCnt / BottomFieldOrderCnt at
 * libavcodec/vaapi_h264.c::fill_vaapi_pic.
 *
 * Working VAAPI backends (intel-iHD, i965 verified empirically on
 * meitner 2026-05-02) tolerate the high word — they either mask it
 * or treat POCs as relative comparisons. V4L2 stateless H.264
 * driver-side consumers (hantro_h264.c::prepare_table feeds the
 * value direct to tbl->poc[]) need the spec value, so we strip the
 * sentinel here at the libva-v4l2-request boundary.
 *
 * Detection by bit-16-set rather than blind subtraction so that a
 * future ffmpeg version that fixes the sentinel leak degrades
 * gracefully. POC values for non-degenerate H.264 content rarely
 * exceed 16 bits; bit 16 set is a strong signal of the sentinel.
 *
 * Empty DPB slots (VA_PICTURE_H264_INVALID) carry POC=0 by
 * libavcodec/vaapi_h264.c::init_vaapi_pic and need no fix-up.
 */
static inline int32_t h264_strip_ffmpeg_poc_sentinel(int32_t poc, uint32_t flags)
{
	if (flags & VA_PICTURE_H264_INVALID)
		return 0;
	if (poc & (1 << 16))
		return poc - (1 << 16);
	return poc;
}

static void h264_fill_dpb(struct request_data *data,
			  struct object_context *context,
			  VAPictureParameterBufferH264 *VAPicture,
			  struct v4l2_ctrl_h264_decode_params *decode)
{
	const int max_frame_num =
		1 << (VAPicture->seq_fields.bits.log2_max_frame_num_minus4 + 4);
	const int cur_frame_num = (int)VAPicture->frame_num;
	int i;

	for (i = 0; i < H264_DPB_SIZE; i++) {
		struct v4l2_h264_dpb_entry *dpb = &decode->dpb[i];
		struct h264_dpb_entry *entry = &context->dpb.entries[i];
		struct object_surface *surface =
			SURFACE(data, entry->pic.picture_id);
		uint64_t timestamp;

		if (!entry->valid)
			continue;

		if (surface) {
			timestamp = v4l2_timeval_to_ns(&surface->timestamp);
			dpb->reference_ts = timestamp;
		}

		dpb->frame_num = entry->pic.frame_idx;

		/*
		 * Per ext-ctrls-codec-stateless.rst, dpb[].pic_num must
		 * equal the H.264 spec's PicNum (8-28) for short-term refs
		 * or LongTermPicNum (8-29) for long-term refs.
		 *
		 * For frames (not field-coded), PicNum = FrameNumWrap.
		 * FrameNumWrap = (frame_num > cur_frame_num)
		 *                ? frame_num - max_frame_num
		 *                : frame_num
		 * (per spec section 8.2.4.1, frame_num wraparound).
		 *
		 * VAAPI convention (libavcodec/vaapi_h264.c::fill_vaapi_pic
		 * line 64): VAPictureH264.frame_idx holds long_term_frame_idx
		 * for long-term refs and frame_num for short-term refs. So
		 * for long-term entries we copy frame_idx straight through
		 * as LongTermPicNum.
		 *
		 * fourier's previous code set pic_num to picture_id (the
		 * VAAPI surface id) which is unrelated to H.264 PicNum;
		 * mediatek's vdec_h264_req_common.c::dst_entry->pic_num is
		 * one consumer that fails on that. Hantro doesn't read
		 * pic_num at all (uses reference_ts for ref resolution),
		 * which is why fourier's wrong value never surfaced on
		 * RK3568.
		 */
		if (entry->pic.flags & VA_PICTURE_H264_LONG_TERM_REFERENCE) {
			dpb->pic_num = entry->pic.frame_idx;
		} else {
			int frame_num = (int)entry->pic.frame_idx;
			dpb->pic_num = (frame_num > cur_frame_num)
				? frame_num - max_frame_num
				: frame_num;
		}

		dpb->top_field_order_cnt =
			h264_strip_ffmpeg_poc_sentinel(entry->pic.TopFieldOrderCnt,
						       entry->pic.flags);
		dpb->bottom_field_order_cnt =
			h264_strip_ffmpeg_poc_sentinel(entry->pic.BottomFieldOrderCnt,
						       entry->pic.flags);

		dpb->flags = V4L2_H264_DPB_ENTRY_FLAG_VALID;

		if (entry->used)
			dpb->flags |= V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;

		if (entry->pic.flags & VA_PICTURE_H264_LONG_TERM_REFERENCE)
			dpb->flags |= V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM;
	}
}

static void h264_va_picture_to_v4l2(struct request_data *driver_data,
				    struct object_context *context,
				    struct object_surface *surface,
				    VAPictureParameterBufferH264 *VAPicture,
				    struct v4l2_ctrl_h264_decode_params *decode,
				    struct v4l2_ctrl_h264_pps *pps,
				    struct v4l2_ctrl_h264_sps *sps)
{
	unsigned char *b;
	unsigned char nal_ref_idc;
	unsigned char nal_unit_type;

	/* Extract missing nal_ref_idc and nal_unit_type */
	b = surface->source_data;
	if (context->h264_start_code)
		b += 3;
	nal_ref_idc = (b[0] >> 5) & 0x3;
	nal_unit_type = b[0] & 0x1f;

	h264_fill_dpb(driver_data, context, VAPicture, decode);

	/*
	 * Populate every V4L2_CID_STATELESS_H264_DECODE_PARAMS field
	 * we can derive from VAAPI's pre-parsed VAPictureParameterBuffer
	 * + bitstream byte. Cross-reference: GStreamer
	 * gstv4l2codech264dec.c::gst_v4l2_codec_h264_dec_fill_decoder_params
	 * (lines 632-678).
	 *
	 * Fields not derivable from VAAPI (idr_pic_id, pic_order_cnt_lsb,
	 * delta_pic_order_cnt_*, dec_ref_pic_marking_bit_size,
	 * pic_order_cnt_bit_size, slice_group_change_cycle) require a
	 * full slice_header() bit-level parse, which libva-v4l2-request
	 * does not currently do. They are left at zero-init and the
	 * kernel-side hantro-vpu may compute them itself when scanning
	 * the OUTPUT bitstream — a hypothesis verified empirically by
	 * running this patch and inspecting the CAPTURE buffer.
	 */
	/*
	 * DEBUG INSTRUMENTATION (0014): dump the raw bytes of
	 * VAPicture->CurrPic plus sizeof(VAPictureH264) so we can
	 * tell whether the observed TopFieldOrderCnt=65536 anomaly is
	 * (a) at the documented byte-offset 12 (ffmpeg-side bug or
	 * intentional non-spec encoding) or
	 * (b) at a different offset (libva ABI / VA_PADDING_LOW
	 * mismatch between ffmpeg's writer and our reader).
	 *
	 * Documented VAPictureH264 layout (libva-2.x):
	 *   offset 0:  VASurfaceID picture_id  (uint32)
	 *   offset 4:  uint32 frame_idx
	 *   offset 8:  uint32 flags
	 *   offset 12: int32 TopFieldOrderCnt
	 *   offset 16: int32 BottomFieldOrderCnt
	 *   offset 20+: uint32 va_reserved[VA_PADDING_LOW]
	 */
	{
		const unsigned char *cp = (const unsigned char *)&VAPicture->CurrPic;
		char hex[32 * 3 + 1] = { 0 };
		unsigned int i;
		for (i = 0; i < 32; i++)
			snprintf(hex + i * 3, 4, " %02x", cp[i]);
		request_log("VAPictureH264 sizeof=%zu CurrPic[0..31]:%s\n",
			    sizeof(VAPictureH264), hex);
		request_log("VAPictureH264 CurrPic field reads: "
			    "picture_id=0x%08x frame_idx=%u flags=0x%x "
			    "TopFOC=%d BottomFOC=%d frame_num=%u\n",
			    (unsigned)VAPicture->CurrPic.picture_id,
			    (unsigned)VAPicture->CurrPic.frame_idx,
			    (unsigned)VAPicture->CurrPic.flags,
			    (int)VAPicture->CurrPic.TopFieldOrderCnt,
			    (int)VAPicture->CurrPic.BottomFieldOrderCnt,
			    (unsigned)VAPicture->frame_num);
	}

	decode->nal_ref_idc = nal_ref_idc;
	decode->frame_num = VAPicture->frame_num;
	decode->top_field_order_cnt =
		h264_strip_ffmpeg_poc_sentinel(VAPicture->CurrPic.TopFieldOrderCnt,
					       VAPicture->CurrPic.flags);
	decode->bottom_field_order_cnt =
		h264_strip_ffmpeg_poc_sentinel(VAPicture->CurrPic.BottomFieldOrderCnt,
					       VAPicture->CurrPic.flags);

	if (nal_unit_type == 5)
		decode->flags |= V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC;
	if (VAPicture->pic_fields.bits.field_pic_flag)
		decode->flags |= V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC;
	if (VAPicture->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
		decode->flags |= V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD;

	pps->weighted_bipred_idc =
		VAPicture->pic_fields.bits.weighted_bipred_idc;
	pps->pic_init_qs_minus26 = VAPicture->pic_init_qs_minus26;
	pps->pic_init_qp_minus26 = VAPicture->pic_init_qp_minus26;
	pps->chroma_qp_index_offset = VAPicture->chroma_qp_index_offset;
	pps->second_chroma_qp_index_offset =
		VAPicture->second_chroma_qp_index_offset;

	if (VAPicture->pic_fields.bits.entropy_coding_mode_flag)
		pps->flags |= V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE;

	if (VAPicture->pic_fields.bits.weighted_pred_flag)
		pps->flags |= V4L2_H264_PPS_FLAG_WEIGHTED_PRED;

	if (VAPicture->pic_fields.bits.transform_8x8_mode_flag)
		pps->flags |= V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE;

	if (VAPicture->pic_fields.bits.constrained_intra_pred_flag)
		pps->flags |= V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED;

	if (VAPicture->pic_fields.bits.pic_order_present_flag)
		pps->flags |=
			V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT;

	if (VAPicture->pic_fields.bits.deblocking_filter_control_present_flag)
		pps->flags |=
			V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT;

	if (VAPicture->pic_fields.bits.redundant_pic_cnt_present_flag)
		pps->flags |= V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT;

	sps->max_num_ref_frames = VAPicture->num_ref_frames;
	sps->chroma_format_idc = VAPicture->seq_fields.bits.chroma_format_idc;
	sps->bit_depth_luma_minus8 = VAPicture->bit_depth_luma_minus8;
	sps->bit_depth_chroma_minus8 = VAPicture->bit_depth_chroma_minus8;
	sps->log2_max_frame_num_minus4 =
		VAPicture->seq_fields.bits.log2_max_frame_num_minus4;
	sps->log2_max_pic_order_cnt_lsb_minus4 =
		VAPicture->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4;
	sps->pic_order_cnt_type = VAPicture->seq_fields.bits.pic_order_cnt_type;
	sps->pic_width_in_mbs_minus1 = VAPicture->picture_width_in_mbs_minus1;
	sps->pic_height_in_map_units_minus1 =
		VAPicture->picture_height_in_mbs_minus1;

	if (VAPicture->seq_fields.bits.residual_colour_transform_flag)
		sps->flags |= V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE;
	if (VAPicture->seq_fields.bits.gaps_in_frame_num_value_allowed_flag)
		sps->flags |=
			V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED;
	if (VAPicture->seq_fields.bits.frame_mbs_only_flag)
		sps->flags |= V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY;
	if (VAPicture->seq_fields.bits.mb_adaptive_frame_field_flag)
		sps->flags |= V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD;
	if (VAPicture->seq_fields.bits.direct_8x8_inference_flag)
		sps->flags |= V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE;
	if (VAPicture->seq_fields.bits.delta_pic_order_always_zero_flag)
		sps->flags |= V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO;
}

static void h264_va_matrix_to_v4l2(struct request_data *driver_data,
				   struct object_context *context,
				   VAIQMatrixBufferH264 *VAMatrix,
				   struct v4l2_ctrl_h264_scaling_matrix *v4l2_matrix)
{
	memcpy(v4l2_matrix->scaling_list_4x4, &VAMatrix->ScalingList4x4,
	       sizeof(VAMatrix->ScalingList4x4));

	/*
	 * In YUV422, there's only two matrices involved, while YUV444
	 * needs 6. However, in the former case, the two matrices
	 * should be placed at the 0 and 3 offsets.
	 */
	memcpy(v4l2_matrix->scaling_list_8x8[0], &VAMatrix->ScalingList8x8[0],
	       sizeof(v4l2_matrix->scaling_list_8x8[0]));
	memcpy(v4l2_matrix->scaling_list_8x8[3], &VAMatrix->ScalingList8x8[1],
	       sizeof(v4l2_matrix->scaling_list_8x8[3]));
}

static void h264_copy_pred_table(struct v4l2_h264_weight_factors *factors,
				 unsigned int num_refs,
				 int16_t luma_weight[32],
				 int16_t luma_offset[32],
				 int16_t chroma_weight[32][2],
				 int16_t chroma_offset[32][2])
{
	unsigned int i;

	for (i = 0; i < num_refs; i++) {
		unsigned int j;

		factors->luma_weight[i] = luma_weight[i];
		factors->luma_offset[i] = luma_offset[i];

		for (j = 0; j < 2; j++) {
			factors->chroma_weight[i][j] = chroma_weight[i][j];
			factors->chroma_offset[i][j] = chroma_offset[i][j];
		}
	}
}

static void h264_va_slice_to_v4l2(struct request_data *driver_data,
				  struct object_context *context,
				  VASliceParameterBufferH264 *VASlice,
				  VAPictureParameterBufferH264 *VAPicture,
				  struct v4l2_ctrl_h264_slice_params *slice,
				  struct v4l2_ctrl_h264_pred_weights *weights)
{
	slice->header_bit_size = VASlice->slice_data_bit_offset;
	//if (context->h264_start_code)
	//	slice->header_bit_size += 3 * 8;
	slice->first_mb_in_slice = VASlice->first_mb_in_slice;
	slice->slice_type = VASlice->slice_type;
	slice->cabac_init_idc = VASlice->cabac_init_idc;
	slice->slice_qp_delta = VASlice->slice_qp_delta;
	slice->disable_deblocking_filter_idc =
		VASlice->disable_deblocking_filter_idc;
	slice->slice_alpha_c0_offset_div2 = VASlice->slice_alpha_c0_offset_div2;
	slice->slice_beta_offset_div2 = VASlice->slice_beta_offset_div2;

	if (((VASlice->slice_type % 5) == H264_SLICE_P) ||
	    ((VASlice->slice_type % 5) == H264_SLICE_B)) {
		unsigned int i;

		slice->num_ref_idx_l0_active_minus1 =
			VASlice->num_ref_idx_l0_active_minus1;

		for (i = 0; i < VASlice->num_ref_idx_l0_active_minus1 + 1; i++) {
			VAPictureH264 *pic = &VASlice->RefPicList0[i];
			struct h264_dpb_entry *entry;
			unsigned int idx;
			unsigned char fields;

			entry = dpb_lookup(context, pic, &idx, &fields);
			if (!entry)
				continue;

			slice->ref_pic_list0[i].index = idx;
			slice->ref_pic_list0[i].fields = fields;
		}
	}

	if ((VASlice->slice_type % 5) == H264_SLICE_B) {
		unsigned int i;

		slice->num_ref_idx_l1_active_minus1 =
			VASlice->num_ref_idx_l1_active_minus1;

		for (i = 0; i < VASlice->num_ref_idx_l1_active_minus1 + 1; i++) {
			VAPictureH264 *pic = &VASlice->RefPicList1[i];
			struct h264_dpb_entry *entry;
			unsigned int idx;
			unsigned char fields;

			entry = dpb_lookup(context, pic, &idx, &fields);
			if (!entry)
				continue;

			slice->ref_pic_list1[i].index = idx;
			slice->ref_pic_list0[i].fields = fields;
		}
	}

	if (VASlice->direct_spatial_mv_pred_flag)
		slice->flags |= V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED;

	weights->chroma_log2_weight_denom =
		VASlice->chroma_log2_weight_denom;
	weights->luma_log2_weight_denom =
		VASlice->luma_log2_weight_denom;

	if (((VASlice->slice_type % 5) == H264_SLICE_P) ||
	    ((VASlice->slice_type % 5) == H264_SLICE_B))
		h264_copy_pred_table(&weights->weight_factors[0],
				     slice->num_ref_idx_l0_active_minus1 + 1,
				     VASlice->luma_weight_l0,
				     VASlice->luma_offset_l0,
				     VASlice->chroma_weight_l0,
				     VASlice->chroma_offset_l0);

	if ((VASlice->slice_type % 5) == H264_SLICE_B)
		h264_copy_pred_table(&weights->weight_factors[1],
				     slice->num_ref_idx_l1_active_minus1 + 1,
				     VASlice->luma_weight_l1,
				     VASlice->luma_offset_l1,
				     VASlice->chroma_weight_l1,
				     VASlice->chroma_offset_l1);
}

int h264_get_controls(struct request_data *driver_data,
		      struct object_context *context)
{
	struct v4l2_ext_control controls[2] = {
		{
			.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
		}, {
			.id = V4L2_CID_STATELESS_H264_START_CODE,
		}
	};
	int rc;

	rc = v4l2_get_controls(driver_data->video_fd, -1, controls, 2);
	if (rc < 0)
		return VA_STATUS_ERROR_OPERATION_FAILED;

	switch (controls[0].value) {
	case V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED:
		break;
	case V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED:
		break;
	default:
		request_log("Unsupported decode mode\n");
		return VA_STATUS_ERROR_OPERATION_FAILED;
	}

	switch (controls[1].value) {
	case V4L2_STATELESS_H264_START_CODE_NONE:
		context->h264_start_code = false;
		break;
	case V4L2_STATELESS_H264_START_CODE_ANNEX_B:
		context->h264_start_code = true;
		break;
	default:
		request_log("Unsupported start code\n");
		return VA_STATUS_ERROR_OPERATION_FAILED;
	}

	return VA_STATUS_SUCCESS;
}

static inline __u8 h264_profile_to_idc(VAProfile profile)
{
	switch (profile) {
	case VAProfileH264Main:
		return 77;
	case VAProfileH264High:
		return 100;
	case VAProfileH264ConstrainedBaseline:
		return 66;
	case VAProfileH264MultiviewHigh:
		return 118;
	case VAProfileH264StereoHigh:
		return 128;
	default:
		return 0;
	}
}

int h264_set_controls(struct request_data *driver_data,
		      struct object_context *context,
		      VAProfile profile,
		      struct object_surface *surface)
{
	struct v4l2_ctrl_h264_scaling_matrix matrix = { 0 };
	struct v4l2_ctrl_h264_decode_params decode = { 0 };
	struct v4l2_ctrl_h264_slice_params slice = { 0 };
	struct v4l2_ctrl_h264_pred_weights weights = { 0 };
	struct v4l2_ctrl_h264_pps pps = { 0 };
	struct v4l2_ctrl_h264_sps sps = { 0 };
	struct h264_dpb_entry *output;
	int rc;

	output = dpb_lookup(context, &surface->params.h264.picture.CurrPic,
			    NULL, NULL);
	if (!output)
		output = dpb_find_entry(context);

	dpb_clear_entry(output, true);

	dpb_update(context, &surface->params.h264.picture);

	h264_va_picture_to_v4l2(driver_data, context, surface,
				&surface->params.h264.picture,
				&decode, &pps, &sps);
	h264_va_matrix_to_v4l2(driver_data, context,
			       &surface->params.h264.matrix, &matrix);
	h264_va_slice_to_v4l2(driver_data, context,
			      &surface->params.h264.slice,
			      &surface->params.h264.picture, &slice, &weights);

	/*
	 * Derive PFRAME / BFRAME flags in v4l2_ctrl_h264_decode_params.flags
	 * from VASliceParameterBufferH264.slice_type. VAAPI's slice_type
	 * matches the H.264 spec slice_type semantic: 0=P, 1=B, 2=I, 3=SP,
	 * 4=SI; values 5..9 mean "all slices in the picture have this
	 * slice_type" (mod 5 yields the underlying type). VAAPI consumers
	 * (ffmpeg, mpv) populate this for every slice; in FRAME_BASED mode
	 * we only see the most-recent slice's params, but slice_type is
	 * uniform across a single coded picture for our purposes.
	 *
	 * Kernel consumers that read these flags: tegra-vde
	 * (drivers/media/platform/nvidia/tegra-vde/h264.c lines 783-799 of
	 * 6.19.x) selects the inter-frame decode kernel. Hantro / rkvdec /
	 * cedrus / mediatek / qcom-iris-stateless do not consume them.
	 * Setting them keeps the libva-v4l2-request fork upstreamable
	 * across drivers without affecting hantro behaviour.
	 *
	 * Cross-reference: ext-ctrls-codec-stateless.rst Decode Parameters
	 * Flags — V4L2_H264_DECODE_PARAM_FLAG_PFRAME / _BFRAME.
	 */
	switch (surface->params.h264.slice.slice_type % 5) {
	case H264_SLICE_P:
		decode.flags |= V4L2_H264_DECODE_PARAM_FLAG_PFRAME;
		break;
	case H264_SLICE_B:
		decode.flags |= V4L2_H264_DECODE_PARAM_FLAG_BFRAME;
		break;
	default:
		/* I / SP / SI: no extra flag. */
		break;
	}

	sps.profile_idc = h264_profile_to_idc(profile);

	/*
	 * VAAPI's decode-side VAPictureParameterBufferH264 does not carry
	 * level_idc — see va.h, the field exists only in
	 * VAEncSequenceParameterBufferH264 on the encode path. The H.264
	 * SPS NAL is also not included in VASliceDataBuffer (ffmpeg-vaapi
	 * parses it client-side and forwards only slice data), so a
	 * SPS-NAL byte extractor is not viable from the bitstream we
	 * receive.
	 *
	 * Hantro and other stateless H.264 decoders use level_idc to
	 * pre-allocate decoder resources (DPB, motion-vector buffers); a
	 * zero-init level_idc=0 is invalid (lowest legal is 10 = Level
	 * 1.0) and causes hantro to silently skip the decode hardware
	 * dispatch.
	 *
	 * Hardcode level_idc = 51 (Level 5.1, max for 1080p/4K@30) as a
	 * known-incomplete intermediate. This INTENTIONALLY OVER-ALLOCATES
	 * decoder resources and is sufficient for any stream up to 4K@30.
	 * It is corpus-correct, not contract-correct.
	 *
	 * TODO: derive level_idc from (VAProfile, picture_width_in_mbs,
	 * picture_height_in_mbs) per H.264 Annex A.3 max-MB-per-second
	 * thresholds. That is a small lookup table but requires also
	 * mapping the consumer's framerate, which VAAPI doesn't provide
	 * directly. For now the over-allocation is the upstreamable
	 * compromise.
	 */
	sps.level_idc = 51;

	/*
	 * Build the per-request control list incrementally:
	 *   - SPS, PPS, DECODE_PARAMS: always required (in either decode
	 *     mode).
	 *   - SCALING_MATRIX: gated on surface->params.h264.matrix_set,
	 *     i.e. the consumer sent a VAIQMatrixBufferH264 this frame.
	 *     This matches the H.264 spec: explicit scaling lists are
	 *     present iff sps_scaling_matrix_present_flag ||
	 *     pps_scaling_matrix_present_flag, in which case VAAPI
	 *     consumers send the matrix; otherwise the kernel uses
	 *     spec-defined defaults. Independent of FRAME_BASED /
	 *     SLICE_BASED.
	 *   - SLICE_PARAMS: SLICE_BASED only. Kernel doc
	 *     ext-ctrls-codec-stateless.rst (FRAME_BASED entry):
	 *     "When this mode is selected, the
	 *     V4L2_CID_STATELESS_H264_SLICE_PARAMS control shall not be
	 *     set." Submitting it under FRAME_BASED triggers cluster-
	 *     validation EINVAL at error_idx=count.
	 *   - PRED_WEIGHTS: SLICE_BASED + V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED.
	 *
	 * Patch 0002 unconditionally sets the device to FRAME_BASED,
	 * so slice_based is hardcoded false here. When the planned
	 * probe-then-set commit lands, this becomes
	 *     context->decode_mode == V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED.
	 */
	struct v4l2_ext_control controls[6] = { 0 };
	unsigned int num_controls = 0;
	const bool slice_based = false; /* TODO: probe via context->decode_mode */

	controls[num_controls].id = V4L2_CID_STATELESS_H264_SPS;
	controls[num_controls].p_h264_sps = &sps;
	controls[num_controls].size = sizeof(sps);
	num_controls++;

	controls[num_controls].id = V4L2_CID_STATELESS_H264_PPS;
	controls[num_controls].p_h264_pps = &pps;
	controls[num_controls].size = sizeof(pps);
	num_controls++;

	controls[num_controls].id = V4L2_CID_STATELESS_H264_DECODE_PARAMS;
	controls[num_controls].p_h264_decode_params = &decode;
	controls[num_controls].size = sizeof(decode);
	num_controls++;

	if (surface->params.h264.matrix_set) {
		controls[num_controls].id = V4L2_CID_STATELESS_H264_SCALING_MATRIX;
		controls[num_controls].p_h264_scaling_matrix = &matrix;
		controls[num_controls].size = sizeof(matrix);
		num_controls++;
	}

	if (slice_based) {
		controls[num_controls].id = V4L2_CID_STATELESS_H264_SLICE_PARAMS;
		controls[num_controls].p_h264_slice_params = &slice;
		controls[num_controls].size = sizeof(slice);
		num_controls++;

		if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(&pps, &slice)) {
			controls[num_controls].id = V4L2_CID_STATELESS_H264_PRED_WEIGHTS;
			controls[num_controls].ptr = &weights;
			controls[num_controls].size = sizeof(weights);
			num_controls++;
		}
	}

	rc = v4l2_set_controls(driver_data->video_fd, surface->request_fd,
			       controls, num_controls);
	if (rc < 0)
		return VA_STATUS_ERROR_OPERATION_FAILED;

	dpb_insert(context, &surface->params.h264.picture.CurrPic, output);

	return VA_STATUS_SUCCESS;
}