Files
libva-v4l2-request-fourier/src/h265.c
T
claude-noether 719d813f4a iter27 α-27: populate slice_params.num_entry_point_offsets from VAAPI
BBB HEVC uses WPP (entropy_coding_sync_enabled_flag=1); slice header
contains entry_point_offset_minus1 syntax elements. libva was setting
num_entry_point_offsets=0 with the comment 'iter2 doesn't do tiles',
but WPP uses the same mechanism — rkvdec miscounted the slice header
skip distance and read slice data starting at wrong byte for P/B
frames → frame 2+ decoded with garbage reference data.

iter27 kernel printk diff:
  libva frame 2 sl[8..11]  = 00 00 00 00 (=0)
  kdirect frame 2 sl[8..11] = 16 00 00 00 (=22)

VAAPI exposes VASliceParameterBufferHEVC.num_entry_point_offsets.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 10:19:14 +00:00

662 lines
28 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* Copyright (C) 2007 Intel Corporation
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* fresnel-fourier iter2 Phase 6 commit B: rewrite h265.c against new
* V4L2_CID_STATELESS_HEVC_{SPS,PPS,SLICE_PARAMS,SCALING_MATRIX,
* DECODE_PARAMS,DECODE_MODE,START_CODE} stateless controls (mainline
* kernel <linux/v4l2-controls.h>:2090-2300).
*
* Replaces the staging-era V4L2_CID_MPEG_VIDEO_HEVC_{SPS,PPS,
* SLICE_PARAMS} CIDs that don't exist on modern kernels (verified via
* test-compile in iter2 Phase 2).
*
* Per-frame submission: one batched VIDIOC_S_EXT_CTRLS, count=5,
* ctrl_class=V4L2_CTRL_CLASS_CODEC_STATELESS:
* 0xa40a90 SPS (40 bytes)
* 0xa40a91 PPS (64 bytes)
* 0xa40a92 SLICE_PARAMS (variable; dynamic-array; one entry per slice)
* 0xa40a93 SCALING_MATRIX (1296 bytes; conditional on kernel availability)
* 0xa40a94 DECODE_PARAMS (328 bytes; per-frame DPB info)
*
* Plus device-wide menus set once at context init:
* 0xa40a95 DECODE_MODE (FRAME_BASED on rkvdec)
* 0xa40a96 START_CODE (ANNEX_B on rkvdec)
*
* Reference: FFmpeg libavcodec/v4l2_request_hevc.c:505-565
* (v4l2_request_hevc_queue_decode batched submission shape).
*
* Key Phase 5 review amendments incorporated:
* C1: data_byte_offset (NOT data_bit_offset); old bit-search dropped.
* C2: dpb_entry.flags only LONG_TERM_REFERENCE bit; pic_order_cnt_val
* (singular); poc_st_curr_*[] arrays are u8 DPB INDICES, not POC
* values (per FFmpeg get_ref_pic_index pattern).
* S1: PPS flags 19+20 (DEBLOCKING_FILTER_CONTROL_PRESENT, UNIFORM_SPACING)
* included.
* S2: PPS scalars pic_parameter_set_id, num_ref_idx_l0/l1_default_active_
* minus1 populated.
* Q2: slice_segment_addr populated from VAAPI slice->slice_segment_address.
* S3: SCALING_MATRIX content matches FFmpeg pattern — memset zero when
* iqmatrix_set==false (BBB has no scaling list in SPS flags).
*/
#include "h265.h"
#include "context.h"
#include "object_heap.h"
#include "request.h"
#include "surface.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/videodev2.h>
#include <linux/v4l2-controls.h>
#include "utils.h"
#include "v4l2.h"
/*
* NAL unit header bit positions per ISO/IEC 23008-2 / H.265 spec.
* Used for nal_unit_type + nuh_temporal_id_plus1 extraction from
* the slice bitstream's first 2 bytes (after any ANNEX_B start code).
*/
#define H265_NAL_UNIT_TYPE_SHIFT 1
#define H265_NAL_UNIT_TYPE_MASK ((1 << 6) - 1)
#define H265_NUH_TEMPORAL_ID_PLUS1_SHIFT 0
#define H265_NUH_TEMPORAL_ID_PLUS1_MASK ((1 << 3) - 1)
/* ===== Clause 2: SPS (40 bytes) ===== */
static void h265_fill_sps(VAPictureParameterBufferHEVC *picture,
struct v4l2_ctrl_hevc_sps *sps)
{
memset(sps, 0, sizeof(*sps));
sps->video_parameter_set_id = 0; /* not exposed by VAAPI */
sps->seq_parameter_set_id = 0; /* not exposed by VAAPI */
sps->pic_width_in_luma_samples = picture->pic_width_in_luma_samples;
sps->pic_height_in_luma_samples = picture->pic_height_in_luma_samples;
sps->bit_depth_luma_minus8 = picture->bit_depth_luma_minus8;
sps->bit_depth_chroma_minus8 = picture->bit_depth_chroma_minus8;
sps->log2_max_pic_order_cnt_lsb_minus4 =
picture->log2_max_pic_order_cnt_lsb_minus4;
sps->sps_max_dec_pic_buffering_minus1 =
picture->sps_max_dec_pic_buffering_minus1;
/*
* iter11 α-13: VAAPI doesn't forward sps_max_num_reorder_pics or
* sps_max_latency_increase_plus1. kdirect parses the SPS NAL and
* submits the bitstream's true values; libva used to hardcode 0
* (a structurally wrong "no reordering" hint, even though Phase 5b
* empirically confirmed rkvdec ignores both fields on RK3399, so
* this is wire-hygiene only — matches kdirect's payload more
* closely without behavior change). sps_max_dec_pic_buffering_minus1
* is a safe upper bound per H.265 §A.4.2 (sps_max_num_reorder_pics ≤
* sps_max_dec_pic_buffering_minus1 always holds). latency_increase_plus1
* stays at 0 = spec "unconstrained".
*/
sps->sps_max_num_reorder_pics = picture->sps_max_dec_pic_buffering_minus1;
sps->sps_max_latency_increase_plus1 = 0;
sps->log2_min_luma_coding_block_size_minus3 =
picture->log2_min_luma_coding_block_size_minus3;
sps->log2_diff_max_min_luma_coding_block_size =
picture->log2_diff_max_min_luma_coding_block_size;
sps->log2_min_luma_transform_block_size_minus2 =
picture->log2_min_transform_block_size_minus2;
sps->log2_diff_max_min_luma_transform_block_size =
picture->log2_diff_max_min_transform_block_size;
sps->max_transform_hierarchy_depth_inter =
picture->max_transform_hierarchy_depth_inter;
sps->max_transform_hierarchy_depth_intra =
picture->max_transform_hierarchy_depth_intra;
sps->pcm_sample_bit_depth_luma_minus1 =
picture->pcm_sample_bit_depth_luma_minus1;
sps->pcm_sample_bit_depth_chroma_minus1 =
picture->pcm_sample_bit_depth_chroma_minus1;
sps->log2_min_pcm_luma_coding_block_size_minus3 =
picture->log2_min_pcm_luma_coding_block_size_minus3;
sps->log2_diff_max_min_pcm_luma_coding_block_size =
picture->log2_diff_max_min_pcm_luma_coding_block_size;
sps->num_short_term_ref_pic_sets = picture->num_short_term_ref_pic_sets;
sps->num_long_term_ref_pics_sps = picture->num_long_term_ref_pic_sps;
sps->chroma_format_idc = picture->pic_fields.bits.chroma_format_idc;
sps->sps_max_sub_layers_minus1 = 0; /* not exposed */
/* reserved[6] zeroed by memset */
/* 9 boolean flags collapsed to u64 */
if (picture->pic_fields.bits.separate_colour_plane_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
if (picture->pic_fields.bits.scaling_list_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
if (picture->pic_fields.bits.amp_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
if (picture->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
if (picture->pic_fields.bits.pcm_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
if (picture->pic_fields.bits.pcm_loop_filter_disabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
if (picture->slice_parsing_fields.bits.long_term_ref_pics_present_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
if (picture->slice_parsing_fields.bits.sps_temporal_mvp_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
if (picture->pic_fields.bits.strong_intra_smoothing_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
}
/* ===== Clause 3: PPS (64 bytes; 21 flags + 3 newly-mapped scalars per S1+S2) ===== */
static void h265_fill_pps(VAPictureParameterBufferHEVC *picture,
VASliceParameterBufferHEVC *slice,
struct v4l2_ctrl_hevc_pps *pps)
{
memset(pps, 0, sizeof(*pps));
pps->pic_parameter_set_id = 0; /* S2: not exposed by VAAPI; default 0 */
pps->num_extra_slice_header_bits = picture->num_extra_slice_header_bits;
pps->num_ref_idx_l0_default_active_minus1 =
picture->num_ref_idx_l0_default_active_minus1; /* S2 */
pps->num_ref_idx_l1_default_active_minus1 =
picture->num_ref_idx_l1_default_active_minus1; /* S2 */
pps->init_qp_minus26 = picture->init_qp_minus26;
pps->diff_cu_qp_delta_depth = picture->diff_cu_qp_delta_depth;
pps->pps_cb_qp_offset = picture->pps_cb_qp_offset;
pps->pps_cr_qp_offset = picture->pps_cr_qp_offset;
pps->num_tile_columns_minus1 = picture->num_tile_columns_minus1;
pps->num_tile_rows_minus1 = picture->num_tile_rows_minus1;
/* column_width_minus1[20] + row_height_minus1[22] left zero — BBB single-tile */
pps->pps_beta_offset_div2 = picture->pps_beta_offset_div2;
pps->pps_tc_offset_div2 = picture->pps_tc_offset_div2;
pps->log2_parallel_merge_level_minus2 =
picture->log2_parallel_merge_level_minus2;
/* reserved zeroed by memset */
/* 21 boolean flags (bits 0-20) collapsed to u64 */
if (slice && slice->LongSliceFlags.fields.dependent_slice_segment_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
if (picture->slice_parsing_fields.bits.output_flag_present_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
if (picture->pic_fields.bits.sign_data_hiding_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
if (picture->slice_parsing_fields.bits.cabac_init_present_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
if (picture->pic_fields.bits.constrained_intra_pred_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
if (picture->pic_fields.bits.transform_skip_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
if (picture->pic_fields.bits.cu_qp_delta_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
if (picture->slice_parsing_fields.bits.pps_slice_chroma_qp_offsets_present_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
if (picture->pic_fields.bits.weighted_pred_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
if (picture->pic_fields.bits.weighted_bipred_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
if (picture->pic_fields.bits.transquant_bypass_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
if (picture->pic_fields.bits.tiles_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
if (picture->pic_fields.bits.entropy_coding_sync_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
if (picture->pic_fields.bits.loop_filter_across_tiles_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
if (picture->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
if (picture->slice_parsing_fields.bits.deblocking_filter_override_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
if (picture->slice_parsing_fields.bits.pps_disable_deblocking_filter_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
if (picture->slice_parsing_fields.bits.lists_modification_present_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
/* SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (bit 18) — not exposed; skip */
/* DEBLOCKING_FILTER_CONTROL_PRESENT (bit 19, S1) — not exposed by VAAPI */
/* DEBLOCKING_FILTER_CONTROL_PRESENT (bit 19) and UNIFORM_SPACING (bit 20):
* VAAPI does not expose either flag in VAPictureParameterBufferHEVC.
* BBB-720p10s_hevc.mp4 uses neither tiles nor explicit deblocking-
* control parameters; leaving these bits zero is correct for the
* iter2 binding-cell fixture. */
}
/* ===== Clause 6: DECODE_PARAMS (328 bytes) =====
*
* NEW in modern API. Houses DPB info that was inside slice_params in
* the staging-era. Per Phase 5 C2: dpb[].flags has only LONG_TERM_REFERENCE
* bit; dpb[].pic_order_cnt_val (singular); poc_st_curr_*[] arrays hold
* u8 DPB INDICES (not POC values).
*
* Pattern: classify each VAAPI ReferenceFrames[i] into ST_CURR_BEFORE /
* ST_CURR_AFTER / LT_CURR; populate dpb[] sequentially; record the DPB
* index in the matching classification array.
*/
static void h265_fill_decode_params(struct request_data *driver_data,
VAPictureParameterBufferHEVC *picture,
struct v4l2_ctrl_hevc_decode_params *decode_params)
{
struct object_surface *surface_object;
VAPictureHEVC *hevc_picture;
unsigned int i;
uint8_t n_active = 0;
uint8_t n_st_before = 0, n_st_after = 0, n_lt = 0;
memset(decode_params, 0, sizeof(*decode_params));
decode_params->pic_order_cnt_val = picture->CurrPic.pic_order_cnt;
for (i = 0; i < 15; i++) {
hevc_picture = &picture->ReferenceFrames[i];
if (hevc_picture->picture_id == VA_INVALID_SURFACE ||
(hevc_picture->flags & VA_PICTURE_HEVC_INVALID))
continue;
surface_object = (struct object_surface *)
object_heap_lookup(&driver_data->surface_heap,
hevc_picture->picture_id);
if (surface_object == NULL)
continue;
if (n_active >= V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
break;
decode_params->dpb[n_active].timestamp =
v4l2_timeval_to_ns(&surface_object->timestamp);
decode_params->dpb[n_active].pic_order_cnt_val =
hevc_picture->pic_order_cnt;
decode_params->dpb[n_active].field_pic =
!!(hevc_picture->flags & VA_PICTURE_HEVC_FIELD_PIC);
decode_params->dpb[n_active].flags =
(hevc_picture->flags & VA_PICTURE_HEVC_RPS_LT_CURR) ?
V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE : 0;
/* dpb[n_active].reserved zeroed by memset */
/* Classify into one of the three "current" lists.
* Each list holds the DPB INDEX (u8), not the POC value. */
if (hevc_picture->flags & VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE) {
if (n_st_before < V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
decode_params->poc_st_curr_before[n_st_before++] = n_active;
} else if (hevc_picture->flags & VA_PICTURE_HEVC_RPS_ST_CURR_AFTER) {
if (n_st_after < V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
decode_params->poc_st_curr_after[n_st_after++] = n_active;
} else if (hevc_picture->flags & VA_PICTURE_HEVC_RPS_LT_CURR) {
if (n_lt < V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
decode_params->poc_lt_curr[n_lt++] = n_active;
}
n_active++;
}
decode_params->num_active_dpb_entries = n_active;
decode_params->num_poc_st_curr_before = n_st_before;
decode_params->num_poc_st_curr_after = n_st_after;
decode_params->num_poc_lt_curr = n_lt;
/*
* iter26 α-26: VAAPI DOES expose short_term_ref_pic_set bit-count
* via picture->st_rps_bits. Without populating this, rkvdec's
* DPB reference resolution for P/B frames uses the wrong slice-
* header skip and reads the wrong reference; frame 1 (IDR) decodes
* correctly but frames 2+ diverge (iter25 evidence: cmp differs at
* byte 1382401 = frame 2 boundary, kdirect bytes 4-5 = 0x0a 0x00,
* libva = 0x00 0x00).
*
* long_term_ref_pic_set_size and num_delta_pocs_of_ref_rps_idx still
* left zero (VAAPI doesn't expose either).
*/
decode_params->short_term_ref_pic_set_size = picture->st_rps_bits;
/*
* iter11 α-14: IRAP/IDR/NO_OUTPUT_OF_PRIOR flags. VAAPI doesn't
* expose these in VAPictureParameterBufferHEVC. The iter2 binding
* cell hardcoded them to 0 with the comment "BBB B/P-frames don't
* need these set" — but IDR keyframes DO need IDR_PIC|IRAP_PIC.
* Without them rkvdec doesn't recognise the keyframe boundary,
* treats the IDR as inter without references, and produces all-zero
* CAPTURE output (Bug 5).
*
* The flags are derived at h265_set_controls level after slice_params
* have been parsed (slice_params[0].nal_unit_type carries the NAL
* type extracted from the bitstream). Initialise to 0 here; the caller
* patches the IRAP/IDR bits.
*/
decode_params->flags = 0;
}
/* ===== Clause 4: SLICE_PARAMS per slice =====
*
* Called per slice in a loop in h265_set_controls. Output is one entry
* in the dynamic-array of slice_params submitted to the kernel.
*
* source_offset is the byte offset within the surface_object->source_data
* buffer where this slice's bitstream begins (after any ANNEX_B start
* code prefix). data_byte_offset is the offset within the buffer to the
* first byte of slice header data.
*
* Per Phase 5 C1: data_byte_offset is a BYTE offset (not a bit offset).
* The old bit-search at h265.c:184-209 has been DROPPED.
*/
static void h265_fill_slice_params(VAPictureParameterBufferHEVC *picture,
VASliceParameterBufferHEVC *slice,
void *source_data,
unsigned int source_offset,
struct v4l2_ctrl_hevc_slice_params *slice_params)
{
uint8_t *b;
uint8_t nal_unit_type, nuh_temporal_id_plus1;
uint8_t pic_struct;
uint8_t slice_type;
unsigned int i, j;
memset(slice_params, 0, sizeof(*slice_params));
/* NAL header parse from slice bitstream (after ANNEX_B start code).
* source_offset points at the byte AFTER the start code (start code
* was prepended by codec_store_buffer:68-75 if context->h264_start_code
* is set). The first 2 bytes are the NAL unit header. */
b = (uint8_t *)source_data + source_offset;
nal_unit_type = (b[0] >> H265_NAL_UNIT_TYPE_SHIFT) & H265_NAL_UNIT_TYPE_MASK;
nuh_temporal_id_plus1 = (b[1] >> H265_NUH_TEMPORAL_ID_PLUS1_SHIFT) &
H265_NUH_TEMPORAL_ID_PLUS1_MASK;
slice_params->bit_size = slice->slice_data_size * 8;
/* C1: data_byte_offset, NOT data_bit_offset. Plain byte offset to
* the first byte of slice segment header data within the OUTPUT
* buffer. FFmpeg pattern at v4l2_request_hevc.c:190. */
slice_params->data_byte_offset = source_offset + slice->slice_data_byte_offset;
/*
* iter27 α-27: populate num_entry_point_offsets from VAAPI.
*
* BBB HEVC uses WPP (entropy_coding_sync_enabled_flag); each CTU row
* after the first creates an entry point. For 720p with 32-pixel
* CTUs that's 22 entry points per slice. Hardcoding 0 made rkvdec
* miscount the slice header skip distance → wrong slice data
* boundary → frame 2+ decoded with garbage reference data.
*
* Comment "iter2 doesn't do tiles" was inaccurate: WPP isn't tiles
* but uses the same entry_point_offsets mechanism.
*/
slice_params->num_entry_point_offsets = slice->num_entry_point_offsets;
request_log("iter27diag: slice %p num_entry_point_offsets=%u\n",
(void *)slice, (unsigned)slice->num_entry_point_offsets);
slice_params->nal_unit_type = nal_unit_type;
slice_params->nuh_temporal_id_plus1 = nuh_temporal_id_plus1;
slice_type = slice->LongSliceFlags.fields.slice_type;
slice_params->slice_type = slice_type;
slice_params->colour_plane_id = slice->LongSliceFlags.fields.color_plane_id;
slice_params->slice_pic_order_cnt = picture->CurrPic.pic_order_cnt;
slice_params->num_ref_idx_l0_active_minus1 = slice->num_ref_idx_l0_active_minus1;
slice_params->num_ref_idx_l1_active_minus1 = slice->num_ref_idx_l1_active_minus1;
slice_params->collocated_ref_idx = slice->collocated_ref_idx;
slice_params->five_minus_max_num_merge_cand = slice->five_minus_max_num_merge_cand;
slice_params->slice_qp_delta = slice->slice_qp_delta;
slice_params->slice_cb_qp_offset = slice->slice_cb_qp_offset;
slice_params->slice_cr_qp_offset = slice->slice_cr_qp_offset;
slice_params->slice_act_y_qp_offset = 0; /* VAAPI doesn't expose */
slice_params->slice_act_cb_qp_offset = 0;
slice_params->slice_act_cr_qp_offset = 0;
slice_params->slice_beta_offset_div2 = slice->slice_beta_offset_div2;
slice_params->slice_tc_offset_div2 = slice->slice_tc_offset_div2;
if (picture->CurrPic.flags & VA_PICTURE_HEVC_FIELD_PIC) {
if (picture->CurrPic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)
pic_struct = 2;
else
pic_struct = 1;
} else {
pic_struct = 0;
}
slice_params->pic_struct = pic_struct;
/* reserved0[3] zeroed by memset */
/* Q2: slice_segment_addr from VAAPI (was missing in old h265.c). */
slice_params->slice_segment_addr = slice->slice_segment_address;
/* Ref index arrays (DPB indices). For I-slices both are unused. */
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) {
if (i < (slice->num_ref_idx_l0_active_minus1 + 1U))
slice_params->ref_idx_l0[i] = slice->RefPicList[0][i];
}
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) {
if (i < (slice->num_ref_idx_l1_active_minus1 + 1U))
slice_params->ref_idx_l1[i] = slice->RefPicList[1][i];
}
slice_params->short_term_ref_pic_set_size = 0; /* VAAPI doesn't expose */
slice_params->long_term_ref_pic_set_size = 0;
/* Pred weight table */
slice_params->pred_weight_table.luma_log2_weight_denom =
slice->luma_log2_weight_denom;
slice_params->pred_weight_table.delta_chroma_log2_weight_denom =
slice->delta_chroma_log2_weight_denom;
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) {
slice_params->pred_weight_table.delta_luma_weight_l0[i] =
slice->delta_luma_weight_l0[i];
slice_params->pred_weight_table.luma_offset_l0[i] =
slice->luma_offset_l0[i];
for (j = 0; j < 2; j++) {
slice_params->pred_weight_table.delta_chroma_weight_l0[i][j] =
slice->delta_chroma_weight_l0[i][j];
slice_params->pred_weight_table.chroma_offset_l0[i][j] =
slice->ChromaOffsetL0[i][j];
}
}
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) {
slice_params->pred_weight_table.delta_luma_weight_l1[i] =
slice->delta_luma_weight_l1[i];
slice_params->pred_weight_table.luma_offset_l1[i] =
slice->luma_offset_l1[i];
for (j = 0; j < 2; j++) {
slice_params->pred_weight_table.delta_chroma_weight_l1[i][j] =
slice->delta_chroma_weight_l1[i][j];
slice_params->pred_weight_table.chroma_offset_l1[i][j] =
slice->ChromaOffsetL1[i][j];
}
}
/* reserved1[2] zeroed by memset */
/* 10 SLICE_PARAMS flag bits */
if (slice->LongSliceFlags.fields.slice_sao_luma_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
if (slice->LongSliceFlags.fields.slice_sao_chroma_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
if (slice->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
if (slice->LongSliceFlags.fields.mvd_l1_zero_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
if (slice->LongSliceFlags.fields.cabac_init_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
if (slice->LongSliceFlags.fields.collocated_from_l0_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
/* USE_INTEGER_MV — VAAPI doesn't expose; leave 0 */
if (slice->LongSliceFlags.fields.slice_deblocking_filter_disabled_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
if (slice->LongSliceFlags.fields.slice_loop_filter_across_slices_enabled_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
if (slice->LongSliceFlags.fields.dependent_slice_segment_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
}
/* ===== Clause 5: SCALING_MATRIX (1296 bytes; conditional fill) =====
*
* Per Phase 5 S3: when iqmatrix_set==false (BBB has no scaling list
* in SPS flags), send memset-zero. Matches FFmpeg's pattern when the
* stream has no scaling list. When iqmatrix_set==true, copy from VAAPI
* VAIQMatrixBufferHEVC.
*/
static void h265_fill_scaling_matrix(VAIQMatrixBufferHEVC *iqmatrix,
bool iqmatrix_set,
struct v4l2_ctrl_hevc_scaling_matrix *scaling_matrix)
{
memset(scaling_matrix, 0, sizeof(*scaling_matrix));
if (!iqmatrix_set)
return; /* memset zero matches FFmpeg sl=NULL path */
memcpy(scaling_matrix->scaling_list_4x4,
iqmatrix->ScalingList4x4, sizeof(iqmatrix->ScalingList4x4));
memcpy(scaling_matrix->scaling_list_8x8,
iqmatrix->ScalingList8x8, sizeof(iqmatrix->ScalingList8x8));
memcpy(scaling_matrix->scaling_list_16x16,
iqmatrix->ScalingList16x16, sizeof(iqmatrix->ScalingList16x16));
memcpy(scaling_matrix->scaling_list_32x32,
iqmatrix->ScalingList32x32, sizeof(iqmatrix->ScalingList32x32));
memcpy(scaling_matrix->scaling_list_dc_coef_16x16,
iqmatrix->ScalingListDC16x16,
sizeof(iqmatrix->ScalingListDC16x16));
memcpy(scaling_matrix->scaling_list_dc_coef_32x32,
iqmatrix->ScalingListDC32x32,
sizeof(iqmatrix->ScalingListDC32x32));
}
/* ===== Clause 1: orchestrator — batched 5-control submission ===== */
int h265_set_controls(struct request_data *driver_data,
struct object_context *context_object,
struct object_surface *surface_object)
{
VAPictureParameterBufferHEVC *picture =
&surface_object->params.h265.picture;
VAIQMatrixBufferHEVC *iqmatrix =
&surface_object->params.h265.iqmatrix;
bool iqmatrix_set = surface_object->params.h265.iqmatrix_set;
unsigned int num_slices = surface_object->params.h265.num_slices;
struct v4l2_ctrl_hevc_sps sps;
struct v4l2_ctrl_hevc_pps pps;
struct v4l2_ctrl_hevc_decode_params decode_params;
struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
struct v4l2_ctrl_hevc_slice_params *slice_params_array = NULL;
struct v4l2_ext_control controls[5];
unsigned int n = 0;
unsigned int i;
unsigned int prefix_bytes;
unsigned int cumulative_offset = 0;
int rc;
if (num_slices == 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
slice_params_array = calloc(num_slices,
sizeof(struct v4l2_ctrl_hevc_slice_params));
if (slice_params_array == NULL)
return VA_STATUS_ERROR_ALLOCATION_FAILED;
/* Per-slice fill. ANNEX_B start code (3 bytes 0x00 0x00 0x01) is
* prepended per slice by codec_store_buffer:68-75 when
* context->h264_start_code is true. Track cumulative offset
* accordingly. */
prefix_bytes = context_object->h264_start_code ? 3 : 0;
for (i = 0; i < num_slices; i++) {
VASliceParameterBufferHEVC *slice =
&surface_object->params.h265.slices[i];
cumulative_offset += prefix_bytes; /* skip start code prefix for this slice */
h265_fill_slice_params(picture, slice,
surface_object->source_data,
cumulative_offset,
&slice_params_array[i]);
cumulative_offset += slice->slice_data_size;
}
h265_fill_sps(picture, &sps);
h265_fill_pps(picture, &surface_object->params.h265.slices[0], &pps);
h265_fill_decode_params(driver_data, picture, &decode_params);
h265_fill_scaling_matrix(iqmatrix, iqmatrix_set, &scaling_matrix);
/*
* iter11 α-14: derive IRAP_PIC / IDR_PIC flags from the first
* slice's nal_unit_type (already parsed by h265_fill_slice_params
* from the bitstream into slice_params_array[0].nal_unit_type).
*
* H.265 §7.4.2.2:
* nal_unit_type 16..23 are IRAP (random access).
* nal_unit_type 19 (IDR_W_RADL) and 20 (IDR_N_LP) are IDR.
*
* Without setting these, rkvdec doesn't recognise the keyframe
* boundary, treats the IDR as inter without references, and
* produces all-zero CAPTURE output. Phase 3 confirmed kdirect
* (ffmpeg-v4l2request) sets flags=0x03 (IRAP|IDR) on frame 1
* and decodes correctly through the same kernel.
*/
if (num_slices > 0) {
uint8_t nut = slice_params_array[0].nal_unit_type;
if (nut >= 16 && nut <= 23)
decode_params.flags |=
V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
if (nut == 19 || nut == 20)
decode_params.flags |=
V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
}
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_SPS,
.ptr = &sps,
.size = sizeof(sps),
};
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_PPS,
.ptr = &pps,
.size = sizeof(pps),
};
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
.ptr = slice_params_array,
.size = sizeof(struct v4l2_ctrl_hevc_slice_params) * num_slices,
};
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
.ptr = &scaling_matrix,
.size = sizeof(scaling_matrix),
};
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
.ptr = &decode_params,
.size = sizeof(decode_params),
};
rc = v4l2_set_controls(driver_data->video_fd,
surface_object->request_fd,
controls, n);
free(slice_params_array);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
return 0;
}