Files
libva-v4l2-request-fourier/src/h265.c
T
claude-noether 23eb1bd5ae iter31 α-29: slice_params.short_term_ref_pic_set_size = picture->st_rps_bits
ROOT CAUSE FIX for HEVC frame 2+ divergence (Bug 5 remainder).

rkvdec's assemble_sw_rps (rkvdec-hevc.c:386-389) uses
sl_params->short_term_ref_pic_set_size to compute the bit offset where
long-term RPS data starts in the slice header. When zero, it falls back
to fls(num_short_term_ref_pic_sets - 1) — wrong when num=1 (BBB's case).

α-26 misdirected: set decode_params->short_term_ref_pic_set_size = st_rps_bits
but rkvdec doesn't use that field. The correct consumer is slice_params per
V4L2 spec and rkvdec source.

VAAPI's picture->st_rps_bits is documented as: 'number of bits that structure
short_term_ref_pic_set(num_short_term_ref_pic_sets) takes in slice segment
header when short_term_ref_pic_set_sps_flag equals 0' — exactly what
sl_params->short_term_ref_pic_set_size means.

Frames 1 (IDR) unaffected (V4L2 rkvdec gates on !IDR_PIC flag).
Frames 2+: bit offset for long-term RPS now correct, slice header parsing
no longer falls off the edge of the entropy bitstream.
2026-05-14 15:28:44 +00:00

724 lines
31 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* Copyright (C) 2007 Intel Corporation
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* fresnel-fourier iter2 Phase 6 commit B: rewrite h265.c against new
* V4L2_CID_STATELESS_HEVC_{SPS,PPS,SLICE_PARAMS,SCALING_MATRIX,
* DECODE_PARAMS,DECODE_MODE,START_CODE} stateless controls (mainline
* kernel <linux/v4l2-controls.h>:2090-2300).
*
* Replaces the staging-era V4L2_CID_MPEG_VIDEO_HEVC_{SPS,PPS,
* SLICE_PARAMS} CIDs that don't exist on modern kernels (verified via
* test-compile in iter2 Phase 2).
*
* Per-frame submission: one batched VIDIOC_S_EXT_CTRLS, count=5,
* ctrl_class=V4L2_CTRL_CLASS_CODEC_STATELESS:
* 0xa40a90 SPS (40 bytes)
* 0xa40a91 PPS (64 bytes)
* 0xa40a92 SLICE_PARAMS (variable; dynamic-array; one entry per slice)
* 0xa40a93 SCALING_MATRIX (1296 bytes; conditional on kernel availability)
* 0xa40a94 DECODE_PARAMS (328 bytes; per-frame DPB info)
*
* Plus device-wide menus set once at context init:
* 0xa40a95 DECODE_MODE (FRAME_BASED on rkvdec)
* 0xa40a96 START_CODE (ANNEX_B on rkvdec)
*
* Reference: FFmpeg libavcodec/v4l2_request_hevc.c:505-565
* (v4l2_request_hevc_queue_decode batched submission shape).
*
* Key Phase 5 review amendments incorporated:
* C1: data_byte_offset (NOT data_bit_offset); old bit-search dropped.
* C2: dpb_entry.flags only LONG_TERM_REFERENCE bit; pic_order_cnt_val
* (singular); poc_st_curr_*[] arrays are u8 DPB INDICES, not POC
* values (per FFmpeg get_ref_pic_index pattern).
* S1: PPS flags 19+20 (DEBLOCKING_FILTER_CONTROL_PRESENT, UNIFORM_SPACING)
* included.
* S2: PPS scalars pic_parameter_set_id, num_ref_idx_l0/l1_default_active_
* minus1 populated.
* Q2: slice_segment_addr populated from VAAPI slice->slice_segment_address.
* S3: SCALING_MATRIX content matches FFmpeg pattern — memset zero when
* iqmatrix_set==false (BBB has no scaling list in SPS flags).
*/
#include "h265.h"
#include "context.h"
#include "object_heap.h"
#include "request.h"
#include "surface.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/videodev2.h>
#include <linux/v4l2-controls.h>
#include "utils.h"
#include "v4l2.h"
/*
* NAL unit header bit positions per ISO/IEC 23008-2 / H.265 spec.
* Used for nal_unit_type + nuh_temporal_id_plus1 extraction from
* the slice bitstream's first 2 bytes (after any ANNEX_B start code).
*/
#define H265_NAL_UNIT_TYPE_SHIFT 1
#define H265_NAL_UNIT_TYPE_MASK ((1 << 6) - 1)
#define H265_NUH_TEMPORAL_ID_PLUS1_SHIFT 0
#define H265_NUH_TEMPORAL_ID_PLUS1_MASK ((1 << 3) - 1)
/* ===== Clause 2: SPS (40 bytes) ===== */
static void h265_fill_sps(VAPictureParameterBufferHEVC *picture,
struct v4l2_ctrl_hevc_sps *sps)
{
memset(sps, 0, sizeof(*sps));
sps->video_parameter_set_id = 0; /* not exposed by VAAPI */
sps->seq_parameter_set_id = 0; /* not exposed by VAAPI */
sps->pic_width_in_luma_samples = picture->pic_width_in_luma_samples;
sps->pic_height_in_luma_samples = picture->pic_height_in_luma_samples;
sps->bit_depth_luma_minus8 = picture->bit_depth_luma_minus8;
sps->bit_depth_chroma_minus8 = picture->bit_depth_chroma_minus8;
sps->log2_max_pic_order_cnt_lsb_minus4 =
picture->log2_max_pic_order_cnt_lsb_minus4;
sps->sps_max_dec_pic_buffering_minus1 =
picture->sps_max_dec_pic_buffering_minus1;
/*
* iter11 α-13: VAAPI doesn't forward sps_max_num_reorder_pics or
* sps_max_latency_increase_plus1. kdirect parses the SPS NAL and
* submits the bitstream's true values; libva used to hardcode 0
* (a structurally wrong "no reordering" hint, even though Phase 5b
* empirically confirmed rkvdec ignores both fields on RK3399, so
* this is wire-hygiene only — matches kdirect's payload more
* closely without behavior change). sps_max_dec_pic_buffering_minus1
* is a safe upper bound per H.265 §A.4.2 (sps_max_num_reorder_pics ≤
* sps_max_dec_pic_buffering_minus1 always holds). latency_increase_plus1
* stays at 0 = spec "unconstrained".
*/
sps->sps_max_num_reorder_pics = picture->sps_max_dec_pic_buffering_minus1;
sps->sps_max_latency_increase_plus1 = 0;
sps->log2_min_luma_coding_block_size_minus3 =
picture->log2_min_luma_coding_block_size_minus3;
sps->log2_diff_max_min_luma_coding_block_size =
picture->log2_diff_max_min_luma_coding_block_size;
sps->log2_min_luma_transform_block_size_minus2 =
picture->log2_min_transform_block_size_minus2;
sps->log2_diff_max_min_luma_transform_block_size =
picture->log2_diff_max_min_transform_block_size;
sps->max_transform_hierarchy_depth_inter =
picture->max_transform_hierarchy_depth_inter;
sps->max_transform_hierarchy_depth_intra =
picture->max_transform_hierarchy_depth_intra;
sps->pcm_sample_bit_depth_luma_minus1 =
picture->pcm_sample_bit_depth_luma_minus1;
sps->pcm_sample_bit_depth_chroma_minus1 =
picture->pcm_sample_bit_depth_chroma_minus1;
sps->log2_min_pcm_luma_coding_block_size_minus3 =
picture->log2_min_pcm_luma_coding_block_size_minus3;
sps->log2_diff_max_min_pcm_luma_coding_block_size =
picture->log2_diff_max_min_pcm_luma_coding_block_size;
sps->num_short_term_ref_pic_sets = picture->num_short_term_ref_pic_sets;
sps->num_long_term_ref_pics_sps = picture->num_long_term_ref_pic_sps;
sps->chroma_format_idc = picture->pic_fields.bits.chroma_format_idc;
sps->sps_max_sub_layers_minus1 = 0; /* not exposed */
/* reserved[6] zeroed by memset */
/* 9 boolean flags collapsed to u64 */
if (picture->pic_fields.bits.separate_colour_plane_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
if (picture->pic_fields.bits.scaling_list_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
if (picture->pic_fields.bits.amp_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
if (picture->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
if (picture->pic_fields.bits.pcm_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
if (picture->pic_fields.bits.pcm_loop_filter_disabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
if (picture->slice_parsing_fields.bits.long_term_ref_pics_present_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
if (picture->slice_parsing_fields.bits.sps_temporal_mvp_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
if (picture->pic_fields.bits.strong_intra_smoothing_enabled_flag)
sps->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
}
/* ===== Clause 3: PPS (64 bytes; 21 flags + 3 newly-mapped scalars per S1+S2) ===== */
static void h265_fill_pps(VAPictureParameterBufferHEVC *picture,
VASliceParameterBufferHEVC *slice,
struct v4l2_ctrl_hevc_pps *pps)
{
memset(pps, 0, sizeof(*pps));
pps->pic_parameter_set_id = 0; /* S2: not exposed by VAAPI; default 0 */
pps->num_extra_slice_header_bits = picture->num_extra_slice_header_bits;
pps->num_ref_idx_l0_default_active_minus1 =
picture->num_ref_idx_l0_default_active_minus1; /* S2 */
pps->num_ref_idx_l1_default_active_minus1 =
picture->num_ref_idx_l1_default_active_minus1; /* S2 */
pps->init_qp_minus26 = picture->init_qp_minus26;
pps->diff_cu_qp_delta_depth = picture->diff_cu_qp_delta_depth;
pps->pps_cb_qp_offset = picture->pps_cb_qp_offset;
pps->pps_cr_qp_offset = picture->pps_cr_qp_offset;
pps->num_tile_columns_minus1 = picture->num_tile_columns_minus1;
pps->num_tile_rows_minus1 = picture->num_tile_rows_minus1;
/* column_width_minus1[20] + row_height_minus1[22] left zero — BBB single-tile */
pps->pps_beta_offset_div2 = picture->pps_beta_offset_div2;
pps->pps_tc_offset_div2 = picture->pps_tc_offset_div2;
pps->log2_parallel_merge_level_minus2 =
picture->log2_parallel_merge_level_minus2;
/* reserved zeroed by memset */
/* 21 boolean flags (bits 0-20) collapsed to u64 */
if (slice && slice->LongSliceFlags.fields.dependent_slice_segment_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
if (picture->slice_parsing_fields.bits.output_flag_present_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
if (picture->pic_fields.bits.sign_data_hiding_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
if (picture->slice_parsing_fields.bits.cabac_init_present_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
if (picture->pic_fields.bits.constrained_intra_pred_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
if (picture->pic_fields.bits.transform_skip_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
if (picture->pic_fields.bits.cu_qp_delta_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
if (picture->slice_parsing_fields.bits.pps_slice_chroma_qp_offsets_present_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
if (picture->pic_fields.bits.weighted_pred_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
if (picture->pic_fields.bits.weighted_bipred_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
if (picture->pic_fields.bits.transquant_bypass_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
if (picture->pic_fields.bits.tiles_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
if (picture->pic_fields.bits.entropy_coding_sync_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
if (picture->pic_fields.bits.loop_filter_across_tiles_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
if (picture->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
if (picture->slice_parsing_fields.bits.deblocking_filter_override_enabled_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
if (picture->slice_parsing_fields.bits.pps_disable_deblocking_filter_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
if (picture->slice_parsing_fields.bits.lists_modification_present_flag)
pps->flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
/* SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (bit 18) — not exposed; skip */
/* DEBLOCKING_FILTER_CONTROL_PRESENT (bit 19, S1) — not exposed by VAAPI */
/* DEBLOCKING_FILTER_CONTROL_PRESENT (bit 19) and UNIFORM_SPACING (bit 20):
* VAAPI does not expose either flag in VAPictureParameterBufferHEVC.
* BBB-720p10s_hevc.mp4 uses neither tiles nor explicit deblocking-
* control parameters; leaving these bits zero is correct for the
* iter2 binding-cell fixture. */
}
/* ===== Clause 6: DECODE_PARAMS (328 bytes) =====
*
* NEW in modern API. Houses DPB info that was inside slice_params in
* the staging-era. Per Phase 5 C2: dpb[].flags has only LONG_TERM_REFERENCE
* bit; dpb[].pic_order_cnt_val (singular); poc_st_curr_*[] arrays hold
* u8 DPB INDICES (not POC values).
*
* Pattern: classify each VAAPI ReferenceFrames[i] into ST_CURR_BEFORE /
* ST_CURR_AFTER / LT_CURR; populate dpb[] sequentially; record the DPB
* index in the matching classification array.
*/
static void h265_fill_decode_params(struct request_data *driver_data,
VAPictureParameterBufferHEVC *picture,
struct v4l2_ctrl_hevc_decode_params *decode_params)
{
struct object_surface *surface_object;
VAPictureHEVC *hevc_picture;
unsigned int i;
uint8_t n_active = 0;
uint8_t n_st_before = 0, n_st_after = 0, n_lt = 0;
memset(decode_params, 0, sizeof(*decode_params));
decode_params->pic_order_cnt_val = picture->CurrPic.pic_order_cnt;
for (i = 0; i < 15; i++) {
hevc_picture = &picture->ReferenceFrames[i];
if (hevc_picture->picture_id == VA_INVALID_SURFACE ||
(hevc_picture->flags & VA_PICTURE_HEVC_INVALID))
continue;
surface_object = (struct object_surface *)
object_heap_lookup(&driver_data->surface_heap,
hevc_picture->picture_id);
if (surface_object == NULL)
continue;
if (n_active >= V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
break;
decode_params->dpb[n_active].timestamp =
v4l2_timeval_to_ns(&surface_object->timestamp);
decode_params->dpb[n_active].pic_order_cnt_val =
hevc_picture->pic_order_cnt;
decode_params->dpb[n_active].field_pic =
!!(hevc_picture->flags & VA_PICTURE_HEVC_FIELD_PIC);
decode_params->dpb[n_active].flags =
(hevc_picture->flags & VA_PICTURE_HEVC_RPS_LT_CURR) ?
V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE : 0;
/* dpb[n_active].reserved zeroed by memset */
/* Classify into one of the three "current" lists.
* Each list holds the DPB INDEX (u8), not the POC value. */
if (hevc_picture->flags & VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE) {
if (n_st_before < V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
decode_params->poc_st_curr_before[n_st_before++] = n_active;
} else if (hevc_picture->flags & VA_PICTURE_HEVC_RPS_ST_CURR_AFTER) {
if (n_st_after < V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
decode_params->poc_st_curr_after[n_st_after++] = n_active;
} else if (hevc_picture->flags & VA_PICTURE_HEVC_RPS_LT_CURR) {
if (n_lt < V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
decode_params->poc_lt_curr[n_lt++] = n_active;
}
n_active++;
}
decode_params->num_active_dpb_entries = n_active;
decode_params->num_poc_st_curr_before = n_st_before;
decode_params->num_poc_st_curr_after = n_st_after;
decode_params->num_poc_lt_curr = n_lt;
/*
* iter26 α-26: VAAPI DOES expose short_term_ref_pic_set bit-count
* via picture->st_rps_bits. Without populating this, rkvdec's
* DPB reference resolution for P/B frames uses the wrong slice-
* header skip and reads the wrong reference; frame 1 (IDR) decodes
* correctly but frames 2+ diverge (iter25 evidence: cmp differs at
* byte 1382401 = frame 2 boundary, kdirect bytes 4-5 = 0x0a 0x00,
* libva = 0x00 0x00).
*
* long_term_ref_pic_set_size and num_delta_pocs_of_ref_rps_idx still
* left zero (VAAPI doesn't expose either).
*/
decode_params->short_term_ref_pic_set_size = picture->st_rps_bits;
/*
* iter11 α-14: IRAP/IDR/NO_OUTPUT_OF_PRIOR flags. VAAPI doesn't
* expose these in VAPictureParameterBufferHEVC. The iter2 binding
* cell hardcoded them to 0 with the comment "BBB B/P-frames don't
* need these set" — but IDR keyframes DO need IDR_PIC|IRAP_PIC.
* Without them rkvdec doesn't recognise the keyframe boundary,
* treats the IDR as inter without references, and produces all-zero
* CAPTURE output (Bug 5).
*
* The flags are derived at h265_set_controls level after slice_params
* have been parsed (slice_params[0].nal_unit_type carries the NAL
* type extracted from the bitstream). Initialise to 0 here; the caller
* patches the IRAP/IDR bits.
*/
decode_params->flags = 0;
}
/* ===== Clause 4: SLICE_PARAMS per slice =====
*
* Called per slice in a loop in h265_set_controls. Output is one entry
* in the dynamic-array of slice_params submitted to the kernel.
*
* source_offset is the byte offset within the surface_object->source_data
* buffer where this slice's bitstream begins (after any ANNEX_B start
* code prefix). data_byte_offset is the offset within the buffer to the
* first byte of slice header data.
*
* Per Phase 5 C1: data_byte_offset is a BYTE offset (not a bit offset).
* The old bit-search at h265.c:184-209 has been DROPPED.
*/
static void h265_fill_slice_params(VAPictureParameterBufferHEVC *picture,
VASliceParameterBufferHEVC *slice,
void *source_data,
unsigned int source_offset,
struct v4l2_ctrl_hevc_slice_params *slice_params)
{
uint8_t *b;
uint8_t nal_unit_type, nuh_temporal_id_plus1;
uint8_t pic_struct;
uint8_t slice_type;
unsigned int i, j;
memset(slice_params, 0, sizeof(*slice_params));
/* NAL header parse from slice bitstream (after ANNEX_B start code).
* source_offset points at the byte AFTER the start code (start code
* was prepended by codec_store_buffer:68-75 if context->h264_start_code
* is set). The first 2 bytes are the NAL unit header. */
b = (uint8_t *)source_data + source_offset;
nal_unit_type = (b[0] >> H265_NAL_UNIT_TYPE_SHIFT) & H265_NAL_UNIT_TYPE_MASK;
nuh_temporal_id_plus1 = (b[1] >> H265_NUH_TEMPORAL_ID_PLUS1_SHIFT) &
H265_NUH_TEMPORAL_ID_PLUS1_MASK;
/*
* iter28 α-28: bit_size formula.
*
* VAAPI's slice_data_size is the size of the slice's source-data
* buffer INCLUDING the NAL header and slice header. rkvdec_hevc
* expects bit_size to cover the slice_data area starting at
* data_byte_offset (the slice payload). Setting bit_size =
* slice_data_size * 8 makes rkvdec read past the slice payload
* into trailing bytes → wrong entropy state → frame 2+ visual
* garbage.
*
* Empirical match with ffmpeg-v4l2request (which uses
* (size+extra_size)*8 for the data it actually appended):
* bit_size = (slice_data_size - slice_data_byte_offset) * 8
* yields 44096 bits for BBB frame 2 (= 5512 bytes), matching
* kdirect exactly per iter27 dmesg printk.
*/
slice_params->bit_size =
(slice->slice_data_size - slice->slice_data_byte_offset) * 8;
/* C1: data_byte_offset, NOT data_bit_offset. Plain byte offset to
* the first byte of slice segment header data within the OUTPUT
* buffer. FFmpeg pattern at v4l2_request_hevc.c:190. */
slice_params->data_byte_offset = source_offset + slice->slice_data_byte_offset;
/*
* iter27 α-27: populate num_entry_point_offsets from VAAPI.
*
* BBB HEVC uses WPP (entropy_coding_sync_enabled_flag); each CTU row
* after the first creates an entry point. For 720p with 32-pixel
* CTUs that's 22 entry points per slice. Hardcoding 0 made rkvdec
* miscount the slice header skip distance → wrong slice data
* boundary → frame 2+ decoded with garbage reference data.
*
* Comment "iter2 doesn't do tiles" was inaccurate: WPP isn't tiles
* but uses the same entry_point_offsets mechanism.
*/
/*
* iter27 diagnostic: VAAPI ffmpeg-vaapi front-end reports
* slice->num_entry_point_offsets = 0 for all slices even on WPP
* streams (ffmpeg-vaapi doesn't parse this). Kernel rkvdec_hevc
* doesn't reference num_entry_point_offsets either, so this field
* is harmless either way. Leaving the VAAPI propagation in place
* for future when ffmpeg-vaapi may populate it.
*/
slice_params->num_entry_point_offsets = slice->num_entry_point_offsets;
slice_params->nal_unit_type = nal_unit_type;
slice_params->nuh_temporal_id_plus1 = nuh_temporal_id_plus1;
slice_type = slice->LongSliceFlags.fields.slice_type;
slice_params->slice_type = slice_type;
slice_params->colour_plane_id = slice->LongSliceFlags.fields.color_plane_id;
slice_params->slice_pic_order_cnt = picture->CurrPic.pic_order_cnt;
slice_params->num_ref_idx_l0_active_minus1 = slice->num_ref_idx_l0_active_minus1;
slice_params->num_ref_idx_l1_active_minus1 = slice->num_ref_idx_l1_active_minus1;
slice_params->collocated_ref_idx = slice->collocated_ref_idx;
slice_params->five_minus_max_num_merge_cand = slice->five_minus_max_num_merge_cand;
slice_params->slice_qp_delta = slice->slice_qp_delta;
slice_params->slice_cb_qp_offset = slice->slice_cb_qp_offset;
slice_params->slice_cr_qp_offset = slice->slice_cr_qp_offset;
slice_params->slice_act_y_qp_offset = 0; /* VAAPI doesn't expose */
slice_params->slice_act_cb_qp_offset = 0;
slice_params->slice_act_cr_qp_offset = 0;
slice_params->slice_beta_offset_div2 = slice->slice_beta_offset_div2;
slice_params->slice_tc_offset_div2 = slice->slice_tc_offset_div2;
if (picture->CurrPic.flags & VA_PICTURE_HEVC_FIELD_PIC) {
if (picture->CurrPic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)
pic_struct = 2;
else
pic_struct = 1;
} else {
pic_struct = 0;
}
slice_params->pic_struct = pic_struct;
/* reserved0[3] zeroed by memset */
/* Q2: slice_segment_addr from VAAPI (was missing in old h265.c). */
slice_params->slice_segment_addr = slice->slice_segment_address;
/* Ref index arrays (DPB indices). For I-slices both are unused. */
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) {
if (i < (slice->num_ref_idx_l0_active_minus1 + 1U))
slice_params->ref_idx_l0[i] = slice->RefPicList[0][i];
}
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) {
if (i < (slice->num_ref_idx_l1_active_minus1 + 1U))
slice_params->ref_idx_l1[i] = slice->RefPicList[1][i];
}
/*
* iter31 α-29: VAAPI's picture->st_rps_bits IS the bit-count of
* short_term_ref_pic_set() in the slice header (per va_dec_hevc.h
* doc-comment for st_rps_bits). This field is required by rkvdec
* (assemble_sw_rps: line 386 in kernel rkvdec-hevc.c). When zero,
* rkvdec falls back to fls(sps->num_short_term_ref_pic_sets - 1),
* which is wrong when num_short_term_ref_pic_sets == 1 (BBB case).
*
* α-26 mis-targeted this onto decode_params->short_term_ref_pic_set_size
* which rkvdec doesn't use. The actual consumer is slice_params.
*
* Note: VAAPI defines st_rps_bits as 0 when short_term_ref_pic_set_sps_flag=1
* (i.e. when slice uses an SPS-defined RPS rather than inline). For BBB,
* st_rps_bits is non-zero for non-IDR slices.
*
* long_term_ref_pic_set_size still 0 — VAAPI doesn't expose this.
*/
slice_params->short_term_ref_pic_set_size = picture->st_rps_bits;
slice_params->long_term_ref_pic_set_size = 0;
/* Pred weight table */
slice_params->pred_weight_table.luma_log2_weight_denom =
slice->luma_log2_weight_denom;
slice_params->pred_weight_table.delta_chroma_log2_weight_denom =
slice->delta_chroma_log2_weight_denom;
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) {
slice_params->pred_weight_table.delta_luma_weight_l0[i] =
slice->delta_luma_weight_l0[i];
slice_params->pred_weight_table.luma_offset_l0[i] =
slice->luma_offset_l0[i];
for (j = 0; j < 2; j++) {
slice_params->pred_weight_table.delta_chroma_weight_l0[i][j] =
slice->delta_chroma_weight_l0[i][j];
slice_params->pred_weight_table.chroma_offset_l0[i][j] =
slice->ChromaOffsetL0[i][j];
}
}
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) {
slice_params->pred_weight_table.delta_luma_weight_l1[i] =
slice->delta_luma_weight_l1[i];
slice_params->pred_weight_table.luma_offset_l1[i] =
slice->luma_offset_l1[i];
for (j = 0; j < 2; j++) {
slice_params->pred_weight_table.delta_chroma_weight_l1[i][j] =
slice->delta_chroma_weight_l1[i][j];
slice_params->pred_weight_table.chroma_offset_l1[i][j] =
slice->ChromaOffsetL1[i][j];
}
}
/* reserved1[2] zeroed by memset */
/* 10 SLICE_PARAMS flag bits */
if (slice->LongSliceFlags.fields.slice_sao_luma_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
if (slice->LongSliceFlags.fields.slice_sao_chroma_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
if (slice->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
if (slice->LongSliceFlags.fields.mvd_l1_zero_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
if (slice->LongSliceFlags.fields.cabac_init_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
if (slice->LongSliceFlags.fields.collocated_from_l0_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
/* USE_INTEGER_MV — VAAPI doesn't expose; leave 0 */
if (slice->LongSliceFlags.fields.slice_deblocking_filter_disabled_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
if (slice->LongSliceFlags.fields.slice_loop_filter_across_slices_enabled_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
if (slice->LongSliceFlags.fields.dependent_slice_segment_flag)
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
}
/* ===== Clause 5: SCALING_MATRIX (1296 bytes; conditional fill) =====
*
* Per Phase 5 S3: when iqmatrix_set==false (BBB has no scaling list
* in SPS flags), send memset-zero. Matches FFmpeg's pattern when the
* stream has no scaling list. When iqmatrix_set==true, copy from VAAPI
* VAIQMatrixBufferHEVC.
*/
static void h265_fill_scaling_matrix(VAIQMatrixBufferHEVC *iqmatrix,
bool iqmatrix_set,
struct v4l2_ctrl_hevc_scaling_matrix *scaling_matrix)
{
memset(scaling_matrix, 0, sizeof(*scaling_matrix));
if (!iqmatrix_set)
return; /* memset zero matches FFmpeg sl=NULL path */
memcpy(scaling_matrix->scaling_list_4x4,
iqmatrix->ScalingList4x4, sizeof(iqmatrix->ScalingList4x4));
memcpy(scaling_matrix->scaling_list_8x8,
iqmatrix->ScalingList8x8, sizeof(iqmatrix->ScalingList8x8));
memcpy(scaling_matrix->scaling_list_16x16,
iqmatrix->ScalingList16x16, sizeof(iqmatrix->ScalingList16x16));
memcpy(scaling_matrix->scaling_list_32x32,
iqmatrix->ScalingList32x32, sizeof(iqmatrix->ScalingList32x32));
memcpy(scaling_matrix->scaling_list_dc_coef_16x16,
iqmatrix->ScalingListDC16x16,
sizeof(iqmatrix->ScalingListDC16x16));
memcpy(scaling_matrix->scaling_list_dc_coef_32x32,
iqmatrix->ScalingListDC32x32,
sizeof(iqmatrix->ScalingListDC32x32));
}
/* ===== Clause 1: orchestrator — batched 5-control submission ===== */
int h265_set_controls(struct request_data *driver_data,
struct object_context *context_object,
struct object_surface *surface_object)
{
VAPictureParameterBufferHEVC *picture =
&surface_object->params.h265.picture;
VAIQMatrixBufferHEVC *iqmatrix =
&surface_object->params.h265.iqmatrix;
bool iqmatrix_set = surface_object->params.h265.iqmatrix_set;
unsigned int num_slices = surface_object->params.h265.num_slices;
struct v4l2_ctrl_hevc_sps sps;
struct v4l2_ctrl_hevc_pps pps;
struct v4l2_ctrl_hevc_decode_params decode_params;
struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
struct v4l2_ctrl_hevc_slice_params *slice_params_array = NULL;
struct v4l2_ext_control controls[5];
unsigned int n = 0;
unsigned int i;
unsigned int prefix_bytes;
unsigned int cumulative_offset = 0;
int rc;
if (num_slices == 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
slice_params_array = calloc(num_slices,
sizeof(struct v4l2_ctrl_hevc_slice_params));
if (slice_params_array == NULL)
return VA_STATUS_ERROR_ALLOCATION_FAILED;
/* Per-slice fill. ANNEX_B start code (3 bytes 0x00 0x00 0x01) is
* prepended per slice by codec_store_buffer:68-75 when
* context->h264_start_code is true. Track cumulative offset
* accordingly. */
prefix_bytes = context_object->h264_start_code ? 3 : 0;
for (i = 0; i < num_slices; i++) {
VASliceParameterBufferHEVC *slice =
&surface_object->params.h265.slices[i];
cumulative_offset += prefix_bytes; /* skip start code prefix for this slice */
h265_fill_slice_params(picture, slice,
surface_object->source_data,
cumulative_offset,
&slice_params_array[i]);
/* iter29 DIAG: dump trailing 80 bytes of each HEVC slice.
* Set LIBVA_HEVC_DUMP_SLICE_TAIL=1 to enable. Goal: characterise
* the 40-byte inflation in ffmpeg-vaapi vs ffmpeg-v4l2request for
* frame 2+ slices (see iter27/28 close). */
if (getenv("LIBVA_HEVC_DUMP_SLICE_TAIL")) {
uint32_t sz = slice->slice_data_size;
uint32_t boff = slice->slice_data_byte_offset;
uint8_t *p = (uint8_t *)surface_object->source_data + cumulative_offset;
uint32_t dump_n = sz < 80 ? sz : 80;
uint32_t start = sz - dump_n;
uint32_t k;
fprintf(stderr, "iter29 slice[%u] nut=%u size=%u boff=%u start_in_slice=%u tail80:",
i, slice_params_array[i].nal_unit_type, sz, boff, start);
for (k = 0; k < dump_n; k++) {
if ((k & 0xf) == 0) fprintf(stderr, "\n +%04x:", start + k);
fprintf(stderr, " %02x", p[start + k]);
}
fprintf(stderr, "\n");
}
cumulative_offset += slice->slice_data_size;
}
h265_fill_sps(picture, &sps);
h265_fill_pps(picture, &surface_object->params.h265.slices[0], &pps);
h265_fill_decode_params(driver_data, picture, &decode_params);
h265_fill_scaling_matrix(iqmatrix, iqmatrix_set, &scaling_matrix);
/*
* iter11 α-14: derive IRAP_PIC / IDR_PIC flags from the first
* slice's nal_unit_type (already parsed by h265_fill_slice_params
* from the bitstream into slice_params_array[0].nal_unit_type).
*
* H.265 §7.4.2.2:
* nal_unit_type 16..23 are IRAP (random access).
* nal_unit_type 19 (IDR_W_RADL) and 20 (IDR_N_LP) are IDR.
*
* Without setting these, rkvdec doesn't recognise the keyframe
* boundary, treats the IDR as inter without references, and
* produces all-zero CAPTURE output. Phase 3 confirmed kdirect
* (ffmpeg-v4l2request) sets flags=0x03 (IRAP|IDR) on frame 1
* and decodes correctly through the same kernel.
*/
if (num_slices > 0) {
uint8_t nut = slice_params_array[0].nal_unit_type;
if (nut >= 16 && nut <= 23)
decode_params.flags |=
V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
if (nut == 19 || nut == 20)
decode_params.flags |=
V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
}
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_SPS,
.ptr = &sps,
.size = sizeof(sps),
};
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_PPS,
.ptr = &pps,
.size = sizeof(pps),
};
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
.ptr = slice_params_array,
.size = sizeof(struct v4l2_ctrl_hevc_slice_params) * num_slices,
};
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
.ptr = &scaling_matrix,
.size = sizeof(scaling_matrix),
};
controls[n++] = (struct v4l2_ext_control){
.id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
.ptr = &decode_params,
.size = sizeof(decode_params),
};
rc = v4l2_set_controls(driver_data->video_fd,
surface_object->request_fd,
controls, n);
free(slice_params_array);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
return 0;
}