719d813f4a
BBB HEVC uses WPP (entropy_coding_sync_enabled_flag=1); slice header contains entry_point_offset_minus1 syntax elements. libva was setting num_entry_point_offsets=0 with the comment 'iter2 doesn't do tiles', but WPP uses the same mechanism — rkvdec miscounted the slice header skip distance and read slice data starting at wrong byte for P/B frames → frame 2+ decoded with garbage reference data. iter27 kernel printk diff: libva frame 2 sl[8..11] = 00 00 00 00 (=0) kdirect frame 2 sl[8..11] = 16 00 00 00 (=22) VAAPI exposes VASliceParameterBufferHEVC.num_entry_point_offsets. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
662 lines
28 KiB
C
662 lines
28 KiB
C
/*
|
||
* Copyright (C) 2007 Intel Corporation
|
||
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
|
||
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
|
||
*
|
||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||
* copy of this software and associated documentation files (the
|
||
* "Software"), to deal in the Software without restriction, including
|
||
* without limitation the rights to use, copy, modify, merge, publish,
|
||
* distribute, sub license, and/or sell copies of the Software, and to
|
||
* permit persons to whom the Software is furnished to do so, subject to
|
||
* the following conditions:
|
||
*
|
||
* The above copyright notice and this permission notice (including the
|
||
* next paragraph) shall be included in all copies or substantial portions
|
||
* of the Software.
|
||
*
|
||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||
*/
|
||
|
||
/*
|
||
* fresnel-fourier iter2 Phase 6 commit B: rewrite h265.c against new
|
||
* V4L2_CID_STATELESS_HEVC_{SPS,PPS,SLICE_PARAMS,SCALING_MATRIX,
|
||
* DECODE_PARAMS,DECODE_MODE,START_CODE} stateless controls (mainline
|
||
* kernel <linux/v4l2-controls.h>:2090-2300).
|
||
*
|
||
* Replaces the staging-era V4L2_CID_MPEG_VIDEO_HEVC_{SPS,PPS,
|
||
* SLICE_PARAMS} CIDs that don't exist on modern kernels (verified via
|
||
* test-compile in iter2 Phase 2).
|
||
*
|
||
* Per-frame submission: one batched VIDIOC_S_EXT_CTRLS, count=5,
|
||
* ctrl_class=V4L2_CTRL_CLASS_CODEC_STATELESS:
|
||
* 0xa40a90 SPS (40 bytes)
|
||
* 0xa40a91 PPS (64 bytes)
|
||
* 0xa40a92 SLICE_PARAMS (variable; dynamic-array; one entry per slice)
|
||
* 0xa40a93 SCALING_MATRIX (1296 bytes; conditional on kernel availability)
|
||
* 0xa40a94 DECODE_PARAMS (328 bytes; per-frame DPB info)
|
||
*
|
||
* Plus device-wide menus set once at context init:
|
||
* 0xa40a95 DECODE_MODE (FRAME_BASED on rkvdec)
|
||
* 0xa40a96 START_CODE (ANNEX_B on rkvdec)
|
||
*
|
||
* Reference: FFmpeg libavcodec/v4l2_request_hevc.c:505-565
|
||
* (v4l2_request_hevc_queue_decode batched submission shape).
|
||
*
|
||
* Key Phase 5 review amendments incorporated:
|
||
* C1: data_byte_offset (NOT data_bit_offset); old bit-search dropped.
|
||
* C2: dpb_entry.flags only LONG_TERM_REFERENCE bit; pic_order_cnt_val
|
||
* (singular); poc_st_curr_*[] arrays are u8 DPB INDICES, not POC
|
||
* values (per FFmpeg get_ref_pic_index pattern).
|
||
* S1: PPS flags 19+20 (DEBLOCKING_FILTER_CONTROL_PRESENT, UNIFORM_SPACING)
|
||
* included.
|
||
* S2: PPS scalars pic_parameter_set_id, num_ref_idx_l0/l1_default_active_
|
||
* minus1 populated.
|
||
* Q2: slice_segment_addr populated from VAAPI slice->slice_segment_address.
|
||
* S3: SCALING_MATRIX content matches FFmpeg pattern — memset zero when
|
||
* iqmatrix_set==false (BBB has no scaling list in SPS flags).
|
||
*/
|
||
|
||
#include "h265.h"
|
||
#include "context.h"
|
||
#include "object_heap.h"
|
||
#include "request.h"
|
||
#include "surface.h"
|
||
|
||
#include <assert.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
|
||
#include <sys/ioctl.h>
|
||
#include <sys/mman.h>
|
||
|
||
#include <linux/videodev2.h>
|
||
#include <linux/v4l2-controls.h>
|
||
|
||
#include "utils.h"
|
||
#include "v4l2.h"
|
||
|
||
/*
|
||
* NAL unit header bit positions per ISO/IEC 23008-2 / H.265 spec.
|
||
* Used for nal_unit_type + nuh_temporal_id_plus1 extraction from
|
||
* the slice bitstream's first 2 bytes (after any ANNEX_B start code).
|
||
*/
|
||
#define H265_NAL_UNIT_TYPE_SHIFT 1
|
||
#define H265_NAL_UNIT_TYPE_MASK ((1 << 6) - 1)
|
||
#define H265_NUH_TEMPORAL_ID_PLUS1_SHIFT 0
|
||
#define H265_NUH_TEMPORAL_ID_PLUS1_MASK ((1 << 3) - 1)
|
||
|
||
/* ===== Clause 2: SPS (40 bytes) ===== */
|
||
static void h265_fill_sps(VAPictureParameterBufferHEVC *picture,
|
||
struct v4l2_ctrl_hevc_sps *sps)
|
||
{
|
||
memset(sps, 0, sizeof(*sps));
|
||
|
||
sps->video_parameter_set_id = 0; /* not exposed by VAAPI */
|
||
sps->seq_parameter_set_id = 0; /* not exposed by VAAPI */
|
||
sps->pic_width_in_luma_samples = picture->pic_width_in_luma_samples;
|
||
sps->pic_height_in_luma_samples = picture->pic_height_in_luma_samples;
|
||
sps->bit_depth_luma_minus8 = picture->bit_depth_luma_minus8;
|
||
sps->bit_depth_chroma_minus8 = picture->bit_depth_chroma_minus8;
|
||
sps->log2_max_pic_order_cnt_lsb_minus4 =
|
||
picture->log2_max_pic_order_cnt_lsb_minus4;
|
||
sps->sps_max_dec_pic_buffering_minus1 =
|
||
picture->sps_max_dec_pic_buffering_minus1;
|
||
/*
|
||
* iter11 α-13: VAAPI doesn't forward sps_max_num_reorder_pics or
|
||
* sps_max_latency_increase_plus1. kdirect parses the SPS NAL and
|
||
* submits the bitstream's true values; libva used to hardcode 0
|
||
* (a structurally wrong "no reordering" hint, even though Phase 5b
|
||
* empirically confirmed rkvdec ignores both fields on RK3399, so
|
||
* this is wire-hygiene only — matches kdirect's payload more
|
||
* closely without behavior change). sps_max_dec_pic_buffering_minus1
|
||
* is a safe upper bound per H.265 §A.4.2 (sps_max_num_reorder_pics ≤
|
||
* sps_max_dec_pic_buffering_minus1 always holds). latency_increase_plus1
|
||
* stays at 0 = spec "unconstrained".
|
||
*/
|
||
sps->sps_max_num_reorder_pics = picture->sps_max_dec_pic_buffering_minus1;
|
||
sps->sps_max_latency_increase_plus1 = 0;
|
||
sps->log2_min_luma_coding_block_size_minus3 =
|
||
picture->log2_min_luma_coding_block_size_minus3;
|
||
sps->log2_diff_max_min_luma_coding_block_size =
|
||
picture->log2_diff_max_min_luma_coding_block_size;
|
||
sps->log2_min_luma_transform_block_size_minus2 =
|
||
picture->log2_min_transform_block_size_minus2;
|
||
sps->log2_diff_max_min_luma_transform_block_size =
|
||
picture->log2_diff_max_min_transform_block_size;
|
||
sps->max_transform_hierarchy_depth_inter =
|
||
picture->max_transform_hierarchy_depth_inter;
|
||
sps->max_transform_hierarchy_depth_intra =
|
||
picture->max_transform_hierarchy_depth_intra;
|
||
sps->pcm_sample_bit_depth_luma_minus1 =
|
||
picture->pcm_sample_bit_depth_luma_minus1;
|
||
sps->pcm_sample_bit_depth_chroma_minus1 =
|
||
picture->pcm_sample_bit_depth_chroma_minus1;
|
||
sps->log2_min_pcm_luma_coding_block_size_minus3 =
|
||
picture->log2_min_pcm_luma_coding_block_size_minus3;
|
||
sps->log2_diff_max_min_pcm_luma_coding_block_size =
|
||
picture->log2_diff_max_min_pcm_luma_coding_block_size;
|
||
sps->num_short_term_ref_pic_sets = picture->num_short_term_ref_pic_sets;
|
||
sps->num_long_term_ref_pics_sps = picture->num_long_term_ref_pic_sps;
|
||
sps->chroma_format_idc = picture->pic_fields.bits.chroma_format_idc;
|
||
sps->sps_max_sub_layers_minus1 = 0; /* not exposed */
|
||
/* reserved[6] zeroed by memset */
|
||
|
||
/* 9 boolean flags collapsed to u64 */
|
||
if (picture->pic_fields.bits.separate_colour_plane_flag)
|
||
sps->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
|
||
if (picture->pic_fields.bits.scaling_list_enabled_flag)
|
||
sps->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
|
||
if (picture->pic_fields.bits.amp_enabled_flag)
|
||
sps->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
|
||
if (picture->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag)
|
||
sps->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
|
||
if (picture->pic_fields.bits.pcm_enabled_flag)
|
||
sps->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
|
||
if (picture->pic_fields.bits.pcm_loop_filter_disabled_flag)
|
||
sps->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
|
||
if (picture->slice_parsing_fields.bits.long_term_ref_pics_present_flag)
|
||
sps->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
|
||
if (picture->slice_parsing_fields.bits.sps_temporal_mvp_enabled_flag)
|
||
sps->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
|
||
if (picture->pic_fields.bits.strong_intra_smoothing_enabled_flag)
|
||
sps->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
|
||
}
|
||
|
||
/* ===== Clause 3: PPS (64 bytes; 21 flags + 3 newly-mapped scalars per S1+S2) ===== */
|
||
static void h265_fill_pps(VAPictureParameterBufferHEVC *picture,
|
||
VASliceParameterBufferHEVC *slice,
|
||
struct v4l2_ctrl_hevc_pps *pps)
|
||
{
|
||
memset(pps, 0, sizeof(*pps));
|
||
|
||
pps->pic_parameter_set_id = 0; /* S2: not exposed by VAAPI; default 0 */
|
||
pps->num_extra_slice_header_bits = picture->num_extra_slice_header_bits;
|
||
pps->num_ref_idx_l0_default_active_minus1 =
|
||
picture->num_ref_idx_l0_default_active_minus1; /* S2 */
|
||
pps->num_ref_idx_l1_default_active_minus1 =
|
||
picture->num_ref_idx_l1_default_active_minus1; /* S2 */
|
||
pps->init_qp_minus26 = picture->init_qp_minus26;
|
||
pps->diff_cu_qp_delta_depth = picture->diff_cu_qp_delta_depth;
|
||
pps->pps_cb_qp_offset = picture->pps_cb_qp_offset;
|
||
pps->pps_cr_qp_offset = picture->pps_cr_qp_offset;
|
||
pps->num_tile_columns_minus1 = picture->num_tile_columns_minus1;
|
||
pps->num_tile_rows_minus1 = picture->num_tile_rows_minus1;
|
||
/* column_width_minus1[20] + row_height_minus1[22] left zero — BBB single-tile */
|
||
pps->pps_beta_offset_div2 = picture->pps_beta_offset_div2;
|
||
pps->pps_tc_offset_div2 = picture->pps_tc_offset_div2;
|
||
pps->log2_parallel_merge_level_minus2 =
|
||
picture->log2_parallel_merge_level_minus2;
|
||
/* reserved zeroed by memset */
|
||
|
||
/* 21 boolean flags (bits 0-20) collapsed to u64 */
|
||
if (slice && slice->LongSliceFlags.fields.dependent_slice_segment_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
|
||
if (picture->slice_parsing_fields.bits.output_flag_present_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
|
||
if (picture->pic_fields.bits.sign_data_hiding_enabled_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
|
||
if (picture->slice_parsing_fields.bits.cabac_init_present_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
|
||
if (picture->pic_fields.bits.constrained_intra_pred_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
|
||
if (picture->pic_fields.bits.transform_skip_enabled_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
|
||
if (picture->pic_fields.bits.cu_qp_delta_enabled_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
|
||
if (picture->slice_parsing_fields.bits.pps_slice_chroma_qp_offsets_present_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
|
||
if (picture->pic_fields.bits.weighted_pred_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
|
||
if (picture->pic_fields.bits.weighted_bipred_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
|
||
if (picture->pic_fields.bits.transquant_bypass_enabled_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
|
||
if (picture->pic_fields.bits.tiles_enabled_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
|
||
if (picture->pic_fields.bits.entropy_coding_sync_enabled_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
|
||
if (picture->pic_fields.bits.loop_filter_across_tiles_enabled_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
|
||
if (picture->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
|
||
if (picture->slice_parsing_fields.bits.deblocking_filter_override_enabled_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
|
||
if (picture->slice_parsing_fields.bits.pps_disable_deblocking_filter_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
|
||
if (picture->slice_parsing_fields.bits.lists_modification_present_flag)
|
||
pps->flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
|
||
/* SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (bit 18) — not exposed; skip */
|
||
/* DEBLOCKING_FILTER_CONTROL_PRESENT (bit 19, S1) — not exposed by VAAPI */
|
||
/* DEBLOCKING_FILTER_CONTROL_PRESENT (bit 19) and UNIFORM_SPACING (bit 20):
|
||
* VAAPI does not expose either flag in VAPictureParameterBufferHEVC.
|
||
* BBB-720p10s_hevc.mp4 uses neither tiles nor explicit deblocking-
|
||
* control parameters; leaving these bits zero is correct for the
|
||
* iter2 binding-cell fixture. */
|
||
}
|
||
|
||
/* ===== Clause 6: DECODE_PARAMS (328 bytes) =====
|
||
*
|
||
* NEW in modern API. Houses DPB info that was inside slice_params in
|
||
* the staging-era. Per Phase 5 C2: dpb[].flags has only LONG_TERM_REFERENCE
|
||
* bit; dpb[].pic_order_cnt_val (singular); poc_st_curr_*[] arrays hold
|
||
* u8 DPB INDICES (not POC values).
|
||
*
|
||
* Pattern: classify each VAAPI ReferenceFrames[i] into ST_CURR_BEFORE /
|
||
* ST_CURR_AFTER / LT_CURR; populate dpb[] sequentially; record the DPB
|
||
* index in the matching classification array.
|
||
*/
|
||
static void h265_fill_decode_params(struct request_data *driver_data,
|
||
VAPictureParameterBufferHEVC *picture,
|
||
struct v4l2_ctrl_hevc_decode_params *decode_params)
|
||
{
|
||
struct object_surface *surface_object;
|
||
VAPictureHEVC *hevc_picture;
|
||
unsigned int i;
|
||
uint8_t n_active = 0;
|
||
uint8_t n_st_before = 0, n_st_after = 0, n_lt = 0;
|
||
|
||
memset(decode_params, 0, sizeof(*decode_params));
|
||
|
||
decode_params->pic_order_cnt_val = picture->CurrPic.pic_order_cnt;
|
||
|
||
for (i = 0; i < 15; i++) {
|
||
hevc_picture = &picture->ReferenceFrames[i];
|
||
|
||
if (hevc_picture->picture_id == VA_INVALID_SURFACE ||
|
||
(hevc_picture->flags & VA_PICTURE_HEVC_INVALID))
|
||
continue;
|
||
|
||
surface_object = (struct object_surface *)
|
||
object_heap_lookup(&driver_data->surface_heap,
|
||
hevc_picture->picture_id);
|
||
if (surface_object == NULL)
|
||
continue;
|
||
|
||
if (n_active >= V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
|
||
break;
|
||
|
||
decode_params->dpb[n_active].timestamp =
|
||
v4l2_timeval_to_ns(&surface_object->timestamp);
|
||
decode_params->dpb[n_active].pic_order_cnt_val =
|
||
hevc_picture->pic_order_cnt;
|
||
decode_params->dpb[n_active].field_pic =
|
||
!!(hevc_picture->flags & VA_PICTURE_HEVC_FIELD_PIC);
|
||
decode_params->dpb[n_active].flags =
|
||
(hevc_picture->flags & VA_PICTURE_HEVC_RPS_LT_CURR) ?
|
||
V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE : 0;
|
||
/* dpb[n_active].reserved zeroed by memset */
|
||
|
||
/* Classify into one of the three "current" lists.
|
||
* Each list holds the DPB INDEX (u8), not the POC value. */
|
||
if (hevc_picture->flags & VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE) {
|
||
if (n_st_before < V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
|
||
decode_params->poc_st_curr_before[n_st_before++] = n_active;
|
||
} else if (hevc_picture->flags & VA_PICTURE_HEVC_RPS_ST_CURR_AFTER) {
|
||
if (n_st_after < V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
|
||
decode_params->poc_st_curr_after[n_st_after++] = n_active;
|
||
} else if (hevc_picture->flags & VA_PICTURE_HEVC_RPS_LT_CURR) {
|
||
if (n_lt < V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
|
||
decode_params->poc_lt_curr[n_lt++] = n_active;
|
||
}
|
||
|
||
n_active++;
|
||
}
|
||
|
||
decode_params->num_active_dpb_entries = n_active;
|
||
decode_params->num_poc_st_curr_before = n_st_before;
|
||
decode_params->num_poc_st_curr_after = n_st_after;
|
||
decode_params->num_poc_lt_curr = n_lt;
|
||
/*
|
||
* iter26 α-26: VAAPI DOES expose short_term_ref_pic_set bit-count
|
||
* via picture->st_rps_bits. Without populating this, rkvdec's
|
||
* DPB reference resolution for P/B frames uses the wrong slice-
|
||
* header skip and reads the wrong reference; frame 1 (IDR) decodes
|
||
* correctly but frames 2+ diverge (iter25 evidence: cmp differs at
|
||
* byte 1382401 = frame 2 boundary, kdirect bytes 4-5 = 0x0a 0x00,
|
||
* libva = 0x00 0x00).
|
||
*
|
||
* long_term_ref_pic_set_size and num_delta_pocs_of_ref_rps_idx still
|
||
* left zero (VAAPI doesn't expose either).
|
||
*/
|
||
decode_params->short_term_ref_pic_set_size = picture->st_rps_bits;
|
||
|
||
/*
|
||
* iter11 α-14: IRAP/IDR/NO_OUTPUT_OF_PRIOR flags. VAAPI doesn't
|
||
* expose these in VAPictureParameterBufferHEVC. The iter2 binding
|
||
* cell hardcoded them to 0 with the comment "BBB B/P-frames don't
|
||
* need these set" — but IDR keyframes DO need IDR_PIC|IRAP_PIC.
|
||
* Without them rkvdec doesn't recognise the keyframe boundary,
|
||
* treats the IDR as inter without references, and produces all-zero
|
||
* CAPTURE output (Bug 5).
|
||
*
|
||
* The flags are derived at h265_set_controls level after slice_params
|
||
* have been parsed (slice_params[0].nal_unit_type carries the NAL
|
||
* type extracted from the bitstream). Initialise to 0 here; the caller
|
||
* patches the IRAP/IDR bits.
|
||
*/
|
||
decode_params->flags = 0;
|
||
}
|
||
|
||
/* ===== Clause 4: SLICE_PARAMS per slice =====
|
||
*
|
||
* Called per slice in a loop in h265_set_controls. Output is one entry
|
||
* in the dynamic-array of slice_params submitted to the kernel.
|
||
*
|
||
* source_offset is the byte offset within the surface_object->source_data
|
||
* buffer where this slice's bitstream begins (after any ANNEX_B start
|
||
* code prefix). data_byte_offset is the offset within the buffer to the
|
||
* first byte of slice header data.
|
||
*
|
||
* Per Phase 5 C1: data_byte_offset is a BYTE offset (not a bit offset).
|
||
* The old bit-search at h265.c:184-209 has been DROPPED.
|
||
*/
|
||
static void h265_fill_slice_params(VAPictureParameterBufferHEVC *picture,
|
||
VASliceParameterBufferHEVC *slice,
|
||
void *source_data,
|
||
unsigned int source_offset,
|
||
struct v4l2_ctrl_hevc_slice_params *slice_params)
|
||
{
|
||
uint8_t *b;
|
||
uint8_t nal_unit_type, nuh_temporal_id_plus1;
|
||
uint8_t pic_struct;
|
||
uint8_t slice_type;
|
||
unsigned int i, j;
|
||
|
||
memset(slice_params, 0, sizeof(*slice_params));
|
||
|
||
/* NAL header parse from slice bitstream (after ANNEX_B start code).
|
||
* source_offset points at the byte AFTER the start code (start code
|
||
* was prepended by codec_store_buffer:68-75 if context->h264_start_code
|
||
* is set). The first 2 bytes are the NAL unit header. */
|
||
b = (uint8_t *)source_data + source_offset;
|
||
nal_unit_type = (b[0] >> H265_NAL_UNIT_TYPE_SHIFT) & H265_NAL_UNIT_TYPE_MASK;
|
||
nuh_temporal_id_plus1 = (b[1] >> H265_NUH_TEMPORAL_ID_PLUS1_SHIFT) &
|
||
H265_NUH_TEMPORAL_ID_PLUS1_MASK;
|
||
|
||
slice_params->bit_size = slice->slice_data_size * 8;
|
||
|
||
/* C1: data_byte_offset, NOT data_bit_offset. Plain byte offset to
|
||
* the first byte of slice segment header data within the OUTPUT
|
||
* buffer. FFmpeg pattern at v4l2_request_hevc.c:190. */
|
||
slice_params->data_byte_offset = source_offset + slice->slice_data_byte_offset;
|
||
|
||
/*
|
||
* iter27 α-27: populate num_entry_point_offsets from VAAPI.
|
||
*
|
||
* BBB HEVC uses WPP (entropy_coding_sync_enabled_flag); each CTU row
|
||
* after the first creates an entry point. For 720p with 32-pixel
|
||
* CTUs that's 22 entry points per slice. Hardcoding 0 made rkvdec
|
||
* miscount the slice header skip distance → wrong slice data
|
||
* boundary → frame 2+ decoded with garbage reference data.
|
||
*
|
||
* Comment "iter2 doesn't do tiles" was inaccurate: WPP isn't tiles
|
||
* but uses the same entry_point_offsets mechanism.
|
||
*/
|
||
slice_params->num_entry_point_offsets = slice->num_entry_point_offsets;
|
||
request_log("iter27diag: slice %p num_entry_point_offsets=%u\n",
|
||
(void *)slice, (unsigned)slice->num_entry_point_offsets);
|
||
slice_params->nal_unit_type = nal_unit_type;
|
||
slice_params->nuh_temporal_id_plus1 = nuh_temporal_id_plus1;
|
||
|
||
slice_type = slice->LongSliceFlags.fields.slice_type;
|
||
slice_params->slice_type = slice_type;
|
||
slice_params->colour_plane_id = slice->LongSliceFlags.fields.color_plane_id;
|
||
slice_params->slice_pic_order_cnt = picture->CurrPic.pic_order_cnt;
|
||
slice_params->num_ref_idx_l0_active_minus1 = slice->num_ref_idx_l0_active_minus1;
|
||
slice_params->num_ref_idx_l1_active_minus1 = slice->num_ref_idx_l1_active_minus1;
|
||
slice_params->collocated_ref_idx = slice->collocated_ref_idx;
|
||
slice_params->five_minus_max_num_merge_cand = slice->five_minus_max_num_merge_cand;
|
||
slice_params->slice_qp_delta = slice->slice_qp_delta;
|
||
slice_params->slice_cb_qp_offset = slice->slice_cb_qp_offset;
|
||
slice_params->slice_cr_qp_offset = slice->slice_cr_qp_offset;
|
||
slice_params->slice_act_y_qp_offset = 0; /* VAAPI doesn't expose */
|
||
slice_params->slice_act_cb_qp_offset = 0;
|
||
slice_params->slice_act_cr_qp_offset = 0;
|
||
slice_params->slice_beta_offset_div2 = slice->slice_beta_offset_div2;
|
||
slice_params->slice_tc_offset_div2 = slice->slice_tc_offset_div2;
|
||
|
||
if (picture->CurrPic.flags & VA_PICTURE_HEVC_FIELD_PIC) {
|
||
if (picture->CurrPic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)
|
||
pic_struct = 2;
|
||
else
|
||
pic_struct = 1;
|
||
} else {
|
||
pic_struct = 0;
|
||
}
|
||
slice_params->pic_struct = pic_struct;
|
||
/* reserved0[3] zeroed by memset */
|
||
|
||
/* Q2: slice_segment_addr from VAAPI (was missing in old h265.c). */
|
||
slice_params->slice_segment_addr = slice->slice_segment_address;
|
||
|
||
/* Ref index arrays (DPB indices). For I-slices both are unused. */
|
||
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
|
||
slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) {
|
||
if (i < (slice->num_ref_idx_l0_active_minus1 + 1U))
|
||
slice_params->ref_idx_l0[i] = slice->RefPicList[0][i];
|
||
}
|
||
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
|
||
slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) {
|
||
if (i < (slice->num_ref_idx_l1_active_minus1 + 1U))
|
||
slice_params->ref_idx_l1[i] = slice->RefPicList[1][i];
|
||
}
|
||
|
||
slice_params->short_term_ref_pic_set_size = 0; /* VAAPI doesn't expose */
|
||
slice_params->long_term_ref_pic_set_size = 0;
|
||
|
||
/* Pred weight table */
|
||
slice_params->pred_weight_table.luma_log2_weight_denom =
|
||
slice->luma_log2_weight_denom;
|
||
slice_params->pred_weight_table.delta_chroma_log2_weight_denom =
|
||
slice->delta_chroma_log2_weight_denom;
|
||
|
||
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
|
||
slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) {
|
||
slice_params->pred_weight_table.delta_luma_weight_l0[i] =
|
||
slice->delta_luma_weight_l0[i];
|
||
slice_params->pred_weight_table.luma_offset_l0[i] =
|
||
slice->luma_offset_l0[i];
|
||
for (j = 0; j < 2; j++) {
|
||
slice_params->pred_weight_table.delta_chroma_weight_l0[i][j] =
|
||
slice->delta_chroma_weight_l0[i][j];
|
||
slice_params->pred_weight_table.chroma_offset_l0[i][j] =
|
||
slice->ChromaOffsetL0[i][j];
|
||
}
|
||
}
|
||
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX &&
|
||
slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) {
|
||
slice_params->pred_weight_table.delta_luma_weight_l1[i] =
|
||
slice->delta_luma_weight_l1[i];
|
||
slice_params->pred_weight_table.luma_offset_l1[i] =
|
||
slice->luma_offset_l1[i];
|
||
for (j = 0; j < 2; j++) {
|
||
slice_params->pred_weight_table.delta_chroma_weight_l1[i][j] =
|
||
slice->delta_chroma_weight_l1[i][j];
|
||
slice_params->pred_weight_table.chroma_offset_l1[i][j] =
|
||
slice->ChromaOffsetL1[i][j];
|
||
}
|
||
}
|
||
/* reserved1[2] zeroed by memset */
|
||
|
||
/* 10 SLICE_PARAMS flag bits */
|
||
if (slice->LongSliceFlags.fields.slice_sao_luma_flag)
|
||
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
|
||
if (slice->LongSliceFlags.fields.slice_sao_chroma_flag)
|
||
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
|
||
if (slice->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag)
|
||
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
|
||
if (slice->LongSliceFlags.fields.mvd_l1_zero_flag)
|
||
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
|
||
if (slice->LongSliceFlags.fields.cabac_init_flag)
|
||
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
|
||
if (slice->LongSliceFlags.fields.collocated_from_l0_flag)
|
||
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
|
||
/* USE_INTEGER_MV — VAAPI doesn't expose; leave 0 */
|
||
if (slice->LongSliceFlags.fields.slice_deblocking_filter_disabled_flag)
|
||
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
|
||
if (slice->LongSliceFlags.fields.slice_loop_filter_across_slices_enabled_flag)
|
||
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
|
||
if (slice->LongSliceFlags.fields.dependent_slice_segment_flag)
|
||
slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
|
||
}
|
||
|
||
/* ===== Clause 5: SCALING_MATRIX (1296 bytes; conditional fill) =====
|
||
*
|
||
* Per Phase 5 S3: when iqmatrix_set==false (BBB has no scaling list
|
||
* in SPS flags), send memset-zero. Matches FFmpeg's pattern when the
|
||
* stream has no scaling list. When iqmatrix_set==true, copy from VAAPI
|
||
* VAIQMatrixBufferHEVC.
|
||
*/
|
||
static void h265_fill_scaling_matrix(VAIQMatrixBufferHEVC *iqmatrix,
|
||
bool iqmatrix_set,
|
||
struct v4l2_ctrl_hevc_scaling_matrix *scaling_matrix)
|
||
{
|
||
memset(scaling_matrix, 0, sizeof(*scaling_matrix));
|
||
|
||
if (!iqmatrix_set)
|
||
return; /* memset zero matches FFmpeg sl=NULL path */
|
||
|
||
memcpy(scaling_matrix->scaling_list_4x4,
|
||
iqmatrix->ScalingList4x4, sizeof(iqmatrix->ScalingList4x4));
|
||
memcpy(scaling_matrix->scaling_list_8x8,
|
||
iqmatrix->ScalingList8x8, sizeof(iqmatrix->ScalingList8x8));
|
||
memcpy(scaling_matrix->scaling_list_16x16,
|
||
iqmatrix->ScalingList16x16, sizeof(iqmatrix->ScalingList16x16));
|
||
memcpy(scaling_matrix->scaling_list_32x32,
|
||
iqmatrix->ScalingList32x32, sizeof(iqmatrix->ScalingList32x32));
|
||
memcpy(scaling_matrix->scaling_list_dc_coef_16x16,
|
||
iqmatrix->ScalingListDC16x16,
|
||
sizeof(iqmatrix->ScalingListDC16x16));
|
||
memcpy(scaling_matrix->scaling_list_dc_coef_32x32,
|
||
iqmatrix->ScalingListDC32x32,
|
||
sizeof(iqmatrix->ScalingListDC32x32));
|
||
}
|
||
|
||
/* ===== Clause 1: orchestrator — batched 5-control submission ===== */
|
||
int h265_set_controls(struct request_data *driver_data,
|
||
struct object_context *context_object,
|
||
struct object_surface *surface_object)
|
||
{
|
||
VAPictureParameterBufferHEVC *picture =
|
||
&surface_object->params.h265.picture;
|
||
VAIQMatrixBufferHEVC *iqmatrix =
|
||
&surface_object->params.h265.iqmatrix;
|
||
bool iqmatrix_set = surface_object->params.h265.iqmatrix_set;
|
||
unsigned int num_slices = surface_object->params.h265.num_slices;
|
||
|
||
struct v4l2_ctrl_hevc_sps sps;
|
||
struct v4l2_ctrl_hevc_pps pps;
|
||
struct v4l2_ctrl_hevc_decode_params decode_params;
|
||
struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
|
||
struct v4l2_ctrl_hevc_slice_params *slice_params_array = NULL;
|
||
|
||
struct v4l2_ext_control controls[5];
|
||
unsigned int n = 0;
|
||
unsigned int i;
|
||
unsigned int prefix_bytes;
|
||
unsigned int cumulative_offset = 0;
|
||
int rc;
|
||
|
||
if (num_slices == 0)
|
||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||
|
||
slice_params_array = calloc(num_slices,
|
||
sizeof(struct v4l2_ctrl_hevc_slice_params));
|
||
if (slice_params_array == NULL)
|
||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||
|
||
/* Per-slice fill. ANNEX_B start code (3 bytes 0x00 0x00 0x01) is
|
||
* prepended per slice by codec_store_buffer:68-75 when
|
||
* context->h264_start_code is true. Track cumulative offset
|
||
* accordingly. */
|
||
prefix_bytes = context_object->h264_start_code ? 3 : 0;
|
||
|
||
for (i = 0; i < num_slices; i++) {
|
||
VASliceParameterBufferHEVC *slice =
|
||
&surface_object->params.h265.slices[i];
|
||
|
||
cumulative_offset += prefix_bytes; /* skip start code prefix for this slice */
|
||
|
||
h265_fill_slice_params(picture, slice,
|
||
surface_object->source_data,
|
||
cumulative_offset,
|
||
&slice_params_array[i]);
|
||
|
||
cumulative_offset += slice->slice_data_size;
|
||
}
|
||
|
||
h265_fill_sps(picture, &sps);
|
||
h265_fill_pps(picture, &surface_object->params.h265.slices[0], &pps);
|
||
h265_fill_decode_params(driver_data, picture, &decode_params);
|
||
h265_fill_scaling_matrix(iqmatrix, iqmatrix_set, &scaling_matrix);
|
||
|
||
/*
|
||
* iter11 α-14: derive IRAP_PIC / IDR_PIC flags from the first
|
||
* slice's nal_unit_type (already parsed by h265_fill_slice_params
|
||
* from the bitstream into slice_params_array[0].nal_unit_type).
|
||
*
|
||
* H.265 §7.4.2.2:
|
||
* nal_unit_type 16..23 are IRAP (random access).
|
||
* nal_unit_type 19 (IDR_W_RADL) and 20 (IDR_N_LP) are IDR.
|
||
*
|
||
* Without setting these, rkvdec doesn't recognise the keyframe
|
||
* boundary, treats the IDR as inter without references, and
|
||
* produces all-zero CAPTURE output. Phase 3 confirmed kdirect
|
||
* (ffmpeg-v4l2request) sets flags=0x03 (IRAP|IDR) on frame 1
|
||
* and decodes correctly through the same kernel.
|
||
*/
|
||
if (num_slices > 0) {
|
||
uint8_t nut = slice_params_array[0].nal_unit_type;
|
||
if (nut >= 16 && nut <= 23)
|
||
decode_params.flags |=
|
||
V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
|
||
if (nut == 19 || nut == 20)
|
||
decode_params.flags |=
|
||
V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
|
||
}
|
||
|
||
controls[n++] = (struct v4l2_ext_control){
|
||
.id = V4L2_CID_STATELESS_HEVC_SPS,
|
||
.ptr = &sps,
|
||
.size = sizeof(sps),
|
||
};
|
||
controls[n++] = (struct v4l2_ext_control){
|
||
.id = V4L2_CID_STATELESS_HEVC_PPS,
|
||
.ptr = &pps,
|
||
.size = sizeof(pps),
|
||
};
|
||
controls[n++] = (struct v4l2_ext_control){
|
||
.id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
|
||
.ptr = slice_params_array,
|
||
.size = sizeof(struct v4l2_ctrl_hevc_slice_params) * num_slices,
|
||
};
|
||
controls[n++] = (struct v4l2_ext_control){
|
||
.id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
|
||
.ptr = &scaling_matrix,
|
||
.size = sizeof(scaling_matrix),
|
||
};
|
||
controls[n++] = (struct v4l2_ext_control){
|
||
.id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
|
||
.ptr = &decode_params,
|
||
.size = sizeof(decode_params),
|
||
};
|
||
|
||
rc = v4l2_set_controls(driver_data->video_fd,
|
||
surface_object->request_fd,
|
||
controls, n);
|
||
|
||
free(slice_params_array);
|
||
|
||
if (rc < 0)
|
||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||
|
||
return 0;
|
||
}
|