Compare commits
138 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 902d6c17ba | |||
| c839b9456e | |||
| d7ef0f6cd9 | |||
| 5803cbcf6c | |||
| ab79ed5e4d | |||
| 5fb7e36955 | |||
| 85bcddb5ad | |||
| 9c30eccd52 | |||
| 78a9978b02 | |||
| 61db76ebcf | |||
| bed75c0cef | |||
| 1a2c958ab3 | |||
| 4f6ba6c0e3 | |||
| c5fbc5bf04 | |||
| f91c3f53c5 | |||
| 7ac934e0c5 | |||
| c56a77bd4c | |||
| 25d3e5f06f | |||
| 7db15a5685 | |||
| 48fd0288c3 | |||
| 7e0848d7d2 | |||
| bf3e3d8587 | |||
| 4b3c21b105 | |||
| 23eb1bd5ae | |||
| 68dbbdd4b7 | |||
| 0eca3ffc6b | |||
| 6646b1635e | |||
| c5557882aa | |||
| cd286d9bf0 | |||
| 754be1de7e | |||
| c9bfa21425 | |||
| 719d813f4a | |||
| 66ef848b34 | |||
| d062fec65d | |||
| db0b7f9892 | |||
| e109306fd4 | |||
| a9c897fa8b | |||
| 415688dab0 | |||
| aa82bffa35 | |||
| fc78ed4204 | |||
| afe632fe68 | |||
| 65722e74bd | |||
| 5a6eb4351d | |||
| 0dbe1732f6 | |||
| e63bfd4dde | |||
| 111f8bac8f | |||
| 7ae85c54fc | |||
| 3760a70006 | |||
| 522fb6daa5 | |||
| ca4dd88007 | |||
| 8e2c04f84b | |||
| e0be4e6992 | |||
| 02266841c6 | |||
| 6f4e5833f0 | |||
| 66ecbef5c6 | |||
| 7eae6eab46 | |||
| 6df2159dd3 | |||
| c106d95869 | |||
| 70196f8065 | |||
| 7055b14f5e | |||
| cc077a0c06 | |||
| 1c548b136a | |||
| 6bc29ec582 | |||
| 9a7f888f1b | |||
| 709ab34624 | |||
| 4b2288fa9a | |||
| f8256e6c2d | |||
| ce304ef5af | |||
| 692eaa0053 | |||
| beaa914680 | |||
| 406d08e122 | |||
| 16b397305d | |||
| 7f8fa93213 | |||
| e1aca9cc6b | |||
| 7f84bbb50f | |||
| 017e27f389 | |||
| 27d82e3cf4 | |||
| 8d71e20bf7 | |||
| cca539d5f9 | |||
| 229d6d11be | |||
| 3aab1879cb | |||
| 5fe873c144 | |||
| e7dad7abb5 | |||
| 65969da3ee | |||
| dcaa1f12e5 | |||
| 7bd0818792 | |||
| 988b848908 | |||
| a09c03c154 | |||
| c8b6edec3d | |||
| b993355507 | |||
| 843febc174 | |||
| d3a299b4cc | |||
| 951233a12e | |||
| 39498f0d8e | |||
| 848fc0c4c4 | |||
| b81ce6981f | |||
| f21bdf0d50 | |||
| 385dee1bbf | |||
| 4892656b3f | |||
| 74d8dd134a | |||
| a12d29937c | |||
| 086b7ce8cb | |||
| 4a7a07e0f4 | |||
| 19acc76da4 | |||
| e64bb0852d | |||
| 06beef6248 | |||
| c036a44f98 | |||
| ac891a01fa | |||
| fdfee2d661 | |||
| 21ae311077 | |||
| 92f5b254e6 | |||
| 7da2b27454 | |||
| 37c0e720fc | |||
| 2517a1206b | |||
| 6be3f3b120 | |||
| a047926dbc | |||
| 9de1be34ef | |||
| d41a4b96b3 | |||
| 74b3793e3c | |||
| 8594d74275 | |||
| b0a93e4683 | |||
| 05ffd02ff2 | |||
| fdb0b728d7 | |||
| affb4bd12a | |||
| c672f19f44 | |||
| 841f616e74 | |||
| 1690dfaa79 | |||
| 3609fbb425 | |||
| 597e896594 | |||
| 86a8545146 | |||
| 4078368104 | |||
| 4246d5d537 | |||
| e382c63e20 | |||
| 565f5c0de4 | |||
| 58a0e8baf9 | |||
| 50e0c2b996 | |||
| 10114f6781 | |||
| c45fea96e3 |
@@ -0,0 +1,9 @@
|
||||
# STUDY.md → moved
|
||||
|
||||
The Phase 0 / Phase 2 substrate that previously lived here has been transformed into the campaign-level Phase 0 document at:
|
||||
|
||||
- [`../phase0_findings.md`](../phase0_findings.md)
|
||||
|
||||
That document also points at the remaining open questions for Phase 1 lock and the verification gate at Phase 7. Read it together with the campaign README at [`../README.md`](../README.md).
|
||||
|
||||
The git commit that this file points back to (the last commit while STUDY.md still held the substrate content) is `e0acc33` — `git show e0acc33:STUDY.md` recovers the historical content if needed.
|
||||
@@ -1,197 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* These are the H.264 state controls for use with stateless H.264
|
||||
* codec drivers.
|
||||
*
|
||||
* It turns out that these structs are not stable yet and will undergo
|
||||
* more changes. So keep them private until they are stable and ready to
|
||||
* become part of the official public API.
|
||||
*/
|
||||
|
||||
#ifndef _H264_CTRLS_H_
|
||||
#define _H264_CTRLS_H_
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
/* Our pixel format isn't stable at the moment */
|
||||
#define V4L2_PIX_FMT_H264_SLICE_RAW v4l2_fourcc('S', '2', '6', '4') /* H264 parsed slices */
|
||||
|
||||
/*
|
||||
* This is put insanely high to avoid conflicting with controls that
|
||||
* would be added during the phase where those controls are not
|
||||
* stable. It should be fixed eventually.
|
||||
*/
|
||||
#define V4L2_CID_MPEG_VIDEO_H264_SPS (V4L2_CID_MPEG_BASE+1000)
|
||||
#define V4L2_CID_MPEG_VIDEO_H264_PPS (V4L2_CID_MPEG_BASE+1001)
|
||||
#define V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX (V4L2_CID_MPEG_BASE+1002)
|
||||
#define V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS (V4L2_CID_MPEG_BASE+1003)
|
||||
#define V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS (V4L2_CID_MPEG_BASE+1004)
|
||||
|
||||
/* enum v4l2_ctrl_type type values */
|
||||
#define V4L2_CTRL_TYPE_H264_SPS 0x0110
|
||||
#define V4L2_CTRL_TYPE_H264_PPS 0x0111
|
||||
#define V4L2_CTRL_TYPE_H264_SCALING_MATRIX 0x0112
|
||||
#define V4L2_CTRL_TYPE_H264_SLICE_PARAMS 0x0113
|
||||
#define V4L2_CTRL_TYPE_H264_DECODE_PARAMS 0x0114
|
||||
|
||||
#define V4L2_H264_SPS_CONSTRAINT_SET0_FLAG 0x01
|
||||
#define V4L2_H264_SPS_CONSTRAINT_SET1_FLAG 0x02
|
||||
#define V4L2_H264_SPS_CONSTRAINT_SET2_FLAG 0x04
|
||||
#define V4L2_H264_SPS_CONSTRAINT_SET3_FLAG 0x08
|
||||
#define V4L2_H264_SPS_CONSTRAINT_SET4_FLAG 0x10
|
||||
#define V4L2_H264_SPS_CONSTRAINT_SET5_FLAG 0x20
|
||||
|
||||
#define V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE 0x01
|
||||
#define V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS 0x02
|
||||
#define V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO 0x04
|
||||
#define V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED 0x08
|
||||
#define V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY 0x10
|
||||
#define V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD 0x20
|
||||
#define V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE 0x40
|
||||
|
||||
struct v4l2_ctrl_h264_sps {
|
||||
__u8 profile_idc;
|
||||
__u8 constraint_set_flags;
|
||||
__u8 level_idc;
|
||||
__u8 seq_parameter_set_id;
|
||||
__u8 chroma_format_idc;
|
||||
__u8 bit_depth_luma_minus8;
|
||||
__u8 bit_depth_chroma_minus8;
|
||||
__u8 log2_max_frame_num_minus4;
|
||||
__u8 pic_order_cnt_type;
|
||||
__u8 log2_max_pic_order_cnt_lsb_minus4;
|
||||
__u8 max_num_ref_frames;
|
||||
__u8 num_ref_frames_in_pic_order_cnt_cycle;
|
||||
__s32 offset_for_ref_frame[255];
|
||||
__s32 offset_for_non_ref_pic;
|
||||
__s32 offset_for_top_to_bottom_field;
|
||||
__u16 pic_width_in_mbs_minus1;
|
||||
__u16 pic_height_in_map_units_minus1;
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
#define V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE 0x0001
|
||||
#define V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT 0x0002
|
||||
#define V4L2_H264_PPS_FLAG_WEIGHTED_PRED 0x0004
|
||||
#define V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT 0x0008
|
||||
#define V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED 0x0010
|
||||
#define V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT 0x0020
|
||||
#define V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE 0x0040
|
||||
#define V4L2_H264_PPS_FLAG_PIC_SCALING_MATRIX_PRESENT 0x0080
|
||||
|
||||
struct v4l2_ctrl_h264_pps {
|
||||
__u8 pic_parameter_set_id;
|
||||
__u8 seq_parameter_set_id;
|
||||
__u8 num_slice_groups_minus1;
|
||||
__u8 num_ref_idx_l0_default_active_minus1;
|
||||
__u8 num_ref_idx_l1_default_active_minus1;
|
||||
__u8 weighted_bipred_idc;
|
||||
__s8 pic_init_qp_minus26;
|
||||
__s8 pic_init_qs_minus26;
|
||||
__s8 chroma_qp_index_offset;
|
||||
__s8 second_chroma_qp_index_offset;
|
||||
__u16 flags;
|
||||
};
|
||||
|
||||
struct v4l2_ctrl_h264_scaling_matrix {
|
||||
__u8 scaling_list_4x4[6][16];
|
||||
__u8 scaling_list_8x8[6][64];
|
||||
};
|
||||
|
||||
struct v4l2_h264_weight_factors {
|
||||
__s16 luma_weight[32];
|
||||
__s16 luma_offset[32];
|
||||
__s16 chroma_weight[32][2];
|
||||
__s16 chroma_offset[32][2];
|
||||
};
|
||||
|
||||
struct v4l2_h264_pred_weight_table {
|
||||
__u16 luma_log2_weight_denom;
|
||||
__u16 chroma_log2_weight_denom;
|
||||
struct v4l2_h264_weight_factors weight_factors[2];
|
||||
};
|
||||
|
||||
#define V4L2_H264_SLICE_TYPE_P 0
|
||||
#define V4L2_H264_SLICE_TYPE_B 1
|
||||
#define V4L2_H264_SLICE_TYPE_I 2
|
||||
#define V4L2_H264_SLICE_TYPE_SP 3
|
||||
#define V4L2_H264_SLICE_TYPE_SI 4
|
||||
|
||||
#define V4L2_H264_SLICE_FLAG_FIELD_PIC 0x01
|
||||
#define V4L2_H264_SLICE_FLAG_BOTTOM_FIELD 0x02
|
||||
#define V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED 0x04
|
||||
#define V4L2_H264_SLICE_FLAG_SP_FOR_SWITCH 0x08
|
||||
|
||||
struct v4l2_ctrl_h264_slice_params {
|
||||
/* Size in bytes, including header */
|
||||
__u32 size;
|
||||
/* Offset in bits to slice_data() from the beginning of this slice. */
|
||||
__u32 header_bit_size;
|
||||
|
||||
__u16 first_mb_in_slice;
|
||||
__u8 slice_type;
|
||||
__u8 pic_parameter_set_id;
|
||||
__u8 colour_plane_id;
|
||||
__u8 redundant_pic_cnt;
|
||||
__u16 frame_num;
|
||||
__u16 idr_pic_id;
|
||||
__u16 pic_order_cnt_lsb;
|
||||
__s32 delta_pic_order_cnt_bottom;
|
||||
__s32 delta_pic_order_cnt0;
|
||||
__s32 delta_pic_order_cnt1;
|
||||
|
||||
struct v4l2_h264_pred_weight_table pred_weight_table;
|
||||
/* Size in bits of dec_ref_pic_marking() syntax element. */
|
||||
__u32 dec_ref_pic_marking_bit_size;
|
||||
/* Size in bits of pic order count syntax. */
|
||||
__u32 pic_order_cnt_bit_size;
|
||||
|
||||
__u8 cabac_init_idc;
|
||||
__s8 slice_qp_delta;
|
||||
__s8 slice_qs_delta;
|
||||
__u8 disable_deblocking_filter_idc;
|
||||
__s8 slice_alpha_c0_offset_div2;
|
||||
__s8 slice_beta_offset_div2;
|
||||
__u8 num_ref_idx_l0_active_minus1;
|
||||
__u8 num_ref_idx_l1_active_minus1;
|
||||
__u32 slice_group_change_cycle;
|
||||
|
||||
/*
|
||||
* Entries on each list are indices into
|
||||
* v4l2_ctrl_h264_decode_params.dpb[].
|
||||
*/
|
||||
__u8 ref_pic_list0[32];
|
||||
__u8 ref_pic_list1[32];
|
||||
|
||||
__u32 flags;
|
||||
};
|
||||
|
||||
#define V4L2_H264_DPB_ENTRY_FLAG_VALID 0x01
|
||||
#define V4L2_H264_DPB_ENTRY_FLAG_ACTIVE 0x02
|
||||
#define V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM 0x04
|
||||
|
||||
struct v4l2_h264_dpb_entry {
|
||||
__u64 reference_ts;
|
||||
__u16 frame_num;
|
||||
__u16 pic_num;
|
||||
/* Note that field is indicated by v4l2_buffer.field */
|
||||
__s32 top_field_order_cnt;
|
||||
__s32 bottom_field_order_cnt;
|
||||
__u32 flags; /* V4L2_H264_DPB_ENTRY_FLAG_* */
|
||||
};
|
||||
|
||||
#define V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC 0x01
|
||||
|
||||
struct v4l2_ctrl_h264_decode_params {
|
||||
struct v4l2_h264_dpb_entry dpb[16];
|
||||
__u16 num_slices;
|
||||
__u16 nal_ref_idc;
|
||||
__u8 ref_pic_list_p0[32];
|
||||
__u8 ref_pic_list_b0[32];
|
||||
__u8 ref_pic_list_b1[32];
|
||||
__s32 top_field_order_cnt;
|
||||
__s32 bottom_field_order_cnt;
|
||||
__u32 flags; /* V4L2_H264_DECODE_PARAM_FLAG_* */
|
||||
};
|
||||
|
||||
#endif
|
||||
+7
-183
@@ -1,185 +1,9 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* These are the HEVC state controls for use with stateless HEVC
|
||||
* codec drivers.
|
||||
*
|
||||
* It turns out that these structs are not stable yet and will undergo
|
||||
* more changes. So keep them private until they are stable and ready to
|
||||
* become part of the official public API.
|
||||
*/
|
||||
|
||||
#ifndef _HEVC_CTRLS_H_
|
||||
#define _HEVC_CTRLS_H_
|
||||
|
||||
/* The pixel format isn't stable at the moment and will likely be renamed. */
|
||||
#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
|
||||
|
||||
#define V4L2_CID_MPEG_VIDEO_HEVC_SPS (V4L2_CID_MPEG_BASE + 1008)
|
||||
#define V4L2_CID_MPEG_VIDEO_HEVC_PPS (V4L2_CID_MPEG_BASE + 1009)
|
||||
#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS (V4L2_CID_MPEG_BASE + 1010)
|
||||
|
||||
/* enum v4l2_ctrl_type type values */
|
||||
#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
|
||||
#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
|
||||
#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
|
||||
|
||||
#define V4L2_HEVC_SLICE_TYPE_B 0
|
||||
#define V4L2_HEVC_SLICE_TYPE_P 1
|
||||
#define V4L2_HEVC_SLICE_TYPE_I 2
|
||||
|
||||
/* The controls are not stable at the moment and will likely be reworked. */
|
||||
struct v4l2_ctrl_hevc_sps {
|
||||
/* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
|
||||
__u8 chroma_format_idc;
|
||||
__u8 separate_colour_plane_flag;
|
||||
__u16 pic_width_in_luma_samples;
|
||||
__u16 pic_height_in_luma_samples;
|
||||
__u8 bit_depth_luma_minus8;
|
||||
__u8 bit_depth_chroma_minus8;
|
||||
__u8 log2_max_pic_order_cnt_lsb_minus4;
|
||||
__u8 sps_max_dec_pic_buffering_minus1;
|
||||
__u8 sps_max_num_reorder_pics;
|
||||
__u8 sps_max_latency_increase_plus1;
|
||||
__u8 log2_min_luma_coding_block_size_minus3;
|
||||
__u8 log2_diff_max_min_luma_coding_block_size;
|
||||
__u8 log2_min_luma_transform_block_size_minus2;
|
||||
__u8 log2_diff_max_min_luma_transform_block_size;
|
||||
__u8 max_transform_hierarchy_depth_inter;
|
||||
__u8 max_transform_hierarchy_depth_intra;
|
||||
__u8 scaling_list_enabled_flag;
|
||||
__u8 amp_enabled_flag;
|
||||
__u8 sample_adaptive_offset_enabled_flag;
|
||||
__u8 pcm_enabled_flag;
|
||||
__u8 pcm_sample_bit_depth_luma_minus1;
|
||||
__u8 pcm_sample_bit_depth_chroma_minus1;
|
||||
__u8 log2_min_pcm_luma_coding_block_size_minus3;
|
||||
__u8 log2_diff_max_min_pcm_luma_coding_block_size;
|
||||
__u8 pcm_loop_filter_disabled_flag;
|
||||
__u8 num_short_term_ref_pic_sets;
|
||||
__u8 long_term_ref_pics_present_flag;
|
||||
__u8 num_long_term_ref_pics_sps;
|
||||
__u8 sps_temporal_mvp_enabled_flag;
|
||||
__u8 strong_intra_smoothing_enabled_flag;
|
||||
};
|
||||
|
||||
struct v4l2_ctrl_hevc_pps {
|
||||
/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
|
||||
__u8 dependent_slice_segment_flag;
|
||||
__u8 output_flag_present_flag;
|
||||
__u8 num_extra_slice_header_bits;
|
||||
__u8 sign_data_hiding_enabled_flag;
|
||||
__u8 cabac_init_present_flag;
|
||||
__s8 init_qp_minus26;
|
||||
__u8 constrained_intra_pred_flag;
|
||||
__u8 transform_skip_enabled_flag;
|
||||
__u8 cu_qp_delta_enabled_flag;
|
||||
__u8 diff_cu_qp_delta_depth;
|
||||
__s8 pps_cb_qp_offset;
|
||||
__s8 pps_cr_qp_offset;
|
||||
__u8 pps_slice_chroma_qp_offsets_present_flag;
|
||||
__u8 weighted_pred_flag;
|
||||
__u8 weighted_bipred_flag;
|
||||
__u8 transquant_bypass_enabled_flag;
|
||||
__u8 tiles_enabled_flag;
|
||||
__u8 entropy_coding_sync_enabled_flag;
|
||||
__u8 num_tile_columns_minus1;
|
||||
__u8 num_tile_rows_minus1;
|
||||
__u8 column_width_minus1[20];
|
||||
__u8 row_height_minus1[22];
|
||||
__u8 loop_filter_across_tiles_enabled_flag;
|
||||
__u8 pps_loop_filter_across_slices_enabled_flag;
|
||||
__u8 deblocking_filter_override_enabled_flag;
|
||||
__u8 pps_disable_deblocking_filter_flag;
|
||||
__s8 pps_beta_offset_div2;
|
||||
__s8 pps_tc_offset_div2;
|
||||
__u8 lists_modification_present_flag;
|
||||
__u8 log2_parallel_merge_level_minus2;
|
||||
__u8 slice_segment_header_extension_present_flag;
|
||||
__u8 padding;
|
||||
};
|
||||
|
||||
#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE 0x01
|
||||
#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER 0x02
|
||||
#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR 0x03
|
||||
|
||||
#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16
|
||||
|
||||
struct v4l2_hevc_dpb_entry {
|
||||
__u64 timestamp;
|
||||
__u8 rps;
|
||||
__u8 field_pic;
|
||||
__u16 pic_order_cnt[2];
|
||||
__u8 padding[2];
|
||||
};
|
||||
|
||||
struct v4l2_hevc_pred_weight_table {
|
||||
__u8 luma_log2_weight_denom;
|
||||
__s8 delta_chroma_log2_weight_denom;
|
||||
|
||||
__s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
|
||||
__s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
|
||||
__s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
|
||||
__s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
|
||||
|
||||
__s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
|
||||
__s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
|
||||
__s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
|
||||
__s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
|
||||
|
||||
__u8 padding[2];
|
||||
};
|
||||
|
||||
struct v4l2_ctrl_hevc_slice_params {
|
||||
__u32 bit_size;
|
||||
__u32 data_bit_offset;
|
||||
|
||||
/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
|
||||
__u8 nal_unit_type;
|
||||
__u8 nuh_temporal_id_plus1;
|
||||
|
||||
/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
|
||||
__u8 slice_type;
|
||||
__u8 colour_plane_id;
|
||||
__u16 slice_pic_order_cnt;
|
||||
__u8 slice_sao_luma_flag;
|
||||
__u8 slice_sao_chroma_flag;
|
||||
__u8 slice_temporal_mvp_enabled_flag;
|
||||
__u8 num_ref_idx_l0_active_minus1;
|
||||
__u8 num_ref_idx_l1_active_minus1;
|
||||
__u8 mvd_l1_zero_flag;
|
||||
__u8 cabac_init_flag;
|
||||
__u8 collocated_from_l0_flag;
|
||||
__u8 collocated_ref_idx;
|
||||
__u8 five_minus_max_num_merge_cand;
|
||||
__u8 use_integer_mv_flag;
|
||||
__s8 slice_qp_delta;
|
||||
__s8 slice_cb_qp_offset;
|
||||
__s8 slice_cr_qp_offset;
|
||||
__s8 slice_act_y_qp_offset;
|
||||
__s8 slice_act_cb_qp_offset;
|
||||
__s8 slice_act_cr_qp_offset;
|
||||
__u8 slice_deblocking_filter_disabled_flag;
|
||||
__s8 slice_beta_offset_div2;
|
||||
__s8 slice_tc_offset_div2;
|
||||
__u8 slice_loop_filter_across_slices_enabled_flag;
|
||||
|
||||
/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
|
||||
__u8 pic_struct;
|
||||
|
||||
/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
|
||||
struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
|
||||
__u8 num_active_dpb_entries;
|
||||
__u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
|
||||
__u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
|
||||
|
||||
__u8 num_rps_poc_st_curr_before;
|
||||
__u8 num_rps_poc_st_curr_after;
|
||||
__u8 num_rps_poc_lt_curr;
|
||||
|
||||
/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
|
||||
struct v4l2_hevc_pred_weight_table pred_weight_table;
|
||||
|
||||
__u8 padding[2];
|
||||
};
|
||||
|
||||
/* Fourier-local override: HEVC controls are upstream since linux-media
|
||||
* 6.6+, so defer to the kernel's linux/v4l2-controls.h instead of
|
||||
* duplicating the struct definitions (duplication causes redefinition
|
||||
* errors on newer linux-api-headers). */
|
||||
#ifndef _LIBVA_V4L2_REQUEST_HEVC_CTRLS_H
|
||||
#define _LIBVA_V4L2_REQUEST_HEVC_CTRLS_H
|
||||
#include <linux/v4l2-controls.h>
|
||||
#endif
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* These are the MPEG2 state controls for use with stateless MPEG-2
|
||||
* codec drivers.
|
||||
*
|
||||
* It turns out that these structs are not stable yet and will undergo
|
||||
* more changes. So keep them private until they are stable and ready to
|
||||
* become part of the official public API.
|
||||
*/
|
||||
|
||||
#ifndef _MPEG2_CTRLS_H_
|
||||
#define _MPEG2_CTRLS_H_
|
||||
|
||||
#define V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS (V4L2_CID_MPEG_BASE+250)
|
||||
#define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION (V4L2_CID_MPEG_BASE+251)
|
||||
|
||||
/* enum v4l2_ctrl_type type values */
|
||||
#define V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS 0x0103
|
||||
#define V4L2_CTRL_TYPE_MPEG2_QUANTIZATION 0x0104
|
||||
|
||||
#define V4L2_MPEG2_PICTURE_CODING_TYPE_I 1
|
||||
#define V4L2_MPEG2_PICTURE_CODING_TYPE_P 2
|
||||
#define V4L2_MPEG2_PICTURE_CODING_TYPE_B 3
|
||||
#define V4L2_MPEG2_PICTURE_CODING_TYPE_D 4
|
||||
|
||||
struct v4l2_mpeg2_sequence {
|
||||
/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence header */
|
||||
__u16 horizontal_size;
|
||||
__u16 vertical_size;
|
||||
__u32 vbv_buffer_size;
|
||||
|
||||
/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence extension */
|
||||
__u16 profile_and_level_indication;
|
||||
__u8 progressive_sequence;
|
||||
__u8 chroma_format;
|
||||
};
|
||||
|
||||
struct v4l2_mpeg2_picture {
|
||||
/* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture header */
|
||||
__u8 picture_coding_type;
|
||||
|
||||
/* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture coding extension */
|
||||
__u8 f_code[2][2];
|
||||
__u8 intra_dc_precision;
|
||||
__u8 picture_structure;
|
||||
__u8 top_field_first;
|
||||
__u8 frame_pred_frame_dct;
|
||||
__u8 concealment_motion_vectors;
|
||||
__u8 q_scale_type;
|
||||
__u8 intra_vlc_format;
|
||||
__u8 alternate_scan;
|
||||
__u8 repeat_first_field;
|
||||
__u16 progressive_frame;
|
||||
};
|
||||
|
||||
struct v4l2_ctrl_mpeg2_slice_params {
|
||||
__u32 bit_size;
|
||||
__u32 data_bit_offset;
|
||||
__u64 backward_ref_ts;
|
||||
__u64 forward_ref_ts;
|
||||
|
||||
struct v4l2_mpeg2_sequence sequence;
|
||||
struct v4l2_mpeg2_picture picture;
|
||||
|
||||
/* ISO/IEC 13818-2, ITU-T Rec. H.262: Slice */
|
||||
__u32 quantiser_scale_code;
|
||||
};
|
||||
|
||||
struct v4l2_ctrl_mpeg2_quantization {
|
||||
/* ISO/IEC 13818-2, ITU-T Rec. H.262: Quant matrix extension */
|
||||
__u8 load_intra_quantiser_matrix;
|
||||
__u8 load_non_intra_quantiser_matrix;
|
||||
__u8 load_chroma_intra_quantiser_matrix;
|
||||
__u8 load_chroma_non_intra_quantiser_matrix;
|
||||
|
||||
__u8 intra_quantiser_matrix[64];
|
||||
__u8 non_intra_quantiser_matrix[64];
|
||||
__u8 chroma_intra_quantiser_matrix[64];
|
||||
__u8 chroma_non_intra_quantiser_matrix[64];
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,689 @@
|
||||
/*
|
||||
* Copyright (C) 2026 claude-noether <claude-noether@reauktion.de>
|
||||
*
|
||||
* ampere-av1-enablement Phase 2.1: AV1 codec dispatcher for libva-v4l2-
|
||||
* request-fourier. Translates VAAPI AV1 picture/slice parameter buffers
|
||||
* into V4L2 stateless AV1 controls (V4L2_CID_STATELESS_AV1_*) for the
|
||||
* Rockchip vpu981 hardware on RK3588.
|
||||
*
|
||||
* Reference: Kwiboo/FFmpeg v4l2-request-n8.1:libavcodec/v4l2_request_av1.c
|
||||
* (636 LoC; reads from FFmpeg's AV1RawSequenceHeader + AV1RawFrameHeader).
|
||||
* VAAPI exposes the same AV1 spec semantics through different struct
|
||||
* shapes: sequence-level fields are folded into VADecPictureParameterBufferAV1
|
||||
* (no separate sequence buffer); per-frame fields live in the same struct.
|
||||
*
|
||||
* F1/F2/F3 risk mitigations per phase1_plan_v2 §"General fill_frame
|
||||
* implementation risks":
|
||||
* F1 tile_info.mi_col/row_starts sentinel = 2 * ((frame_width + 7) >> 3)
|
||||
* mirrors Kwiboo lines 238/244 exactly.
|
||||
* F2 superres_denom: VAAPI exposes superres_scale_denominator directly
|
||||
* and per spec it's already 8 when use_superres=0. No offset math
|
||||
* needed (Kwiboo does it because FFmpeg stores raw coded_denom).
|
||||
* F3 loop_restoration_size[] gated on USES_LR flag mirrors Kwiboo
|
||||
* lines 281-287 exactly.
|
||||
*
|
||||
* V4L2 controls (4 per frame, batched in one VIDIOC_S_EXT_CTRLS):
|
||||
* 1. V4L2_CID_STATELESS_AV1_SEQUENCE
|
||||
* 2. V4L2_CID_STATELESS_AV1_FRAME
|
||||
* 3. V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY[] (DYNAMIC_ARRAY)
|
||||
* 4. V4L2_CID_STATELESS_AV1_FILM_GRAIN (conditional on driver_data->
|
||||
* has_av1_film_grain probe)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "av1.h"
|
||||
|
||||
#include "context.h"
|
||||
#include "object_heap.h"
|
||||
#include "request.h"
|
||||
#include "surface.h"
|
||||
#include "utils.h"
|
||||
#include "v4l2.h"
|
||||
|
||||
#include <va/va.h>
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
#include <linux/v4l2-controls.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Sanity asserts to catch kernel uAPI drift. If these fire, the kernel
|
||||
* headers on the build machine are out of sync with what the running
|
||||
* driver expects — silent register-misalignment bugs result. Cross-compile
|
||||
* hazard per Janet v3 amendment: native-arm64 builds only (boltzmann +
|
||||
* ampere); no cross from x86 against ARM kernel headers. */
|
||||
_Static_assert(sizeof(struct v4l2_ctrl_av1_tile_group_entry) == 16,
|
||||
"v4l2_ctrl_av1_tile_group_entry size drift — recheck uAPI");
|
||||
|
||||
/* Per AV1 spec, when use_superres=0 the superres denominator is 8.
|
||||
* VAAPI's superres_scale_denominator already encodes this directly
|
||||
* (per va_dec_av1.h: "When use_superres=0, superres_scale_denominator
|
||||
* must be 8"). Kwiboo's AV1_SUPERRES_DENOM_MIN+coded_denom math is
|
||||
* not needed when reading from VAAPI. */
|
||||
#define AV1_SUPERRES_NUM 8
|
||||
|
||||
/* AV1 spec maxima used for V4L2 array sizing. */
|
||||
#define BACKEND_AV1_MAX_SEGMENTS 8
|
||||
#define BACKEND_AV1_SEG_LVL_MAX 8
|
||||
#define BACKEND_AV1_SEG_LVL_REF_FRAME 5
|
||||
#define BACKEND_AV1_NUM_REF_FRAMES 8
|
||||
#define BACKEND_AV1_TOTAL_REFS_PER_FRAME 8
|
||||
#define BACKEND_AV1_REFS_PER_FRAME 7
|
||||
|
||||
/* ===== fill_sequence ===== */
|
||||
static void av1_fill_sequence(VADecPictureParameterBufferAV1 *picture,
|
||||
struct v4l2_ctrl_av1_sequence *ctrl)
|
||||
{
|
||||
uint8_t bit_depth;
|
||||
|
||||
memset(ctrl, 0, sizeof(*ctrl));
|
||||
|
||||
switch (picture->bit_depth_idx) {
|
||||
case 0: bit_depth = 8; break;
|
||||
case 1: bit_depth = 10; break;
|
||||
case 2: bit_depth = 12; break;
|
||||
default: bit_depth = 8; break;
|
||||
}
|
||||
|
||||
ctrl->seq_profile = picture->profile;
|
||||
ctrl->order_hint_bits = picture->seq_info_fields.fields.enable_order_hint ?
|
||||
(picture->order_hint_bits_minus_1 + 1) : 0;
|
||||
ctrl->bit_depth = bit_depth;
|
||||
/* VAAPI does NOT separately expose max_frame_{width,height}_minus_1
|
||||
* (sequence-level). Use the current frame size as a proxy. Correct
|
||||
* for fixed-size sequences (the 208/352/1080p test vectors). */
|
||||
ctrl->max_frame_width_minus_1 = picture->frame_width_minus1;
|
||||
ctrl->max_frame_height_minus_1 = picture->frame_height_minus1;
|
||||
|
||||
if (picture->seq_info_fields.fields.still_picture)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_STILL_PICTURE;
|
||||
if (picture->seq_info_fields.fields.use_128x128_superblock)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK;
|
||||
if (picture->seq_info_fields.fields.enable_filter_intra)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA;
|
||||
if (picture->seq_info_fields.fields.enable_intra_edge_filter)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER;
|
||||
if (picture->seq_info_fields.fields.enable_interintra_compound)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND;
|
||||
if (picture->seq_info_fields.fields.enable_masked_compound)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND;
|
||||
/* VAAPI doesn't expose enable_warped_motion as a sequence flag;
|
||||
* per-frame allow_warped_motion gates it. Conservative: set true so
|
||||
* per-frame flag is honored. */
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_WARPED_MOTION;
|
||||
if (picture->seq_info_fields.fields.enable_dual_filter)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER;
|
||||
if (picture->seq_info_fields.fields.enable_order_hint)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT;
|
||||
if (picture->seq_info_fields.fields.enable_jnt_comp)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP;
|
||||
/* enable_ref_frame_mvs / enable_restoration not exposed at sequence
|
||||
* level — conservative set-true (kdirect also sets these for the
|
||||
* test streams; gating doesn't matter because per-frame flags
|
||||
* govern actual use). */
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_REF_FRAME_MVS;
|
||||
/* enable_superres: gate on the current frame's use_superres so the
|
||||
* SEQUENCE flag matches the bitstream-derived value. Empirical
|
||||
* strace diff vs kdirect: kdirect clears this for streams that
|
||||
* never use superres; we were unconditionally setting it true. */
|
||||
if (picture->pic_info_fields.bits.use_superres)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_SUPERRES;
|
||||
if (picture->seq_info_fields.fields.enable_cdef)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF;
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION;
|
||||
if (picture->seq_info_fields.fields.mono_chrome)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME;
|
||||
if (picture->seq_info_fields.fields.color_range)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_COLOR_RANGE;
|
||||
if (picture->seq_info_fields.fields.subsampling_x)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_X;
|
||||
if (picture->seq_info_fields.fields.subsampling_y)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_Y;
|
||||
if (picture->seq_info_fields.fields.film_grain_params_present)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_FILM_GRAIN_PARAMS_PRESENT;
|
||||
}
|
||||
|
||||
/* ===== fill_frame ===== */
|
||||
static void av1_fill_frame(VADecPictureParameterBufferAV1 *picture,
|
||||
struct v4l2_ctrl_av1_frame *ctrl)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
memset(ctrl, 0, sizeof(*ctrl));
|
||||
|
||||
/* ---- tile_info ---- */
|
||||
ctrl->tile_info.context_update_tile_id = picture->context_update_tile_id;
|
||||
ctrl->tile_info.tile_cols = picture->tile_cols;
|
||||
ctrl->tile_info.tile_rows = picture->tile_rows;
|
||||
if (picture->tile_cols > 1 || picture->tile_rows > 1)
|
||||
ctrl->tile_info.tile_size_bytes = 4;
|
||||
else
|
||||
ctrl->tile_info.tile_size_bytes = 0;
|
||||
|
||||
if (picture->pic_info_fields.bits.uniform_tile_spacing_flag)
|
||||
ctrl->tile_info.flags |= V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING;
|
||||
|
||||
/* F1: mi_col/row_starts[]: prefix-sum from width_in_sbs_minus_1[]+1
|
||||
* (Kwiboo reads tile_start_col_sb[] directly; VAAPI doesn't expose
|
||||
* starts, only widths — reconstruct via accumulation). Plus the
|
||||
* sentinel at index tile_cols/tile_rows. */
|
||||
{
|
||||
uint16_t cum = 0;
|
||||
for (i = 0; i < picture->tile_cols && i < 63; i++) {
|
||||
ctrl->tile_info.mi_col_starts[i] = cum;
|
||||
ctrl->tile_info.width_in_sbs_minus_1[i] =
|
||||
picture->width_in_sbs_minus_1[i];
|
||||
cum = (uint16_t)(cum + picture->width_in_sbs_minus_1[i] + 1);
|
||||
}
|
||||
ctrl->tile_info.mi_col_starts[picture->tile_cols] =
|
||||
2 * ((picture->frame_width_minus1 + 1 + 7) >> 3);
|
||||
}
|
||||
{
|
||||
uint16_t cum = 0;
|
||||
for (i = 0; i < picture->tile_rows && i < 63; i++) {
|
||||
ctrl->tile_info.mi_row_starts[i] = cum;
|
||||
ctrl->tile_info.height_in_sbs_minus_1[i] =
|
||||
picture->height_in_sbs_minus_1[i];
|
||||
cum = (uint16_t)(cum + picture->height_in_sbs_minus_1[i] + 1);
|
||||
}
|
||||
ctrl->tile_info.mi_row_starts[picture->tile_rows] =
|
||||
2 * ((picture->frame_height_minus1 + 1 + 7) >> 3);
|
||||
}
|
||||
|
||||
/* ---- quantization ---- */
|
||||
ctrl->quantization.base_q_idx = picture->base_qindex;
|
||||
ctrl->quantization.delta_q_y_dc = picture->y_dc_delta_q;
|
||||
ctrl->quantization.delta_q_u_dc = picture->u_dc_delta_q;
|
||||
ctrl->quantization.delta_q_u_ac = picture->u_ac_delta_q;
|
||||
ctrl->quantization.delta_q_v_dc = picture->v_dc_delta_q;
|
||||
ctrl->quantization.delta_q_v_ac = picture->v_ac_delta_q;
|
||||
ctrl->quantization.qm_y = picture->qmatrix_fields.bits.qm_y;
|
||||
ctrl->quantization.qm_u = picture->qmatrix_fields.bits.qm_u;
|
||||
ctrl->quantization.qm_v = picture->qmatrix_fields.bits.qm_v;
|
||||
ctrl->quantization.delta_q_res =
|
||||
picture->mode_control_fields.bits.log2_delta_q_res;
|
||||
|
||||
if (picture->u_dc_delta_q != picture->v_dc_delta_q ||
|
||||
picture->u_ac_delta_q != picture->v_ac_delta_q)
|
||||
ctrl->quantization.flags |= V4L2_AV1_QUANTIZATION_FLAG_DIFF_UV_DELTA;
|
||||
if (picture->qmatrix_fields.bits.using_qmatrix)
|
||||
ctrl->quantization.flags |= V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX;
|
||||
if (picture->mode_control_fields.bits.delta_q_present_flag)
|
||||
ctrl->quantization.flags |= V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT;
|
||||
|
||||
/* ---- segmentation ---- */
|
||||
if (picture->seg_info.segment_info_fields.bits.enabled)
|
||||
ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_ENABLED;
|
||||
if (picture->seg_info.segment_info_fields.bits.update_map)
|
||||
ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP;
|
||||
if (picture->seg_info.segment_info_fields.bits.temporal_update)
|
||||
ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE;
|
||||
if (picture->seg_info.segment_info_fields.bits.update_data)
|
||||
ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_UPDATE_DATA;
|
||||
|
||||
for (i = 0; i < BACKEND_AV1_MAX_SEGMENTS; i++) {
|
||||
for (j = 0; j < BACKEND_AV1_SEG_LVL_MAX; j++) {
|
||||
if (picture->seg_info.feature_mask[i] & (1 << j)) {
|
||||
ctrl->segmentation.feature_enabled[i] |=
|
||||
V4L2_AV1_SEGMENT_FEATURE_ENABLED(j);
|
||||
ctrl->segmentation.last_active_seg_id = i;
|
||||
if (j >= BACKEND_AV1_SEG_LVL_REF_FRAME)
|
||||
ctrl->segmentation.flags |=
|
||||
V4L2_AV1_SEGMENTATION_FLAG_SEG_ID_PRE_SKIP;
|
||||
}
|
||||
ctrl->segmentation.feature_data[i][j] =
|
||||
picture->seg_info.feature_data[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- loop_filter ---- */
|
||||
ctrl->loop_filter.level[0] = picture->filter_level[0];
|
||||
ctrl->loop_filter.level[1] = picture->filter_level[1];
|
||||
ctrl->loop_filter.level[2] = picture->filter_level_u;
|
||||
ctrl->loop_filter.level[3] = picture->filter_level_v;
|
||||
ctrl->loop_filter.sharpness =
|
||||
picture->loop_filter_info_fields.bits.sharpness_level;
|
||||
ctrl->loop_filter.mode_deltas[0] = picture->mode_deltas[0];
|
||||
ctrl->loop_filter.mode_deltas[1] = picture->mode_deltas[1];
|
||||
ctrl->loop_filter.delta_lf_res =
|
||||
picture->mode_control_fields.bits.log2_delta_lf_res;
|
||||
for (i = 0; i < BACKEND_AV1_NUM_REF_FRAMES; i++)
|
||||
ctrl->loop_filter.ref_deltas[i] = picture->ref_deltas[i];
|
||||
|
||||
if (picture->loop_filter_info_fields.bits.mode_ref_delta_enabled)
|
||||
ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED;
|
||||
if (picture->loop_filter_info_fields.bits.mode_ref_delta_update)
|
||||
ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_UPDATE;
|
||||
if (picture->mode_control_fields.bits.delta_lf_present_flag)
|
||||
ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT;
|
||||
if (picture->mode_control_fields.bits.delta_lf_multi)
|
||||
ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI;
|
||||
|
||||
/* ---- cdef ---- */
|
||||
ctrl->cdef.damping_minus_3 = picture->cdef_damping_minus_3;
|
||||
ctrl->cdef.bits = picture->cdef_bits;
|
||||
for (i = 0; i < (unsigned)(1 << picture->cdef_bits) && i < 8; i++) {
|
||||
uint8_t y = picture->cdef_y_strengths[i];
|
||||
uint8_t uv = picture->cdef_uv_strengths[i];
|
||||
ctrl->cdef.y_pri_strength[i] = (y >> 2) & 0x0F;
|
||||
ctrl->cdef.y_sec_strength[i] = y & 0x03;
|
||||
ctrl->cdef.uv_pri_strength[i] = (uv >> 2) & 0x0F;
|
||||
ctrl->cdef.uv_sec_strength[i] = uv & 0x03;
|
||||
}
|
||||
|
||||
/* ---- loop_restoration ---- (F3)
|
||||
* Phase 5 review Amendment 1 was REVERTED. The reviewer proposed
|
||||
* remap = {NONE, SWITCHABLE, WIENER, SGRPROJ} (Kwiboo's table)
|
||||
* based on AV1 spec FrameRestoreType wire encoding
|
||||
* {NONE=0, SWITCHABLE=1, WIENER=2, SGRPROJ=3} differing from V4L2's
|
||||
* {NONE=0, WIENER=1, SGRPROJ=2, SWITCHABLE=3}. Empirically applying
|
||||
* that permutation regressed ALL tests (allintra 10/10 → 0/10) —
|
||||
* so either VAAPI's yframe_restoration_type is NOT the raw spec
|
||||
* value (already-remapped to V4L2 enum semantics?), or vpu981
|
||||
* interprets the V4L2 enum values via a different mapping than
|
||||
* the V4L2 uAPI header documents. Per
|
||||
* [[feedback_review_empirical_over_theoretical]] keep the
|
||||
* identity mapping that empirically works; revisit if a
|
||||
* restoration-using fixture surfaces a real decode bug.
|
||||
*/
|
||||
{
|
||||
uint8_t remap[4] = {
|
||||
V4L2_AV1_FRAME_RESTORE_NONE,
|
||||
V4L2_AV1_FRAME_RESTORE_WIENER,
|
||||
V4L2_AV1_FRAME_RESTORE_SGRPROJ,
|
||||
V4L2_AV1_FRAME_RESTORE_SWITCHABLE,
|
||||
};
|
||||
uint8_t y_t = picture->loop_restoration_fields.bits.yframe_restoration_type & 3;
|
||||
uint8_t cb_t = picture->loop_restoration_fields.bits.cbframe_restoration_type & 3;
|
||||
uint8_t cr_t = picture->loop_restoration_fields.bits.crframe_restoration_type & 3;
|
||||
bool uses_lr = false;
|
||||
|
||||
ctrl->loop_restoration.frame_restoration_type[0] = remap[y_t];
|
||||
ctrl->loop_restoration.frame_restoration_type[1] = remap[cb_t];
|
||||
ctrl->loop_restoration.frame_restoration_type[2] = remap[cr_t];
|
||||
if (y_t != 0)
|
||||
uses_lr = true;
|
||||
if (cb_t != 0 || cr_t != 0) {
|
||||
uses_lr = true;
|
||||
ctrl->loop_restoration.flags |=
|
||||
V4L2_AV1_LOOP_RESTORATION_FLAG_USES_CHROMA_LR;
|
||||
}
|
||||
|
||||
ctrl->loop_restoration.lr_unit_shift =
|
||||
picture->loop_restoration_fields.bits.lr_unit_shift;
|
||||
ctrl->loop_restoration.lr_uv_shift =
|
||||
picture->loop_restoration_fields.bits.lr_uv_shift;
|
||||
|
||||
if (uses_lr) {
|
||||
uint8_t shift = picture->loop_restoration_fields.bits.lr_unit_shift;
|
||||
uint8_t uv_shift = picture->loop_restoration_fields.bits.lr_uv_shift;
|
||||
ctrl->loop_restoration.flags |=
|
||||
V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR;
|
||||
ctrl->loop_restoration.loop_restoration_size[0] =
|
||||
1 << (6 + shift);
|
||||
ctrl->loop_restoration.loop_restoration_size[1] =
|
||||
1 << (6 + shift - uv_shift);
|
||||
ctrl->loop_restoration.loop_restoration_size[2] =
|
||||
1 << (6 + shift - uv_shift);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- global_motion ---- */
|
||||
for (i = 0; i < BACKEND_AV1_TOTAL_REFS_PER_FRAME; i++) {
|
||||
if (i == 0)
|
||||
continue; /* INTRA_FRAME slot — no warp */
|
||||
ctrl->global_motion.type[i] = picture->wm[i - 1].wmtype;
|
||||
for (j = 0; j < 6; j++)
|
||||
ctrl->global_motion.params[i][j] = picture->wm[i - 1].wmmat[j];
|
||||
if (picture->wm[i - 1].invalid)
|
||||
ctrl->global_motion.invalid |=
|
||||
V4L2_AV1_GLOBAL_MOTION_IS_INVALID(i);
|
||||
switch (picture->wm[i - 1].wmtype) {
|
||||
case 1:
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_TRANSLATION;
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_GLOBAL;
|
||||
break;
|
||||
case 2:
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_ROT_ZOOM;
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_GLOBAL;
|
||||
break;
|
||||
case 3:
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_GLOBAL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- reference frames + order hints ---- */
|
||||
/* reference_frame_ts[] is filled by the orchestrator (av1_set_controls)
|
||||
* which has driver_data for the SURFACE() lookup. order_hints[] not
|
||||
* exposed per-ref by VAAPI — leave zero. ref_frame_idx[7] is the
|
||||
* index map from spec-defined ref slots (LAST..ALTREF) into
|
||||
* ref_frame_map[8] (the surface IDs). */
|
||||
for (i = 0; i < BACKEND_AV1_TOTAL_REFS_PER_FRAME; i++)
|
||||
ctrl->order_hints[i] = 0;
|
||||
for (i = 0; i < BACKEND_AV1_REFS_PER_FRAME; i++)
|
||||
ctrl->ref_frame_idx[i] = picture->ref_frame_idx[i];
|
||||
|
||||
/* F2: superres_denom direct from VAAPI; fallback to AV1_SUPERRES_NUM
|
||||
* if zero (spec violation but defensive). */
|
||||
ctrl->superres_denom = picture->superres_scale_denominator
|
||||
? picture->superres_scale_denominator : AV1_SUPERRES_NUM;
|
||||
|
||||
ctrl->skip_mode_frame[0] = 0;
|
||||
ctrl->skip_mode_frame[1] = 0;
|
||||
ctrl->primary_ref_frame = picture->primary_ref_frame;
|
||||
ctrl->frame_type = picture->pic_info_fields.bits.frame_type;
|
||||
ctrl->order_hint = picture->order_hint;
|
||||
ctrl->upscaled_width = picture->frame_width_minus1 + 1;
|
||||
ctrl->interpolation_filter = picture->interp_filter;
|
||||
ctrl->tx_mode = picture->mode_control_fields.bits.tx_mode;
|
||||
ctrl->frame_width_minus_1 = picture->frame_width_minus1;
|
||||
ctrl->frame_height_minus_1 = picture->frame_height_minus1;
|
||||
ctrl->render_width_minus_1 = picture->frame_width_minus1;
|
||||
ctrl->render_height_minus_1 = picture->frame_height_minus1;
|
||||
ctrl->current_frame_id = 0;
|
||||
/* Phase 3: VAAPI doesn't expose refresh_frame_flags. For KEY/SWITCH
|
||||
* frames the AV1 spec mandates 0xff (refresh all DPB slots). For
|
||||
* inter frames we default to 0xff too — simple P-frame chains will
|
||||
* naturally rotate through slots without a precise per-slot value.
|
||||
* If the stream needs precise control, this needs SPS-side parsing.
|
||||
* Empirical diff vs kdirect shows kdirect always sends 0xff here. */
|
||||
ctrl->refresh_frame_flags = 0xff;
|
||||
|
||||
/* ---- frame flags ---- */
|
||||
if (picture->pic_info_fields.bits.show_frame)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_SHOW_FRAME;
|
||||
if (picture->pic_info_fields.bits.showable_frame)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_SHOWABLE_FRAME;
|
||||
if (picture->pic_info_fields.bits.error_resilient_mode)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE;
|
||||
if (picture->pic_info_fields.bits.disable_cdf_update)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE;
|
||||
if (picture->pic_info_fields.bits.allow_screen_content_tools)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS;
|
||||
if (picture->pic_info_fields.bits.force_integer_mv)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV;
|
||||
if (picture->pic_info_fields.bits.allow_intrabc)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC;
|
||||
if (picture->pic_info_fields.bits.use_superres)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_USE_SUPERRES;
|
||||
if (picture->pic_info_fields.bits.allow_high_precision_mv)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV;
|
||||
if (picture->pic_info_fields.bits.is_motion_mode_switchable)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE;
|
||||
if (picture->pic_info_fields.bits.use_ref_frame_mvs)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS;
|
||||
if (picture->pic_info_fields.bits.disable_frame_end_update_cdf)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF;
|
||||
if (picture->pic_info_fields.bits.allow_warped_motion)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION;
|
||||
if (picture->mode_control_fields.bits.reference_select)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT;
|
||||
if (picture->mode_control_fields.bits.reduced_tx_set_used)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET;
|
||||
if (picture->mode_control_fields.bits.skip_mode_present) {
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_SKIP_MODE_ALLOWED;
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT;
|
||||
}
|
||||
}
|
||||
|
||||
/* ===== fill_film_grain ===== */
|
||||
static void av1_fill_film_grain(VADecPictureParameterBufferAV1 *picture,
|
||||
struct v4l2_ctrl_av1_film_grain *ctrl)
|
||||
{
|
||||
VAFilmGrainStructAV1 *fg = &picture->film_grain_info;
|
||||
unsigned int i;
|
||||
|
||||
memset(ctrl, 0, sizeof(*ctrl));
|
||||
|
||||
ctrl->cr_mult = fg->cr_mult;
|
||||
ctrl->grain_seed = fg->grain_seed;
|
||||
/* VAAPI doesn't expose film_grain_params_ref_idx (the reuse-from-
|
||||
* previous-frame index). Leave zero — only consulted when
|
||||
* update_grain=0, which VAAPI also doesn't expose. */
|
||||
ctrl->film_grain_params_ref_idx = 0;
|
||||
ctrl->num_y_points = fg->num_y_points;
|
||||
ctrl->num_cb_points = fg->num_cb_points;
|
||||
ctrl->num_cr_points = fg->num_cr_points;
|
||||
ctrl->grain_scaling_minus_8 =
|
||||
fg->film_grain_info_fields.bits.grain_scaling_minus_8;
|
||||
ctrl->ar_coeff_lag = fg->film_grain_info_fields.bits.ar_coeff_lag;
|
||||
ctrl->ar_coeff_shift_minus_6 =
|
||||
fg->film_grain_info_fields.bits.ar_coeff_shift_minus_6;
|
||||
ctrl->grain_scale_shift =
|
||||
fg->film_grain_info_fields.bits.grain_scale_shift;
|
||||
ctrl->cb_mult = fg->cb_mult;
|
||||
ctrl->cb_luma_mult = fg->cb_luma_mult;
|
||||
ctrl->cr_luma_mult = fg->cr_luma_mult;
|
||||
ctrl->cb_offset = fg->cb_offset;
|
||||
ctrl->cr_offset = fg->cr_offset;
|
||||
|
||||
if (fg->film_grain_info_fields.bits.apply_grain) {
|
||||
ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN;
|
||||
/* kdirect strace diff confirmed: V4L2_AV1_FILM_GRAIN_FLAG_
|
||||
* UPDATE_GRAIN must be set when apply_grain=1 (kdirect's
|
||||
* flags byte is 0x0B = APPLY|UPDATE|...). VAAPI's
|
||||
* VAFilmGrainStructAV1 doesn't expose update_grain
|
||||
* separately. Default to UPDATE=1 (use submitted params,
|
||||
* not reuse from non-existent prior film_grain ref). The
|
||||
* earlier segfault we saw with this flag was unmasked by
|
||||
* the link-NULL deref (now fixed via linked_decode_surface);
|
||||
* not caused by UPDATE_GRAIN itself. */
|
||||
ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_UPDATE_GRAIN;
|
||||
}
|
||||
if (fg->film_grain_info_fields.bits.chroma_scaling_from_luma)
|
||||
ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA;
|
||||
if (fg->film_grain_info_fields.bits.overlap_flag)
|
||||
ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP;
|
||||
if (fg->film_grain_info_fields.bits.clip_to_restricted_range)
|
||||
ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE;
|
||||
|
||||
if (!fg->film_grain_info_fields.bits.apply_grain)
|
||||
return;
|
||||
|
||||
for (i = 0; i < fg->num_y_points && i < 14; i++) {
|
||||
ctrl->point_y_value[i] = fg->point_y_value[i];
|
||||
ctrl->point_y_scaling[i] = fg->point_y_scaling[i];
|
||||
}
|
||||
for (i = 0; i < fg->num_cb_points && i < 10; i++) {
|
||||
ctrl->point_cb_value[i] = fg->point_cb_value[i];
|
||||
ctrl->point_cb_scaling[i] = fg->point_cb_scaling[i];
|
||||
}
|
||||
for (i = 0; i < fg->num_cr_points && i < 10; i++) {
|
||||
ctrl->point_cr_value[i] = fg->point_cr_value[i];
|
||||
ctrl->point_cr_scaling[i] = fg->point_cr_scaling[i];
|
||||
}
|
||||
for (i = 0; i < 24; i++)
|
||||
ctrl->ar_coeffs_y_plus_128[i] = (uint8_t)(fg->ar_coeffs_y[i] + 128);
|
||||
for (i = 0; i < 25; i++) {
|
||||
ctrl->ar_coeffs_cb_plus_128[i] = (uint8_t)(fg->ar_coeffs_cb[i] + 128);
|
||||
ctrl->ar_coeffs_cr_plus_128[i] = (uint8_t)(fg->ar_coeffs_cr[i] + 128);
|
||||
}
|
||||
}
|
||||
|
||||
/* ===== orchestrator ===== */
|
||||
int av1_set_controls(struct request_data *driver_data,
|
||||
struct object_context *context,
|
||||
struct object_surface *surface_object)
|
||||
{
|
||||
VADecPictureParameterBufferAV1 *picture =
|
||||
&surface_object->params.av1.picture;
|
||||
unsigned int num_tiles = surface_object->params.av1.num_tile_group_entries;
|
||||
struct v4l2_ctrl_av1_sequence sequence;
|
||||
struct v4l2_ctrl_av1_frame frame;
|
||||
struct v4l2_ctrl_av1_film_grain film_grain;
|
||||
struct v4l2_ctrl_av1_tile_group_entry *tile_entries = NULL;
|
||||
struct v4l2_ext_control controls[4];
|
||||
unsigned int n = 0;
|
||||
unsigned int i;
|
||||
unsigned int alloc_tiles;
|
||||
int rc;
|
||||
|
||||
(void)context;
|
||||
|
||||
/*
|
||||
* AV1 film_grain link: when apply_grain=1, ffmpeg-vaapi allocates a
|
||||
* separate display surface (current_display_picture) from the decode
|
||||
* surface (current_frame). vpu981 HW applies grain inline to the
|
||||
* decode CAPTURE buffer, so the consumable data is in current_frame's
|
||||
* slot. ffmpeg then calls vaGetImage on current_display_picture which
|
||||
* has no slot bound. Link the display surface back to the decode
|
||||
* surface so copy_surface_to_image can borrow destination_data[].
|
||||
*/
|
||||
if (picture->current_display_picture != VA_INVALID_SURFACE &&
|
||||
picture->current_display_picture != picture->current_frame) {
|
||||
struct object_surface *display_surface =
|
||||
SURFACE(driver_data, picture->current_display_picture);
|
||||
if (display_surface != NULL)
|
||||
display_surface->linked_decode_surface_id =
|
||||
picture->current_frame;
|
||||
}
|
||||
|
||||
if (num_tiles > AV1_MAX_TILES)
|
||||
num_tiles = AV1_MAX_TILES;
|
||||
|
||||
/* DYNAMIC_ARRAY size = MAX(num_tiles, 1) per Janet v2 Q1
|
||||
* amendment — kernel UB on size=0. */
|
||||
alloc_tiles = num_tiles > 0 ? num_tiles : 1;
|
||||
tile_entries = calloc(alloc_tiles, sizeof(*tile_entries));
|
||||
if (tile_entries == NULL)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < num_tiles; i++) {
|
||||
VASliceParameterBufferAV1 *slice =
|
||||
&surface_object->params.av1.tile_group_entries[i];
|
||||
tile_entries[i].tile_offset = slice->slice_data_offset;
|
||||
tile_entries[i].tile_size = slice->slice_data_size;
|
||||
tile_entries[i].tile_row = (uint8_t)slice->tile_row;
|
||||
tile_entries[i].tile_col = (uint8_t)slice->tile_column;
|
||||
}
|
||||
|
||||
av1_fill_sequence(picture, &sequence);
|
||||
av1_fill_frame(picture, &frame);
|
||||
|
||||
/*
|
||||
* Phase 2.1 + frame-2 divergence fix: wire reference_frame_ts[].
|
||||
* VAAPI exposes ref_frame_map[8] as VASurfaceIDs; the kernel needs
|
||||
* v4l2-style timestamps to cross-reference the corresponding
|
||||
* CAPTURE buffers (set on the OUTPUT buffer at QBUF time per
|
||||
* picture.c::EndPicture, via surface_object->timestamp). Mirrors
|
||||
* the vp9.c:614-628 pattern, scaled to AV1's 8 ref slots.
|
||||
*
|
||||
* VA_INVALID_SURFACE entries stay at the calloc'd zero timestamp
|
||||
* (kernel reads zero, doesn't try to dereference).
|
||||
*/
|
||||
/*
|
||||
* Empirical: DPB-slot iteration (i over ref_frame_map[i]) gives
|
||||
* better correctness than ref-name iteration via ref_frame_idx[].
|
||||
* Tried the ref-name reindex (Kwiboo convention via FFmpeg s->ref[i])
|
||||
* and lost frames that previously PASSed (3/10 → 1/10) — so the V4L2
|
||||
* uAPI semantic here may be DPB-slot-indexed despite the AV1 spec
|
||||
* lexicon. Phase 3 open question pending kernel-side disambiguation.
|
||||
*/
|
||||
for (i = 0; i < BACKEND_AV1_TOTAL_REFS_PER_FRAME; i++) {
|
||||
VASurfaceID ref_id = picture->ref_frame_map[i];
|
||||
struct object_surface *ref_surface;
|
||||
uint64_t ts;
|
||||
if (ref_id == VA_INVALID_SURFACE)
|
||||
continue;
|
||||
ref_surface = SURFACE(driver_data, ref_id);
|
||||
if (ref_surface == NULL)
|
||||
continue;
|
||||
ts = v4l2_timeval_to_ns(&ref_surface->timestamp);
|
||||
if (ts == 0 &&
|
||||
ref_surface->linked_decode_surface_id != VA_INVALID_SURFACE) {
|
||||
struct object_surface *dec =
|
||||
SURFACE(driver_data,
|
||||
ref_surface->linked_decode_surface_id);
|
||||
if (dec != NULL) {
|
||||
ts = v4l2_timeval_to_ns(&dec->timestamp);
|
||||
frame.order_hints[i] = dec->av1_order_hint;
|
||||
}
|
||||
} else {
|
||||
frame.order_hints[i] = ref_surface->av1_order_hint;
|
||||
}
|
||||
frame.reference_frame_ts[i] = ts;
|
||||
}
|
||||
|
||||
/* Phase 3: record this frame's order_hint on the surface so the
|
||||
* NEXT frame's ref-loop can populate order_hints[] for slots that
|
||||
* reference us. */
|
||||
surface_object->av1_order_hint = picture->order_hint;
|
||||
/* Also propagate to the linked display surface (if any), since
|
||||
* future frames' ref_frame_map[] may point at either. */
|
||||
if (picture->current_display_picture != VA_INVALID_SURFACE &&
|
||||
picture->current_display_picture != picture->current_frame) {
|
||||
struct object_surface *disp =
|
||||
SURFACE(driver_data, picture->current_display_picture);
|
||||
if (disp != NULL)
|
||||
disp->av1_order_hint = picture->order_hint;
|
||||
}
|
||||
|
||||
if (driver_data->has_av1_film_grain)
|
||||
av1_fill_film_grain(picture, &film_grain);
|
||||
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_AV1_SEQUENCE,
|
||||
.ptr = &sequence,
|
||||
.size = sizeof(sequence),
|
||||
};
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_AV1_FRAME,
|
||||
.ptr = &frame,
|
||||
.size = sizeof(frame),
|
||||
};
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY,
|
||||
.ptr = tile_entries,
|
||||
.size = sizeof(*tile_entries) * alloc_tiles,
|
||||
};
|
||||
if (driver_data->has_av1_film_grain) {
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_AV1_FILM_GRAIN,
|
||||
.ptr = &film_grain,
|
||||
.size = sizeof(film_grain),
|
||||
};
|
||||
}
|
||||
|
||||
rc = v4l2_set_controls(driver_data->video_fd,
|
||||
surface_object->request_fd,
|
||||
controls, n);
|
||||
|
||||
free(tile_entries);
|
||||
|
||||
if (rc < 0) {
|
||||
request_log("ampere-av1: VIDIOC_S_EXT_CTRLS failed rc=%d\n", rc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (C) 2026 claude-noether <claude-noether@reauktion.de>
|
||||
*
|
||||
* ampere-av1-enablement Phase 2: AV1 codec dispatcher header for libva-
|
||||
* v4l2-request-fourier. Mirrors vp9.h shape — single set_controls entry
|
||||
* point that translates surface->params.av1.* VAAPI structures into a
|
||||
* batch of V4L2_CID_STATELESS_AV1_{SEQUENCE,FRAME,TILE_GROUP_ENTRY,
|
||||
* FILM_GRAIN} controls + the underlying request_fd / OUTPUT plane setup.
|
||||
*
|
||||
* V4L2 target: V4L2_PIX_FMT_AV1_FRAME on the vpu981 hantro instance
|
||||
* (RK3588's dedicated AV1 decoder).
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _AV1_H_
|
||||
#define _AV1_H_
|
||||
|
||||
#include "context.h"
|
||||
#include "request.h"
|
||||
#include "surface.h"
|
||||
|
||||
int av1_set_controls(struct request_data *driver_data,
|
||||
struct object_context *context,
|
||||
struct object_surface *surface);
|
||||
|
||||
#endif /* _AV1_H_ */
|
||||
@@ -55,12 +55,14 @@ VAStatus RequestCreateBuffer(VADriverContextP context, VAContextID context_id,
|
||||
VAStatus status;
|
||||
VABufferID id;
|
||||
|
||||
|
||||
switch (type) {
|
||||
case VAPictureParameterBufferType:
|
||||
case VAIQMatrixBufferType:
|
||||
case VASliceParameterBufferType:
|
||||
case VASliceDataBufferType:
|
||||
case VAImageBufferType:
|
||||
case VAProbabilityBufferType:
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -109,6 +111,7 @@ complete:
|
||||
|
||||
VAStatus RequestDestroyBuffer(VADriverContextP context, VABufferID buffer_id)
|
||||
{
|
||||
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_buffer *buffer_object;
|
||||
|
||||
@@ -128,6 +131,7 @@ VAStatus RequestDestroyBuffer(VADriverContextP context, VABufferID buffer_id)
|
||||
VAStatus RequestMapBuffer(VADriverContextP context, VABufferID buffer_id,
|
||||
void **data_map)
|
||||
{
|
||||
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_buffer *buffer_object;
|
||||
|
||||
@@ -143,6 +147,7 @@ VAStatus RequestMapBuffer(VADriverContextP context, VABufferID buffer_id,
|
||||
|
||||
VAStatus RequestUnmapBuffer(VADriverContextP context, VABufferID buffer_id)
|
||||
{
|
||||
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_buffer *buffer_object;
|
||||
|
||||
@@ -245,6 +250,7 @@ VAStatus RequestAcquireBufferHandle(VADriverContextP context,
|
||||
VAStatus RequestReleaseBufferHandle(VADriverContextP context,
|
||||
VABufferID buffer_id)
|
||||
{
|
||||
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_buffer *buffer_object;
|
||||
int export_fd;
|
||||
|
||||
+303
@@ -0,0 +1,303 @@
|
||||
/*
|
||||
* Iteration 2 Fix 3: cap_pool implementation.
|
||||
*
|
||||
* Design rationale + limitations: see cap_pool.h docblock.
|
||||
*
|
||||
* Concurrency model:
|
||||
* - All public functions take pool->lock at entry, release at exit.
|
||||
* - cap_pool_acquire may sleep briefly while scanning slots; safe
|
||||
* under lock since the scan is bounded by pool->count (<= 24
|
||||
* typical).
|
||||
* - The slot pointer returned by acquire / mark_decoded /
|
||||
* mark_exported / release is stable across the call (lock is
|
||||
* dropped before return) but the slot's state may change between
|
||||
* calls. Callers MUST NOT cache slot pointers across sleep/I/O --
|
||||
* they should treat slot pointers as opaque references valid only
|
||||
* for the immediate operation.
|
||||
*
|
||||
* In practice, our caller pattern is:
|
||||
* surface_object->current_slot = cap_pool_acquire(...);
|
||||
* v4l2_queue_buffer(slot->v4l2_index, ...);
|
||||
* // later, in SyncSurface for the same surface:
|
||||
* v4l2_dequeue_buffer(surface_object->current_slot->v4l2_index, ...);
|
||||
* cap_pool_mark_decoded(surface_object->current_slot);
|
||||
*
|
||||
* surface_object->current_slot is the persistent reference; the
|
||||
* slot's V4L2 index is stable for the slot's lifetime. The state
|
||||
* field IS read by other threads (acquire scans for FREE) — that
|
||||
* reads are safe because:
|
||||
* - acquire holds the lock during the scan
|
||||
* - mark_decoded/mark_exported/release also hold the lock
|
||||
* So state transitions are serialized.
|
||||
*/
|
||||
|
||||
#include "cap_pool.h"
|
||||
#include "v4l2.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
static uint64_t monotonic_ns(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
|
||||
return 0;
|
||||
return (uint64_t)ts.tv_sec * 1000000000ull + (uint64_t)ts.tv_nsec;
|
||||
}
|
||||
|
||||
int cap_pool_init(struct cap_pool *pool, int video_fd, unsigned int capture_type,
|
||||
unsigned int count, unsigned int v4l2_buffers_count_per_slot)
|
||||
{
|
||||
unsigned int index_base;
|
||||
unsigned int i, j;
|
||||
int rc;
|
||||
|
||||
if (pool == NULL || count == 0)
|
||||
return -EINVAL;
|
||||
|
||||
memset(pool, 0, sizeof(*pool));
|
||||
|
||||
rc = pthread_mutex_init(&pool->lock, NULL);
|
||||
if (rc != 0)
|
||||
return -rc;
|
||||
|
||||
pool->slots = calloc(count, sizeof(*pool->slots));
|
||||
if (pool->slots == NULL) {
|
||||
pthread_mutex_destroy(&pool->lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
pool->count = count;
|
||||
|
||||
rc = v4l2_create_buffers(video_fd, capture_type, count, &index_base);
|
||||
if (rc < 0) {
|
||||
free(pool->slots);
|
||||
pthread_mutex_destroy(&pool->lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
struct cap_pool_slot *slot = &pool->slots[i];
|
||||
|
||||
slot->v4l2_index = index_base + i;
|
||||
slot->buffers_count = v4l2_buffers_count_per_slot;
|
||||
slot->state = CAP_SLOT_FREE;
|
||||
slot->our_export_fd = -1;
|
||||
slot->last_used_at_ns = 0; /* never used → highest LRU priority */
|
||||
slot->bound_to_surface_id = -1;
|
||||
|
||||
rc = v4l2_query_buffer(video_fd, capture_type, slot->v4l2_index,
|
||||
slot->map_lengths, slot->map_offsets,
|
||||
v4l2_buffers_count_per_slot);
|
||||
if (rc < 0) {
|
||||
request_log("cap_pool_init: query_buffer failed for "
|
||||
"slot %u (v4l2_index=%u)\n",
|
||||
i, slot->v4l2_index);
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
for (j = 0; j < v4l2_buffers_count_per_slot; j++) {
|
||||
slot->map[j] = mmap(NULL, slot->map_lengths[j],
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
video_fd, slot->map_offsets[j]);
|
||||
if (slot->map[j] == MAP_FAILED) {
|
||||
request_log("cap_pool_init: mmap failed for "
|
||||
"slot %u plane %u\n", i, j);
|
||||
slot->map[j] = NULL;
|
||||
goto error_cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pool->initialized = true;
|
||||
request_log("cap_pool_init: %u slots ready (v4l2_index=%u..%u, "
|
||||
"%u plane(s) per slot)\n",
|
||||
count, index_base, index_base + count - 1,
|
||||
v4l2_buffers_count_per_slot);
|
||||
return 0;
|
||||
|
||||
error_cleanup:
|
||||
for (i = 0; i < count; i++) {
|
||||
struct cap_pool_slot *slot = &pool->slots[i];
|
||||
for (j = 0; j < v4l2_buffers_count_per_slot; j++) {
|
||||
if (slot->map[j] != NULL && slot->map[j] != MAP_FAILED)
|
||||
munmap(slot->map[j], slot->map_lengths[j]);
|
||||
}
|
||||
}
|
||||
(void)v4l2_request_buffers(video_fd, capture_type, 0);
|
||||
free(pool->slots);
|
||||
pthread_mutex_destroy(&pool->lock);
|
||||
memset(pool, 0, sizeof(*pool));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
void cap_pool_destroy(struct cap_pool *pool, int video_fd, unsigned int capture_type)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
if (pool == NULL || !pool->initialized)
|
||||
return;
|
||||
|
||||
pthread_mutex_lock(&pool->lock);
|
||||
|
||||
for (i = 0; i < pool->count; i++) {
|
||||
struct cap_pool_slot *slot = &pool->slots[i];
|
||||
|
||||
if (slot->our_export_fd >= 0) {
|
||||
close(slot->our_export_fd);
|
||||
slot->our_export_fd = -1;
|
||||
}
|
||||
|
||||
for (j = 0; j < slot->buffers_count; j++) {
|
||||
if (slot->map[j] != NULL && slot->map[j] != MAP_FAILED) {
|
||||
munmap(slot->map[j], slot->map_lengths[j]);
|
||||
slot->map[j] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(void)v4l2_request_buffers(video_fd, capture_type, 0);
|
||||
|
||||
pthread_mutex_unlock(&pool->lock);
|
||||
pthread_mutex_destroy(&pool->lock);
|
||||
|
||||
free(pool->slots);
|
||||
pool->slots = NULL;
|
||||
pool->count = 0;
|
||||
pool->initialized = false;
|
||||
}
|
||||
|
||||
struct cap_pool_slot *cap_pool_acquire(struct cap_pool *pool, int surface_id)
|
||||
{
|
||||
struct cap_pool_slot *best = NULL;
|
||||
uint64_t best_ts = UINT64_MAX;
|
||||
unsigned int i;
|
||||
|
||||
if (pool == NULL || !pool->initialized)
|
||||
return NULL;
|
||||
|
||||
pthread_mutex_lock(&pool->lock);
|
||||
|
||||
/* First pass: find the FREE slot with oldest last_used_at_ns. */
|
||||
for (i = 0; i < pool->count; i++) {
|
||||
struct cap_pool_slot *slot = &pool->slots[i];
|
||||
if (slot->state != CAP_SLOT_FREE)
|
||||
continue;
|
||||
if (slot->last_used_at_ns < best_ts) {
|
||||
best = slot;
|
||||
best_ts = slot->last_used_at_ns;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Second pass (fallback): if no FREE slot, force-recycle the
|
||||
* oldest EXPORTED slot. This is the documented Option A race
|
||||
* window — the consumer may still hold a dup'd fd to this
|
||||
* buffer's underlying physical memory, and the kernel will
|
||||
* happily DMA new content into it. For typical mpv 16-surface
|
||||
* playback with MIN_CAP_POOL=24, this fallback should never
|
||||
* fire. If it does, the visual artifact is bounded to a few
|
||||
* frames during recovery.
|
||||
*/
|
||||
if (best == NULL) {
|
||||
best_ts = UINT64_MAX;
|
||||
for (i = 0; i < pool->count; i++) {
|
||||
struct cap_pool_slot *slot = &pool->slots[i];
|
||||
if (slot->state != CAP_SLOT_EXPORTED)
|
||||
continue;
|
||||
if (slot->last_used_at_ns < best_ts) {
|
||||
best = slot;
|
||||
best_ts = slot->last_used_at_ns;
|
||||
}
|
||||
}
|
||||
if (best != NULL) {
|
||||
request_log("cap_pool_acquire: pool exhausted, "
|
||||
"force-recycling EXPORTED slot v4l2_index=%u "
|
||||
"(consumer race window may open)\n",
|
||||
best->v4l2_index);
|
||||
if (best->our_export_fd >= 0) {
|
||||
close(best->our_export_fd);
|
||||
best->our_export_fd = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best == NULL) {
|
||||
pthread_mutex_unlock(&pool->lock);
|
||||
request_log("cap_pool_acquire: no slot available "
|
||||
"(pool->count=%u, all slots IN_DECODE/DECODED?)\n",
|
||||
pool->count);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't transition DECODED slots — they hold valid pixel content
|
||||
* a consumer may still be reading via DeriveImage (vaapi-copy
|
||||
* path). We never recycle DECODED. If a surface holds a DECODED
|
||||
* slot for an extended period, it stays held; the surface's
|
||||
* destruction (vaDestroySurfaces) is the only path that releases
|
||||
* it. mpv typically progresses through DECODED → EXPORTED quickly
|
||||
* for vaapi DMA-BUF; for vaapi-copy, DECODED → consumer reads
|
||||
* via mmap → consumer is done after copy_surface_to_image returns.
|
||||
* The vaapi-copy consumer has no explicit "I'm done" signal, so
|
||||
* we rely on the next BeginPicture for the same surface to
|
||||
* release the prior DECODED slot.
|
||||
*/
|
||||
|
||||
best->state = CAP_SLOT_IN_DECODE;
|
||||
best->bound_to_surface_id = surface_id;
|
||||
best->last_used_at_ns = monotonic_ns();
|
||||
|
||||
pthread_mutex_unlock(&pool->lock);
|
||||
return best;
|
||||
}
|
||||
|
||||
void cap_pool_mark_decoded(struct cap_pool *pool, struct cap_pool_slot *slot)
|
||||
{
|
||||
if (pool == NULL || slot == NULL)
|
||||
return;
|
||||
pthread_mutex_lock(&pool->lock);
|
||||
slot->state = CAP_SLOT_DECODED;
|
||||
slot->last_used_at_ns = monotonic_ns();
|
||||
pthread_mutex_unlock(&pool->lock);
|
||||
}
|
||||
|
||||
void cap_pool_mark_exported(struct cap_pool *pool, struct cap_pool_slot *slot, int our_fd)
|
||||
{
|
||||
if (pool == NULL || slot == NULL)
|
||||
return;
|
||||
pthread_mutex_lock(&pool->lock);
|
||||
if (slot->our_export_fd >= 0 && slot->our_export_fd != our_fd) {
|
||||
/*
|
||||
* Double-Export: a previous EXPBUF'd fd existed. Close
|
||||
* the old one. Consumer's old fd remains valid via
|
||||
* dma_buf refcount. Documented in surface.c export path.
|
||||
*/
|
||||
close(slot->our_export_fd);
|
||||
}
|
||||
slot->our_export_fd = our_fd;
|
||||
slot->state = CAP_SLOT_EXPORTED;
|
||||
slot->last_used_at_ns = monotonic_ns();
|
||||
pthread_mutex_unlock(&pool->lock);
|
||||
}
|
||||
|
||||
void cap_pool_release(struct cap_pool *pool, struct cap_pool_slot *slot)
|
||||
{
|
||||
if (pool == NULL || slot == NULL)
|
||||
return;
|
||||
pthread_mutex_lock(&pool->lock);
|
||||
if (slot->our_export_fd >= 0) {
|
||||
close(slot->our_export_fd);
|
||||
slot->our_export_fd = -1;
|
||||
}
|
||||
slot->state = CAP_SLOT_FREE;
|
||||
slot->bound_to_surface_id = -1;
|
||||
slot->last_used_at_ns = monotonic_ns();
|
||||
pthread_mutex_unlock(&pool->lock);
|
||||
}
|
||||
+156
@@ -0,0 +1,156 @@
|
||||
/*
|
||||
* Iteration 2 Fix 3: decoupled CAPTURE buffer pool with LRU recycling.
|
||||
*
|
||||
* Background — the bug this fixes:
|
||||
*
|
||||
* Pre-iteration-2, each VAAPI surface was permanently 1:1 bound to a
|
||||
* V4L2 CAPTURE buffer index at vaCreateSurfaces2 time. Each decode
|
||||
* cycle re-QBUF'd that same physical buffer for the same surface ID.
|
||||
* When mpv reused a surface for a new decode while the compositor
|
||||
* still held an EXPBUF'd dma_buf fd to the prior frame's content,
|
||||
* the kernel wrote new decode output into the SAME physical memory
|
||||
* the compositor was reading from — visible as stutter / "back and
|
||||
* forth" frame swap during mpv --hwdec=vaapi --vo=gpu playback.
|
||||
*
|
||||
* V4L2 does not enforce the constraint (it lets QBUF re-queue a
|
||||
* buffer regardless of dma_buf refcount on EXPBUF'd fds). userspace
|
||||
* must coordinate.
|
||||
*
|
||||
* Architecture (Sonnet Phase 5 review for iter2):
|
||||
*
|
||||
* Pool of N CAPTURE buffers (N >= max(surfaces_count, MIN_CAP_POOL)).
|
||||
* Each slot has a state in {FREE, IN_DECODE, DECODED, EXPORTED}.
|
||||
* Surfaces are no longer permanently bound; each vaBeginPicture
|
||||
* acquires a FREE slot, binds it to the current decode, transitions
|
||||
* it through IN_DECODE → DECODED → optionally EXPORTED.
|
||||
*
|
||||
* The DECODED state captures the window between SyncSurface DQBUF
|
||||
* and either ExportSurfaceHandle (DMA-BUF path) or DeriveImage
|
||||
* (vaapi-copy path). LRU recycling considers ONLY FREE slots, so
|
||||
* DECODED slots cannot be claimed by a concurrent decode while
|
||||
* the consumer is still using the bound surface's content.
|
||||
*
|
||||
* Concurrency: a pthread_mutex_t protects pool state. VAAPI is
|
||||
* re-entrant for multi-threaded consumers (mpv may BeginPicture/
|
||||
* SyncSurface from one thread and ExportSurfaceHandle from
|
||||
* another).
|
||||
*
|
||||
* Limitations (deferred to iteration 3+):
|
||||
*
|
||||
* - Option-A statistical mitigation, not a correct fix. The race
|
||||
* window narrows from "constant" to "only when pool is exhausted
|
||||
* and force-recycle of oldest EXPORTED slot fires." For typical
|
||||
* mpv 16-surface playback with MIN_CAP_POOL=24, this never fires
|
||||
* in practice (Sonnet review iter2 question 3). For pathological
|
||||
* workloads (paused-with-video-still-visible, multi-stream),
|
||||
* race windows still possible. Iteration 3 may revisit with
|
||||
* V4L2_MEMORY_DMABUF + userspace allocation.
|
||||
*
|
||||
* - LRU "force-recycle" still has the race in the worst case.
|
||||
* Closing OUR EXPBUF fd does not close the consumer's dup — the
|
||||
* consumer's fd keeps the dma_buf alive but the V4L2 layer will
|
||||
* happily write new data into the underlying physical memory on
|
||||
* re-QBUF. There is no public V4L2 API to query dma_buf refcount.
|
||||
*
|
||||
* - Multi-context concurrent use (two libva contexts open
|
||||
* simultaneously, e.g. Firefox playing two videos in different
|
||||
* tabs through separate RDD instances): not addressed. Each
|
||||
* context gets its own pool, but there's only one V4L2 device.
|
||||
*/
|
||||
|
||||
#ifndef _CAP_POOL_H_
|
||||
#define _CAP_POOL_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <linux/videodev2.h> /* for VIDEO_MAX_PLANES */
|
||||
|
||||
#define MIN_CAP_POOL 24
|
||||
|
||||
enum cap_slot_state {
|
||||
CAP_SLOT_FREE = 0, /* available for a new decode acquisition */
|
||||
CAP_SLOT_IN_DECODE, /* QBUF'd to V4L2, kernel owns */
|
||||
CAP_SLOT_DECODED, /* DQBUF'd, valid pixel content; mapped by surface */
|
||||
CAP_SLOT_EXPORTED, /* EXPBUF'd; consumer holds a dma_buf fd */
|
||||
};
|
||||
|
||||
struct cap_pool_slot {
|
||||
unsigned int v4l2_index; /* V4L2 buffer index */
|
||||
void *map[VIDEO_MAX_PLANES]; /* mmap pointers */
|
||||
unsigned int map_lengths[VIDEO_MAX_PLANES];
|
||||
unsigned int map_offsets[VIDEO_MAX_PLANES];
|
||||
unsigned int buffers_count; /* V4L2 buffers per logical NV12 (1 for single-plane MPLANE) */
|
||||
enum cap_slot_state state;
|
||||
int our_export_fd; /* -1 if not exported; close on FREE transition */
|
||||
uint64_t last_used_at_ns; /* CLOCK_MONOTONIC when last touched (LRU) */
|
||||
int bound_to_surface_id; /* -1 if not bound; informational */
|
||||
};
|
||||
|
||||
struct cap_pool {
|
||||
struct cap_pool_slot *slots;
|
||||
unsigned int count; /* allocated slot count */
|
||||
pthread_mutex_t lock;
|
||||
bool initialized;
|
||||
};
|
||||
|
||||
/*
|
||||
* cap_pool_init — allocate a pool of `count` CAPTURE buffers via
|
||||
* v4l2_create_buffers, mmap each buffer's planes, init slot states
|
||||
* to FREE. `count` is min'd against any reasonable hardware cap.
|
||||
*
|
||||
* Returns 0 on success, negative errno on failure.
|
||||
*/
|
||||
int cap_pool_init(struct cap_pool *pool, int video_fd, unsigned int capture_type,
|
||||
unsigned int count, unsigned int v4l2_buffers_count_per_slot);
|
||||
|
||||
/*
|
||||
* cap_pool_destroy — close any outstanding our_export_fds, munmap all
|
||||
* planes, REQBUFS(0), free slots. Safe to call on a non-initialized
|
||||
* pool (no-op).
|
||||
*
|
||||
* Note: closing our_export_fd does not invalidate any consumer-held
|
||||
* dup'd fds — the kernel keeps the dma_buf alive while any fd refs
|
||||
* it. munmap on our side is independent of the consumer's mmap (each
|
||||
* mmap of a dma_buf is a distinct VMA).
|
||||
*/
|
||||
void cap_pool_destroy(struct cap_pool *pool, int video_fd, unsigned int capture_type);
|
||||
|
||||
/*
|
||||
* cap_pool_acquire — find a FREE slot with the oldest last_used_at_ns
|
||||
* (LRU). If no FREE slot is available, force-recycle the oldest
|
||||
* EXPORTED slot (close our_export_fd, demote to IN_DECODE for the
|
||||
* caller). Returns NULL only if no slots can be recycled at all
|
||||
* (catastrophic — pool too small).
|
||||
*
|
||||
* The returned slot is in IN_DECODE state. Caller QBUFs it and
|
||||
* transitions to DECODED via cap_pool_mark_decoded after DQBUF.
|
||||
*/
|
||||
struct cap_pool_slot *cap_pool_acquire(struct cap_pool *pool, int surface_id);
|
||||
|
||||
/*
|
||||
* cap_pool_mark_decoded — IN_DECODE → DECODED. Touches last_used_at_ns.
|
||||
* Called from RequestSyncSurface after successful DQBUF.
|
||||
*/
|
||||
void cap_pool_mark_decoded(struct cap_pool *pool, struct cap_pool_slot *slot);
|
||||
|
||||
/*
|
||||
* cap_pool_mark_exported — DECODED → EXPORTED. Stores `our_fd` so the
|
||||
* pool owns OUR copy of the EXPBUF'd fd; the consumer received a
|
||||
* dup'd / equivalent fd via the descriptor. last_used_at_ns is
|
||||
* touched again so EXPORTED slots are recycled in LRU order.
|
||||
*
|
||||
* Called from RequestExportSurfaceHandle after VIDIOC_EXPBUF.
|
||||
*/
|
||||
void cap_pool_mark_exported(struct cap_pool *pool, struct cap_pool_slot *slot, int our_fd);
|
||||
|
||||
/*
|
||||
* cap_pool_release — explicitly return a slot to FREE (close our
|
||||
* export fd if any). Called from RequestDestroySurfaces and from
|
||||
* RequestBeginPicture when re-acquiring (the surface's previous slot
|
||||
* is released first, then a new one acquired).
|
||||
*/
|
||||
void cap_pool_release(struct cap_pool *pool, struct cap_pool_slot *slot);
|
||||
|
||||
#endif /* _CAP_POOL_H_ */
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (C) 2026 claude-noether <claude-noether@reauktion.de>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "codec.h"
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
unsigned int pixelformat_for_profile(VAProfile profile)
|
||||
{
|
||||
switch (profile) {
|
||||
case VAProfileMPEG2Simple:
|
||||
case VAProfileMPEG2Main:
|
||||
return V4L2_PIX_FMT_MPEG2_SLICE;
|
||||
case VAProfileH264Main:
|
||||
case VAProfileH264High:
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
return V4L2_PIX_FMT_H264_SLICE;
|
||||
case VAProfileHEVCMain:
|
||||
return V4L2_PIX_FMT_HEVC_SLICE;
|
||||
case VAProfileVP8Version0_3:
|
||||
return V4L2_PIX_FMT_VP8_FRAME;
|
||||
case VAProfileVP9Profile0:
|
||||
return V4L2_PIX_FMT_VP9_FRAME;
|
||||
case VAProfileAV1Profile0:
|
||||
return V4L2_PIX_FMT_AV1_FRAME;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
+48
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (C) 2026 claude-noether <claude-noether@reauktion.de>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _CODEC_H_
|
||||
#define _CODEC_H_
|
||||
|
||||
#include <va/va.h>
|
||||
|
||||
/**
|
||||
* pixelformat_for_profile - map a VA-API VAProfile to its V4L2 OUTPUT-side
|
||||
* pixel format FOURCC.
|
||||
*
|
||||
* @profile: VAProfile enum value as passed to vaCreateConfig.
|
||||
*
|
||||
* Returns the V4L2_PIX_FMT_* constant that the V4L2 device's OUTPUT_MPLANE
|
||||
* (bitstream-input) queue should be set to in order for the kernel
|
||||
* stateless decoder to dispatch to the right codec_mode. Used at
|
||||
* RequestCreateConfig to populate object_config->pixelformat, and read
|
||||
* from there at RequestCreateContext when committing the OUTPUT format
|
||||
* to the V4L2 device.
|
||||
*
|
||||
* Returns 0 for an unhandled profile; caller is expected to either
|
||||
* fall back to a safe default or refuse to proceed.
|
||||
*/
|
||||
unsigned int pixelformat_for_profile(VAProfile profile);
|
||||
|
||||
#endif /* _CODEC_H_ */
|
||||
+103
-20
@@ -34,10 +34,9 @@
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
#include <mpeg2-ctrls.h>
|
||||
#include <h264-ctrls.h>
|
||||
#include <hevc-ctrls.h>
|
||||
|
||||
#include "codec.h"
|
||||
#include "utils.h"
|
||||
#include "v4l2.h"
|
||||
|
||||
@@ -54,22 +53,55 @@ VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile,
|
||||
int i, index;
|
||||
|
||||
switch (profile) {
|
||||
case VAProfileMPEG2Simple:
|
||||
case VAProfileMPEG2Main:
|
||||
|
||||
case VAProfileH264Main:
|
||||
case VAProfileH264High:
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileHEVCMain:
|
||||
if (entrypoint != VAEntrypointVLD)
|
||||
return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
|
||||
// FIXME
|
||||
break;
|
||||
case VAProfileMPEG2Simple:
|
||||
case VAProfileMPEG2Main:
|
||||
// fresnel-fourier iter1: MPEG-2 enabled. Same shape as H.264
|
||||
// above — no profile-specific config validation in the libva
|
||||
// backend; validation happens at vaCreateContext / control
|
||||
// submission time.
|
||||
break;
|
||||
case VAProfileHEVCMain:
|
||||
// fresnel-fourier iter2: HEVC enabled. Same shape as H.264/
|
||||
// MPEG-2 above — no profile-specific config validation in the
|
||||
// libva backend; validation happens at vaCreateContext / control
|
||||
// submission time.
|
||||
break;
|
||||
case VAProfileVP8Version0_3:
|
||||
// fresnel-fourier iter3: VP8 enabled. Same shape as iter1+iter2
|
||||
// above — no profile-specific config validation in the libva
|
||||
// backend; validation happens at vaCreateContext / control
|
||||
// submission time.
|
||||
break;
|
||||
case VAProfileVP9Profile0:
|
||||
// fresnel-fourier iter4: VP9 Profile 0 enabled on rkvdec.
|
||||
// Same shape — no profile-specific validation here.
|
||||
break;
|
||||
case VAProfileAV1Profile0:
|
||||
// ampere-av1-enablement: AV1 Profile 0 enabled on vpu981.
|
||||
// Same shape — no profile-specific validation here.
|
||||
break;
|
||||
|
||||
default:
|
||||
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
||||
}
|
||||
|
||||
/*
|
||||
* iter38: retarget the active V4L2 device to whichever physical
|
||||
* decoder (rkvdec or hantro-vpu on RK3399) serves this codec profile.
|
||||
* Safe no-op when the right device is already active. When a switch
|
||||
* is needed, output/capture pools and the video_format cache are
|
||||
* torn down so the next RequestCreateContext rebuilds them on the
|
||||
* new device.
|
||||
*/
|
||||
(void)request_switch_device_for_profile(driver_data, profile);
|
||||
|
||||
if (attributes_count > V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES)
|
||||
attributes_count = V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES;
|
||||
|
||||
@@ -80,6 +112,16 @@ VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile,
|
||||
|
||||
config_object->profile = profile;
|
||||
config_object->entrypoint = entrypoint;
|
||||
/*
|
||||
* iter5b-β: cache the V4L2 OUTPUT-side FOURCC for this profile so
|
||||
* context.c::RequestCreateContext can read it without re-running
|
||||
* the profile→pixelformat mapping. Wires up the previously-dead
|
||||
* pixelformat field at config.h:46. The switch above already
|
||||
* rejected unsupported profiles via VA_STATUS_ERROR_UNSUPPORTED_PROFILE,
|
||||
* so pixelformat_for_profile here returns non-zero for every
|
||||
* profile that reaches this assignment.
|
||||
*/
|
||||
config_object->pixelformat = pixelformat_for_profile(profile);
|
||||
config_object->attributes[0].type = VAConfigAttribRTFormat;
|
||||
config_object->attributes[0].value = VA_RT_FORMAT_YUV420;
|
||||
config_object->attributes_count = 1;
|
||||
@@ -111,6 +153,31 @@ VAStatus RequestDestroyConfig(VADriverContextP context, VAConfigID config_id)
|
||||
return VA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* iter38: check whether `fmt` is supported on any of the open V4L2 device
|
||||
* fds (active + the two alt fds tracked since iter38 multi-device probe).
|
||||
* Tries both VIDEO_OUTPUT and VIDEO_OUTPUT_MPLANE.
|
||||
*/
|
||||
static bool any_fd_supports_output_format(struct request_data *driver_data,
|
||||
unsigned int fmt)
|
||||
{
|
||||
int fds[4] = {
|
||||
driver_data->video_fd,
|
||||
driver_data->video_fd_rkvdec,
|
||||
driver_data->video_fd_hantro,
|
||||
driver_data->video_fd_vpu981,
|
||||
};
|
||||
int i;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (fds[i] < 0) continue;
|
||||
if (v4l2_find_format(fds[i], V4L2_BUF_TYPE_VIDEO_OUTPUT, fmt))
|
||||
return true;
|
||||
if (v4l2_find_format(fds[i], V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, fmt))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
VAStatus RequestQueryConfigProfiles(VADriverContextP context,
|
||||
VAProfile *profiles, int *profiles_count)
|
||||
{
|
||||
@@ -118,18 +185,14 @@ VAStatus RequestQueryConfigProfiles(VADriverContextP context,
|
||||
unsigned int index = 0;
|
||||
bool found;
|
||||
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_OUTPUT,
|
||||
V4L2_PIX_FMT_MPEG2_SLICE);
|
||||
if (found && index < (V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES - 2)) {
|
||||
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_MPEG2_SLICE);
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 2)) {
|
||||
profiles[index++] = VAProfileMPEG2Simple;
|
||||
profiles[index++] = VAProfileMPEG2Main;
|
||||
}
|
||||
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_OUTPUT,
|
||||
V4L2_PIX_FMT_H264_SLICE_RAW);
|
||||
if (found && index < (V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES - 5)) {
|
||||
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_H264_SLICE);
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 5)) {
|
||||
profiles[index++] = VAProfileH264Main;
|
||||
profiles[index++] = VAProfileH264High;
|
||||
profiles[index++] = VAProfileH264ConstrainedBaseline;
|
||||
@@ -137,12 +200,29 @@ VAStatus RequestQueryConfigProfiles(VADriverContextP context,
|
||||
profiles[index++] = VAProfileH264StereoHigh;
|
||||
}
|
||||
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_OUTPUT,
|
||||
V4L2_PIX_FMT_HEVC_SLICE);
|
||||
if (found && index < (V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES - 1))
|
||||
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_HEVC_SLICE);
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1))
|
||||
profiles[index++] = VAProfileHEVCMain;
|
||||
|
||||
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_VP8_FRAME);
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1))
|
||||
profiles[index++] = VAProfileVP8Version0_3;
|
||||
|
||||
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_VP9_FRAME);
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1))
|
||||
profiles[index++] = VAProfileVP9Profile0;
|
||||
|
||||
/*
|
||||
* ampere-av1-enablement: AV1 routes to vpu981 (advertised via the
|
||||
* new video_fd_vpu981 slot). V4L2_REQUEST_MAX_PROFILES=11 is now
|
||||
* EXACTLY full with this addition. Future profile additions
|
||||
* require bumping that constant + verifying libva consumers'
|
||||
* profiles[] sizing.
|
||||
*/
|
||||
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_AV1_FRAME);
|
||||
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1))
|
||||
profiles[index++] = VAProfileAV1Profile0;
|
||||
|
||||
*profiles_count = index;
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
@@ -162,6 +242,9 @@ VAStatus RequestQueryConfigEntrypoints(VADriverContextP context,
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileVP8Version0_3:
|
||||
case VAProfileVP9Profile0:
|
||||
case VAProfileAV1Profile0:
|
||||
entrypoints[0] = VAEntrypointVLD;
|
||||
*entrypoints_count = 1;
|
||||
break;
|
||||
|
||||
@@ -43,6 +43,7 @@ struct object_config {
|
||||
VAEntrypoint entrypoint;
|
||||
VAConfigAttrib attributes[V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES];
|
||||
int attributes_count;
|
||||
unsigned int pixelformat;
|
||||
};
|
||||
|
||||
VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile,
|
||||
|
||||
+464
-86
@@ -29,6 +29,7 @@
|
||||
#include "request.h"
|
||||
#include "surface.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
@@ -39,8 +40,6 @@
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
#include <mpeg2-ctrls.h>
|
||||
#include <h264-ctrls.h>
|
||||
#include <hevc-ctrls.h>
|
||||
|
||||
#include "utils.h"
|
||||
@@ -55,35 +54,323 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
{
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_config *config_object;
|
||||
struct object_surface *surface_object;
|
||||
struct object_context *context_object = NULL;
|
||||
struct video_format *video_format;
|
||||
unsigned int length;
|
||||
unsigned int offset;
|
||||
void *source_data = MAP_FAILED;
|
||||
unsigned int destination_sizes[VIDEO_MAX_PLANES];
|
||||
unsigned int destination_bytesperlines[VIDEO_MAX_PLANES];
|
||||
unsigned int destination_planes_count;
|
||||
unsigned int format_width, format_height;
|
||||
unsigned int pixelformat;
|
||||
VASurfaceID *ids = NULL;
|
||||
VAContextID id;
|
||||
VAStatus status;
|
||||
unsigned int output_type, capture_type;
|
||||
unsigned int pixelformat;
|
||||
unsigned int index_base;
|
||||
unsigned int index;
|
||||
unsigned int i;
|
||||
unsigned int j;
|
||||
bool found;
|
||||
int rc;
|
||||
|
||||
video_format = driver_data->video_format;
|
||||
if (video_format == NULL)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
|
||||
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
||||
|
||||
/*
|
||||
* iter5b-β: CreateContext owns the V4L2 OUTPUT-side device-format
|
||||
* lifecycle (S_FMT, CAPTURE-format probe, cap_pool_init, per-surface
|
||||
* destination_* fill). Pre-β these lived in CreateSurfaces2 with a
|
||||
* resolution-change gate; β moves them here because (a) config_id
|
||||
* is known so the right OUTPUT pixel format can be derived from
|
||||
* the bound profile, and (b) STREAMON happens at the end of this
|
||||
* function, so the queue is never streaming when we do S_FMT.
|
||||
*
|
||||
* DestroyContext is the only per-session teardown site under β
|
||||
* (no in-CreateSurfaces2 teardown branch). It STREAMOFFs both
|
||||
* queues, calls request_pool_destroy + cap_pool_destroy, and
|
||||
* REQBUFS(0) — leaving the V4L2 device in a clean slate for the
|
||||
* next CreateContext.
|
||||
*/
|
||||
config_object = CONFIG(driver_data, config_id);
|
||||
if (config_object == NULL) {
|
||||
status = VA_STATUS_ERROR_INVALID_CONFIG;
|
||||
goto error;
|
||||
}
|
||||
|
||||
pixelformat = config_object->pixelformat;
|
||||
if (pixelformat == 0) {
|
||||
/*
|
||||
* Defensive: CreateConfig rejects unhandled profiles, so
|
||||
* pixelformat is always non-zero by the time we get here.
|
||||
* Belt-and-suspenders.
|
||||
*/
|
||||
status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe the CAPTURE-side V4L2 format. video_format is a static
|
||||
* pointer into video.c's formats[]; it stays valid for the life of
|
||||
* the driver_data and is cached across CreateContext cycles. The
|
||||
* probe doesn't require any prior S_FMT — v4l2_find_format
|
||||
* enumerates the device's supported formats directly.
|
||||
*/
|
||||
if (!driver_data->video_format) {
|
||||
video_format = NULL;
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_CAPTURE,
|
||||
V4L2_PIX_FMT_SUNXI_TILED_NV12);
|
||||
if (found)
|
||||
video_format = video_format_find(V4L2_PIX_FMT_SUNXI_TILED_NV12);
|
||||
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE,
|
||||
V4L2_PIX_FMT_NV12);
|
||||
if (found)
|
||||
video_format = video_format_find(V4L2_PIX_FMT_NV12);
|
||||
|
||||
if (video_format == NULL) {
|
||||
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
goto error;
|
||||
}
|
||||
|
||||
driver_data->video_format = video_format;
|
||||
}
|
||||
video_format = driver_data->video_format;
|
||||
|
||||
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
|
||||
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
||||
|
||||
/*
|
||||
* Commit the OUTPUT pixel format. picture_width/picture_height
|
||||
* are the kernel-facing dimensions for this decode session. With
|
||||
* profile-derived pixelformat, hantro's CAPTURE-format derivation
|
||||
* dispatches to the right codec_mode (pre-β hardcoded H264_SLICE
|
||||
* meant hantro silently substituted MPEG2_DECODER for HEVC/VP8/VP9
|
||||
* → all-zero CAPTURE; rkvdec silently dropped HEVC/VP9 → same
|
||||
* outcome).
|
||||
*/
|
||||
rc = v4l2_set_format(driver_data->video_fd, output_type, pixelformat,
|
||||
picture_width, picture_height);
|
||||
if (rc < 0) {
|
||||
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* iter15 α-19: explicit S_FMT on CAPTURE for rkvdec.
|
||||
*
|
||||
* Original iter5b-β comment: "Do NOT VIDIOC_S_FMT on CAPTURE — hantro
|
||||
* reads the SPS from OUTPUT to set CAPTURE shape internally."
|
||||
*
|
||||
* Empirical finding at iter15 Phase 3 (2026-05-14): kdirect (ffmpeg-
|
||||
* v4l2request) does S_FMT on CAPTURE side after S_FMT(OUTPUT),
|
||||
* then CREATE_BUFS for CAPTURE. libva's old G_FMT-only path skipped
|
||||
* the S_FMT call. For hantro this was deliberate (works); for rkvdec
|
||||
* (HEVC + H.264 + VP9 on RK3399) the absence of explicit S_FMT puts
|
||||
* the driver into a state where it does NOT commit the chosen NV12
|
||||
* pixel format properly — and the resulting decode silently writes
|
||||
* garbage or zero for HEVC + H.264 (Bug 4 + Bug 5).
|
||||
*
|
||||
* Per [[feedback-per-driver-kludge-gating]]: this driver-specific
|
||||
* difference should be gated on driver_kind. For now use a single
|
||||
* always-on S_FMT call as the safe move: kdirect proves S_FMT
|
||||
* CAPTURE works on both hantro AND rkvdec (it's the reference path).
|
||||
* The iter5b-β comment is preserved-but-amended below.
|
||||
*
|
||||
* Sequence: S_FMT OUTPUT (above) → S_FMT CAPTURE (this) → G_FMT
|
||||
* CAPTURE (sanity read-back, matches what S_FMT committed).
|
||||
*/
|
||||
{
|
||||
unsigned int capture_pixelformat = V4L2_PIX_FMT_NV12;
|
||||
rc = v4l2_set_format(driver_data->video_fd, capture_type,
|
||||
capture_pixelformat, picture_width,
|
||||
picture_height);
|
||||
if (rc < 0) {
|
||||
/* Non-fatal: if the kernel rejects S_FMT CAPTURE (some
|
||||
* older hantro variants), fall through to G_FMT. */
|
||||
request_log("iter15 α-19: S_FMT CAPTURE failed (continuing): %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
rc = v4l2_get_format(driver_data->video_fd, capture_type, &format_width,
|
||||
&format_height, destination_bytesperlines,
|
||||
destination_sizes, NULL);
|
||||
if (rc < 0) {
|
||||
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* iter25 α-25: synthetic-SPS injection to pre-seed ctx->image_fmt
|
||||
* before CAPTURE buffer allocation.
|
||||
*
|
||||
* Root cause (iter17→iter24 kernel-printk chain): rkvdec_s_ctrl for
|
||||
* HEVC_SPS / H264_SPS calls get_image_fmt() and, if the resolved
|
||||
* image_fmt differs from the cached ctx->image_fmt (default
|
||||
* RKVDEC_IMG_FMT_ANY), tries to reset the CAPTURE format. The reset
|
||||
* returns -EBUSY when vb2_is_busy(CAPTURE_queue) — i.e. any CAPTURE
|
||||
* buffer is allocated.
|
||||
*
|
||||
* libva (iter5b-β CAPTURE pool) pre-allocates 24 CAPTURE buffers
|
||||
* via cap_pool_init below — before any per-frame S_EXT_CTRLS
|
||||
* arrives. So the first real HEVC_SPS at decode time fails with
|
||||
* -EBUSY in try_or_set_cluster, breaks v4l2_ctrl_request_setup's
|
||||
* outer loop, and leaves ctx->ctrl_hdl[SPS..DECODE_PARAMS] at all-
|
||||
* zero contents. rkvdec_hevc_run reads zero, hardware sees w=0
|
||||
* h=0, decoded CAPTURE is all-zero (Bug 5 + Bug 4).
|
||||
*
|
||||
* Fix: while CAPTURE is still empty (before cap_pool_init), inject
|
||||
* a synthetic SPS containing the profile's chroma + bit_depth so
|
||||
* rkvdec_s_ctrl resolves image_fmt and updates ctx->image_fmt
|
||||
* before vb2_is_busy can return true. From then on, per-frame
|
||||
* SPS submissions with matching profile parameters see
|
||||
* image_fmt_changed=false → skip reset → commit succeeds.
|
||||
*
|
||||
* Gated by config->profile: only HEVC and H.264 paths set
|
||||
* get_image_fmt in their rkvdec coded_fmt_desc->ops; VP9 / MPEG-2 /
|
||||
* VP8 are unaffected (rkvdec_s_ctrl returns 0 immediately when
|
||||
* get_image_fmt is NULL, or those codecs are routed to hantro).
|
||||
*
|
||||
* Failure is best-effort: if the kernel returns -EBUSY/-EINVAL here
|
||||
* (e.g. driver doesn't expose the control on this DT path), we fall
|
||||
* through and may still hit the original bug for that codec — but
|
||||
* the device-init DECODE_MODE + START_CODE block below ALSO uses
|
||||
* void-cast best-effort, so this is consistent with prior pattern.
|
||||
*/
|
||||
{
|
||||
switch (config_object->profile) {
|
||||
case VAProfileHEVCMain: {
|
||||
struct v4l2_ctrl_hevc_sps dummy_sps;
|
||||
struct v4l2_ext_control dummy_ctrl;
|
||||
|
||||
memset(&dummy_sps, 0, sizeof(dummy_sps));
|
||||
dummy_sps.chroma_format_idc = 1; /* 4:2:0 */
|
||||
dummy_sps.bit_depth_luma_minus8 = 0; /* 8-bit */
|
||||
dummy_sps.bit_depth_chroma_minus8 = 0;
|
||||
dummy_sps.pic_width_in_luma_samples = picture_width;
|
||||
dummy_sps.pic_height_in_luma_samples = picture_height;
|
||||
|
||||
dummy_ctrl.id = V4L2_CID_STATELESS_HEVC_SPS;
|
||||
dummy_ctrl.ptr = &dummy_sps;
|
||||
dummy_ctrl.size = sizeof(dummy_sps);
|
||||
(void)v4l2_set_controls(driver_data->video_fd, -1,
|
||||
&dummy_ctrl, 1);
|
||||
break;
|
||||
}
|
||||
case VAProfileH264Main:
|
||||
case VAProfileH264High:
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh: {
|
||||
struct v4l2_ctrl_h264_sps dummy_sps;
|
||||
struct v4l2_ext_control dummy_ctrl;
|
||||
|
||||
memset(&dummy_sps, 0, sizeof(dummy_sps));
|
||||
dummy_sps.chroma_format_idc = 1; /* 4:2:0 */
|
||||
dummy_sps.bit_depth_luma_minus8 = 0;
|
||||
dummy_sps.bit_depth_chroma_minus8 = 0;
|
||||
dummy_sps.pic_width_in_mbs_minus1 =
|
||||
(picture_width + 15) / 16 - 1;
|
||||
dummy_sps.pic_height_in_map_units_minus1 =
|
||||
(picture_height + 15) / 16 - 1;
|
||||
dummy_sps.profile_idc = 100; /* High */
|
||||
dummy_sps.level_idc = 41;
|
||||
/*
|
||||
* FRAME_MBS_ONLY required: rkvdec_h264_validate_sps
|
||||
* doubles height for non-frame-mbs-only streams to
|
||||
* compute frame-height from field-height. Without
|
||||
* this flag, dummy with (height_in_map_units+1)*16
|
||||
* = 1088 doubles to 2176 > coded_fmt 1080 → -EINVAL.
|
||||
*/
|
||||
dummy_sps.flags = V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY;
|
||||
|
||||
dummy_ctrl.id = V4L2_CID_STATELESS_H264_SPS;
|
||||
dummy_ctrl.ptr = &dummy_sps;
|
||||
dummy_ctrl.size = sizeof(dummy_sps);
|
||||
(void)v4l2_set_controls(driver_data->video_fd, -1,
|
||||
&dummy_ctrl, 1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
destination_planes_count = video_format->planes_count;
|
||||
|
||||
/*
|
||||
* Initialize the CAPTURE buffer pool (cap_pool). Pool size =
|
||||
* max(surfaces_count, MIN_CAP_POOL). The headroom gives LRU
|
||||
* recycling enough margin to never reuse a buffer within the
|
||||
* consumer's compositor-hold window for typical playback
|
||||
* patterns. cap_pool_init does the V4L2 CREATE_BUFS + per-slot
|
||||
* mmap.
|
||||
*
|
||||
* `pool->initialized` is reset to false by cap_pool_destroy in
|
||||
* DestroyContext; subsequent CreateContext re-inits at the new
|
||||
* resolution.
|
||||
*/
|
||||
if (!driver_data->capture_pool.initialized) {
|
||||
unsigned int pool_count = surfaces_count > MIN_CAP_POOL ?
|
||||
surfaces_count : MIN_CAP_POOL;
|
||||
rc = cap_pool_init(&driver_data->capture_pool,
|
||||
driver_data->video_fd, capture_type,
|
||||
pool_count, video_format->v4l2_buffers_count);
|
||||
if (rc < 0) {
|
||||
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute format-uniform destination_* values. Same for all
|
||||
* surfaces of this format; written once per surface, never
|
||||
* changed by BeginPicture's slot acquisition.
|
||||
*/
|
||||
if (video_format->v4l2_buffers_count == 1) {
|
||||
destination_sizes[0] = destination_bytesperlines[0] *
|
||||
format_height;
|
||||
for (j = 1; j < destination_planes_count; j++)
|
||||
destination_sizes[j] = destination_sizes[0] / 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* iter5b-β Commit D: cache the format-uniform CAPTURE geometry
|
||||
* in driver_data. CreateSurfaces2 calls AFTER this CreateContext
|
||||
* (ffmpeg vaapi-copy late-surface-allocation case) will lazy-fill
|
||||
* via surface_fill_format_uniform(); the surface_heap walk below
|
||||
* fills surfaces that pre-existed when CreateContext fired.
|
||||
*/
|
||||
driver_data->fmt_planes_count = destination_planes_count;
|
||||
driver_data->fmt_buffers_count = video_format->v4l2_buffers_count;
|
||||
driver_data->fmt_format_height = format_height;
|
||||
for (j = 0; j < destination_planes_count; j++) {
|
||||
driver_data->fmt_sizes[j] = destination_sizes[j];
|
||||
driver_data->fmt_bytesperlines[j] =
|
||||
destination_bytesperlines[j];
|
||||
}
|
||||
driver_data->fmt_valid = true;
|
||||
|
||||
/*
|
||||
* Walk the surface_heap (not just surfaces_ids[]) to populate
|
||||
* destination_* on every existing surface. Pre-Commit-D we walked
|
||||
* surfaces_ids[], which is empty for ffmpeg vaapi-copy consumers
|
||||
* that call vaCreateContext with surfaces_count=0 — those surfaces
|
||||
* exist in the heap but aren't in the param array. Walking the
|
||||
* heap catches both flows. Late-created surfaces (after this
|
||||
* CreateContext) fill via surface_fill_format_uniform in
|
||||
* CreateSurfaces2's per-surface init.
|
||||
*/
|
||||
{
|
||||
struct object_surface *surface_iter;
|
||||
int heap_iter;
|
||||
|
||||
surface_iter = (struct object_surface *)
|
||||
object_heap_first(&driver_data->surface_heap,
|
||||
&heap_iter);
|
||||
while (surface_iter != NULL) {
|
||||
surface_fill_format_uniform(driver_data, surface_iter);
|
||||
surface_iter = (struct object_surface *)
|
||||
object_heap_next(&driver_data->surface_heap,
|
||||
&heap_iter);
|
||||
}
|
||||
}
|
||||
|
||||
id = object_heap_allocate(&driver_data->context_heap);
|
||||
context_object = CONTEXT(driver_data, id);
|
||||
if (context_object == NULL) {
|
||||
@@ -91,40 +378,29 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
goto error;
|
||||
}
|
||||
memset(&context_object->dpb, 0, sizeof(context_object->dpb));
|
||||
context_object->timestamp_counter = 0; /* iter9 α-7 */
|
||||
|
||||
switch (config_object->profile) {
|
||||
|
||||
case VAProfileMPEG2Simple:
|
||||
case VAProfileMPEG2Main:
|
||||
pixelformat = V4L2_PIX_FMT_MPEG2_SLICE;
|
||||
break;
|
||||
|
||||
case VAProfileH264Main:
|
||||
case VAProfileH264High:
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
pixelformat = V4L2_PIX_FMT_H264_SLICE_RAW;
|
||||
break;
|
||||
|
||||
case VAProfileHEVCMain:
|
||||
pixelformat = V4L2_PIX_FMT_HEVC_SLICE;
|
||||
break;
|
||||
|
||||
default:
|
||||
status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
rc = v4l2_set_format(driver_data->video_fd, output_type, pixelformat,
|
||||
picture_width, picture_height);
|
||||
if (rc < 0) {
|
||||
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
goto error;
|
||||
}
|
||||
|
||||
rc = v4l2_create_buffers(driver_data->video_fd, output_type,
|
||||
surfaces_count, &index_base);
|
||||
/*
|
||||
* Initialize the OUTPUT (bitstream-input) buffer pool. Sized by
|
||||
* codec pipeline depth (4 H.264 frames in flight is sufficient
|
||||
* for current hantro/rkvdec scheduling); independent of caller-
|
||||
* supplied surfaces_count. Pool is owned by driver_data so it
|
||||
* outlives any single context destroy/recreate cycle.
|
||||
*
|
||||
* This replaces the prior per-surface OUTPUT loop, which (a)
|
||||
* created an empty queue when surfaces_count==0 (ffmpeg vaapi-
|
||||
* copy path) and (b) only populated surface->source_* for
|
||||
* surfaces present at vaCreateContext time, NULL-derefing on
|
||||
* surfaces created later.
|
||||
*/
|
||||
/*
|
||||
* iter6: pool size 16 gives comfortable headroom over typical H.264
|
||||
* MaxDpbFrames (16) for any consumer that pipelines decode requests.
|
||||
* Each slot owns its own request_fd (REINIT'd per use).
|
||||
*/
|
||||
rc = request_pool_init(&driver_data->output_pool,
|
||||
driver_data->video_fd, driver_data->media_fd,
|
||||
output_type, 16);
|
||||
if (rc < 0) {
|
||||
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
goto error;
|
||||
@@ -135,40 +411,107 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
* we don't have any indication wrt its life time. Let's make sure
|
||||
* its life span is under our control.
|
||||
*/
|
||||
ids = malloc(surfaces_count * sizeof(VASurfaceID));
|
||||
if (ids == NULL) {
|
||||
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
goto error;
|
||||
if (surfaces_count > 0) {
|
||||
ids = malloc(surfaces_count * sizeof(VASurfaceID));
|
||||
if (ids == NULL) {
|
||||
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
goto error;
|
||||
}
|
||||
|
||||
memcpy(ids, surfaces_ids,
|
||||
surfaces_count * sizeof(VASurfaceID));
|
||||
}
|
||||
|
||||
memcpy(ids, surfaces_ids, surfaces_count * sizeof(VASurfaceID));
|
||||
/*
|
||||
* Stateless H.264 device-wide controls. The kernel V4L2 stateless
|
||||
* framework requires DECODE_MODE and START_CODE be set on the
|
||||
* device fd (request_fd=-1) before VIDIOC_STREAMON; per-request
|
||||
* controls (SPS/PPS/etc.) attached to a request_fd come later.
|
||||
*
|
||||
* hantro-vpu via rockchip,rk3568-vpu DT compatible (covers RK3568
|
||||
* and RK3566 — PineTab2 silicon — since they're close enough)
|
||||
* accepts only DECODE_MODE_FRAME_BASED.
|
||||
* START_CODE_ANNEX_B preserves leading 0x00000001 in the slice
|
||||
* payload that h264.c assembles. Errors here are not fatal: not
|
||||
* every backing driver supports both controls (e.g. cedrus may
|
||||
* default to SLICE_BASED without exposing DECODE_MODE).
|
||||
*/
|
||||
{
|
||||
struct v4l2_ext_control dev_ctrls[2] = {
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
|
||||
.value = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
|
||||
},
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_H264_START_CODE,
|
||||
.value = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
|
||||
},
|
||||
};
|
||||
(void)v4l2_set_controls(driver_data->video_fd, -1,
|
||||
dev_ctrls, 2);
|
||||
}
|
||||
|
||||
for (i = 0; i < surfaces_count; i++) {
|
||||
index = index_base + i;
|
||||
/*
|
||||
* iter2: HEVC device-wide controls. Same best-effort pattern as
|
||||
* H.264 above — separate batched call so a kernel that does not
|
||||
* advertise HEVC controls (e.g. hantro-vpu-dec on RK3568/RK3399)
|
||||
* silently fails on this batch without invalidating the H.264
|
||||
* batch. rkvdec on RK3399 advertises HEVC and accepts FRAME_BASED
|
||||
* + ANNEX_B (only supported menu values per Phase 0 v4l2_inventory).
|
||||
*/
|
||||
{
|
||||
struct v4l2_ext_control hevc_dev_ctrls[2] = {
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_HEVC_DECODE_MODE,
|
||||
.value = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
|
||||
},
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_HEVC_START_CODE,
|
||||
.value = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
|
||||
},
|
||||
};
|
||||
(void)v4l2_set_controls(driver_data->video_fd, -1,
|
||||
hevc_dev_ctrls, 2);
|
||||
}
|
||||
|
||||
surface_object = SURFACE(driver_data, surfaces_ids[i]);
|
||||
if (surface_object == NULL) {
|
||||
status = VA_STATUS_ERROR_INVALID_SURFACE;
|
||||
goto error;
|
||||
}
|
||||
|
||||
rc = v4l2_query_buffer(driver_data->video_fd, output_type,
|
||||
index, &length, &offset, 1);
|
||||
if (rc < 0) {
|
||||
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
goto error;
|
||||
}
|
||||
|
||||
source_data = mmap(NULL, length, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, driver_data->video_fd, offset);
|
||||
if (source_data == MAP_FAILED) {
|
||||
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
goto error;
|
||||
}
|
||||
|
||||
surface_object->source_index = index;
|
||||
surface_object->source_data = source_data;
|
||||
surface_object->source_size = length;
|
||||
/*
|
||||
* Mirror the ANNEX_B start-code mode set on the device above
|
||||
* into context_object->h264_start_code so picture.c::
|
||||
* codec_store_buffer prepends 0x00 0x00 0x01 to each slice
|
||||
* payload it copies into the OUTPUT buffer. Without this, the
|
||||
* kernel — which we just told to expect ANNEX_B — sees a raw
|
||||
* NAL stream with no start codes, fails to find slice
|
||||
* boundaries, and emits a zeroed CAPTURE buffer (visually a
|
||||
* flat dark-green frame).
|
||||
*
|
||||
* iter4 fix: this start-code prepend is ANNEX-B-specific and
|
||||
* applies to H.264 and HEVC ONLY. MPEG-2, VP8, and VP9 use raw
|
||||
* frame bitstreams without start codes — prepending 0x00 0x00 0x01
|
||||
* to a VP9 uncompressed header produces a frame_marker mismatch
|
||||
* (kernel reads 0x00 instead of 0x10), the rkvdec driver silently
|
||||
* fails to find a valid frame, and the CAPTURE slot stays at its
|
||||
* cap_pool init pattern (a dim 0x4c green). Phase 7 verification
|
||||
* caught this for VP9; iter1+iter3 transitive proof masked it for
|
||||
* MPEG-2/VP8 because those iters compared payload bytes, not
|
||||
* decoded pixels.
|
||||
*
|
||||
* h264_get_controls() exists for this purpose but is never
|
||||
* called in the current code path; the planned probe-then-set
|
||||
* commit will replace this hardcoded assignment with a runtime
|
||||
* read of the kernel's accepted START_CODE value.
|
||||
*/
|
||||
switch (config_object->profile) {
|
||||
case VAProfileH264Main:
|
||||
case VAProfileH264High:
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileHEVCMain:
|
||||
context_object->h264_start_code = true;
|
||||
break;
|
||||
default:
|
||||
context_object->h264_start_code = false;
|
||||
break;
|
||||
}
|
||||
|
||||
rc = v4l2_set_stream(driver_data->video_fd, output_type, true);
|
||||
@@ -197,9 +540,6 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
goto complete;
|
||||
|
||||
error:
|
||||
if (source_data != MAP_FAILED)
|
||||
munmap(source_data, length);
|
||||
|
||||
if (ids != NULL)
|
||||
free(ids);
|
||||
|
||||
@@ -251,13 +591,51 @@ VAStatus RequestDestroyContext(VADriverContextP context, VAContextID context_id)
|
||||
object_heap_free(&driver_data->context_heap,
|
||||
(struct object_base *)context_object);
|
||||
|
||||
/*
|
||||
* iter5b-β: tear down the OUTPUT pool (mmap unmaps) BEFORE
|
||||
* REQBUFS(0) frees the kernel-side buffers. Pre-β this was done
|
||||
* only by surface.c's resolution-change branch — which β removed.
|
||||
* Without this here, the next CreateContext's request_pool_init
|
||||
* sees pool->initialized=true with stale slot pointers, returns
|
||||
* 0 without re-CREATE_BUFS, and the next QBUF EINVALs because
|
||||
* the slots reference buffer indices that no longer exist
|
||||
* (Phase 5 v2 review CRIT-2).
|
||||
*/
|
||||
if (driver_data->output_pool.initialized)
|
||||
request_pool_destroy(&driver_data->output_pool);
|
||||
|
||||
rc = v4l2_request_buffers(driver_data->video_fd, output_type, 0);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
rc = v4l2_request_buffers(driver_data->video_fd, capture_type, 0);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
/*
|
||||
* Iter2 Fix 3 (still relevant under β): cap_pool owns the
|
||||
* CAPTURE buffers' mmaps + any outstanding our_export_fds. Tear
|
||||
* it down (which also issues REQBUFS(0) on CAPTURE), so the next
|
||||
* CreateContext cycle sees a clean slate.
|
||||
*/
|
||||
cap_pool_destroy(&driver_data->capture_pool, driver_data->video_fd,
|
||||
capture_type);
|
||||
|
||||
/*
|
||||
* iter5b-β: driver_data->video_format is a static-ref pointer
|
||||
* into video.c's formats[]; it stays valid for the life of the
|
||||
* driver_data and intentionally survives DestroyContext cycles.
|
||||
* The next CreateContext's `if (!driver_data->video_format)`
|
||||
* guard skips the probe — correct, because the device's CAPTURE
|
||||
* format menu doesn't change.
|
||||
*
|
||||
* The pre-β surface_reset_format_cache() call here is removed:
|
||||
* β doesn't have a last_output_{width,height,pixelformat} cache
|
||||
* (those fields are deleted). Each CreateContext is a fresh
|
||||
* S_FMT(OUTPUT) cycle.
|
||||
*
|
||||
* Commit D: invalidate the format-uniform cache so a CreateSurfaces2
|
||||
* call between DestroyContext and the next CreateContext doesn't
|
||||
* lazy-fill with stale geometry from the now-torn-down session.
|
||||
* The next CreateContext re-populates the cache.
|
||||
*/
|
||||
driver_data->fmt_valid = false;
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -27,6 +27,9 @@
|
||||
#ifndef _CONTEXT_H_
|
||||
#define _CONTEXT_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <va/va_backend.h>
|
||||
|
||||
#include "object_heap.h"
|
||||
@@ -50,6 +53,27 @@ struct object_context {
|
||||
|
||||
/* H264 only */
|
||||
struct h264_dpb dpb;
|
||||
bool h264_start_code;
|
||||
|
||||
/*
|
||||
* iter9 α-7: monotonic per-context timestamp counter (us). Replaces
|
||||
* gettimeofday in EndPicture so DPB.reference_ts / OUTPUT QBUF ts
|
||||
* are small values matching ffmpeg-v4l2request's pattern. Placed
|
||||
* here (object_context) not driver_data per Phase 5 IMP-1 to avoid
|
||||
* cross-context collisions.
|
||||
*/
|
||||
uint64_t timestamp_counter;
|
||||
|
||||
/* fresnel-fourier iter4: VP9 loop-filter delta state, persisted across
|
||||
* frames per kernel UAPI <linux/v4l2-controls.h>:2578 ("If this syntax
|
||||
* element is not present in the bitstream, users should pass its last
|
||||
* value.") and VP9 spec defaults from FFmpeg vp9.c:666-671. Reset on
|
||||
* keyframe / error-resilient / intra-only via vp9_lf.initialized=false. */
|
||||
struct {
|
||||
int8_t ref_deltas[4];
|
||||
int8_t mode_deltas[2];
|
||||
bool initialized;
|
||||
} vp9_lf;
|
||||
};
|
||||
|
||||
VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
|
||||
+581
-46
@@ -28,16 +28,18 @@
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
#include <h264-ctrls.h>
|
||||
|
||||
#include "request.h"
|
||||
#include "utils.h"
|
||||
#include "surface.h"
|
||||
#include "v4l2.h"
|
||||
#include "h264_slice_header.h"
|
||||
|
||||
enum h264_slice_type {
|
||||
H264_SLICE_P = 0,
|
||||
@@ -95,7 +97,8 @@ static struct h264_dpb_entry *dpb_find_entry(struct object_context *context)
|
||||
}
|
||||
|
||||
static struct h264_dpb_entry *dpb_lookup(struct object_context *context,
|
||||
VAPictureH264 *pic, unsigned int *idx)
|
||||
VAPictureH264 *pic, unsigned int *idx,
|
||||
unsigned char *fields)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
@@ -109,6 +112,16 @@ static struct h264_dpb_entry *dpb_lookup(struct object_context *context,
|
||||
if (idx)
|
||||
*idx = i;
|
||||
|
||||
if (fields) {
|
||||
//if (entry->pic.TopFieldOrderCnt < entry->pic.BottomFieldOrderCnt) {
|
||||
// *fields = V4L2_H264_TOP_FIELD_REF;
|
||||
//} else if (entry->pic.TopFieldOrderCnt > entry->pic.BottomFieldOrderCnt) {
|
||||
// *fields = V4L2_H264_BOTTOM_FIELD_REF;
|
||||
//} else {
|
||||
*fields = V4L2_H264_FRAME_REF;
|
||||
//}
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
@@ -130,7 +143,7 @@ static void dpb_insert(struct object_context *context, VAPictureH264 *pic,
|
||||
if (is_picture_null(pic))
|
||||
return;
|
||||
|
||||
if (dpb_lookup(context, pic, NULL))
|
||||
if (dpb_lookup(context, pic, NULL, NULL))
|
||||
return;
|
||||
|
||||
if (!entry)
|
||||
@@ -165,7 +178,7 @@ static void dpb_update(struct object_context *context,
|
||||
if (is_picture_null(pic))
|
||||
continue;
|
||||
|
||||
entry = dpb_lookup(context, pic, NULL);
|
||||
entry = dpb_lookup(context, pic, NULL, NULL);
|
||||
if (entry) {
|
||||
entry->age = context->dpb.age;
|
||||
entry->used = true;
|
||||
@@ -175,10 +188,61 @@ static void dpb_update(struct object_context *context,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Strip ffmpeg-vaapi's POC sentinel.
|
||||
*
|
||||
* ffmpeg's H264POCContext initialises prev_poc_msb to (1 << 16) =
|
||||
* 0x10000 in libavcodec/h264dec.c (lines 301 and 444 of v8.0). After
|
||||
* an IDR the idr() helper resets prev_poc_msb to that same sentinel.
|
||||
* ff_h264_init_poc (libavcodec/h264_parse.c lines 296-305) then
|
||||
* computes pc->poc_msb as prev_poc_msb when the slice header's
|
||||
* poc_lsb hasn't wrapped — which is the typical case for normal
|
||||
* content. The sentinel leaks into field_poc[] and from there into
|
||||
* VAPictureH264.TopFieldOrderCnt / BottomFieldOrderCnt at
|
||||
* libavcodec/vaapi_h264.c::fill_vaapi_pic.
|
||||
*
|
||||
* Working VAAPI backends (intel-iHD, i965 verified empirically on
|
||||
* meitner 2026-05-02) tolerate the high word — they either mask it
|
||||
* or treat POCs as relative comparisons. V4L2 stateless H.264
|
||||
* driver-side consumers (hantro_h264.c::prepare_table feeds the
|
||||
* value direct to tbl->poc[]) need the spec value, so we strip the
|
||||
* sentinel here at the libva-v4l2-request boundary.
|
||||
*
|
||||
* Detection by bit-16-set rather than blind subtraction so that a
|
||||
* future ffmpeg version that fixes the sentinel leak degrades
|
||||
* gracefully. POC values for non-degenerate H.264 content rarely
|
||||
* exceed 16 bits; bit 16 set is a strong signal of the sentinel.
|
||||
*
|
||||
* Empty DPB slots (VA_PICTURE_H264_INVALID) carry POC=0 by
|
||||
* libavcodec/vaapi_h264.c::init_vaapi_pic and need no fix-up.
|
||||
*/
|
||||
static inline int32_t h264_strip_ffmpeg_poc_sentinel(int32_t poc, uint32_t flags)
|
||||
{
|
||||
if (flags & VA_PICTURE_H264_INVALID)
|
||||
return 0;
|
||||
/*
|
||||
* iter8 α-2: pass POC values through unchanged for rkvdec. The
|
||||
* sentinel-subtract was added for hantro's tbl->poc[] prepare_table
|
||||
* which fed the value through unmasked. rkvdec writes POC to MMIO
|
||||
* via writel_relaxed (rkvdec-h264.c:975-978) and the macro
|
||||
* RKVDEC_CUR_POC is a 32-bit passthrough. kdirect (ffmpeg-v4l2request)
|
||||
* delivers the sentinel-encoded value directly and decodes
|
||||
* correctly; libva's strip was the cause of the 16x32 partial-fill
|
||||
* Bug 4 symptom. Hantro+H.264 isn't exercised on RK3399 (hantro-dec
|
||||
* doesn't advertise H.264 there) — restoring the strip per-driver
|
||||
* is iter9 work if it ever surfaces.
|
||||
*/
|
||||
return poc;
|
||||
}
|
||||
|
||||
static void h264_fill_dpb(struct request_data *data,
|
||||
struct object_context *context,
|
||||
VAPictureParameterBufferH264 *VAPicture,
|
||||
struct v4l2_ctrl_h264_decode_params *decode)
|
||||
{
|
||||
const int max_frame_num =
|
||||
1 << (VAPicture->seq_fields.bits.log2_max_frame_num_minus4 + 4);
|
||||
const int cur_frame_num = (int)VAPicture->frame_num;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < H264_DPB_SIZE; i++) {
|
||||
@@ -188,7 +252,26 @@ static void h264_fill_dpb(struct request_data *data,
|
||||
SURFACE(data, entry->pic.picture_id);
|
||||
uint64_t timestamp;
|
||||
|
||||
if (!entry->valid)
|
||||
/*
|
||||
* Skip entries no longer referenced by the consumer's
|
||||
* VAPictureParameterBufferH264.ReferenceFrames[]. dpb_update()
|
||||
* clears `used` for all entries then re-marks only those in the
|
||||
* current ReferenceFrames list; entries with valid=true but
|
||||
* used=false are stale (a frame the libva consumer has retired
|
||||
* from its DPB).
|
||||
*
|
||||
* Without this skip, our V4L2 dpb[] grows monotonically until
|
||||
* H264_DPB_SIZE; by frame_num=10 it carries 7+ entries while
|
||||
* SPS.max_num_ref_frames may be 4. The kernel reflist builder /
|
||||
* cluster validator rejects the request with EINVAL once the
|
||||
* count exceeds the SPS contract — which iter1+iter2+iter3
|
||||
* surfaced as the "frame-11 EINVAL" carryover. iter4 fix:
|
||||
* report only currently-used entries to match FFmpeg's
|
||||
* libavcodec/v4l2_request_h264.c::fill_dpb behaviour (which
|
||||
* iterates h->short_ref[] / h->long_ref[] — exactly the
|
||||
* currently-referenced set).
|
||||
*/
|
||||
if (!entry->valid || !entry->used)
|
||||
continue;
|
||||
|
||||
if (surface) {
|
||||
@@ -197,8 +280,47 @@ static void h264_fill_dpb(struct request_data *data,
|
||||
}
|
||||
|
||||
dpb->frame_num = entry->pic.frame_idx;
|
||||
dpb->top_field_order_cnt = entry->pic.TopFieldOrderCnt;
|
||||
dpb->bottom_field_order_cnt = entry->pic.BottomFieldOrderCnt;
|
||||
|
||||
/*
|
||||
* Per ext-ctrls-codec-stateless.rst, dpb[].pic_num must
|
||||
* equal the H.264 spec's PicNum (8-28) for short-term refs
|
||||
* or LongTermPicNum (8-29) for long-term refs.
|
||||
*
|
||||
* For frames (not field-coded), PicNum = FrameNumWrap.
|
||||
* FrameNumWrap = (frame_num > cur_frame_num)
|
||||
* ? frame_num - max_frame_num
|
||||
* : frame_num
|
||||
* (per spec section 8.2.4.1, frame_num wraparound).
|
||||
*
|
||||
* VAAPI convention (libavcodec/vaapi_h264.c::fill_vaapi_pic
|
||||
* line 64): VAPictureH264.frame_idx holds long_term_frame_idx
|
||||
* for long-term refs and frame_num for short-term refs. So
|
||||
* for long-term entries we copy frame_idx straight through
|
||||
* as LongTermPicNum.
|
||||
*
|
||||
* fourier's previous code set pic_num to picture_id (the
|
||||
* VAAPI surface id) which is unrelated to H.264 PicNum;
|
||||
* mediatek's vdec_h264_req_common.c::dst_entry->pic_num is
|
||||
* one consumer that fails on that. Hantro doesn't read
|
||||
* pic_num at all (uses reference_ts for ref resolution),
|
||||
* which is why fourier's wrong value never surfaced on
|
||||
* PineTab2 (RK3566 via hantro/rk3568-vpu).
|
||||
*/
|
||||
if (entry->pic.flags & VA_PICTURE_H264_LONG_TERM_REFERENCE) {
|
||||
dpb->pic_num = entry->pic.frame_idx;
|
||||
} else {
|
||||
int frame_num = (int)entry->pic.frame_idx;
|
||||
dpb->pic_num = (frame_num > cur_frame_num)
|
||||
? frame_num - max_frame_num
|
||||
: frame_num;
|
||||
}
|
||||
|
||||
dpb->top_field_order_cnt =
|
||||
h264_strip_ffmpeg_poc_sentinel(entry->pic.TopFieldOrderCnt,
|
||||
entry->pic.flags);
|
||||
dpb->bottom_field_order_cnt =
|
||||
h264_strip_ffmpeg_poc_sentinel(entry->pic.BottomFieldOrderCnt,
|
||||
entry->pic.flags);
|
||||
|
||||
dpb->flags = V4L2_H264_DPB_ENTRY_FLAG_VALID;
|
||||
|
||||
@@ -207,6 +329,27 @@ static void h264_fill_dpb(struct request_data *data,
|
||||
|
||||
if (entry->pic.flags & VA_PICTURE_H264_LONG_TERM_REFERENCE)
|
||||
dpb->flags |= V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM;
|
||||
|
||||
/*
|
||||
* Mark this DPB entry as a frame reference (both top + bottom
|
||||
* fields). The kernel's v4l2_h264_init_reflist_builder iterates
|
||||
* dpb[] and skips entries whose `fields` member is zero — they
|
||||
* count as "no valid field reference for this entry." For
|
||||
* frame-coded streams (BBB and most desktop H.264) every
|
||||
* reference is a frame reference; per UAPI doc
|
||||
* (ext-ctrls-codec-stateless.rst), fields must be set to
|
||||
* V4L2_H264_FRAME_REF (= TOP|BOTTOM) for frames.
|
||||
*
|
||||
* Cross-reference: FFmpeg libavcodec/v4l2_request_h264.c::
|
||||
* fill_dpb_entry sets entry->fields from pic->reference; for
|
||||
* frames pic->reference includes V4L2_H264_FRAME_REF. Without
|
||||
* this, P-slices that need to walk the reference list (the
|
||||
* first one in BBB is at frame 11) hit "no valid refs" inside
|
||||
* the kernel's reflist builder and S_EXT_CTRLS rejects the
|
||||
* whole request with EINVAL (error_idx == count, the kernel's
|
||||
* "application bug" sentinel).
|
||||
*/
|
||||
dpb->fields = V4L2_H264_FRAME_REF;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,11 +361,126 @@ static void h264_va_picture_to_v4l2(struct request_data *driver_data,
|
||||
struct v4l2_ctrl_h264_pps *pps,
|
||||
struct v4l2_ctrl_h264_sps *sps)
|
||||
{
|
||||
h264_fill_dpb(driver_data, context, decode);
|
||||
unsigned char *b;
|
||||
unsigned char nal_ref_idc;
|
||||
unsigned char nal_unit_type;
|
||||
|
||||
decode->num_slices = surface->slices_count;
|
||||
decode->top_field_order_cnt = VAPicture->CurrPic.TopFieldOrderCnt;
|
||||
decode->bottom_field_order_cnt = VAPicture->CurrPic.BottomFieldOrderCnt;
|
||||
/* Extract missing nal_ref_idc and nal_unit_type */
|
||||
b = surface->source_data;
|
||||
if (context->h264_start_code)
|
||||
b += 3;
|
||||
nal_ref_idc = (b[0] >> 5) & 0x3;
|
||||
nal_unit_type = b[0] & 0x1f;
|
||||
|
||||
/*
|
||||
* Bit-parse the slice_header() to recover fields VAAPI doesn't
|
||||
* forward and that hantro G1 hardware reads out of DECODE_PARAMS:
|
||||
*
|
||||
* - dec_ref_pic_marking_bit_size -> G1_REG_DEC_CTRL5_REFPIC_MK_LEN
|
||||
* - idr_pic_id -> G1_REG_DEC_CTRL5_IDR_PIC_ID
|
||||
* - pic_order_cnt_bit_size -> G1_REG_DEC_CTRL6_POC_LENGTH
|
||||
* - pic_order_cnt_lsb / delta_pic_order_cnt_* (used by hantro
|
||||
* reference-list builder for poc_type=0/1 inter prediction)
|
||||
*
|
||||
* Without these set correctly, hantro's hardware bitstream parser
|
||||
* walks past zero bits, lands on garbage, decodes zero pixels —
|
||||
* the all-zero CAPTURE output observed during 2026-05-04 Phase 0.
|
||||
*
|
||||
* Spec: ITU-T H.264 §7.3.3 slice_header. Cross-reference (proven
|
||||
* working): FFmpeg libavcodec/h264_slice.c populates
|
||||
* H264SliceContext::ref_pic_marking_bit_size and
|
||||
* pic_order_cnt_bit_size by the same bit-precise parse.
|
||||
*/
|
||||
{
|
||||
const struct h264_slice_header_context sh_ctx = {
|
||||
.separate_colour_plane_flag =
|
||||
(VAPicture->seq_fields.bits.residual_colour_transform_flag != 0),
|
||||
.log2_max_frame_num_minus4 =
|
||||
VAPicture->seq_fields.bits.log2_max_frame_num_minus4,
|
||||
.frame_mbs_only_flag =
|
||||
(VAPicture->seq_fields.bits.frame_mbs_only_flag != 0),
|
||||
.pic_order_cnt_type =
|
||||
VAPicture->seq_fields.bits.pic_order_cnt_type,
|
||||
.log2_max_pic_order_cnt_lsb_minus4 =
|
||||
VAPicture->seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4,
|
||||
.delta_pic_order_always_zero_flag =
|
||||
(VAPicture->seq_fields.bits.delta_pic_order_always_zero_flag != 0),
|
||||
.bottom_field_pic_order_in_frame_present_flag =
|
||||
(VAPicture->pic_fields.bits.pic_order_present_flag != 0),
|
||||
.redundant_pic_cnt_present_flag =
|
||||
(VAPicture->pic_fields.bits.redundant_pic_cnt_present_flag != 0),
|
||||
.weighted_pred_flag =
|
||||
(VAPicture->pic_fields.bits.weighted_pred_flag != 0),
|
||||
.weighted_bipred_idc =
|
||||
VAPicture->pic_fields.bits.weighted_bipred_idc,
|
||||
.num_ref_idx_l0_default_active_minus1 =
|
||||
surface->params.h264.slice.num_ref_idx_l0_active_minus1,
|
||||
.num_ref_idx_l1_default_active_minus1 =
|
||||
surface->params.h264.slice.num_ref_idx_l1_active_minus1,
|
||||
.chroma_format_idc =
|
||||
VAPicture->seq_fields.bits.chroma_format_idc,
|
||||
.bit_depth_luma_minus8 =
|
||||
VAPicture->bit_depth_luma_minus8,
|
||||
.bit_depth_chroma_minus8 =
|
||||
VAPicture->bit_depth_chroma_minus8,
|
||||
.nal_unit_type = nal_unit_type,
|
||||
.nal_ref_idc = nal_ref_idc,
|
||||
};
|
||||
struct h264_slice_header_info sh = { 0 };
|
||||
unsigned char *nal_payload = b + 1; /* past NAL header byte */
|
||||
size_t nal_payload_len = surface->slices_size -
|
||||
(size_t)((nal_payload) - (unsigned char *)surface->source_data);
|
||||
int sh_rc = h264_parse_slice_header(nal_payload, nal_payload_len,
|
||||
&sh_ctx, &sh);
|
||||
if (sh_rc == 0) {
|
||||
decode->idr_pic_id = sh.idr_pic_id;
|
||||
decode->pic_order_cnt_lsb = sh.pic_order_cnt_lsb;
|
||||
decode->delta_pic_order_cnt_bottom = sh.delta_pic_order_cnt_bottom;
|
||||
decode->delta_pic_order_cnt0 = sh.delta_pic_order_cnt0;
|
||||
decode->delta_pic_order_cnt1 = sh.delta_pic_order_cnt1;
|
||||
decode->pic_order_cnt_bit_size = sh.pic_order_cnt_bit_size;
|
||||
decode->dec_ref_pic_marking_bit_size = sh.dec_ref_pic_marking_bit_size;
|
||||
} else {
|
||||
request_log("slice_header parse FAILED rc=%d "
|
||||
"(payload_len=%zu) — DECODE_PARAMS bit_size "
|
||||
"fields left zero, hantro will likely produce zeros\n",
|
||||
sh_rc, nal_payload_len);
|
||||
}
|
||||
}
|
||||
|
||||
h264_fill_dpb(driver_data, context, VAPicture, decode);
|
||||
|
||||
/*
|
||||
* Populate every V4L2_CID_STATELESS_H264_DECODE_PARAMS field
|
||||
* we can derive from VAAPI's pre-parsed VAPictureParameterBuffer
|
||||
* + bitstream byte. Cross-reference: GStreamer
|
||||
* gstv4l2codech264dec.c::gst_v4l2_codec_h264_dec_fill_decoder_params
|
||||
* (lines 632-678).
|
||||
*
|
||||
* Fields not derivable from VAAPI (idr_pic_id, pic_order_cnt_lsb,
|
||||
* delta_pic_order_cnt_*, dec_ref_pic_marking_bit_size,
|
||||
* pic_order_cnt_bit_size, slice_group_change_cycle) require a
|
||||
* full slice_header() bit-level parse, which libva-v4l2-request
|
||||
* does not currently do. They are left at zero-init and the
|
||||
* kernel-side hantro-vpu may compute them itself when scanning
|
||||
* the OUTPUT bitstream — a hypothesis verified empirically by
|
||||
* running this patch and inspecting the CAPTURE buffer.
|
||||
*/
|
||||
decode->nal_ref_idc = nal_ref_idc;
|
||||
decode->frame_num = VAPicture->frame_num;
|
||||
decode->top_field_order_cnt =
|
||||
h264_strip_ffmpeg_poc_sentinel(VAPicture->CurrPic.TopFieldOrderCnt,
|
||||
VAPicture->CurrPic.flags);
|
||||
decode->bottom_field_order_cnt =
|
||||
h264_strip_ffmpeg_poc_sentinel(VAPicture->CurrPic.BottomFieldOrderCnt,
|
||||
VAPicture->CurrPic.flags);
|
||||
|
||||
if (nal_unit_type == 5)
|
||||
decode->flags |= V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC;
|
||||
if (VAPicture->pic_fields.bits.field_pic_flag)
|
||||
decode->flags |= V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC;
|
||||
if (VAPicture->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
|
||||
decode->flags |= V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD;
|
||||
|
||||
pps->weighted_bipred_idc =
|
||||
VAPicture->pic_fields.bits.weighted_bipred_idc;
|
||||
@@ -255,6 +513,7 @@ static void h264_va_picture_to_v4l2(struct request_data *driver_data,
|
||||
if (VAPicture->pic_fields.bits.redundant_pic_cnt_present_flag)
|
||||
pps->flags |= V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT;
|
||||
|
||||
sps->max_num_ref_frames = VAPicture->num_ref_frames;
|
||||
sps->chroma_format_idc = VAPicture->seq_fields.bits.chroma_format_idc;
|
||||
sps->bit_depth_luma_minus8 = VAPicture->bit_depth_luma_minus8;
|
||||
sps->bit_depth_chroma_minus8 = VAPicture->bit_depth_chroma_minus8;
|
||||
@@ -301,6 +560,32 @@ static void h264_va_matrix_to_v4l2(struct request_data *driver_data,
|
||||
sizeof(v4l2_matrix->scaling_list_8x8[3]));
|
||||
}
|
||||
|
||||
/*
|
||||
* H.264 spec default scaling matrices: Flat_4x4_16 and Flat_8x8_16
|
||||
* (every entry = 16). When sps_scaling_matrix_present_flag and
|
||||
* pps_scaling_matrix_present_flag are both false, the bitstream
|
||||
* carries no explicit scaling lists and the decoder uses these
|
||||
* flat defaults — matching ITU-T H.264 (08/2024) §7.4.2.1.1.1
|
||||
* (sequence scaling) and §7.4.2.2 (picture scaling).
|
||||
*
|
||||
* Why we always provide the matrix: hantro G1's set_params reads
|
||||
* pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT to drive
|
||||
* the G1_REG_DEC_CTRL2_TYPE1_QUANT_E hardware bit. FFmpeg's
|
||||
* v4l2_request_h264.c always submits the SCALING_MATRIX control
|
||||
* with the spec default when the bitstream omits explicit lists,
|
||||
* and always sets the SCALING_MATRIX_PRESENT flag (commit
|
||||
* comment: "FFmpeg always provide a scaling matrix"). We mirror
|
||||
* that so the kernel sees a consistent control set across drivers.
|
||||
*/
|
||||
static void h264_default_flat_scaling_matrix(
|
||||
struct v4l2_ctrl_h264_scaling_matrix *v4l2_matrix)
|
||||
{
|
||||
memset(v4l2_matrix->scaling_list_4x4, 16,
|
||||
sizeof(v4l2_matrix->scaling_list_4x4));
|
||||
memset(v4l2_matrix->scaling_list_8x8, 16,
|
||||
sizeof(v4l2_matrix->scaling_list_8x8));
|
||||
}
|
||||
|
||||
static void h264_copy_pred_table(struct v4l2_h264_weight_factors *factors,
|
||||
unsigned int num_refs,
|
||||
int16_t luma_weight[32],
|
||||
@@ -327,10 +612,12 @@ static void h264_va_slice_to_v4l2(struct request_data *driver_data,
|
||||
struct object_context *context,
|
||||
VASliceParameterBufferH264 *VASlice,
|
||||
VAPictureParameterBufferH264 *VAPicture,
|
||||
struct v4l2_ctrl_h264_slice_params *slice)
|
||||
struct v4l2_ctrl_h264_slice_params *slice,
|
||||
struct v4l2_ctrl_h264_pred_weights *weights)
|
||||
{
|
||||
slice->size = VASlice->slice_data_size;
|
||||
slice->header_bit_size = VASlice->slice_data_bit_offset;
|
||||
//if (context->h264_start_code)
|
||||
// slice->header_bit_size += 3 * 8;
|
||||
slice->first_mb_in_slice = VASlice->first_mb_in_slice;
|
||||
slice->slice_type = VASlice->slice_type;
|
||||
slice->cabac_init_idc = VASlice->cabac_init_idc;
|
||||
@@ -351,12 +638,14 @@ static void h264_va_slice_to_v4l2(struct request_data *driver_data,
|
||||
VAPictureH264 *pic = &VASlice->RefPicList0[i];
|
||||
struct h264_dpb_entry *entry;
|
||||
unsigned int idx;
|
||||
unsigned char fields;
|
||||
|
||||
entry = dpb_lookup(context, pic, &idx);
|
||||
entry = dpb_lookup(context, pic, &idx, &fields);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
slice->ref_pic_list0[i] = idx;
|
||||
slice->ref_pic_list0[i].index = idx;
|
||||
slice->ref_pic_list0[i].fields = fields;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -370,26 +659,28 @@ static void h264_va_slice_to_v4l2(struct request_data *driver_data,
|
||||
VAPictureH264 *pic = &VASlice->RefPicList1[i];
|
||||
struct h264_dpb_entry *entry;
|
||||
unsigned int idx;
|
||||
unsigned char fields;
|
||||
|
||||
entry = dpb_lookup(context, pic, &idx);
|
||||
entry = dpb_lookup(context, pic, &idx, &fields);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
slice->ref_pic_list1[i] = idx;
|
||||
slice->ref_pic_list1[i].index = idx;
|
||||
slice->ref_pic_list1[i].fields = fields;
|
||||
}
|
||||
}
|
||||
|
||||
if (VASlice->direct_spatial_mv_pred_flag)
|
||||
slice->flags |= V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED;
|
||||
|
||||
slice->pred_weight_table.chroma_log2_weight_denom =
|
||||
weights->chroma_log2_weight_denom =
|
||||
VASlice->chroma_log2_weight_denom;
|
||||
slice->pred_weight_table.luma_log2_weight_denom =
|
||||
weights->luma_log2_weight_denom =
|
||||
VASlice->luma_log2_weight_denom;
|
||||
|
||||
if (((VASlice->slice_type % 5) == H264_SLICE_P) ||
|
||||
((VASlice->slice_type % 5) == H264_SLICE_B))
|
||||
h264_copy_pred_table(&slice->pred_weight_table.weight_factors[0],
|
||||
h264_copy_pred_table(&weights->weight_factors[0],
|
||||
slice->num_ref_idx_l0_active_minus1 + 1,
|
||||
VASlice->luma_weight_l0,
|
||||
VASlice->luma_offset_l0,
|
||||
@@ -397,7 +688,7 @@ static void h264_va_slice_to_v4l2(struct request_data *driver_data,
|
||||
VASlice->chroma_offset_l0);
|
||||
|
||||
if ((VASlice->slice_type % 5) == H264_SLICE_B)
|
||||
h264_copy_pred_table(&slice->pred_weight_table.weight_factors[1],
|
||||
h264_copy_pred_table(&weights->weight_factors[1],
|
||||
slice->num_ref_idx_l1_active_minus1 + 1,
|
||||
VASlice->luma_weight_l1,
|
||||
VASlice->luma_offset_l1,
|
||||
@@ -405,20 +696,130 @@ static void h264_va_slice_to_v4l2(struct request_data *driver_data,
|
||||
VASlice->chroma_offset_l1);
|
||||
}
|
||||
|
||||
int h264_get_controls(struct request_data *driver_data,
|
||||
struct object_context *context)
|
||||
{
|
||||
struct v4l2_ext_control controls[2] = {
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
|
||||
}, {
|
||||
.id = V4L2_CID_STATELESS_H264_START_CODE,
|
||||
}
|
||||
};
|
||||
int rc;
|
||||
|
||||
rc = v4l2_get_controls(driver_data->video_fd, -1, controls, 2);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
switch (controls[0].value) {
|
||||
case V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED:
|
||||
break;
|
||||
case V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED:
|
||||
break;
|
||||
default:
|
||||
request_log("Unsupported decode mode\n");
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
}
|
||||
|
||||
switch (controls[1].value) {
|
||||
case V4L2_STATELESS_H264_START_CODE_NONE:
|
||||
context->h264_start_code = false;
|
||||
break;
|
||||
case V4L2_STATELESS_H264_START_CODE_ANNEX_B:
|
||||
context->h264_start_code = true;
|
||||
break;
|
||||
default:
|
||||
request_log("Unsupported start code\n");
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
}
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static inline __u8 h264_profile_to_idc(VAProfile profile)
|
||||
{
|
||||
switch (profile) {
|
||||
case VAProfileH264Main:
|
||||
return 77;
|
||||
case VAProfileH264High:
|
||||
return 100;
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
return 66;
|
||||
case VAProfileH264MultiviewHigh:
|
||||
return 118;
|
||||
case VAProfileH264StereoHigh:
|
||||
return 128;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Derive sps.level_idc from the encoded frame size in macroblocks per
|
||||
* H.264 Annex A.3 (Table A-1) MaxFS thresholds. Each level's MaxFS is
|
||||
* the maximum encoded frame size in MBs the level supports; we pick
|
||||
* the smallest level whose MaxFS contains the actual frame size.
|
||||
*
|
||||
* Level decoding for the V4L2 control: level_idc = level * 10
|
||||
* Level 1.0 → 10, Level 4.1 → 41, Level 5.1 → 51, Level 6.0 → 60.
|
||||
*
|
||||
* VAAPI does not expose the bitstream's actual level_idc on the
|
||||
* decode side (VAPictureParameterBufferH264 has no such field) — see
|
||||
* va.h. The H.264 SPS NAL is parsed client-side by ffmpeg-vaapi /
|
||||
* mpv and only slice data is forwarded in VASliceDataBuffer, so a
|
||||
* SPS-NAL byte parser is not viable at this layer.
|
||||
*
|
||||
* Without framerate we cannot also check MaxMBPS / MaxBR / MaxCPB.
|
||||
* That gap is acceptable in practice: consumers that push
|
||||
* temporally-dense streams (high MBPS) almost always also push
|
||||
* spatially-large frames (high MaxFS), so frame-size-based level
|
||||
* selection over-allocates on the temporal axis but never
|
||||
* under-allocates a level the consumer relies on for correct
|
||||
* decode-resource sizing.
|
||||
*
|
||||
* Picks for typical content:
|
||||
* 1080p (8160 MBs) → Level 4.1 (level_idc = 41)
|
||||
* 4K (32400 MBs) → Level 5.1 (level_idc = 51)
|
||||
* 8K (138240 MBs) → Level 6.0 (level_idc = 60)
|
||||
*
|
||||
* Replaces the hardcoded level_idc=51 from patch 0013.
|
||||
*/
|
||||
static inline __u8 h264_derive_level_idc(unsigned int width_in_mbs,
|
||||
unsigned int height_in_mbs)
|
||||
{
|
||||
const unsigned int frame_size_mbs = width_in_mbs * height_in_mbs;
|
||||
|
||||
if (frame_size_mbs <= 99) return 10; /* Level 1.0 */
|
||||
if (frame_size_mbs <= 396) return 11; /* Level 1.1 - 2.0 */
|
||||
if (frame_size_mbs <= 792) return 21; /* Level 2.1 */
|
||||
if (frame_size_mbs <= 1620) return 22; /* Level 2.2 - 3.0 */
|
||||
if (frame_size_mbs <= 3600) return 31; /* Level 3.1 */
|
||||
if (frame_size_mbs <= 5120) return 32; /* Level 3.2 */
|
||||
if (frame_size_mbs <= 8192) return 41; /* Level 4.0 - 4.1 */
|
||||
if (frame_size_mbs <= 8704) return 42; /* Level 4.2 */
|
||||
if (frame_size_mbs <= 22080) return 50; /* Level 5.0 */
|
||||
if (frame_size_mbs <= 36864) return 51; /* Level 5.1 - 5.2 */
|
||||
if (frame_size_mbs <= 139264) return 60; /* Level 6.0 - 6.2 */
|
||||
return 62; /* > Level 6 ceiling */
|
||||
}
|
||||
|
||||
int h264_set_controls(struct request_data *driver_data,
|
||||
struct object_context *context,
|
||||
VAProfile profile,
|
||||
struct object_surface *surface)
|
||||
{
|
||||
struct v4l2_ctrl_h264_scaling_matrix matrix = { 0 };
|
||||
struct v4l2_ctrl_h264_decode_params decode = { 0 };
|
||||
struct v4l2_ctrl_h264_slice_params slice = { 0 };
|
||||
struct v4l2_ctrl_h264_pred_weights weights = { 0 };
|
||||
struct v4l2_ctrl_h264_pps pps = { 0 };
|
||||
struct v4l2_ctrl_h264_sps sps = { 0 };
|
||||
struct h264_dpb_entry *output;
|
||||
int rc;
|
||||
|
||||
output = dpb_lookup(context, &surface->params.h264.picture.CurrPic,
|
||||
NULL);
|
||||
NULL, NULL);
|
||||
if (!output)
|
||||
output = dpb_find_entry(context);
|
||||
|
||||
@@ -429,37 +830,171 @@ int h264_set_controls(struct request_data *driver_data,
|
||||
h264_va_picture_to_v4l2(driver_data, context, surface,
|
||||
&surface->params.h264.picture,
|
||||
&decode, &pps, &sps);
|
||||
h264_va_matrix_to_v4l2(driver_data, context,
|
||||
&surface->params.h264.matrix, &matrix);
|
||||
|
||||
/*
|
||||
* Populate the scaling matrix unconditionally: from VAAPI's
|
||||
* VAIQMatrixBufferH264 when the consumer sent one this frame
|
||||
* (matrix_set), otherwise from the H.264 spec flat defaults.
|
||||
* Submitted to the kernel as V4L2_CID_STATELESS_H264_SCALING_MATRIX
|
||||
* for every request — required for FFmpeg/hantro contract parity
|
||||
* (see h264_default_flat_scaling_matrix() docblock).
|
||||
*/
|
||||
if (surface->params.h264.matrix_set)
|
||||
h264_va_matrix_to_v4l2(driver_data, context,
|
||||
&surface->params.h264.matrix, &matrix);
|
||||
else
|
||||
h264_default_flat_scaling_matrix(&matrix);
|
||||
|
||||
h264_va_slice_to_v4l2(driver_data, context,
|
||||
&surface->params.h264.slice,
|
||||
&surface->params.h264.picture, &slice);
|
||||
&surface->params.h264.picture, &slice, &weights);
|
||||
|
||||
rc = v4l2_set_control(driver_data->video_fd, surface->request_fd,
|
||||
V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS, &decode,
|
||||
sizeof(decode));
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
/*
|
||||
* Mirror SCALING_MATRIX_PRESENT in PPS flags. Hantro G1 set_params
|
||||
* gates its G1_REG_DEC_CTRL2_TYPE1_QUANT_E register bit on this;
|
||||
* FFmpeg sets it unconditionally with the comment "FFmpeg always
|
||||
* provide a scaling matrix." We submit the matrix always (above),
|
||||
* so the flag must be set always to match.
|
||||
*/
|
||||
pps.flags |= V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT;
|
||||
|
||||
rc = v4l2_set_control(driver_data->video_fd, surface->request_fd,
|
||||
V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS, &slice,
|
||||
sizeof(slice));
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
/*
|
||||
* Populate pps->num_ref_idx_l0/l1_default_active_minus1. Hantro G1
|
||||
* writes both into G1_REG_DEC_CTRL6_REFIDX0_ACTIVE / REFIDX1_ACTIVE
|
||||
* MMIO registers (via "(field) + 1", so an uninitialized 0 here
|
||||
* would advertise "1 active reference per list" to hardware, wrong
|
||||
* for I/IDR frames with 0 refs and wrong for B frames with >1).
|
||||
*
|
||||
* VAAPI's VAPictureParameterBufferH264 does not carry the parsed
|
||||
* PPS num_ref_idx_l*_default_active_minus1 fields — those are in
|
||||
* the bitstream's PPS NAL which VAAPI consumers parse client-side
|
||||
* but don't forward. The closest available source is VASlice's
|
||||
* num_ref_idx_l*_active_minus1, which is the per-slice override
|
||||
* defaulting to the PPS value (H.264 §7.4.3 num_ref_idx_active_
|
||||
* override_flag). For most streams these values match; mismatch
|
||||
* only on streams with explicit per-slice overrides.
|
||||
*
|
||||
* For IDR frames (no references), the values are not used by
|
||||
* hantro's reference list builder, so a wrong value here is
|
||||
* harmless. For inter frames it matters and slice-derived is
|
||||
* the best we can do without a full PPS-NAL parser.
|
||||
*/
|
||||
pps.num_ref_idx_l0_default_active_minus1 =
|
||||
surface->params.h264.slice.num_ref_idx_l0_active_minus1;
|
||||
pps.num_ref_idx_l1_default_active_minus1 =
|
||||
surface->params.h264.slice.num_ref_idx_l1_active_minus1;
|
||||
|
||||
rc = v4l2_set_control(driver_data->video_fd, surface->request_fd,
|
||||
V4L2_CID_MPEG_VIDEO_H264_PPS, &pps, sizeof(pps));
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
/*
|
||||
* Derive PFRAME / BFRAME flags in v4l2_ctrl_h264_decode_params.flags
|
||||
* from VASliceParameterBufferH264.slice_type. VAAPI's slice_type
|
||||
* matches the H.264 spec slice_type semantic: 0=P, 1=B, 2=I, 3=SP,
|
||||
* 4=SI; values 5..9 mean "all slices in the picture have this
|
||||
* slice_type" (mod 5 yields the underlying type). VAAPI consumers
|
||||
* (ffmpeg, mpv) populate this for every slice; in FRAME_BASED mode
|
||||
* we only see the most-recent slice's params, but slice_type is
|
||||
* uniform across a single coded picture for our purposes.
|
||||
*
|
||||
* Kernel consumers that read these flags: tegra-vde
|
||||
* (drivers/media/platform/nvidia/tegra-vde/h264.c lines 783-799 of
|
||||
* 6.19.x) selects the inter-frame decode kernel. Hantro / rkvdec /
|
||||
* cedrus / mediatek / qcom-iris-stateless do not consume them.
|
||||
* Setting them keeps the libva-v4l2-request fork upstreamable
|
||||
* across drivers without affecting hantro behaviour.
|
||||
*
|
||||
* Cross-reference: ext-ctrls-codec-stateless.rst Decode Parameters
|
||||
* Flags — V4L2_H264_DECODE_PARAM_FLAG_PFRAME / _BFRAME.
|
||||
*/
|
||||
switch (surface->params.h264.slice.slice_type % 5) {
|
||||
case H264_SLICE_P:
|
||||
decode.flags |= V4L2_H264_DECODE_PARAM_FLAG_PFRAME;
|
||||
break;
|
||||
case H264_SLICE_B:
|
||||
decode.flags |= V4L2_H264_DECODE_PARAM_FLAG_BFRAME;
|
||||
break;
|
||||
default:
|
||||
/* I / SP / SI: no extra flag. */
|
||||
break;
|
||||
}
|
||||
|
||||
rc = v4l2_set_control(driver_data->video_fd, surface->request_fd,
|
||||
V4L2_CID_MPEG_VIDEO_H264_SPS, &sps, sizeof(sps));
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
sps.profile_idc = h264_profile_to_idc(profile);
|
||||
|
||||
rc = v4l2_set_control(driver_data->video_fd, surface->request_fd,
|
||||
V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX, &matrix,
|
||||
sizeof(matrix));
|
||||
/*
|
||||
* Derive level_idc from encoded frame size per H.264 Annex A.3.
|
||||
* VAAPI doesn't expose level_idc on the decode side (see
|
||||
* h264_derive_level_idc()'s docblock for the rationale); we pick
|
||||
* the smallest level whose MaxFS contains the picture dimensions.
|
||||
* Replaces patch 0013's intermediate hardcode of 51.
|
||||
*/
|
||||
sps.level_idc = h264_derive_level_idc(
|
||||
(unsigned int)surface->params.h264.picture.picture_width_in_mbs_minus1 + 1u,
|
||||
(unsigned int)surface->params.h264.picture.picture_height_in_mbs_minus1 + 1u);
|
||||
|
||||
/*
|
||||
* Build the per-request control list incrementally:
|
||||
* - SPS, PPS, DECODE_PARAMS, SCALING_MATRIX: always required.
|
||||
* Hantro G1 reads the SCALING_MATRIX_PRESENT flag from PPS to
|
||||
* gate hardware register G1_REG_DEC_CTRL2_TYPE1_QUANT_E and
|
||||
* reads the matrix entries directly into hardware tables when
|
||||
* decoding. FFmpeg always submits the matrix (with spec-default
|
||||
* flat values when no explicit lists are in the bitstream); we
|
||||
* match that — see h264_default_flat_scaling_matrix() docblock.
|
||||
* Earlier patch 0012 made SCALING_MATRIX submission conditional
|
||||
* on VAAPI's VAIQMatrixBuffer arrival; that was corpus-correct
|
||||
* (bbb has no explicit scaling lists) but inconsistent with the
|
||||
* hantro contract — replaced 2026-05-04.
|
||||
* - SLICE_PARAMS: SLICE_BASED only. Kernel doc
|
||||
* ext-ctrls-codec-stateless.rst (FRAME_BASED entry):
|
||||
* "When this mode is selected, the
|
||||
* V4L2_CID_STATELESS_H264_SLICE_PARAMS control shall not be
|
||||
* set." Submitting it under FRAME_BASED triggers cluster-
|
||||
* validation EINVAL at error_idx=count.
|
||||
* - PRED_WEIGHTS: SLICE_BASED + V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED.
|
||||
*
|
||||
* Patch 0002 unconditionally sets the device to FRAME_BASED,
|
||||
* so slice_based is hardcoded false here. When the planned
|
||||
* probe-then-set commit lands, this becomes
|
||||
* context->decode_mode == V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED.
|
||||
*/
|
||||
struct v4l2_ext_control controls[6] = { 0 };
|
||||
unsigned int num_controls = 0;
|
||||
const bool slice_based = false; /* TODO: probe via context->decode_mode */
|
||||
|
||||
controls[num_controls].id = V4L2_CID_STATELESS_H264_SPS;
|
||||
controls[num_controls].p_h264_sps = &sps;
|
||||
controls[num_controls].size = sizeof(sps);
|
||||
num_controls++;
|
||||
|
||||
controls[num_controls].id = V4L2_CID_STATELESS_H264_PPS;
|
||||
controls[num_controls].p_h264_pps = &pps;
|
||||
controls[num_controls].size = sizeof(pps);
|
||||
num_controls++;
|
||||
|
||||
controls[num_controls].id = V4L2_CID_STATELESS_H264_DECODE_PARAMS;
|
||||
controls[num_controls].p_h264_decode_params = &decode;
|
||||
controls[num_controls].size = sizeof(decode);
|
||||
num_controls++;
|
||||
|
||||
controls[num_controls].id = V4L2_CID_STATELESS_H264_SCALING_MATRIX;
|
||||
controls[num_controls].p_h264_scaling_matrix = &matrix;
|
||||
controls[num_controls].size = sizeof(matrix);
|
||||
num_controls++;
|
||||
|
||||
if (slice_based) {
|
||||
controls[num_controls].id = V4L2_CID_STATELESS_H264_SLICE_PARAMS;
|
||||
controls[num_controls].p_h264_slice_params = &slice;
|
||||
controls[num_controls].size = sizeof(slice);
|
||||
num_controls++;
|
||||
|
||||
if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(&pps, &slice)) {
|
||||
controls[num_controls].id = V4L2_CID_STATELESS_H264_PRED_WEIGHTS;
|
||||
controls[num_controls].ptr = &weights;
|
||||
controls[num_controls].size = sizeof(weights);
|
||||
num_controls++;
|
||||
}
|
||||
}
|
||||
|
||||
rc = v4l2_set_controls(driver_data->video_fd, surface->request_fd,
|
||||
controls, num_controls);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
|
||||
@@ -51,8 +51,11 @@ struct h264_dpb {
|
||||
unsigned int age;
|
||||
};
|
||||
|
||||
int h264_get_controls(struct request_data *driver_data,
|
||||
struct object_context *context);
|
||||
int h264_set_controls(struct request_data *data,
|
||||
struct object_context *context,
|
||||
VAProfile profile,
|
||||
struct object_surface *surface);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,361 @@
|
||||
/*
|
||||
* H.264 slice header bit-parser implementation.
|
||||
*
|
||||
* Implements just enough of ITU-T Rec. H.264 (08/2024) §7.3.3
|
||||
* slice_header to populate the V4L2 DECODE_PARAMS bit-position
|
||||
* fields (idr_pic_id, pic_order_cnt_lsb, delta_pic_order_cnt_*,
|
||||
* pic_order_cnt_bit_size, dec_ref_pic_marking_bit_size).
|
||||
*
|
||||
* Skips through ref_pic_list_modification() and pred_weight_table()
|
||||
* because dec_ref_pic_marking() (whose bit length we need) comes
|
||||
* after them. MVC extensions (nal_unit_type 20/21) are not handled
|
||||
* — this fork strips MVC alongside HEVC.
|
||||
*/
|
||||
|
||||
#include "h264_slice_header.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Minimal RBSP bit reader. Reads bits MSB-first. Tracks bit_pos for
|
||||
* caller use (e.g. computing the size of a syntax element by
|
||||
* pre/post bit_pos delta).
|
||||
*/
|
||||
struct br {
|
||||
const uint8_t *data;
|
||||
size_t length; /* bytes */
|
||||
size_t bit_pos;
|
||||
bool error;
|
||||
};
|
||||
|
||||
static uint32_t br_read_u(struct br *b, unsigned n)
|
||||
{
|
||||
uint32_t v = 0;
|
||||
while (n--) {
|
||||
if (b->bit_pos >= b->length * 8) {
|
||||
b->error = true;
|
||||
return 0;
|
||||
}
|
||||
v = (v << 1) | ((b->data[b->bit_pos >> 3] >>
|
||||
(7 - (b->bit_pos & 7))) & 1u);
|
||||
b->bit_pos++;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static uint32_t br_read_ue(struct br *b)
|
||||
{
|
||||
unsigned zeros = 0;
|
||||
while (br_read_u(b, 1) == 0) {
|
||||
if (b->error || ++zeros >= 32)
|
||||
return 0;
|
||||
}
|
||||
if (zeros == 0)
|
||||
return 0;
|
||||
return (1u << zeros) - 1u + br_read_u(b, zeros);
|
||||
}
|
||||
|
||||
static int32_t br_read_se(struct br *b)
|
||||
{
|
||||
uint32_t v = br_read_ue(b);
|
||||
if (v & 1u)
|
||||
return (int32_t)((v + 1u) >> 1);
|
||||
return -(int32_t)(v >> 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* RBSP unescape: strip emulation prevention bytes (after every
|
||||
* 0x00 0x00 in the encoded stream, an extra 0x03 is inserted to
|
||||
* prevent byte-aligned start-code emulation; we strip those before
|
||||
* bit-parsing). Output buffer must be at least as large as input.
|
||||
*
|
||||
* Slice headers are short (<100 bits typically), so we unescape
|
||||
* only the first H264_SLICE_HEADER_SCAN_BYTES = 64 input bytes.
|
||||
* That covers any realistic slice header including
|
||||
* dec_ref_pic_marking() and a generous safety margin.
|
||||
*/
|
||||
#define H264_SLICE_HEADER_SCAN_BYTES 64
|
||||
|
||||
static size_t rbsp_unescape(uint8_t *out, const uint8_t *in,
|
||||
size_t in_len)
|
||||
{
|
||||
size_t out_len = 0;
|
||||
int zero_run = 0;
|
||||
size_t i;
|
||||
size_t cap = in_len < H264_SLICE_HEADER_SCAN_BYTES ?
|
||||
in_len : H264_SLICE_HEADER_SCAN_BYTES;
|
||||
|
||||
for (i = 0; i < cap; i++) {
|
||||
if (zero_run >= 2 && in[i] == 0x03) {
|
||||
zero_run = 0;
|
||||
continue;
|
||||
}
|
||||
out[out_len++] = in[i];
|
||||
zero_run = (in[i] == 0x00) ? zero_run + 1 : 0;
|
||||
}
|
||||
return out_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* §7.3.3.1 ref_pic_list_modification() — skip past it without
|
||||
* keeping any values. Length depends on slice_type and the loop
|
||||
* terminator modification_of_pic_nums_idc == 3.
|
||||
*/
|
||||
static void skip_ref_pic_list_modification(struct br *b,
|
||||
uint32_t slice_type)
|
||||
{
|
||||
uint32_t st_mod5 = slice_type % 5;
|
||||
|
||||
if (st_mod5 != 2 && st_mod5 != 4) {
|
||||
/* P, SP, B */
|
||||
uint32_t ref_pic_list_modification_flag_l0 = br_read_u(b, 1);
|
||||
if (ref_pic_list_modification_flag_l0) {
|
||||
uint32_t mod_idc;
|
||||
do {
|
||||
mod_idc = br_read_ue(b);
|
||||
if (mod_idc == 0 || mod_idc == 1)
|
||||
br_read_ue(b); /* abs_diff_pic_num_minus1 */
|
||||
else if (mod_idc == 2)
|
||||
br_read_ue(b); /* long_term_pic_num */
|
||||
if (b->error)
|
||||
return;
|
||||
} while (mod_idc != 3);
|
||||
}
|
||||
}
|
||||
if (st_mod5 == 1) {
|
||||
/* B */
|
||||
uint32_t ref_pic_list_modification_flag_l1 = br_read_u(b, 1);
|
||||
if (ref_pic_list_modification_flag_l1) {
|
||||
uint32_t mod_idc;
|
||||
do {
|
||||
mod_idc = br_read_ue(b);
|
||||
if (mod_idc == 0 || mod_idc == 1)
|
||||
br_read_ue(b);
|
||||
else if (mod_idc == 2)
|
||||
br_read_ue(b);
|
||||
if (b->error)
|
||||
return;
|
||||
} while (mod_idc != 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* §7.3.3.2 pred_weight_table() — skip past it. Length depends on
|
||||
* the active reference counts and chroma_format_idc.
|
||||
*/
|
||||
static void skip_pred_weight_table(struct br *b,
|
||||
uint32_t slice_type,
|
||||
uint8_t chroma_format_idc,
|
||||
uint8_t bit_depth_luma_minus8,
|
||||
uint8_t bit_depth_chroma_minus8,
|
||||
uint32_t num_ref_idx_l0_active_minus1,
|
||||
uint32_t num_ref_idx_l1_active_minus1)
|
||||
{
|
||||
uint32_t i, j;
|
||||
uint32_t st_mod5 = slice_type % 5;
|
||||
|
||||
(void)bit_depth_luma_minus8;
|
||||
(void)bit_depth_chroma_minus8;
|
||||
|
||||
br_read_ue(b); /* luma_log2_weight_denom */
|
||||
if (chroma_format_idc != 0)
|
||||
br_read_ue(b); /* chroma_log2_weight_denom */
|
||||
|
||||
for (i = 0; i <= num_ref_idx_l0_active_minus1 && !b->error; i++) {
|
||||
uint32_t luma_weight_l0_flag = br_read_u(b, 1);
|
||||
if (luma_weight_l0_flag) {
|
||||
br_read_se(b); /* luma_weight_l0 */
|
||||
br_read_se(b); /* luma_offset_l0 */
|
||||
}
|
||||
if (chroma_format_idc != 0) {
|
||||
uint32_t chroma_weight_l0_flag = br_read_u(b, 1);
|
||||
if (chroma_weight_l0_flag) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
br_read_se(b);
|
||||
br_read_se(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (st_mod5 == 1) {
|
||||
for (i = 0; i <= num_ref_idx_l1_active_minus1 && !b->error; i++) {
|
||||
uint32_t luma_weight_l1_flag = br_read_u(b, 1);
|
||||
if (luma_weight_l1_flag) {
|
||||
br_read_se(b);
|
||||
br_read_se(b);
|
||||
}
|
||||
if (chroma_format_idc != 0) {
|
||||
uint32_t chroma_weight_l1_flag = br_read_u(b, 1);
|
||||
if (chroma_weight_l1_flag) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
br_read_se(b);
|
||||
br_read_se(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int h264_parse_slice_header(const uint8_t *nal_payload,
|
||||
size_t nal_payload_length,
|
||||
const struct h264_slice_header_context *ctx,
|
||||
struct h264_slice_header_info *out)
|
||||
{
|
||||
uint8_t unescaped[H264_SLICE_HEADER_SCAN_BYTES];
|
||||
size_t unescaped_len;
|
||||
struct br b = { 0 };
|
||||
bool idr_pic_flag = (ctx->nal_unit_type == 5);
|
||||
uint32_t slice_type;
|
||||
uint32_t num_ref_idx_l0_active_minus1;
|
||||
uint32_t num_ref_idx_l1_active_minus1;
|
||||
size_t pic_order_cnt_start;
|
||||
size_t pic_order_cnt_end;
|
||||
size_t dec_ref_pic_marking_start;
|
||||
size_t dec_ref_pic_marking_end;
|
||||
bool field_pic_flag = false;
|
||||
|
||||
memset(out, 0, sizeof(*out));
|
||||
|
||||
if (!nal_payload || nal_payload_length == 0)
|
||||
return -EINVAL;
|
||||
|
||||
unescaped_len = rbsp_unescape(unescaped, nal_payload,
|
||||
nal_payload_length);
|
||||
if (unescaped_len < 2)
|
||||
return -EINVAL;
|
||||
|
||||
b.data = unescaped;
|
||||
b.length = unescaped_len;
|
||||
b.bit_pos = 0;
|
||||
b.error = false;
|
||||
|
||||
/* slice_header() per §7.3.3 */
|
||||
out->first_mb_in_slice = br_read_ue(&b);
|
||||
slice_type = br_read_ue(&b);
|
||||
out->slice_type = slice_type;
|
||||
out->pic_parameter_set_id = br_read_ue(&b);
|
||||
|
||||
if (ctx->separate_colour_plane_flag)
|
||||
(void)br_read_u(&b, 2); /* colour_plane_id */
|
||||
|
||||
out->frame_num = br_read_u(&b, ctx->log2_max_frame_num_minus4 + 4u);
|
||||
|
||||
if (!ctx->frame_mbs_only_flag) {
|
||||
field_pic_flag = (br_read_u(&b, 1) != 0);
|
||||
if (field_pic_flag)
|
||||
(void)br_read_u(&b, 1); /* bottom_field_flag */
|
||||
}
|
||||
|
||||
if (idr_pic_flag)
|
||||
out->idr_pic_id = (uint16_t)br_read_ue(&b);
|
||||
|
||||
/*
|
||||
* pic_order_cnt syntax — measure bit length from the start of
|
||||
* pic_order_cnt_lsb / delta_pic_order_cnt[0] to the end of
|
||||
* delta_pic_order_cnt_bottom / delta_pic_order_cnt[1]. This is
|
||||
* what V4L2 calls pic_order_cnt_bit_size and what hantro G1
|
||||
* writes into G1_REG_DEC_CTRL6_POC_LENGTH.
|
||||
*/
|
||||
pic_order_cnt_start = b.bit_pos;
|
||||
if (ctx->pic_order_cnt_type == 0) {
|
||||
out->pic_order_cnt_lsb = (uint16_t)br_read_u(
|
||||
&b, ctx->log2_max_pic_order_cnt_lsb_minus4 + 4u);
|
||||
if (ctx->bottom_field_pic_order_in_frame_present_flag &&
|
||||
!field_pic_flag)
|
||||
out->delta_pic_order_cnt_bottom = br_read_se(&b);
|
||||
} else if (ctx->pic_order_cnt_type == 1 &&
|
||||
!ctx->delta_pic_order_always_zero_flag) {
|
||||
out->delta_pic_order_cnt0 = br_read_se(&b);
|
||||
if (ctx->bottom_field_pic_order_in_frame_present_flag &&
|
||||
!field_pic_flag)
|
||||
out->delta_pic_order_cnt1 = br_read_se(&b);
|
||||
}
|
||||
pic_order_cnt_end = b.bit_pos;
|
||||
out->pic_order_cnt_bit_size = (uint32_t)(pic_order_cnt_end -
|
||||
pic_order_cnt_start);
|
||||
|
||||
if (ctx->redundant_pic_cnt_present_flag)
|
||||
(void)br_read_ue(&b); /* redundant_pic_cnt */
|
||||
|
||||
if (slice_type % 5 == 1) /* B */
|
||||
(void)br_read_u(&b, 1); /* direct_spatial_mv_pred_flag */
|
||||
|
||||
num_ref_idx_l0_active_minus1 = ctx->num_ref_idx_l0_default_active_minus1;
|
||||
num_ref_idx_l1_active_minus1 = ctx->num_ref_idx_l1_default_active_minus1;
|
||||
|
||||
{
|
||||
uint32_t st = slice_type % 5;
|
||||
if (st == 0 || st == 3 || st == 1) {
|
||||
/* P, SP, B */
|
||||
uint32_t override = br_read_u(&b, 1);
|
||||
if (override) {
|
||||
num_ref_idx_l0_active_minus1 = br_read_ue(&b);
|
||||
if (st == 1)
|
||||
num_ref_idx_l1_active_minus1 = br_read_ue(&b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
skip_ref_pic_list_modification(&b, slice_type);
|
||||
if (b.error)
|
||||
return -EIO;
|
||||
|
||||
{
|
||||
uint32_t st = slice_type % 5;
|
||||
bool do_pwt =
|
||||
(ctx->weighted_pred_flag && (st == 0 || st == 3)) ||
|
||||
(ctx->weighted_bipred_idc == 1 && st == 1);
|
||||
if (do_pwt) {
|
||||
skip_pred_weight_table(&b, slice_type,
|
||||
ctx->chroma_format_idc,
|
||||
ctx->bit_depth_luma_minus8,
|
||||
ctx->bit_depth_chroma_minus8,
|
||||
num_ref_idx_l0_active_minus1,
|
||||
num_ref_idx_l1_active_minus1);
|
||||
if (b.error)
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* dec_ref_pic_marking() per §7.3.3.3 — measure bit length;
|
||||
* hantro G1 writes this into G1_REG_DEC_CTRL5_REFPIC_MK_LEN.
|
||||
*/
|
||||
dec_ref_pic_marking_start = b.bit_pos;
|
||||
if (ctx->nal_ref_idc != 0) {
|
||||
if (idr_pic_flag) {
|
||||
(void)br_read_u(&b, 1); /* no_output_of_prior_pics_flag */
|
||||
(void)br_read_u(&b, 1); /* long_term_reference_flag */
|
||||
} else {
|
||||
uint32_t adaptive = br_read_u(&b, 1);
|
||||
if (adaptive) {
|
||||
uint32_t mmco;
|
||||
do {
|
||||
mmco = br_read_ue(&b);
|
||||
if (mmco == 1 || mmco == 3)
|
||||
br_read_ue(&b); /* difference_of_pic_nums_minus1 */
|
||||
if (mmco == 2)
|
||||
br_read_ue(&b); /* long_term_pic_num */
|
||||
if (mmco == 3 || mmco == 6)
|
||||
br_read_ue(&b); /* long_term_frame_idx */
|
||||
if (mmco == 4)
|
||||
br_read_ue(&b); /* max_long_term_frame_idx_plus1 */
|
||||
if (b.error)
|
||||
return -EIO;
|
||||
} while (mmco != 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
dec_ref_pic_marking_end = b.bit_pos;
|
||||
out->dec_ref_pic_marking_bit_size =
|
||||
(uint32_t)(dec_ref_pic_marking_end - dec_ref_pic_marking_start);
|
||||
|
||||
if (b.error)
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* H.264 slice header bit-parser for libva-v4l2-request.
|
||||
*
|
||||
* Extracts the slice-header bit-position and value fields that
|
||||
* V4L2_CID_STATELESS_H264_DECODE_PARAMS requires (idr_pic_id,
|
||||
* pic_order_cnt_lsb, delta_pic_order_cnt_*, pic_order_cnt_bit_size,
|
||||
* dec_ref_pic_marking_bit_size). VAAPI's pre-parsed
|
||||
* VAPictureParameterBufferH264 / VASliceParameterBufferH264 do not
|
||||
* carry these — they live only in the bitstream's slice_header()
|
||||
* syntax. Hantro G1 (drivers/media/platform/verisilicon/
|
||||
* hantro_g1_h264_dec.c::set_params) writes the bit_size fields
|
||||
* directly into MMIO registers G1_REG_DEC_CTRL5_REFPIC_MK_LEN and
|
||||
* G1_REG_DEC_CTRL6_POC_LENGTH; with zeros the hardware bitstream
|
||||
* parser walks past zero bits, lands on garbage, decodes nothing.
|
||||
*
|
||||
* Spec reference: ITU-T Rec. H.264 (08/2024) §7.3.3 slice_header
|
||||
* and §7.3.3.1 ref_pic_list_modification, §7.3.3.2 pred_weight_table,
|
||||
* §7.3.3.3 dec_ref_pic_marking.
|
||||
*
|
||||
* Cross-reference (proven working on hantro): FFmpeg's
|
||||
* libavcodec/h264_slice.c populates H264SliceContext::ref_pic_marking_
|
||||
* bit_size and pic_order_cnt_bit_size from its bit-precise slice
|
||||
* header parse, then v4l2_request_h264.c forwards them.
|
||||
*/
|
||||
|
||||
#ifndef H264_SLICE_HEADER_H
|
||||
#define H264_SLICE_HEADER_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
struct h264_slice_header_context {
|
||||
/* From SPS (the active SPS at slice-time). */
|
||||
bool separate_colour_plane_flag;
|
||||
uint8_t log2_max_frame_num_minus4;
|
||||
bool frame_mbs_only_flag;
|
||||
uint8_t pic_order_cnt_type;
|
||||
uint8_t log2_max_pic_order_cnt_lsb_minus4;
|
||||
bool delta_pic_order_always_zero_flag;
|
||||
|
||||
/* From PPS (the active PPS at slice-time). */
|
||||
bool bottom_field_pic_order_in_frame_present_flag;
|
||||
bool redundant_pic_cnt_present_flag;
|
||||
bool weighted_pred_flag;
|
||||
uint8_t weighted_bipred_idc;
|
||||
uint8_t num_ref_idx_l0_default_active_minus1;
|
||||
uint8_t num_ref_idx_l1_default_active_minus1;
|
||||
uint8_t chroma_format_idc;
|
||||
uint8_t bit_depth_luma_minus8;
|
||||
uint8_t bit_depth_chroma_minus8;
|
||||
|
||||
/* From the NAL unit header (already extracted by the caller). */
|
||||
uint8_t nal_unit_type;
|
||||
uint8_t nal_ref_idc;
|
||||
};
|
||||
|
||||
struct h264_slice_header_info {
|
||||
uint16_t idr_pic_id;
|
||||
uint16_t pic_order_cnt_lsb;
|
||||
int32_t delta_pic_order_cnt_bottom;
|
||||
int32_t delta_pic_order_cnt0;
|
||||
int32_t delta_pic_order_cnt1;
|
||||
uint32_t pic_order_cnt_bit_size;
|
||||
uint32_t dec_ref_pic_marking_bit_size;
|
||||
|
||||
/* Diagnostic — useful for cross-checking VAAPI vs bitstream values. */
|
||||
uint32_t first_mb_in_slice;
|
||||
uint32_t slice_type;
|
||||
uint32_t pic_parameter_set_id;
|
||||
uint32_t frame_num;
|
||||
};
|
||||
|
||||
/*
|
||||
* Parse slice_header() up to dec_ref_pic_marking() (inclusive) of
|
||||
* the H.264 RBSP slice_layer_without_partitioning_rbsp() syntax,
|
||||
* extracting the V4L2 DECODE_PARAMS fields. Returns 0 on success,
|
||||
* negative errno-shaped value on parse failure (insufficient data,
|
||||
* malformed exp-Golomb, etc.).
|
||||
*
|
||||
* @nal_payload: pointer to the byte AFTER the NAL header byte
|
||||
* (i.e. start of the RBSP proper; caller has already
|
||||
* skipped any ANNEX_B start code and the 1-byte
|
||||
* nal_unit_header). Will be RBSP-unescaped internally
|
||||
* before parsing.
|
||||
* @nal_payload_length: bytes available at @nal_payload.
|
||||
* @ctx: SPS/PPS/NAL context required to drive the parse.
|
||||
* @out: filled on success. All fields zero-initialized first.
|
||||
*/
|
||||
int h264_parse_slice_header(const uint8_t *nal_payload,
|
||||
size_t nal_payload_length,
|
||||
const struct h264_slice_header_context *ctx,
|
||||
struct h264_slice_header_info *out);
|
||||
|
||||
#endif /* H264_SLICE_HEADER_H */
|
||||
+742
-232
File diff suppressed because it is too large
Load Diff
@@ -27,6 +27,12 @@
|
||||
#ifndef _H265_H_
|
||||
#define _H265_H_
|
||||
|
||||
/* Maximum number of slices per frame the libva backend will accumulate
|
||||
* before submitting to the kernel (kernel HEVC slice_params dynamic-array
|
||||
* accepts up to 600 entries per Phase 0 V4L2 inventory; 64 is a
|
||||
* conservative cap for typical fixtures + safety bound). */
|
||||
#define HEVC_MAX_SLICES_PER_FRAME 64
|
||||
|
||||
struct object_context;
|
||||
struct object_surface;
|
||||
struct request_data;
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
/* Stub for <gst/base/base-prelude.h> — GStreamer base-lib prelude.
|
||||
* In upstream GStreamer, this sets up the GstBaseExport macro + GObject
|
||||
* boilerplate. We bypass all of that and provide only what our four
|
||||
* vendored .c files actually need (gst_compat.h's typedefs).
|
||||
*
|
||||
* Crucially we also #define GST_BASE_API to nothing so the function
|
||||
* declarations in gstbitreader.h / gstbytereader.h drop the
|
||||
* dllimport / visibility attribute prefix.
|
||||
*/
|
||||
#ifndef LIBVA_V4L2_REQUEST_FOURIER_BASE_PRELUDE_STUB
|
||||
#define LIBVA_V4L2_REQUEST_FOURIER_BASE_PRELUDE_STUB
|
||||
#include "gst_compat.h"
|
||||
#define GST_BASE_API
|
||||
#endif
|
||||
@@ -0,0 +1,307 @@
|
||||
/* GStreamer
|
||||
*
|
||||
* Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#define GST_BIT_READER_DISABLE_INLINES
|
||||
#include "gstbitreader.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
* SECTION:gstbitreader
|
||||
* @title: GstBitReader
|
||||
* @short_description: Reads any number of bits from a memory buffer
|
||||
* @symbols:
|
||||
* - gst_bit_reader_skip_unchecked
|
||||
* - gst_bit_reader_skip_to_byte_unchecked
|
||||
* - gst_bit_reader_get_bits_uint8_unchecked
|
||||
* - gst_bit_reader_peek_bits_uint8_unchecked
|
||||
* - gst_bit_reader_get_bits_uint16_unchecked
|
||||
* - gst_bit_reader_peek_bits_uint16_unchecked
|
||||
* - gst_bit_reader_get_bits_uint32_unchecked
|
||||
* - gst_bit_reader_peek_bits_uint32_unchecked
|
||||
* - gst_bit_reader_get_bits_uint64_unchecked
|
||||
* - gst_bit_reader_peek_bits_uint64_unchecked
|
||||
*
|
||||
* #GstBitReader provides a bit reader that can read any number of bits
|
||||
* from a memory buffer. It provides functions for reading any number of bits
|
||||
* into 8, 16, 32 and 64 bit variables.
|
||||
*/
|
||||
|
||||
/**
|
||||
* gst_bit_reader_new: (skip)
|
||||
* @data: (array length=size): Data from which the #GstBitReader
|
||||
* should read
|
||||
* @size: Size of @data in bytes
|
||||
*
|
||||
* Create a new #GstBitReader instance, which will read from @data.
|
||||
*
|
||||
* Free-function: gst_bit_reader_free
|
||||
*
|
||||
* Returns: (transfer full): a new #GstBitReader instance
|
||||
*/
|
||||
GstBitReader *
|
||||
gst_bit_reader_new (const guint8 * data, guint size)
|
||||
{
|
||||
GstBitReader *ret = g_new0 (GstBitReader, 1);
|
||||
|
||||
ret->data = data;
|
||||
ret->size = size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_bit_reader_free:
|
||||
* @reader: (in) (transfer full): a #GstBitReader instance
|
||||
*
|
||||
* Frees a #GstBitReader instance, which was previously allocated by
|
||||
* gst_bit_reader_new().
|
||||
*/
|
||||
void
|
||||
gst_bit_reader_free (GstBitReader * reader)
|
||||
{
|
||||
g_return_if_fail (reader != NULL);
|
||||
|
||||
g_free (reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_bit_reader_init:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @data: (in) (array length=size): data from which the bit reader should read
|
||||
* @size: Size of @data in bytes
|
||||
*
|
||||
* Initializes a #GstBitReader instance to read from @data. This function
|
||||
* can be called on already initialized instances.
|
||||
*/
|
||||
void
|
||||
gst_bit_reader_init (GstBitReader * reader, const guint8 * data, guint size)
|
||||
{
|
||||
g_return_if_fail (reader != NULL);
|
||||
|
||||
reader->data = data;
|
||||
reader->size = size;
|
||||
reader->byte = reader->bit = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_bit_reader_set_pos:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @pos: The new position in bits
|
||||
*
|
||||
* Sets the new position of a #GstBitReader instance to @pos in bits.
|
||||
*
|
||||
* Returns: %TRUE if the position could be set successfully, %FALSE
|
||||
* otherwise.
|
||||
*/
|
||||
gboolean
|
||||
gst_bit_reader_set_pos (GstBitReader * reader, guint pos)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, FALSE);
|
||||
|
||||
if (pos > reader->size * 8)
|
||||
return FALSE;
|
||||
|
||||
reader->byte = pos / 8;
|
||||
reader->bit = pos % 8;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_bit_reader_get_pos:
|
||||
* @reader: a #GstBitReader instance
|
||||
*
|
||||
* Returns the current position of a #GstBitReader instance in bits.
|
||||
*
|
||||
* Returns: The current position of @reader in bits.
|
||||
*/
|
||||
guint
|
||||
gst_bit_reader_get_pos (const GstBitReader * reader)
|
||||
{
|
||||
return _gst_bit_reader_get_pos_inline (reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_bit_reader_get_remaining:
|
||||
* @reader: a #GstBitReader instance
|
||||
*
|
||||
* Returns the remaining number of bits of a #GstBitReader instance.
|
||||
*
|
||||
* Returns: The remaining number of bits of @reader instance.
|
||||
*/
|
||||
guint
|
||||
gst_bit_reader_get_remaining (const GstBitReader * reader)
|
||||
{
|
||||
return _gst_bit_reader_get_remaining_inline (reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_bit_reader_get_size:
|
||||
* @reader: a #GstBitReader instance
|
||||
*
|
||||
* Returns the total number of bits of a #GstBitReader instance.
|
||||
*
|
||||
* Returns: The total number of bits of @reader instance.
|
||||
*/
|
||||
guint
|
||||
gst_bit_reader_get_size (const GstBitReader * reader)
|
||||
{
|
||||
return _gst_bit_reader_get_size_inline (reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_bit_reader_skip:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @nbits: the number of bits to skip
|
||||
*
|
||||
* Skips @nbits bits of the #GstBitReader instance.
|
||||
*
|
||||
* Returns: %TRUE if @nbits bits could be skipped, %FALSE otherwise.
|
||||
*/
|
||||
gboolean
|
||||
gst_bit_reader_skip (GstBitReader * reader, guint nbits)
|
||||
{
|
||||
return _gst_bit_reader_skip_inline (reader, nbits);
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_bit_reader_skip_to_byte:
|
||||
* @reader: a #GstBitReader instance
|
||||
*
|
||||
* Skips until the next byte.
|
||||
*
|
||||
* Returns: %TRUE if successful, %FALSE otherwise.
|
||||
*/
|
||||
gboolean
|
||||
gst_bit_reader_skip_to_byte (GstBitReader * reader)
|
||||
{
|
||||
return _gst_bit_reader_skip_to_byte_inline (reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_bit_reader_get_bits_uint8:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @val: (out): Pointer to a #guint8 to store the result
|
||||
* @nbits: number of bits to read
|
||||
*
|
||||
* Read @nbits bits into @val and update the current position.
|
||||
*
|
||||
* Returns: %TRUE if successful, %FALSE otherwise.
|
||||
*/
|
||||
|
||||
/**
|
||||
* gst_bit_reader_get_bits_uint16:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @val: (out): Pointer to a #guint16 to store the result
|
||||
* @nbits: number of bits to read
|
||||
*
|
||||
* Read @nbits bits into @val and update the current position.
|
||||
*
|
||||
* Returns: %TRUE if successful, %FALSE otherwise.
|
||||
*/
|
||||
|
||||
/**
|
||||
* gst_bit_reader_get_bits_uint32:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @val: (out): Pointer to a #guint32 to store the result
|
||||
* @nbits: number of bits to read
|
||||
*
|
||||
* Read @nbits bits into @val and update the current position.
|
||||
*
|
||||
* Returns: %TRUE if successful, %FALSE otherwise.
|
||||
*/
|
||||
|
||||
/**
|
||||
* gst_bit_reader_get_bits_uint64:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @val: (out): Pointer to a #guint64 to store the result
|
||||
* @nbits: number of bits to read
|
||||
*
|
||||
* Read @nbits bits into @val and update the current position.
|
||||
*
|
||||
* Returns: %TRUE if successful, %FALSE otherwise.
|
||||
*/
|
||||
|
||||
/**
|
||||
* gst_bit_reader_peek_bits_uint8:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @val: (out): Pointer to a #guint8 to store the result
|
||||
* @nbits: number of bits to read
|
||||
*
|
||||
* Read @nbits bits into @val but keep the current position.
|
||||
*
|
||||
* Returns: %TRUE if successful, %FALSE otherwise.
|
||||
*/
|
||||
|
||||
/**
|
||||
* gst_bit_reader_peek_bits_uint16:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @val: (out): Pointer to a #guint16 to store the result
|
||||
* @nbits: number of bits to read
|
||||
*
|
||||
* Read @nbits bits into @val but keep the current position.
|
||||
*
|
||||
* Returns: %TRUE if successful, %FALSE otherwise.
|
||||
*/
|
||||
|
||||
/**
|
||||
* gst_bit_reader_peek_bits_uint32:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @val: (out): Pointer to a #guint32 to store the result
|
||||
* @nbits: number of bits to read
|
||||
*
|
||||
* Read @nbits bits into @val but keep the current position.
|
||||
*
|
||||
* Returns: %TRUE if successful, %FALSE otherwise.
|
||||
*/
|
||||
|
||||
/**
|
||||
* gst_bit_reader_peek_bits_uint64:
|
||||
* @reader: a #GstBitReader instance
|
||||
* @val: (out): Pointer to a #guint64 to store the result
|
||||
* @nbits: number of bits to read
|
||||
*
|
||||
* Read @nbits bits into @val but keep the current position.
|
||||
*
|
||||
* Returns: %TRUE if successful, %FALSE otherwise.
|
||||
*/
|
||||
|
||||
#define GST_BIT_READER_READ_BITS(bits) \
|
||||
gboolean \
|
||||
gst_bit_reader_peek_bits_uint##bits (const GstBitReader *reader, guint##bits *val, guint nbits) \
|
||||
{ \
|
||||
return _gst_bit_reader_peek_bits_uint##bits##_inline (reader, val, nbits); \
|
||||
} \
|
||||
\
|
||||
gboolean \
|
||||
gst_bit_reader_get_bits_uint##bits (GstBitReader *reader, guint##bits *val, guint nbits) \
|
||||
{ \
|
||||
return _gst_bit_reader_get_bits_uint##bits##_inline (reader, val, nbits); \
|
||||
}
|
||||
|
||||
GST_BIT_READER_READ_BITS (8);
|
||||
GST_BIT_READER_READ_BITS (16);
|
||||
GST_BIT_READER_READ_BITS (32);
|
||||
GST_BIT_READER_READ_BITS (64);
|
||||
@@ -0,0 +1,328 @@
|
||||
/* GStreamer
|
||||
*
|
||||
* Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __GST_BIT_READER_H__
|
||||
#define __GST_BIT_READER_H__
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include <gst/base/base-prelude.h>
|
||||
|
||||
/* FIXME: inline functions */
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
#define GST_BIT_READER(reader) ((GstBitReader *) (reader))
|
||||
|
||||
/**
|
||||
* GstBitReader:
|
||||
* @data: (array length=size): Data from which the bit reader will
|
||||
* read
|
||||
* @size: Size of @data in bytes
|
||||
* @byte: Current byte position
|
||||
* @bit: Bit position in the current byte
|
||||
*
|
||||
* A bit reader instance.
|
||||
*/
|
||||
typedef struct {
|
||||
const guint8 *data;
|
||||
guint size;
|
||||
|
||||
guint byte; /* Byte position */
|
||||
guint bit; /* Bit position in the current byte */
|
||||
|
||||
/* < private > */
|
||||
gpointer _gst_reserved[GST_PADDING];
|
||||
} GstBitReader;
|
||||
|
||||
GST_BASE_API
|
||||
GstBitReader * gst_bit_reader_new (const guint8 *data, guint size) G_GNUC_MALLOC;
|
||||
|
||||
GST_BASE_API
|
||||
void gst_bit_reader_free (GstBitReader *reader);
|
||||
|
||||
GST_BASE_API
|
||||
void gst_bit_reader_init (GstBitReader *reader, const guint8 *data, guint size);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_set_pos (GstBitReader *reader, guint pos);
|
||||
|
||||
GST_BASE_API
|
||||
guint gst_bit_reader_get_pos (const GstBitReader *reader);
|
||||
|
||||
GST_BASE_API
|
||||
guint gst_bit_reader_get_remaining (const GstBitReader *reader);
|
||||
|
||||
GST_BASE_API
|
||||
guint gst_bit_reader_get_size (const GstBitReader *reader);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_skip (GstBitReader *reader, guint nbits);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_skip_to_byte (GstBitReader *reader);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_get_bits_uint8 (GstBitReader *reader, guint8 *val, guint nbits);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_get_bits_uint16 (GstBitReader *reader, guint16 *val, guint nbits);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_get_bits_uint32 (GstBitReader *reader, guint32 *val, guint nbits);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_get_bits_uint64 (GstBitReader *reader, guint64 *val, guint nbits);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_peek_bits_uint8 (const GstBitReader *reader, guint8 *val, guint nbits);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_peek_bits_uint16 (const GstBitReader *reader, guint16 *val, guint nbits);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_peek_bits_uint32 (const GstBitReader *reader, guint32 *val, guint nbits);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_bit_reader_peek_bits_uint64 (const GstBitReader *reader, guint64 *val, guint nbits);
|
||||
|
||||
/**
|
||||
* GST_BIT_READER_INIT:
|
||||
* @data: Data from which the #GstBitReader should read
|
||||
* @size: Size of @data in bytes
|
||||
*
|
||||
* A #GstBitReader must be initialized with this macro, before it can be
|
||||
* used. This macro can used be to initialize a variable, but it cannot
|
||||
* be assigned to a variable. In that case you have to use
|
||||
* gst_bit_reader_init().
|
||||
*/
|
||||
#define GST_BIT_READER_INIT(data, size) {data, size, 0, 0}
|
||||
|
||||
/* Unchecked variants */
|
||||
|
||||
static inline void
|
||||
gst_bit_reader_skip_unchecked (GstBitReader * reader, guint nbits)
|
||||
{
|
||||
reader->bit += nbits;
|
||||
reader->byte += reader->bit / 8;
|
||||
reader->bit = reader->bit % 8;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gst_bit_reader_skip_to_byte_unchecked (GstBitReader * reader)
|
||||
{
|
||||
if (reader->bit) {
|
||||
reader->bit = 0;
|
||||
reader->byte++;
|
||||
}
|
||||
}
|
||||
|
||||
#define __GST_BIT_READER_READ_BITS_UNCHECKED(bits) \
|
||||
static inline guint##bits \
|
||||
gst_bit_reader_peek_bits_uint##bits##_unchecked (const GstBitReader *reader, guint nbits) \
|
||||
{ \
|
||||
guint##bits ret = 0; \
|
||||
const guint8 *data; \
|
||||
guint byte, bit; \
|
||||
\
|
||||
data = reader->data; \
|
||||
byte = reader->byte; \
|
||||
bit = reader->bit; \
|
||||
\
|
||||
while (nbits > 0) { \
|
||||
guint toread = MIN (nbits, 8 - bit); \
|
||||
\
|
||||
ret <<= toread; \
|
||||
ret |= (data[byte] & (0xff >> bit)) >> (8 - toread - bit); \
|
||||
\
|
||||
bit += toread; \
|
||||
if (bit >= 8) { \
|
||||
byte++; \
|
||||
bit = 0; \
|
||||
} \
|
||||
nbits -= toread; \
|
||||
} \
|
||||
\
|
||||
return ret; \
|
||||
} \
|
||||
\
|
||||
static inline guint##bits \
|
||||
gst_bit_reader_get_bits_uint##bits##_unchecked (GstBitReader *reader, guint nbits) \
|
||||
{ \
|
||||
guint##bits ret; \
|
||||
\
|
||||
ret = gst_bit_reader_peek_bits_uint##bits##_unchecked (reader, nbits); \
|
||||
\
|
||||
gst_bit_reader_skip_unchecked (reader, nbits); \
|
||||
\
|
||||
return ret; \
|
||||
}
|
||||
|
||||
__GST_BIT_READER_READ_BITS_UNCHECKED (8)
|
||||
__GST_BIT_READER_READ_BITS_UNCHECKED (16)
|
||||
__GST_BIT_READER_READ_BITS_UNCHECKED (32)
|
||||
__GST_BIT_READER_READ_BITS_UNCHECKED (64)
|
||||
|
||||
#undef __GST_BIT_READER_READ_BITS_UNCHECKED
|
||||
|
||||
/* unchecked variants -- do not use */
|
||||
|
||||
static inline guint
|
||||
_gst_bit_reader_get_size_unchecked (const GstBitReader * reader)
|
||||
{
|
||||
return reader->size * 8;
|
||||
}
|
||||
|
||||
static inline guint
|
||||
_gst_bit_reader_get_pos_unchecked (const GstBitReader * reader)
|
||||
{
|
||||
return reader->byte * 8 + reader->bit;
|
||||
}
|
||||
|
||||
static inline guint
|
||||
_gst_bit_reader_get_remaining_unchecked (const GstBitReader * reader)
|
||||
{
|
||||
return reader->size * 8 - (reader->byte * 8 + reader->bit);
|
||||
}
|
||||
|
||||
/* inlined variants -- do not use directly */
|
||||
static inline guint
|
||||
_gst_bit_reader_get_size_inline (const GstBitReader * reader)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, 0);
|
||||
|
||||
return _gst_bit_reader_get_size_unchecked (reader);
|
||||
}
|
||||
|
||||
static inline guint
|
||||
_gst_bit_reader_get_pos_inline (const GstBitReader * reader)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, 0);
|
||||
|
||||
return _gst_bit_reader_get_pos_unchecked (reader);
|
||||
}
|
||||
|
||||
static inline guint
|
||||
_gst_bit_reader_get_remaining_inline (const GstBitReader * reader)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, 0);
|
||||
|
||||
return _gst_bit_reader_get_remaining_unchecked (reader);
|
||||
}
|
||||
|
||||
static inline gboolean
|
||||
_gst_bit_reader_skip_inline (GstBitReader * reader, guint nbits)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, FALSE);
|
||||
|
||||
if (_gst_bit_reader_get_remaining_unchecked (reader) < nbits)
|
||||
return FALSE;
|
||||
|
||||
gst_bit_reader_skip_unchecked (reader, nbits);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static inline gboolean
|
||||
_gst_bit_reader_skip_to_byte_inline (GstBitReader * reader)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, FALSE);
|
||||
|
||||
if (reader->byte > reader->size)
|
||||
return FALSE;
|
||||
|
||||
gst_bit_reader_skip_to_byte_unchecked (reader);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#define __GST_BIT_READER_READ_BITS_INLINE(bits) \
|
||||
static inline gboolean \
|
||||
_gst_bit_reader_get_bits_uint##bits##_inline (GstBitReader *reader, guint##bits *val, guint nbits) \
|
||||
{ \
|
||||
g_return_val_if_fail (reader != NULL, FALSE); \
|
||||
g_return_val_if_fail (val != NULL, FALSE); \
|
||||
g_return_val_if_fail (nbits <= bits, FALSE); \
|
||||
\
|
||||
if (_gst_bit_reader_get_remaining_unchecked (reader) < nbits) \
|
||||
return FALSE; \
|
||||
\
|
||||
*val = gst_bit_reader_get_bits_uint##bits##_unchecked (reader, nbits); \
|
||||
return TRUE; \
|
||||
} \
|
||||
\
|
||||
static inline gboolean \
|
||||
_gst_bit_reader_peek_bits_uint##bits##_inline (const GstBitReader *reader, guint##bits *val, guint nbits) \
|
||||
{ \
|
||||
g_return_val_if_fail (reader != NULL, FALSE); \
|
||||
g_return_val_if_fail (val != NULL, FALSE); \
|
||||
g_return_val_if_fail (nbits <= bits, FALSE); \
|
||||
\
|
||||
if (_gst_bit_reader_get_remaining_unchecked (reader) < nbits) \
|
||||
return FALSE; \
|
||||
\
|
||||
*val = gst_bit_reader_peek_bits_uint##bits##_unchecked (reader, nbits); \
|
||||
return TRUE; \
|
||||
}
|
||||
|
||||
__GST_BIT_READER_READ_BITS_INLINE (8)
|
||||
__GST_BIT_READER_READ_BITS_INLINE (16)
|
||||
__GST_BIT_READER_READ_BITS_INLINE (32)
|
||||
__GST_BIT_READER_READ_BITS_INLINE (64)
|
||||
|
||||
#undef __GST_BIT_READER_READ_BITS_INLINE
|
||||
|
||||
#ifndef GST_BIT_READER_DISABLE_INLINES
|
||||
|
||||
#define gst_bit_reader_get_size(reader) \
|
||||
_gst_bit_reader_get_size_inline (reader)
|
||||
#define gst_bit_reader_get_pos(reader) \
|
||||
_gst_bit_reader_get_pos_inline (reader)
|
||||
#define gst_bit_reader_get_remaining(reader) \
|
||||
_gst_bit_reader_get_remaining_inline (reader)
|
||||
|
||||
/* we use defines here so we can add the G_LIKELY() */
|
||||
|
||||
#define gst_bit_reader_skip(reader, nbits)\
|
||||
G_LIKELY (_gst_bit_reader_skip_inline(reader, nbits))
|
||||
#define gst_bit_reader_skip_to_byte(reader)\
|
||||
G_LIKELY (_gst_bit_reader_skip_to_byte_inline(reader))
|
||||
|
||||
#define gst_bit_reader_get_bits_uint8(reader, val, nbits) \
|
||||
G_LIKELY (_gst_bit_reader_get_bits_uint8_inline (reader, val, nbits))
|
||||
#define gst_bit_reader_get_bits_uint16(reader, val, nbits) \
|
||||
G_LIKELY (_gst_bit_reader_get_bits_uint16_inline (reader, val, nbits))
|
||||
#define gst_bit_reader_get_bits_uint32(reader, val, nbits) \
|
||||
G_LIKELY (_gst_bit_reader_get_bits_uint32_inline (reader, val, nbits))
|
||||
#define gst_bit_reader_get_bits_uint64(reader, val, nbits) \
|
||||
G_LIKELY (_gst_bit_reader_get_bits_uint64_inline (reader, val, nbits))
|
||||
|
||||
#define gst_bit_reader_peek_bits_uint8(reader, val, nbits) \
|
||||
G_LIKELY (_gst_bit_reader_peek_bits_uint8_inline (reader, val, nbits))
|
||||
#define gst_bit_reader_peek_bits_uint16(reader, val, nbits) \
|
||||
G_LIKELY (_gst_bit_reader_peek_bits_uint16_inline (reader, val, nbits))
|
||||
#define gst_bit_reader_peek_bits_uint32(reader, val, nbits) \
|
||||
G_LIKELY (_gst_bit_reader_peek_bits_uint32_inline (reader, val, nbits))
|
||||
#define gst_bit_reader_peek_bits_uint64(reader, val, nbits) \
|
||||
G_LIKELY (_gst_bit_reader_peek_bits_uint64_inline (reader, val, nbits))
|
||||
#endif
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
#endif /* __GST_BIT_READER_H__ */
|
||||
@@ -0,0 +1,67 @@
|
||||
/* Stub for <gst/base/gstbitwriter.h>.
|
||||
*
|
||||
* The vendored nalutils.c uses GstBitWriter for NAL emulation-prevention
|
||||
* byte INSERTION during write-side (encoder) operations. The libva
|
||||
* backend never invokes those paths — we only PARSE NAL units, never
|
||||
* write them. The functions must still compile + link though, so we
|
||||
* stub them with abort() runtime guards: if any future code path
|
||||
* accidentally invokes a writer function, we fail-fast instead of
|
||||
* silently corrupting.
|
||||
*
|
||||
* Header surface mirrors upstream gstbitwriter.h minimally — enough
|
||||
* for nalutils.c to compile.
|
||||
*/
|
||||
#ifndef LIBVA_V4L2_REQUEST_FOURIER_GSTBITWRITER_STUB
|
||||
#define LIBVA_V4L2_REQUEST_FOURIER_GSTBITWRITER_STUB
|
||||
|
||||
#include "gst_compat.h"
|
||||
|
||||
typedef struct {
|
||||
guint8 *data;
|
||||
guint bit_size;
|
||||
guint bit_capacity;
|
||||
gboolean auto_grow;
|
||||
gboolean owned;
|
||||
} GstBitWriter;
|
||||
|
||||
static inline void
|
||||
gst_bit_writer_init(GstBitWriter *bw) { (void)bw; abort(); }
|
||||
static inline void
|
||||
gst_bit_writer_init_with_size(GstBitWriter *bw, guint size, gboolean fixed) {
|
||||
(void)bw; (void)size; (void)fixed; abort();
|
||||
}
|
||||
static inline void
|
||||
gst_bit_writer_reset(GstBitWriter *bw) { (void)bw; abort(); }
|
||||
static inline gboolean
|
||||
gst_bit_writer_put_bits_uint8(GstBitWriter *bw, guint8 value, guint nbits) {
|
||||
(void)bw; (void)value; (void)nbits; abort();
|
||||
}
|
||||
static inline gboolean
|
||||
gst_bit_writer_align_bytes(GstBitWriter *bw, guint8 trailing_bit) {
|
||||
(void)bw; (void)trailing_bit; abort();
|
||||
}
|
||||
static inline guint8 *
|
||||
gst_bit_writer_get_data(GstBitWriter *bw) { (void)bw; abort(); }
|
||||
static inline guint
|
||||
gst_bit_writer_get_size(const GstBitWriter *bw) { (void)bw; abort(); }
|
||||
static inline guint
|
||||
gst_bit_writer_reset_and_get_size(GstBitWriter *bw) { (void)bw; abort(); }
|
||||
static inline guint8 *
|
||||
gst_bit_writer_reset_and_get_data(GstBitWriter *bw) { (void)bw; abort(); }
|
||||
static inline gboolean
|
||||
gst_bit_writer_put_bits_uint16(GstBitWriter *bw, guint16 value, guint nbits) {
|
||||
(void)bw; (void)value; (void)nbits; abort();
|
||||
}
|
||||
static inline gboolean
|
||||
gst_bit_writer_put_bits_uint32(GstBitWriter *bw, guint32 value, guint nbits) {
|
||||
(void)bw; (void)value; (void)nbits; abort();
|
||||
}
|
||||
static inline gboolean
|
||||
gst_bit_writer_put_bytes(GstBitWriter *bw, const guint8 *data, guint nbytes) {
|
||||
(void)bw; (void)data; (void)nbytes; abort();
|
||||
}
|
||||
|
||||
#define GST_BIT_WRITER_BIT_SIZE(bw) ((bw)->bit_size)
|
||||
#define GST_BIT_WRITER_DATA(bw) ((bw)->data)
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,684 @@
|
||||
/* GStreamer byte reader
|
||||
*
|
||||
* Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>.
|
||||
* Copyright (C) 2009 Tim-Philipp Müller <tim centricular net>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef __GST_BYTE_READER_H__
|
||||
#define __GST_BYTE_READER_H__
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include <gst/base/base-prelude.h>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
#define GST_BYTE_READER(reader) ((GstByteReader *) (reader))
|
||||
|
||||
/**
|
||||
* GstByteReader:
|
||||
* @data: (array length=size): Data from which the bit reader will
|
||||
* read
|
||||
* @size: Size of @data in bytes
|
||||
* @byte: Current byte position
|
||||
*
|
||||
* A byte reader instance.
|
||||
*/
|
||||
typedef struct {
|
||||
const guint8 *data;
|
||||
guint size;
|
||||
|
||||
guint byte; /* Byte position */
|
||||
|
||||
/* < private > */
|
||||
gpointer _gst_reserved[GST_PADDING];
|
||||
} GstByteReader;
|
||||
|
||||
GST_BASE_API
|
||||
GstByteReader * gst_byte_reader_new (const guint8 *data, guint size) G_GNUC_MALLOC;
|
||||
|
||||
GST_BASE_API
|
||||
void gst_byte_reader_free (GstByteReader *reader);
|
||||
|
||||
GST_BASE_API
|
||||
void gst_byte_reader_init (GstByteReader *reader, const guint8 *data, guint size);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_sub_reader (GstByteReader * reader,
|
||||
GstByteReader * sub_reader,
|
||||
guint size);
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_sub_reader (GstByteReader * reader,
|
||||
GstByteReader * sub_reader,
|
||||
guint size);
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_set_pos (GstByteReader *reader, guint pos);
|
||||
|
||||
GST_BASE_API
|
||||
guint gst_byte_reader_get_pos (const GstByteReader *reader);
|
||||
|
||||
GST_BASE_API
|
||||
guint gst_byte_reader_get_remaining (const GstByteReader *reader);
|
||||
|
||||
GST_BASE_API
|
||||
guint gst_byte_reader_get_size (const GstByteReader *reader);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_skip (GstByteReader *reader, guint nbytes);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_uint8 (GstByteReader *reader, guint8 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_int8 (GstByteReader *reader, gint8 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_uint16_le (GstByteReader *reader, guint16 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_int16_le (GstByteReader *reader, gint16 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_uint16_be (GstByteReader *reader, guint16 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_int16_be (GstByteReader *reader, gint16 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_uint24_le (GstByteReader *reader, guint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_int24_le (GstByteReader *reader, gint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_uint24_be (GstByteReader *reader, guint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_int24_be (GstByteReader *reader, gint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_uint32_le (GstByteReader *reader, guint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_int32_le (GstByteReader *reader, gint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_uint32_be (GstByteReader *reader, guint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_int32_be (GstByteReader *reader, gint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_uint64_le (GstByteReader *reader, guint64 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_int64_le (GstByteReader *reader, gint64 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_uint64_be (GstByteReader *reader, guint64 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_int64_be (GstByteReader *reader, gint64 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_uint8 (const GstByteReader *reader, guint8 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_int8 (const GstByteReader *reader, gint8 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_uint16_le (const GstByteReader *reader, guint16 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_int16_le (const GstByteReader *reader, gint16 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_uint16_be (const GstByteReader *reader, guint16 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_int16_be (const GstByteReader *reader, gint16 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_uint24_le (const GstByteReader *reader, guint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_int24_le (const GstByteReader *reader, gint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_uint24_be (const GstByteReader *reader, guint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_int24_be (const GstByteReader *reader, gint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_uint32_le (const GstByteReader *reader, guint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_int32_le (const GstByteReader *reader, gint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_uint32_be (const GstByteReader *reader, guint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_int32_be (const GstByteReader *reader, gint32 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_uint64_le (const GstByteReader *reader, guint64 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_int64_le (const GstByteReader *reader, gint64 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_uint64_be (const GstByteReader *reader, guint64 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_int64_be (const GstByteReader *reader, gint64 *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_float32_le (GstByteReader *reader, gfloat *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_float32_be (GstByteReader *reader, gfloat *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_float64_le (GstByteReader *reader, gdouble *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_float64_be (GstByteReader *reader, gdouble *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_float32_le (const GstByteReader *reader, gfloat *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_float32_be (const GstByteReader *reader, gfloat *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_float64_le (const GstByteReader *reader, gdouble *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_float64_be (const GstByteReader *reader, gdouble *val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_dup_data (GstByteReader * reader, guint size, guint8 ** val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_data (GstByteReader * reader, guint size, const guint8 ** val);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_data (const GstByteReader * reader, guint size, const guint8 ** val);
|
||||
|
||||
#define gst_byte_reader_dup_string(reader,str) \
|
||||
gst_byte_reader_dup_string_utf8(reader,str)
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_dup_string_utf8 (GstByteReader * reader, gchar ** str);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_dup_string_utf16 (GstByteReader * reader, guint16 ** str);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_dup_string_utf32 (GstByteReader * reader, guint32 ** str);
|
||||
|
||||
#define gst_byte_reader_skip_string(reader) \
|
||||
gst_byte_reader_skip_string_utf8(reader)
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_skip_string_utf8 (GstByteReader * reader);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_skip_string_utf16 (GstByteReader * reader);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_skip_string_utf32 (GstByteReader * reader);
|
||||
|
||||
#define gst_byte_reader_get_string(reader,str) \
|
||||
gst_byte_reader_get_string_utf8(reader,str)
|
||||
|
||||
#define gst_byte_reader_peek_string(reader,str) \
|
||||
gst_byte_reader_peek_string_utf8(reader,str)
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_get_string_utf8 (GstByteReader * reader, const gchar ** str);
|
||||
|
||||
GST_BASE_API
|
||||
gboolean gst_byte_reader_peek_string_utf8 (const GstByteReader * reader, const gchar ** str);
|
||||
|
||||
GST_BASE_API
|
||||
guint gst_byte_reader_masked_scan_uint32 (const GstByteReader * reader,
|
||||
guint32 mask,
|
||||
guint32 pattern,
|
||||
guint offset,
|
||||
guint size);
|
||||
GST_BASE_API
|
||||
guint gst_byte_reader_masked_scan_uint32_peek (const GstByteReader * reader,
|
||||
guint32 mask,
|
||||
guint32 pattern,
|
||||
guint offset,
|
||||
guint size,
|
||||
guint32 * value);
|
||||
|
||||
/**
|
||||
* GST_BYTE_READER_INIT:
|
||||
* @data: Data from which the #GstByteReader should read
|
||||
* @size: Size of @data in bytes
|
||||
*
|
||||
* A #GstByteReader must be initialized with this macro, before it can be
|
||||
* used. This macro can used be to initialize a variable, but it cannot
|
||||
* be assigned to a variable. In that case you have to use
|
||||
* gst_byte_reader_init().
|
||||
*/
|
||||
#define GST_BYTE_READER_INIT(data, size) {data, size, 0}
|
||||
|
||||
/* unchecked variants */
|
||||
static inline void
|
||||
gst_byte_reader_skip_unchecked (GstByteReader * reader, guint nbytes)
|
||||
{
|
||||
reader->byte += nbytes;
|
||||
}
|
||||
|
||||
#define __GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(bits,type,lower,upper,adj) \
|
||||
\
|
||||
static inline type \
|
||||
gst_byte_reader_peek_##lower##_unchecked (const GstByteReader * reader) \
|
||||
{ \
|
||||
type val = (type) GST_READ_##upper (reader->data + reader->byte); \
|
||||
adj \
|
||||
return val; \
|
||||
} \
|
||||
\
|
||||
static inline type \
|
||||
gst_byte_reader_get_##lower##_unchecked (GstByteReader * reader) \
|
||||
{ \
|
||||
type val = gst_byte_reader_peek_##lower##_unchecked (reader); \
|
||||
reader->byte += bits / 8; \
|
||||
return val; \
|
||||
}
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(8,guint8,uint8,UINT8,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(8,gint8,int8,UINT8,/* */)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(16,guint16,uint16_le,UINT16_LE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(16,guint16,uint16_be,UINT16_BE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(16,gint16,int16_le,UINT16_LE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(16,gint16,int16_be,UINT16_BE,/* */)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(32,guint32,uint32_le,UINT32_LE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(32,guint32,uint32_be,UINT32_BE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(32,gint32,int32_le,UINT32_LE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(32,gint32,int32_be,UINT32_BE,/* */)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(24,guint32,uint24_le,UINT24_LE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(24,guint32,uint24_be,UINT24_BE,/* */)
|
||||
|
||||
/* fix up the sign for 24-bit signed ints stored in 32-bit signed ints */
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(24,gint32,int24_le,UINT24_LE,
|
||||
if (val & 0x00800000) val |= 0xff000000;)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(24,gint32,int24_be,UINT24_BE,
|
||||
if (val & 0x00800000) val |= 0xff000000;)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(64,guint64,uint64_le,UINT64_LE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(64,guint64,uint64_be,UINT64_BE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(64,gint64,int64_le,UINT64_LE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(64,gint64,int64_be,UINT64_BE,/* */)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(32,gfloat,float32_le,FLOAT_LE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(32,gfloat,float32_be,FLOAT_BE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(64,gdouble,float64_le,DOUBLE_LE,/* */)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_UNCHECKED(64,gdouble,float64_be,DOUBLE_BE,/* */)
|
||||
|
||||
#undef __GET_PEEK_BITS_UNCHECKED
|
||||
|
||||
static inline const guint8 *
|
||||
gst_byte_reader_peek_data_unchecked (const GstByteReader * reader)
|
||||
{
|
||||
return (const guint8 *) (reader->data + reader->byte);
|
||||
}
|
||||
|
||||
static inline const guint8 *
|
||||
gst_byte_reader_get_data_unchecked (GstByteReader * reader, guint size)
|
||||
{
|
||||
const guint8 *data;
|
||||
|
||||
data = gst_byte_reader_peek_data_unchecked (reader);
|
||||
gst_byte_reader_skip_unchecked (reader, size);
|
||||
return data;
|
||||
}
|
||||
|
||||
static inline guint8 *
|
||||
gst_byte_reader_dup_data_unchecked (GstByteReader * reader, guint size)
|
||||
{
|
||||
gconstpointer data = gst_byte_reader_get_data_unchecked (reader, size);
|
||||
guint8 *dup_data = (guint8 *) g_malloc (size);
|
||||
|
||||
memcpy (dup_data, data, size);
|
||||
return dup_data;
|
||||
}
|
||||
|
||||
/* Unchecked variants that should not be used */
|
||||
static inline guint
|
||||
_gst_byte_reader_get_pos_unchecked (const GstByteReader * reader)
|
||||
{
|
||||
return reader->byte;
|
||||
}
|
||||
|
||||
static inline guint
|
||||
_gst_byte_reader_get_remaining_unchecked (const GstByteReader * reader)
|
||||
{
|
||||
return reader->size - reader->byte;
|
||||
}
|
||||
|
||||
static inline guint
|
||||
_gst_byte_reader_get_size_unchecked (const GstByteReader * reader)
|
||||
{
|
||||
return reader->size;
|
||||
}
|
||||
|
||||
/* inlined variants (do not use directly) */
|
||||
|
||||
static inline guint
|
||||
_gst_byte_reader_get_remaining_inline (const GstByteReader * reader)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, 0);
|
||||
|
||||
return _gst_byte_reader_get_remaining_unchecked (reader);
|
||||
}
|
||||
|
||||
static inline guint
|
||||
_gst_byte_reader_get_size_inline (const GstByteReader * reader)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, 0);
|
||||
|
||||
return _gst_byte_reader_get_size_unchecked (reader);
|
||||
}
|
||||
|
||||
#define __GST_BYTE_READER_GET_PEEK_BITS_INLINE(bits,type,name) \
|
||||
\
|
||||
static inline gboolean \
|
||||
_gst_byte_reader_peek_##name##_inline (const GstByteReader * reader, type * val) \
|
||||
{ \
|
||||
g_return_val_if_fail (reader != NULL, FALSE); \
|
||||
g_return_val_if_fail (val != NULL, FALSE); \
|
||||
\
|
||||
if (_gst_byte_reader_get_remaining_unchecked (reader) < (bits / 8)) \
|
||||
return FALSE; \
|
||||
\
|
||||
*val = gst_byte_reader_peek_##name##_unchecked (reader); \
|
||||
return TRUE; \
|
||||
} \
|
||||
\
|
||||
static inline gboolean \
|
||||
_gst_byte_reader_get_##name##_inline (GstByteReader * reader, type * val) \
|
||||
{ \
|
||||
g_return_val_if_fail (reader != NULL, FALSE); \
|
||||
g_return_val_if_fail (val != NULL, FALSE); \
|
||||
\
|
||||
if (_gst_byte_reader_get_remaining_unchecked (reader) < (bits / 8)) \
|
||||
return FALSE; \
|
||||
\
|
||||
*val = gst_byte_reader_get_##name##_unchecked (reader); \
|
||||
return TRUE; \
|
||||
}
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(8,guint8,uint8)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(8,gint8,int8)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(16,guint16,uint16_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(16,guint16,uint16_be)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(16,gint16,int16_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(16,gint16,int16_be)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(32,guint32,uint32_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(32,guint32,uint32_be)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(32,gint32,int32_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(32,gint32,int32_be)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(24,guint32,uint24_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(24,guint32,uint24_be)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(24,gint32,int24_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(24,gint32,int24_be)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(64,guint64,uint64_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(64,guint64,uint64_be)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(64,gint64,int64_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(64,gint64,int64_be)
|
||||
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(32,gfloat,float32_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(32,gfloat,float32_be)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(64,gdouble,float64_le)
|
||||
__GST_BYTE_READER_GET_PEEK_BITS_INLINE(64,gdouble,float64_be)
|
||||
|
||||
#undef __GST_BYTE_READER_GET_PEEK_BITS_INLINE
|
||||
|
||||
#ifndef GST_BYTE_READER_DISABLE_INLINES
|
||||
|
||||
#define gst_byte_reader_init(reader,data,size) \
|
||||
_gst_byte_reader_init_inline(reader,data,size)
|
||||
|
||||
#define gst_byte_reader_get_remaining(reader) \
|
||||
_gst_byte_reader_get_remaining_inline(reader)
|
||||
|
||||
#define gst_byte_reader_get_size(reader) \
|
||||
_gst_byte_reader_get_size_inline(reader)
|
||||
|
||||
#define gst_byte_reader_get_pos(reader) \
|
||||
_gst_byte_reader_get_pos_inline(reader)
|
||||
|
||||
/* we use defines here so we can add the G_LIKELY() */
|
||||
#define gst_byte_reader_get_uint8(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_uint8_inline(reader,val))
|
||||
#define gst_byte_reader_get_int8(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_int8_inline(reader,val))
|
||||
#define gst_byte_reader_get_uint16_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_uint16_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_int16_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_int16_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_uint16_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_uint16_be_inline(reader,val))
|
||||
#define gst_byte_reader_get_int16_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_int16_be_inline(reader,val))
|
||||
#define gst_byte_reader_get_uint24_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_uint24_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_int24_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_int24_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_uint24_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_uint24_be_inline(reader,val))
|
||||
#define gst_byte_reader_get_int24_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_int24_be_inline(reader,val))
|
||||
#define gst_byte_reader_get_uint32_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_uint32_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_int32_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_int32_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_uint32_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_uint32_be_inline(reader,val))
|
||||
#define gst_byte_reader_get_int32_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_int32_be_inline(reader,val))
|
||||
#define gst_byte_reader_get_uint64_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_uint64_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_int64_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_int64_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_uint64_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_uint64_be_inline(reader,val))
|
||||
#define gst_byte_reader_get_int64_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_int64_be_inline(reader,val))
|
||||
|
||||
#define gst_byte_reader_peek_uint8(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_uint8_inline(reader,val))
|
||||
#define gst_byte_reader_peek_int8(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_int8_inline(reader,val))
|
||||
#define gst_byte_reader_peek_uint16_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_uint16_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_int16_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_int16_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_uint16_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_uint16_be_inline(reader,val))
|
||||
#define gst_byte_reader_peek_int16_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_int16_be_inline(reader,val))
|
||||
#define gst_byte_reader_peek_uint24_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_uint24_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_int24_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_int24_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_uint24_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_uint24_be_inline(reader,val))
|
||||
#define gst_byte_reader_peek_int24_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_int24_be_inline(reader,val))
|
||||
#define gst_byte_reader_peek_uint32_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_uint32_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_int32_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_int32_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_uint32_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_uint32_be_inline(reader,val))
|
||||
#define gst_byte_reader_peek_int32_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_int32_be_inline(reader,val))
|
||||
#define gst_byte_reader_peek_uint64_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_uint64_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_int64_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_int64_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_uint64_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_uint64_be_inline(reader,val))
|
||||
#define gst_byte_reader_peek_int64_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_int64_be_inline(reader,val))
|
||||
|
||||
#define gst_byte_reader_get_float32_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_float32_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_float32_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_float32_be_inline(reader,val))
|
||||
#define gst_byte_reader_get_float64_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_float64_le_inline(reader,val))
|
||||
#define gst_byte_reader_get_float64_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_float64_be_inline(reader,val))
|
||||
#define gst_byte_reader_peek_float32_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_float32_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_float32_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_float32_be_inline(reader,val))
|
||||
#define gst_byte_reader_peek_float64_le(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_float64_le_inline(reader,val))
|
||||
#define gst_byte_reader_peek_float64_be(reader,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_float64_be_inline(reader,val))
|
||||
|
||||
#endif /* GST_BYTE_READER_DISABLE_INLINES */
|
||||
|
||||
static inline void
|
||||
_gst_byte_reader_init_inline (GstByteReader * reader, const guint8 * data, guint size)
|
||||
{
|
||||
g_return_if_fail (reader != NULL);
|
||||
|
||||
reader->data = data;
|
||||
reader->size = size;
|
||||
reader->byte = 0;
|
||||
}
|
||||
|
||||
static inline gboolean
|
||||
_gst_byte_reader_peek_sub_reader_inline (GstByteReader * reader,
|
||||
GstByteReader * sub_reader, guint size)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, FALSE);
|
||||
g_return_val_if_fail (sub_reader != NULL, FALSE);
|
||||
|
||||
if (_gst_byte_reader_get_remaining_unchecked (reader) < size)
|
||||
return FALSE;
|
||||
|
||||
sub_reader->data = reader->data + reader->byte;
|
||||
sub_reader->byte = 0;
|
||||
sub_reader->size = size;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static inline gboolean
|
||||
_gst_byte_reader_get_sub_reader_inline (GstByteReader * reader,
|
||||
GstByteReader * sub_reader, guint size)
|
||||
{
|
||||
if (!_gst_byte_reader_peek_sub_reader_inline (reader, sub_reader, size))
|
||||
return FALSE;
|
||||
gst_byte_reader_skip_unchecked (reader, size);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static inline gboolean
|
||||
_gst_byte_reader_dup_data_inline (GstByteReader * reader, guint size, guint8 ** val)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, FALSE);
|
||||
g_return_val_if_fail (val != NULL, FALSE);
|
||||
|
||||
if (G_UNLIKELY (size > reader->size || _gst_byte_reader_get_remaining_unchecked (reader) < size))
|
||||
return FALSE;
|
||||
|
||||
*val = gst_byte_reader_dup_data_unchecked (reader, size);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static inline gboolean
|
||||
_gst_byte_reader_get_data_inline (GstByteReader * reader, guint size, const guint8 ** val)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, FALSE);
|
||||
g_return_val_if_fail (val != NULL, FALSE);
|
||||
|
||||
if (G_UNLIKELY (size > reader->size || _gst_byte_reader_get_remaining_unchecked (reader) < size))
|
||||
return FALSE;
|
||||
|
||||
*val = gst_byte_reader_get_data_unchecked (reader, size);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static inline gboolean
|
||||
_gst_byte_reader_peek_data_inline (const GstByteReader * reader, guint size, const guint8 ** val)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, FALSE);
|
||||
g_return_val_if_fail (val != NULL, FALSE);
|
||||
|
||||
if (G_UNLIKELY (size > reader->size || _gst_byte_reader_get_remaining_unchecked (reader) < size))
|
||||
return FALSE;
|
||||
|
||||
*val = gst_byte_reader_peek_data_unchecked (reader);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static inline guint
|
||||
_gst_byte_reader_get_pos_inline (const GstByteReader * reader)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, 0);
|
||||
|
||||
return _gst_byte_reader_get_pos_unchecked (reader);
|
||||
}
|
||||
|
||||
static inline gboolean
|
||||
_gst_byte_reader_skip_inline (GstByteReader * reader, guint nbytes)
|
||||
{
|
||||
g_return_val_if_fail (reader != NULL, FALSE);
|
||||
|
||||
if (G_UNLIKELY (_gst_byte_reader_get_remaining_unchecked (reader) < nbytes))
|
||||
return FALSE;
|
||||
|
||||
reader->byte += nbytes;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#ifndef GST_BYTE_READER_DISABLE_INLINES
|
||||
|
||||
#define gst_byte_reader_dup_data(reader,size,val) \
|
||||
G_LIKELY(_gst_byte_reader_dup_data_inline(reader,size,val))
|
||||
#define gst_byte_reader_get_data(reader,size,val) \
|
||||
G_LIKELY(_gst_byte_reader_get_data_inline(reader,size,val))
|
||||
#define gst_byte_reader_peek_data(reader,size,val) \
|
||||
G_LIKELY(_gst_byte_reader_peek_data_inline(reader,size,val))
|
||||
#define gst_byte_reader_skip(reader,nbytes) \
|
||||
G_LIKELY(_gst_byte_reader_skip_inline(reader,nbytes))
|
||||
|
||||
#endif /* GST_BYTE_READER_DISABLE_INLINES */
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
#endif /* __GST_BYTE_READER_H__ */
|
||||
@@ -0,0 +1,9 @@
|
||||
/* Stub for <gst/codecparsers/codecparsers-prelude.h>.
|
||||
* Same shape as base-prelude.h — drop the GObject boilerplate + define
|
||||
* the GstCodecParsersAPI macro to nothing.
|
||||
*/
|
||||
#ifndef LIBVA_V4L2_REQUEST_FOURIER_CODECPARSERS_PRELUDE_STUB
|
||||
#define LIBVA_V4L2_REQUEST_FOURIER_CODECPARSERS_PRELUDE_STUB
|
||||
#include "gst_compat.h"
|
||||
#define GST_CODEC_PARSERS_API
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,545 @@
|
||||
/* Gstreamer
|
||||
* Copyright (C) <2011> Intel Corporation
|
||||
* Copyright (C) <2011> Collabora Ltd.
|
||||
* Copyright (C) <2011> Thibault Saunier <thibault.saunier@collabora.com>
|
||||
*
|
||||
* Some bits C-c,C-v'ed and s/4/3 from h264parse and videoparsers/h264parse.c:
|
||||
* Copyright (C) <2010> Mark Nauwelaerts <mark.nauwelaerts@collabora.co.uk>
|
||||
* Copyright (C) <2010> Collabora Multimedia
|
||||
* Copyright (C) <2010> Nokia Corporation
|
||||
*
|
||||
* (C) 2005 Michal Benes <michal.benes@itonis.tv>
|
||||
* (C) 2008 Wim Taymans <wim.taymans@gmail.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Common code for NAL parsing from h264 and h265 parsers.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include "nalutils.h"
|
||||
|
||||
/****** Nal parser ******/
|
||||
|
||||
void
|
||||
nal_reader_init (NalReader * nr, const guint8 * data, guint size)
|
||||
{
|
||||
nr->data = data;
|
||||
nr->size = size;
|
||||
nr->n_epb = 0;
|
||||
|
||||
nr->byte = 0;
|
||||
nr->bits_in_cache = 0;
|
||||
/* fill with something other than 0 to detect emulation prevention bytes */
|
||||
nr->first_byte = 0xff;
|
||||
nr->epb_cache = 0xff;
|
||||
nr->cache = 0xff;
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_reader_read (NalReader * nr, guint nbits)
|
||||
{
|
||||
if (G_UNLIKELY (nr->byte * 8 + (nbits - nr->bits_in_cache) > nr->size * 8)) {
|
||||
GST_DEBUG ("Can not read %u bits, bits in cache %u, Byte * 8 %u, size in "
|
||||
"bits %u", nbits, nr->bits_in_cache, nr->byte * 8, nr->size * 8);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
while (nr->bits_in_cache < nbits) {
|
||||
guint8 byte;
|
||||
|
||||
next_byte:
|
||||
if (G_UNLIKELY (nr->byte >= nr->size))
|
||||
return FALSE;
|
||||
|
||||
byte = nr->data[nr->byte++];
|
||||
nr->epb_cache = (nr->epb_cache << 8) | byte;
|
||||
|
||||
/* check if the byte is a emulation_prevention_three_byte */
|
||||
if ((nr->epb_cache & 0xffffff) == 0x3) {
|
||||
nr->n_epb++;
|
||||
goto next_byte;
|
||||
}
|
||||
nr->cache = (nr->cache << 8) | nr->first_byte;
|
||||
nr->first_byte = byte;
|
||||
nr->bits_in_cache += 8;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Skips the specified amount of bits. This is only suitable to a
|
||||
cacheable number of bits */
|
||||
gboolean
|
||||
nal_reader_skip (NalReader * nr, guint nbits)
|
||||
{
|
||||
g_assert (nbits <= 8 * sizeof (nr->cache));
|
||||
|
||||
if (G_UNLIKELY (!nal_reader_read (nr, nbits)))
|
||||
return FALSE;
|
||||
|
||||
nr->bits_in_cache -= nbits;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Generic version to skip any number of bits */
|
||||
gboolean
|
||||
nal_reader_skip_long (NalReader * nr, guint nbits)
|
||||
{
|
||||
/* Leave out enough bits in the cache once we are finished */
|
||||
const guint skip_size = 4 * sizeof (nr->cache);
|
||||
guint remaining = nbits;
|
||||
|
||||
nbits %= skip_size;
|
||||
while (remaining > 0) {
|
||||
if (!nal_reader_skip (nr, nbits))
|
||||
return FALSE;
|
||||
remaining -= nbits;
|
||||
nbits = skip_size;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
guint
|
||||
nal_reader_get_pos (const NalReader * nr)
|
||||
{
|
||||
return nr->byte * 8 - nr->bits_in_cache;
|
||||
}
|
||||
|
||||
guint
|
||||
nal_reader_get_remaining (const NalReader * nr)
|
||||
{
|
||||
return (nr->size - nr->byte) * 8 + nr->bits_in_cache;
|
||||
}
|
||||
|
||||
guint
|
||||
nal_reader_get_epb_count (const NalReader * nr)
|
||||
{
|
||||
return nr->n_epb;
|
||||
}
|
||||
|
||||
#define NAL_READER_READ_BITS(bits) \
|
||||
gboolean \
|
||||
nal_reader_get_bits_uint##bits (NalReader *nr, guint##bits *val, guint nbits) \
|
||||
{ \
|
||||
guint shift; \
|
||||
\
|
||||
if (!nal_reader_read (nr, nbits)) \
|
||||
return FALSE; \
|
||||
\
|
||||
/* bring the required bits down and truncate */ \
|
||||
shift = nr->bits_in_cache - nbits; \
|
||||
*val = nr->first_byte >> shift; \
|
||||
\
|
||||
*val |= nr->cache << (8 - shift); \
|
||||
/* mask out required bits */ \
|
||||
if (nbits < bits) \
|
||||
*val &= ((guint##bits)1 << nbits) - 1; \
|
||||
\
|
||||
nr->bits_in_cache = shift; \
|
||||
\
|
||||
return TRUE; \
|
||||
} \
|
||||
|
||||
NAL_READER_READ_BITS (8);
|
||||
NAL_READER_READ_BITS (16);
|
||||
NAL_READER_READ_BITS (32);
|
||||
|
||||
#define NAL_READER_PEEK_BITS(bits) \
|
||||
gboolean \
|
||||
nal_reader_peek_bits_uint##bits (const NalReader *nr, guint##bits *val, guint nbits) \
|
||||
{ \
|
||||
NalReader tmp; \
|
||||
\
|
||||
tmp = *nr; \
|
||||
return nal_reader_get_bits_uint##bits (&tmp, val, nbits); \
|
||||
}
|
||||
|
||||
NAL_READER_PEEK_BITS (8);
|
||||
|
||||
gboolean
|
||||
nal_reader_get_ue (NalReader * nr, guint32 * val)
|
||||
{
|
||||
guint i = 0;
|
||||
guint8 bit;
|
||||
guint32 value;
|
||||
|
||||
if (G_UNLIKELY (!nal_reader_get_bits_uint8 (nr, &bit, 1)))
|
||||
return FALSE;
|
||||
|
||||
while (bit == 0) {
|
||||
i++;
|
||||
if (G_UNLIKELY (!nal_reader_get_bits_uint8 (nr, &bit, 1)))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (G_UNLIKELY (i > 31))
|
||||
return FALSE;
|
||||
|
||||
if (G_UNLIKELY (!nal_reader_get_bits_uint32 (nr, &value, i)))
|
||||
return FALSE;
|
||||
|
||||
*val = (1 << i) - 1 + value;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_reader_get_se (NalReader * nr, gint32 * val)
|
||||
{
|
||||
guint32 value;
|
||||
|
||||
if (G_UNLIKELY (!nal_reader_get_ue (nr, &value)))
|
||||
return FALSE;
|
||||
|
||||
if (value % 2)
|
||||
*val = (value / 2) + 1;
|
||||
else
|
||||
*val = -(value / 2);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_reader_is_byte_aligned (NalReader * nr)
|
||||
{
|
||||
if (nr->bits_in_cache != 0)
|
||||
return FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_reader_has_more_data (NalReader * nr)
|
||||
{
|
||||
NalReader nr_tmp;
|
||||
guint remaining, nbits;
|
||||
guint8 rbsp_stop_one_bit, zero_bits;
|
||||
|
||||
remaining = nal_reader_get_remaining (nr);
|
||||
if (remaining == 0)
|
||||
return FALSE;
|
||||
|
||||
nr_tmp = *nr;
|
||||
nr = &nr_tmp;
|
||||
|
||||
/* The spec defines that more_rbsp_data() searches for the last bit
|
||||
equal to 1, and that it is the rbsp_stop_one_bit. Subsequent bits
|
||||
until byte boundary is reached shall be zero.
|
||||
|
||||
This means that more_rbsp_data() is FALSE if the next bit is 1
|
||||
and the remaining bits until byte boundary are zero. One way to
|
||||
be sure that this bit was the very last one, is that every other
|
||||
bit after we reached byte boundary are also set to zero.
|
||||
Otherwise, if the next bit is 0 or if there are non-zero bits
|
||||
afterwards, then then we have more_rbsp_data() */
|
||||
if (!nal_reader_get_bits_uint8 (nr, &rbsp_stop_one_bit, 1))
|
||||
return FALSE;
|
||||
if (!rbsp_stop_one_bit)
|
||||
return TRUE;
|
||||
|
||||
nbits = --remaining % 8;
|
||||
while (remaining > 0) {
|
||||
if (!nal_reader_get_bits_uint8 (nr, &zero_bits, nbits))
|
||||
return FALSE;
|
||||
if (zero_bits != 0)
|
||||
return TRUE;
|
||||
remaining -= nbits;
|
||||
nbits = 8;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/*********** end of nal parser ***************/
|
||||
|
||||
gint
|
||||
scan_for_start_codes (const guint8 * data, guint size)
|
||||
{
|
||||
GstByteReader br;
|
||||
gst_byte_reader_init (&br, data, size);
|
||||
|
||||
/* NALU not empty, so we can at least expect 1 (even 2) bytes following sc */
|
||||
return gst_byte_reader_masked_scan_uint32 (&br, 0xffffff00, 0x00000100,
|
||||
0, size);
|
||||
}
|
||||
|
||||
void
|
||||
nal_writer_init (NalWriter * nw, guint nal_prefix_size, gboolean packetized)
|
||||
{
|
||||
g_return_if_fail (nw != NULL);
|
||||
g_return_if_fail ((packetized && nal_prefix_size > 1 && nal_prefix_size < 5)
|
||||
|| (!packetized && (nal_prefix_size == 3 || nal_prefix_size == 4)));
|
||||
|
||||
gst_bit_writer_init (&nw->bw);
|
||||
nw->nal_prefix_size = nal_prefix_size;
|
||||
nw->packetized = packetized;
|
||||
}
|
||||
|
||||
void
|
||||
nal_writer_reset (NalWriter * nw)
|
||||
{
|
||||
g_return_if_fail (nw != NULL);
|
||||
|
||||
gst_bit_writer_reset (&nw->bw);
|
||||
memset (nw, 0, sizeof (NalWriter));
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_writer_do_rbsp_trailing_bits (NalWriter * nw)
|
||||
{
|
||||
g_return_val_if_fail (nw != NULL, FALSE);
|
||||
|
||||
if (!gst_bit_writer_put_bits_uint8 (&nw->bw, 1, 1)) {
|
||||
GST_WARNING ("Cannot put trailing bits");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (!gst_bit_writer_align_bytes (&nw->bw, 0)) {
|
||||
GST_WARNING ("Cannot put align bits");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static gpointer
|
||||
nal_writer_create_nal_data (NalWriter * nw, guint32 * ret_size)
|
||||
{
|
||||
GstBitWriter bw;
|
||||
gint i;
|
||||
guint8 *src, *dst;
|
||||
gsize size;
|
||||
gpointer data;
|
||||
|
||||
/* scan to put emulation_prevention_three_byte */
|
||||
size = GST_BIT_WRITER_BIT_SIZE (&nw->bw) >> 3;
|
||||
src = GST_BIT_WRITER_DATA (&nw->bw);
|
||||
|
||||
gst_bit_writer_init_with_size (&bw, size + nw->nal_prefix_size, FALSE);
|
||||
for (i = 0; i < nw->nal_prefix_size - 1; i++)
|
||||
gst_bit_writer_put_bits_uint8 (&bw, 0, 8);
|
||||
gst_bit_writer_put_bits_uint8 (&bw, 1, 8);
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
guint pos = (GST_BIT_WRITER_BIT_SIZE (&bw) >> 3);
|
||||
dst = GST_BIT_WRITER_DATA (&bw);
|
||||
if (pos >= nw->nal_prefix_size + 2 &&
|
||||
dst[pos - 2] == 0 && dst[pos - 1] == 0 && src[i] <= 0x3) {
|
||||
gst_bit_writer_put_bits_uint8 (&bw, 0x3, 8);
|
||||
}
|
||||
|
||||
gst_bit_writer_put_bits_uint8 (&bw, src[i], 8);
|
||||
}
|
||||
|
||||
*ret_size = bw.bit_size >> 3;
|
||||
data = gst_bit_writer_reset_and_get_data (&bw);
|
||||
|
||||
if (nw->packetized) {
|
||||
size = *ret_size - nw->nal_prefix_size;
|
||||
|
||||
switch (nw->nal_prefix_size) {
|
||||
case 1:
|
||||
GST_WRITE_UINT8 (data, size);
|
||||
break;
|
||||
case 2:
|
||||
GST_WRITE_UINT16_BE (data, size);
|
||||
break;
|
||||
case 3:
|
||||
GST_WRITE_UINT24_BE (data, size);
|
||||
break;
|
||||
case 4:
|
||||
GST_WRITE_UINT32_BE (data, size);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached ();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
GstMemory *
|
||||
nal_writer_reset_and_get_memory (NalWriter * nw)
|
||||
{
|
||||
guint32 size = 0;
|
||||
GstMemory *ret = NULL;
|
||||
gpointer data;
|
||||
|
||||
g_return_val_if_fail (nw != NULL, NULL);
|
||||
|
||||
if ((GST_BIT_WRITER_BIT_SIZE (&nw->bw) >> 3) == 0) {
|
||||
GST_WARNING ("No written byte");
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((GST_BIT_WRITER_BIT_SIZE (&nw->bw) & 0x7) != 0) {
|
||||
GST_WARNING ("Written stream is not byte aligned");
|
||||
if (!nal_writer_do_rbsp_trailing_bits (nw))
|
||||
goto done;
|
||||
}
|
||||
|
||||
data = nal_writer_create_nal_data (nw, &size);
|
||||
if (!data) {
|
||||
GST_WARNING ("Failed to create nal data");
|
||||
goto done;
|
||||
}
|
||||
|
||||
ret = gst_memory_new_wrapped (0, data, size, 0, size, data, g_free);
|
||||
|
||||
done:
|
||||
gst_bit_writer_reset (&nw->bw);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
guint8 *
|
||||
nal_writer_reset_and_get_data (NalWriter * nw, guint32 * ret_size)
|
||||
{
|
||||
guint32 size = 0;
|
||||
guint8 *data = NULL;
|
||||
|
||||
g_return_val_if_fail (nw != NULL, NULL);
|
||||
g_return_val_if_fail (ret_size != NULL, NULL);
|
||||
|
||||
*ret_size = 0;
|
||||
|
||||
if ((GST_BIT_WRITER_BIT_SIZE (&nw->bw) >> 3) == 0) {
|
||||
GST_WARNING ("No written byte");
|
||||
goto done;
|
||||
}
|
||||
|
||||
if ((GST_BIT_WRITER_BIT_SIZE (&nw->bw) & 0x7) != 0) {
|
||||
GST_WARNING ("Written stream is not byte aligned");
|
||||
if (!nal_writer_do_rbsp_trailing_bits (nw))
|
||||
goto done;
|
||||
}
|
||||
|
||||
data = nal_writer_create_nal_data (nw, &size);
|
||||
if (!data) {
|
||||
GST_WARNING ("Failed to create nal data");
|
||||
goto done;
|
||||
}
|
||||
|
||||
*ret_size = size;
|
||||
|
||||
done:
|
||||
gst_bit_writer_reset (&nw->bw);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_writer_put_bits_uint8 (NalWriter * nw, guint8 value, guint nbits)
|
||||
{
|
||||
g_return_val_if_fail (nw != NULL, FALSE);
|
||||
|
||||
if (!gst_bit_writer_put_bits_uint8 (&nw->bw, value, nbits))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_writer_put_bits_uint16 (NalWriter * nw, guint16 value, guint nbits)
|
||||
{
|
||||
g_return_val_if_fail (nw != NULL, FALSE);
|
||||
|
||||
if (!gst_bit_writer_put_bits_uint16 (&nw->bw, value, nbits))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_writer_put_bits_uint32 (NalWriter * nw, guint32 value, guint nbits)
|
||||
{
|
||||
g_return_val_if_fail (nw != NULL, FALSE);
|
||||
|
||||
if (!gst_bit_writer_put_bits_uint32 (&nw->bw, value, nbits))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_writer_put_bytes (NalWriter * nw, const guint8 * data, guint nbytes)
|
||||
{
|
||||
g_return_val_if_fail (nw != NULL, FALSE);
|
||||
g_return_val_if_fail (data != NULL, FALSE);
|
||||
g_return_val_if_fail (nbytes != 0, FALSE);
|
||||
|
||||
if (!gst_bit_writer_put_bytes (&nw->bw, data, nbytes))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
nal_writer_put_ue (NalWriter * nw, guint32 value)
|
||||
{
|
||||
guint leading_zeros;
|
||||
guint rest;
|
||||
|
||||
g_return_val_if_fail (nw != NULL, FALSE);
|
||||
|
||||
count_exp_golomb_bits (value, &leading_zeros, &rest);
|
||||
|
||||
/* write leading zeros */
|
||||
if (leading_zeros) {
|
||||
if (!nal_writer_put_bits_uint32 (nw, 0, leading_zeros))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* write the rest */
|
||||
if (!nal_writer_put_bits_uint32 (nw, value + 1, rest))
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
count_exp_golomb_bits (guint32 value, guint * leading_zeros, guint * rest)
|
||||
{
|
||||
guint32 x;
|
||||
guint count = 0;
|
||||
|
||||
/* https://en.wikipedia.org/wiki/Exponential-Golomb_coding */
|
||||
/* count bits of value + 1 */
|
||||
x = value + 1;
|
||||
while (x) {
|
||||
count++;
|
||||
x >>= 1;
|
||||
}
|
||||
|
||||
if (leading_zeros) {
|
||||
if (count > 1)
|
||||
*leading_zeros = count - 1;
|
||||
else
|
||||
*leading_zeros = 0;
|
||||
}
|
||||
|
||||
if (rest) {
|
||||
*rest = count;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
@@ -0,0 +1,269 @@
|
||||
/* Gstreamer
|
||||
* Copyright (C) <2011> Intel Corporation
|
||||
* Copyright (C) <2011> Collabora Ltd.
|
||||
* Copyright (C) <2011> Thibault Saunier <thibault.saunier@collabora.com>
|
||||
*
|
||||
* Some bits C-c,C-v'ed and s/4/3 from h264parse and videoparsers/h264parse.c:
|
||||
* Copyright (C) <2010> Mark Nauwelaerts <mark.nauwelaerts@collabora.co.uk>
|
||||
* Copyright (C) <2010> Collabora Multimedia
|
||||
* Copyright (C) <2010> Nokia Corporation
|
||||
*
|
||||
* (C) 2005 Michal Benes <michal.benes@itonis.tv>
|
||||
* (C) 2008 Wim Taymans <wim.taymans@gmail.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Common code for NAL parsing from h264 and h265 parsers.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include <gst/base/gstbytereader.h>
|
||||
#include <gst/base/gstbitreader.h>
|
||||
#include <gst/base/gstbitwriter.h>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const guint8 *data;
|
||||
guint size;
|
||||
|
||||
guint n_epb; /* Number of emulation prevention bytes */
|
||||
guint byte; /* Byte position */
|
||||
guint bits_in_cache; /* bitpos in the cache of next bit */
|
||||
guint8 first_byte;
|
||||
guint32 epb_cache; /* cache 3 bytes to check emulation prevention bytes */
|
||||
guint64 cache; /* cached bytes */
|
||||
} NalReader;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
GstBitWriter bw;
|
||||
|
||||
guint nal_prefix_size;
|
||||
gboolean packetized;
|
||||
} NalWriter;
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
void nal_reader_init (NalReader * nr, const guint8 * data, guint size);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_reader_read (NalReader * nr, guint nbits);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_reader_skip (NalReader * nr, guint nbits);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_reader_skip_long (NalReader * nr, guint nbits);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
guint nal_reader_get_pos (const NalReader * nr);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
guint nal_reader_get_remaining (const NalReader * nr);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
guint nal_reader_get_epb_count (const NalReader * nr);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_reader_is_byte_aligned (NalReader * nr);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_reader_has_more_data (NalReader * nr);
|
||||
|
||||
#define NAL_READER_READ_BITS_H(bits) \
|
||||
G_GNUC_INTERNAL \
|
||||
gboolean nal_reader_get_bits_uint##bits (NalReader *nr, guint##bits *val, guint nbits)
|
||||
|
||||
NAL_READER_READ_BITS_H (8);
|
||||
NAL_READER_READ_BITS_H (16);
|
||||
NAL_READER_READ_BITS_H (32);
|
||||
|
||||
#define NAL_READER_PEEK_BITS_H(bits) \
|
||||
G_GNUC_INTERNAL \
|
||||
gboolean nal_reader_peek_bits_uint##bits (const NalReader *nr, guint##bits *val, guint nbits)
|
||||
|
||||
NAL_READER_PEEK_BITS_H (8);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_reader_get_ue (NalReader * nr, guint32 * val);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_reader_get_se (NalReader * nr, gint32 * val);
|
||||
|
||||
#define CHECK_ALLOWED_MAX_WITH_DEBUG(dbg, val, max) { \
|
||||
if (val > max) { \
|
||||
GST_WARNING ("value for '" dbg "' greater than max. value: %d, max %d", \
|
||||
val, max); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
#define CHECK_ALLOWED_MAX(val, max) \
|
||||
CHECK_ALLOWED_MAX_WITH_DEBUG (G_STRINGIFY (val), val, max)
|
||||
|
||||
#define CHECK_ALLOWED_WITH_DEBUG(dbg, val, min, max) { \
|
||||
if (val < min || val > max) { \
|
||||
GST_WARNING ("value for '" dbg "' not in allowed range. value: %d, range %d-%d", \
|
||||
val, min, max); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
#define CHECK_ALLOWED(val, min, max) \
|
||||
CHECK_ALLOWED_WITH_DEBUG (G_STRINGIFY (val), val, min, max)
|
||||
|
||||
#define READ_UINT8(nr, val, nbits) { \
|
||||
if (!nal_reader_get_bits_uint8 (nr, &val, nbits)) { \
|
||||
GST_WARNING ("failed to read uint8 for '" G_STRINGIFY (val) "', nbits: %d", nbits); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define READ_UINT16(nr, val, nbits) { \
|
||||
if (!nal_reader_get_bits_uint16 (nr, &val, nbits)) { \
|
||||
GST_WARNING ("failed to read uint16 for '" G_STRINGIFY (val) "', nbits: %d", nbits); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define READ_UINT32(nr, val, nbits) { \
|
||||
if (!nal_reader_get_bits_uint32 (nr, &val, nbits)) { \
|
||||
GST_WARNING ("failed to read uint32 for '" G_STRINGIFY (val) "', nbits: %d", nbits); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define READ_UINT64(nr, val, nbits) { \
|
||||
if (!nal_reader_get_bits_uint64 (nr, &val, nbits)) { \
|
||||
GST_WARNING ("failed to read uint32 for '" G_STRINGIFY (val) "', nbits: %d", nbits); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define READ_UE(nr, val) { \
|
||||
if (!nal_reader_get_ue (nr, &val)) { \
|
||||
GST_WARNING ("failed to read UE for '" G_STRINGIFY (val) "'"); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define READ_UE_ALLOWED(nr, val, min, max) { \
|
||||
guint32 tmp; \
|
||||
READ_UE (nr, tmp); \
|
||||
CHECK_ALLOWED_WITH_DEBUG (G_STRINGIFY (val), tmp, min, max); \
|
||||
val = tmp; \
|
||||
}
|
||||
|
||||
#define READ_UE_MAX(nr, val, max) { \
|
||||
guint32 tmp; \
|
||||
READ_UE (nr, tmp); \
|
||||
CHECK_ALLOWED_MAX_WITH_DEBUG (G_STRINGIFY (val), tmp, max); \
|
||||
val = tmp; \
|
||||
}
|
||||
|
||||
#define READ_SE(nr, val) { \
|
||||
if (!nal_reader_get_se (nr, &val)) { \
|
||||
GST_WARNING ("failed to read SE for '" G_STRINGIFY (val) "'"); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define READ_SE_ALLOWED(nr, val, min, max) { \
|
||||
gint32 tmp; \
|
||||
READ_SE (nr, tmp); \
|
||||
CHECK_ALLOWED_WITH_DEBUG (G_STRINGIFY (val), tmp, min, max); \
|
||||
val = tmp; \
|
||||
}
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gint scan_for_start_codes (const guint8 * data, guint size);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
void nal_writer_init (NalWriter * nw, guint nal_prefix_size, gboolean packetized);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
void nal_writer_reset (NalWriter * nw);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_writer_do_rbsp_trailing_bits (NalWriter * nw);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
GstMemory * nal_writer_reset_and_get_memory (NalWriter * nw);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
guint8 * nal_writer_reset_and_get_data (NalWriter * nw, guint32 * ret_size);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_writer_put_bits_uint8 (NalWriter * nw, guint8 value, guint nbits);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_writer_put_bits_uint16 (NalWriter * nw, guint16 value, guint nbits);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_writer_put_bits_uint32 (NalWriter * nw, guint32 value, guint nbits);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_writer_put_bytes (NalWriter * nw, const guint8 * data, guint nbytes);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean nal_writer_put_ue (NalWriter * nw, guint32 value);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean count_exp_golomb_bits (guint32 value, guint * leading_zeros, guint * rest);
|
||||
|
||||
#define WRITE_UINT8(nw, val, nbits) { \
|
||||
if (!nal_writer_put_bits_uint8 (nw, val, nbits)) { \
|
||||
GST_WARNING ("failed to write uint8 for '" G_STRINGIFY (val) "', nbits: %d", nbits); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define WRITE_UINT16(nw, val, nbits) { \
|
||||
if (!nal_writer_put_bits_uint16 (nw, val, nbits)) { \
|
||||
GST_WARNING ("failed to write uint16 for '" G_STRINGIFY (val) "', nbits: %d", nbits); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define WRITE_UINT32(nw, val, nbits) { \
|
||||
if (!nal_writer_put_bits_uint32 (nw, val, nbits)) { \
|
||||
GST_WARNING ("failed to write uint32 for '" G_STRINGIFY (val) "', nbits: %d", nbits); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define WRITE_BYTES(nw, data, nbytes) { \
|
||||
if (!nal_writer_put_bytes (nw, data, nbytes)) { \
|
||||
GST_WARNING ("failed to write bytes for '" G_STRINGIFY (val) "', nbits: %d", nbytes); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define WRITE_UE(nw, val) { \
|
||||
if (!nal_writer_put_ue (nw, val)) { \
|
||||
GST_WARNING ("failed to write ue for '" G_STRINGIFY (val) "'"); \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
static inline guint32 div_ceil (guint32 a, guint32 b)
|
||||
{
|
||||
/* http://blog.pkh.me/p/36-figuring-out-round%2C-floor-and-ceil-with-integer-division.html */
|
||||
g_assert (b > 0);
|
||||
return a / b + (a % b > 0);
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
/* Stub for <gst/glib-compat-private.h>.
|
||||
* In upstream GStreamer this provides backwards-compat shims for older
|
||||
* GLib versions (g_memdup2 polyfill being the load-bearing one).
|
||||
* Our gst_compat.h already defines g_memdup2 as a static inline, so
|
||||
* we just include the shim.
|
||||
*/
|
||||
#ifndef LIBVA_V4L2_REQUEST_FOURIER_GLIB_COMPAT_PRIVATE_STUB
|
||||
#define LIBVA_V4L2_REQUEST_FOURIER_GLIB_COMPAT_PRIVATE_STUB
|
||||
#include "gst_compat.h"
|
||||
#endif
|
||||
@@ -0,0 +1,10 @@
|
||||
/* Stub for <gst/gst.h> — redirects to the project's gst_compat shim.
|
||||
* The vendored GStreamer 1.28.2 H.265 parser was originally built against
|
||||
* full GStreamer; we only need the GLib type aliases + memory helpers +
|
||||
* macro stubs, all provided by gst_compat.h. Original gst.h would pull
|
||||
* in GObject + GstObject + the entire framework, which we don't link.
|
||||
*/
|
||||
#ifndef LIBVA_V4L2_REQUEST_FOURIER_GST_H_STUB
|
||||
#define LIBVA_V4L2_REQUEST_FOURIER_GST_H_STUB
|
||||
#include "gst_compat.h"
|
||||
#endif
|
||||
@@ -0,0 +1,145 @@
|
||||
/*
|
||||
* gst_compat.c — GArray implementation for the vendored GStreamer parser.
|
||||
*
|
||||
* Scope: minimal subset of GArray API exercised by gsth265parser.c
|
||||
* (g_array_new, g_array_sized_new, g_array_append_vals + the
|
||||
* g_array_append_val macro, g_array_index macro, g_array_set_size,
|
||||
* g_array_set_clear_func, g_array_free, g_array_unref).
|
||||
*
|
||||
* Non-thread-safe (matches GArray's documented semantics — GArray is
|
||||
* not thread-safe in upstream GLib either, callers must serialize).
|
||||
*
|
||||
* License: MIT (matches backend's COPYING.MIT).
|
||||
*/
|
||||
|
||||
#include "gst_compat.h"
|
||||
|
||||
/* ===== internal helpers ===== */
|
||||
|
||||
static gboolean
|
||||
garray_grow(GArray *array, guint new_capacity)
|
||||
{
|
||||
if (new_capacity <= array->capacity)
|
||||
return TRUE;
|
||||
|
||||
/* round up to next power of two for amortized O(1) growth */
|
||||
guint cap = array->capacity > 0 ? array->capacity : 4;
|
||||
while (cap < new_capacity)
|
||||
cap *= 2;
|
||||
|
||||
char *new_data = realloc(array->data, (size_t)cap * array->element_size);
|
||||
if (new_data == NULL)
|
||||
return FALSE;
|
||||
|
||||
if (array->clear) {
|
||||
memset(new_data + (size_t)array->capacity * array->element_size, 0,
|
||||
(size_t)(cap - array->capacity) * array->element_size);
|
||||
}
|
||||
|
||||
array->data = new_data;
|
||||
array->capacity = cap;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* ===== public API ===== */
|
||||
|
||||
GArray *
|
||||
g_array_sized_new(gboolean zero_terminated, gboolean clear,
|
||||
guint element_size, guint reserved_size)
|
||||
{
|
||||
/* zero_terminated is GLib-specific (appends a zero-element sentinel
|
||||
* for trailing-NULL semantics). The vendored parser does not use it;
|
||||
* we ignore the flag. */
|
||||
(void)zero_terminated;
|
||||
|
||||
GArray *a = calloc(1, sizeof(GArray));
|
||||
if (a == NULL)
|
||||
return NULL;
|
||||
|
||||
a->element_size = element_size;
|
||||
a->clear = clear;
|
||||
|
||||
if (reserved_size > 0) {
|
||||
if (!garray_grow(a, reserved_size)) {
|
||||
free(a);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
GArray *
|
||||
g_array_new(gboolean zero_terminated, gboolean clear, guint element_size)
|
||||
{
|
||||
return g_array_sized_new(zero_terminated, clear, element_size, 0);
|
||||
}
|
||||
|
||||
GArray *
|
||||
g_array_set_size(GArray *array, guint length)
|
||||
{
|
||||
if (length > array->capacity) {
|
||||
if (!garray_grow(array, length))
|
||||
return array;
|
||||
}
|
||||
|
||||
if (array->clear_func != NULL && length < array->len) {
|
||||
for (guint i = length; i < array->len; i++)
|
||||
array->clear_func(array->data + (size_t)i * array->element_size);
|
||||
}
|
||||
if (array->clear && length > array->len) {
|
||||
memset(array->data + (size_t)array->len * array->element_size, 0,
|
||||
(size_t)(length - array->len) * array->element_size);
|
||||
}
|
||||
array->len = length;
|
||||
return array;
|
||||
}
|
||||
|
||||
GArray *
|
||||
g_array_append_vals(GArray *array, gconstpointer data, guint len)
|
||||
{
|
||||
if (len == 0)
|
||||
return array;
|
||||
|
||||
if (!garray_grow(array, array->len + len))
|
||||
return array;
|
||||
|
||||
memcpy(array->data + (size_t)array->len * array->element_size,
|
||||
data, (size_t)len * array->element_size);
|
||||
array->len += len;
|
||||
return array;
|
||||
}
|
||||
|
||||
void
|
||||
g_array_set_clear_func(GArray *array, void (*clear_func)(gpointer))
|
||||
{
|
||||
array->clear_func = clear_func;
|
||||
}
|
||||
|
||||
gchar *
|
||||
g_array_free(GArray *array, gboolean free_segment)
|
||||
{
|
||||
if (array == NULL)
|
||||
return NULL;
|
||||
|
||||
if (array->clear_func != NULL) {
|
||||
for (guint i = 0; i < array->len; i++)
|
||||
array->clear_func(array->data + (size_t)i * array->element_size);
|
||||
}
|
||||
|
||||
gchar *data = NULL;
|
||||
if (free_segment) {
|
||||
free(array->data);
|
||||
} else {
|
||||
data = array->data;
|
||||
}
|
||||
free(array);
|
||||
return data;
|
||||
}
|
||||
|
||||
GArray *
|
||||
g_array_unref(GArray *array)
|
||||
{
|
||||
/* simplified to free; the backend never sub-references shared GArrays */
|
||||
g_array_free(array, TRUE);
|
||||
return NULL;
|
||||
}
|
||||
@@ -0,0 +1,463 @@
|
||||
/*
|
||||
* gst_compat.h — minimal GLib/GStreamer compatibility shim for vendored
|
||||
* GStreamer 1.28.2 H.265 parser + bitreader + bytereader + nalutils.
|
||||
*
|
||||
* Strategy: provide #defines / typedefs for the GLib API surface those
|
||||
* 4 vendored files use, so they can compile against libc + libv4l2 only
|
||||
* (no glib2 / gst-base linkage). Vendored .c files are NOT modified
|
||||
* directly; instead this header is force-included via the Makefile's
|
||||
* `-include` flag on the vendored translation units.
|
||||
*
|
||||
* Coverage scoped to what gsth265parser.c + nalutils.c + gstbitreader.c
|
||||
* + gstbytereader.c actually call. Surveyed in
|
||||
* ampere-kernel-decoders phase4 step 2 prep — see
|
||||
* ~/src/ampere-kernel-decoders/phase4_plan_iter2.md and the survey
|
||||
* commit message for the empirical inventory.
|
||||
*
|
||||
* License: this shim is original work, MIT (matching the backend's
|
||||
* COPYING.MIT). The vendored .c files keep their LGPL v2.1+ headers
|
||||
* verbatim.
|
||||
*/
|
||||
|
||||
#ifndef LIBVA_V4L2_REQUEST_FOURIER_GST_COMPAT_H
|
||||
#define LIBVA_V4L2_REQUEST_FOURIER_GST_COMPAT_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* ===== GLib type aliases ===== */
|
||||
|
||||
typedef bool gboolean;
|
||||
typedef char gchar;
|
||||
typedef unsigned char guchar;
|
||||
typedef int gint;
|
||||
typedef int8_t gint8;
|
||||
typedef int16_t gint16;
|
||||
typedef int32_t gint32;
|
||||
typedef int64_t gint64;
|
||||
typedef unsigned int guint;
|
||||
typedef uint8_t guint8;
|
||||
typedef uint16_t guint16;
|
||||
typedef uint32_t guint32;
|
||||
typedef uint64_t guint64;
|
||||
typedef size_t gsize;
|
||||
typedef ptrdiff_t gssize;
|
||||
typedef void * gpointer;
|
||||
typedef const void * gconstpointer;
|
||||
typedef double gdouble;
|
||||
typedef float gfloat;
|
||||
|
||||
/* GLib's gint64 / guint64 formatting is platform-conditional; for our
|
||||
* aarch64 ALARM target we don't need the full G_*_FORMAT machinery, but
|
||||
* gstbytereader uses G_GSIZE_FORMAT in a debug-only printf. */
|
||||
#define G_GSIZE_FORMAT "zu"
|
||||
|
||||
#ifndef TRUE
|
||||
# define TRUE true
|
||||
#endif
|
||||
#ifndef FALSE
|
||||
# define FALSE false
|
||||
#endif
|
||||
|
||||
/* ===== memory ===== */
|
||||
|
||||
#define g_malloc(n) malloc((size_t)(n))
|
||||
#define g_malloc0(n) calloc(1, (size_t)(n))
|
||||
#define g_realloc(p, n) realloc((p), (size_t)(n))
|
||||
/* g_free needs to be addressable (passed as a function-pointer arg by
|
||||
* nalutils.c::gst_memory_new_wrapped — even though that call site is
|
||||
* dead code we don't invoke, it must compile). Plain `free` is
|
||||
* compatible: signature is `void (void *)` either way. */
|
||||
#define g_free free
|
||||
#define g_new(type, n) ((type *)malloc(sizeof(type) * (size_t)(n)))
|
||||
#define g_new0(type, n) ((type *)calloc((size_t)(n), sizeof(type)))
|
||||
#define g_slice_new(type) ((type *)malloc(sizeof(type)))
|
||||
#define g_slice_new0(type) ((type *)calloc(1, sizeof(type)))
|
||||
#define g_slice_free(type, p) free(p)
|
||||
#define g_slice_free1(size, p) free(p)
|
||||
#define g_clear_pointer(pp, freefn) \
|
||||
do { freefn(*(pp)); *(pp) = NULL; } while (0)
|
||||
|
||||
/* g_memdup2 — GLib's 64-bit-safe memdup, used by gstbytereader. */
|
||||
static inline gpointer
|
||||
g_memdup2(gconstpointer mem, gsize byte_size)
|
||||
{
|
||||
if (mem == NULL || byte_size == 0)
|
||||
return NULL;
|
||||
void *copy = malloc(byte_size);
|
||||
if (copy != NULL)
|
||||
memcpy(copy, mem, byte_size);
|
||||
return copy;
|
||||
}
|
||||
|
||||
/* g_strcmp0 — NULL-safe strcmp. Used by gsth265parser in profile-name lookup. */
|
||||
static inline int
|
||||
g_strcmp0(const char *a, const char *b)
|
||||
{
|
||||
if (a == b) return 0;
|
||||
if (a == NULL) return -1;
|
||||
if (b == NULL) return 1;
|
||||
return strcmp(a, b);
|
||||
}
|
||||
|
||||
/* ===== asserts / return-guards =====
|
||||
*
|
||||
* Per ampere-kernel-decoders iter2 Phase 2 §"new failure modes" #5:
|
||||
* g_assert must NOT abort the process. It becomes a no-op here;
|
||||
* malformed bitstream is caught by the explicit parse-result returns
|
||||
* the parser already implements.
|
||||
*
|
||||
* g_return_if_fail / g_return_val_if_fail propagate as the original
|
||||
* GLib semantics (early return with optional value). */
|
||||
|
||||
#define g_assert(cond) ((void)0)
|
||||
#define g_assert_not_reached() __builtin_unreachable()
|
||||
#define g_return_if_fail(cond) do { if (!(cond)) return; } while (0)
|
||||
#define g_return_val_if_fail(cond, v) do { if (!(cond)) return (v); } while (0)
|
||||
|
||||
/* ===== GStreamer logging — no-ops =====
|
||||
*
|
||||
* The parser is heavy on debug logging. We compile all of it out;
|
||||
* the backend's own logging (request_log/error_log) wraps the parser
|
||||
* calls and reports parse-failure return codes from there. */
|
||||
|
||||
#define GST_DISABLE_GST_DEBUG 1
|
||||
|
||||
#define GST_DEBUG_CATEGORY_STATIC(name)
|
||||
#define GST_DEBUG_CATEGORY_INIT(...) ((void)0)
|
||||
#define GST_DEBUG_CATEGORY_GET(...) ((void)0)
|
||||
#define GST_DEBUG(...) ((void)0)
|
||||
#define GST_INFO(...) ((void)0)
|
||||
#define GST_WARNING(...) ((void)0)
|
||||
#define GST_ERROR(...) ((void)0)
|
||||
#define GST_LOG(...) ((void)0)
|
||||
#define GST_FIXME(...) ((void)0)
|
||||
#define GST_MEMDUMP(...) ((void)0)
|
||||
#define GST_CAT_DEFAULT (NULL)
|
||||
|
||||
/* ===== compiler / language helpers ===== */
|
||||
|
||||
#define G_LIKELY(x) __builtin_expect(!!(x), 1)
|
||||
#define G_UNLIKELY(x) __builtin_expect(!!(x), 0)
|
||||
#define G_GNUC_UNUSED __attribute__((unused))
|
||||
#define G_GNUC_INTERNAL
|
||||
#define G_GNUC_MALLOC __attribute__((malloc))
|
||||
#define G_GNUC_NORETURN __attribute__((noreturn))
|
||||
#define G_GNUC_DEPRECATED
|
||||
#define G_GNUC_DEPRECATED_FOR(x)
|
||||
#define G_GNUC_PURE __attribute__((pure))
|
||||
#define G_GNUC_CONST __attribute__((const))
|
||||
#define G_GNUC_PRINTF(a, b) __attribute__((format(printf, a, b)))
|
||||
#define G_BEGIN_DECLS
|
||||
#define G_END_DECLS
|
||||
#define G_N_ELEMENTS(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
#define G_STMT_START do
|
||||
#define G_STMT_END while (0)
|
||||
#define G_STRINGIFY(x) G_STRINGIFY_(x)
|
||||
#define G_STRINGIFY_(x) #x
|
||||
|
||||
/* GStreamer ABI-padding slot count; upstream uses 4 reserved gpointers
|
||||
* at the end of public structs for future ABI extension. We replicate
|
||||
* the size so struct layout matches what gst_byte_reader_init / friends
|
||||
* write into. */
|
||||
#define GST_PADDING 4
|
||||
#define GST_PADDING_LARGE 20
|
||||
|
||||
/* Public-symbol visibility — backend's shared module uses
|
||||
* -fvisibility=hidden, so we don't need to mark anything public from
|
||||
* within the vendored parser. The original GST_*_API macros expand to
|
||||
* extern + dllimport on Windows; on Linux ELF builds where
|
||||
* fvisibility=hidden is active, they would mark public symbols. The
|
||||
* vendored functions are never called from outside h265_parser/, so
|
||||
* leaving these empty hides them automatically. */
|
||||
#define GST_API
|
||||
#define GST_API_EXPORT extern
|
||||
#define GST_API_IMPORT extern
|
||||
|
||||
/* ===== Opaque GStreamer pipeline types =====
|
||||
*
|
||||
* GstBuffer + GstMemory are referenced by encoder-side dead-code
|
||||
* functions in gsth265parser.c (gst_h265_parser_insert_sei_hevc).
|
||||
* We never call those; declaring them as opaque structs lets the
|
||||
* function pointers / declarations compile, and the linker keeps the
|
||||
* dead-code .text section even though it's unreachable.
|
||||
*
|
||||
* If you ever need to actually USE GstBuffer in this tree, replace
|
||||
* these opaque decls with the project's own buffer abstraction; do not
|
||||
* try to vendor in libgst itself. */
|
||||
|
||||
typedef struct _GstBuffer GstBuffer;
|
||||
typedef struct _GstMemory GstMemory;
|
||||
typedef struct _GstMapInfo GstMapInfo; /* opaque — dead-code in gsth265parser SEI insert */
|
||||
|
||||
/* GLib min/max constants — dead-code unsigned-overflow guards in
|
||||
* gsth265parser.c. */
|
||||
#define G_MAXUINT8 ((guint8)0xFF)
|
||||
#define G_MAXUINT16 ((guint16)0xFFFF)
|
||||
#define G_MAXUINT32 ((guint32)0xFFFFFFFFU)
|
||||
#define G_MAXUINT64 ((guint64)0xFFFFFFFFFFFFFFFFULL)
|
||||
#define G_MAXINT8 ((gint8)0x7F)
|
||||
#define G_MAXINT16 ((gint16)0x7FFF)
|
||||
#define G_MAXINT32 ((gint32)0x7FFFFFFF)
|
||||
#define G_MAXINT64 ((gint64)0x7FFFFFFFFFFFFFFFLL)
|
||||
#define G_MININT8 ((gint8)(-0x80))
|
||||
#define G_MININT16 ((gint16)(-0x8000))
|
||||
#define G_MININT32 ((gint32)(-0x80000000))
|
||||
#define G_MAXSIZE ((gsize)-1)
|
||||
|
||||
/* GLib function-pointer typedefs used by g_list_* APIs (which our
|
||||
* gst_compat declares as abort-stubs). They show up in code paths
|
||||
* we never invoke but must compile. */
|
||||
typedef void (*GDestroyNotify)(gpointer data);
|
||||
typedef int (*GCompareFunc)(gconstpointer a, gconstpointer b);
|
||||
typedef int (*GCompareDataFunc)(gconstpointer a, gconstpointer b, gpointer user_data);
|
||||
|
||||
/* GstMapFlags — passed to gst_memory_map / gst_buffer_map. Dead-code. */
|
||||
#define GST_MAP_READ (1 << 0)
|
||||
#define GST_MAP_WRITE (1 << 1)
|
||||
#define GST_MAP_READWRITE (GST_MAP_READ | GST_MAP_WRITE)
|
||||
|
||||
/* Dead-code stubs for buffer / memory mapping (only referenced by
|
||||
* gst_h265_parser_insert_sei_hevc which we never call). The compile
|
||||
* needs declarations + addressable functions; abort on call. */
|
||||
static inline gboolean
|
||||
gst_memory_map(GstMemory *mem G_GNUC_UNUSED, GstMapInfo *info G_GNUC_UNUSED,
|
||||
int flags G_GNUC_UNUSED) { abort(); }
|
||||
static inline void
|
||||
gst_memory_unmap(GstMemory *mem G_GNUC_UNUSED, GstMapInfo *info G_GNUC_UNUSED) { abort(); }
|
||||
static inline gboolean
|
||||
gst_buffer_map(GstBuffer *buf G_GNUC_UNUSED, GstMapInfo *info G_GNUC_UNUSED,
|
||||
int flags G_GNUC_UNUSED) { abort(); }
|
||||
static inline void
|
||||
gst_buffer_unmap(GstBuffer *buf G_GNUC_UNUSED, GstMapInfo *info G_GNUC_UNUSED) { abort(); }
|
||||
static inline GstBuffer *
|
||||
gst_buffer_new(void) { abort(); }
|
||||
static inline gboolean
|
||||
gst_buffer_copy_into(GstBuffer *dst G_GNUC_UNUSED, GstBuffer *src G_GNUC_UNUSED,
|
||||
int flags G_GNUC_UNUSED, gsize offset G_GNUC_UNUSED,
|
||||
gssize size G_GNUC_UNUSED) { abort(); }
|
||||
static inline void
|
||||
gst_buffer_append_memory(GstBuffer *buf G_GNUC_UNUSED, GstMemory *mem G_GNUC_UNUSED) { abort(); }
|
||||
static inline GstMemory *
|
||||
gst_memory_ref(GstMemory *mem G_GNUC_UNUSED) { abort(); }
|
||||
static inline void
|
||||
gst_memory_unref(GstMemory *mem G_GNUC_UNUSED) { abort(); }
|
||||
static inline GstMemory *
|
||||
gst_memory_copy(GstMemory *mem G_GNUC_UNUSED, gssize offset G_GNUC_UNUSED, gssize size G_GNUC_UNUSED) { abort(); }
|
||||
static inline void
|
||||
gst_clear_buffer(GstBuffer **buf) { *buf = NULL; }
|
||||
#define GST_IS_BUFFER(b) (false)
|
||||
|
||||
/* GstBufferCopyFlags — used only by gst_buffer_copy_into in dead code. */
|
||||
#define GST_BUFFER_COPY_METADATA (1 << 0)
|
||||
#define GST_BUFFER_COPY_MEMORY (1 << 1)
|
||||
#define GST_BUFFER_COPY_DEEP (1 << 2)
|
||||
|
||||
/* gst_util_ceil_log2(n) — ceil(log2(n)) for non-zero unsigned n.
|
||||
* Used by gsth265parser.c::gst_h265_slice_parse_ref_pic_list_modification.
|
||||
* That function is in the slice-header parser which the libva backend
|
||||
* does NOT invoke (we only call parse_sps) — but the linker still
|
||||
* needs a definition. Provide a real impl: cheaper to compute than to
|
||||
* justify a dead-code stub at every call site. */
|
||||
static inline guint
|
||||
gst_util_ceil_log2(guint32 n)
|
||||
{
|
||||
if (n <= 1) return 0;
|
||||
/* __builtin_clz returns leading zeros for a 32-bit value;
|
||||
* 32 - clz(n-1) = bits needed = ceil(log2(n)). */
|
||||
return 32 - (guint)__builtin_clz(n - 1);
|
||||
}
|
||||
|
||||
/* GstMapInfo's real definition is in <gst/gstmemory.h>; we need at
|
||||
* least enough to make `info->data` / `info->size` compile. */
|
||||
struct _GstMapInfo {
|
||||
GstMemory *memory;
|
||||
int flags;
|
||||
guint8 *data;
|
||||
gsize size;
|
||||
gsize maxsize;
|
||||
gpointer user_data[4];
|
||||
gpointer _gst_reserved[GST_PADDING];
|
||||
};
|
||||
|
||||
/* gst_memory_new_wrapped — dead-code stub (nalutils.c calls it from
|
||||
* the SEI-insertion path the libva backend never invokes). */
|
||||
static inline GstMemory *
|
||||
gst_memory_new_wrapped(int flags, gpointer data, gsize maxsize,
|
||||
gsize offset, gsize size, gpointer user_data,
|
||||
void (*notify)(gpointer))
|
||||
{
|
||||
(void)flags; (void)data; (void)maxsize; (void)offset; (void)size;
|
||||
(void)user_data; (void)notify;
|
||||
abort();
|
||||
}
|
||||
|
||||
/* ===== byte-order read / write macros =====
|
||||
*
|
||||
* GStreamer provides these as static-inline functions in
|
||||
* <gst/gstutils.h>. We re-implement for aarch64 little-endian; the
|
||||
* parser is byte-stream input, so endian-conversion is mechanical.
|
||||
* The float / double variants are present in upstream but the parser
|
||||
* never invokes them — provide stubs so the address-taking sites in
|
||||
* gstbytereader.h's function table compile. */
|
||||
|
||||
#define GST_READ_UINT8(data) \
|
||||
(*((const guint8 *)(data)))
|
||||
|
||||
#define GST_READ_UINT16_LE(data) ( \
|
||||
((guint16)((const guint8 *)(data))[0]) | \
|
||||
((guint16)((const guint8 *)(data))[1] << 8))
|
||||
|
||||
#define GST_READ_UINT16_BE(data) ( \
|
||||
((guint16)((const guint8 *)(data))[0] << 8) | \
|
||||
((guint16)((const guint8 *)(data))[1]))
|
||||
|
||||
#define GST_READ_UINT24_LE(data) ( \
|
||||
((guint32)((const guint8 *)(data))[0]) | \
|
||||
((guint32)((const guint8 *)(data))[1] << 8) | \
|
||||
((guint32)((const guint8 *)(data))[2] << 16))
|
||||
|
||||
#define GST_READ_UINT24_BE(data) ( \
|
||||
((guint32)((const guint8 *)(data))[0] << 16) | \
|
||||
((guint32)((const guint8 *)(data))[1] << 8) | \
|
||||
((guint32)((const guint8 *)(data))[2]))
|
||||
|
||||
#define GST_READ_UINT32_LE(data) ( \
|
||||
((guint32)((const guint8 *)(data))[0]) | \
|
||||
((guint32)((const guint8 *)(data))[1] << 8) | \
|
||||
((guint32)((const guint8 *)(data))[2] << 16) | \
|
||||
((guint32)((const guint8 *)(data))[3] << 24))
|
||||
|
||||
#define GST_READ_UINT32_BE(data) ( \
|
||||
((guint32)((const guint8 *)(data))[0] << 24) | \
|
||||
((guint32)((const guint8 *)(data))[1] << 16) | \
|
||||
((guint32)((const guint8 *)(data))[2] << 8) | \
|
||||
((guint32)((const guint8 *)(data))[3]))
|
||||
|
||||
#define GST_READ_UINT64_LE(data) ( \
|
||||
((guint64)((const guint8 *)(data))[0]) | \
|
||||
((guint64)((const guint8 *)(data))[1] << 8) | \
|
||||
((guint64)((const guint8 *)(data))[2] << 16) | \
|
||||
((guint64)((const guint8 *)(data))[3] << 24) | \
|
||||
((guint64)((const guint8 *)(data))[4] << 32) | \
|
||||
((guint64)((const guint8 *)(data))[5] << 40) | \
|
||||
((guint64)((const guint8 *)(data))[6] << 48) | \
|
||||
((guint64)((const guint8 *)(data))[7] << 56))
|
||||
|
||||
#define GST_READ_UINT64_BE(data) ( \
|
||||
((guint64)((const guint8 *)(data))[0] << 56) | \
|
||||
((guint64)((const guint8 *)(data))[1] << 48) | \
|
||||
((guint64)((const guint8 *)(data))[2] << 40) | \
|
||||
((guint64)((const guint8 *)(data))[3] << 32) | \
|
||||
((guint64)((const guint8 *)(data))[4] << 24) | \
|
||||
((guint64)((const guint8 *)(data))[5] << 16) | \
|
||||
((guint64)((const guint8 *)(data))[6] << 8) | \
|
||||
((guint64)((const guint8 *)(data))[7]))
|
||||
|
||||
/* Float / double readers — dead-code, abort if called. The function
|
||||
* table in gstbytereader.h takes the address of the underlying inline
|
||||
* which we don't need to be functional, only addressable. */
|
||||
static inline gfloat
|
||||
GST_READ_FLOAT_LE(const guint8 *data) { (void)data; abort(); }
|
||||
static inline gfloat
|
||||
GST_READ_FLOAT_BE(const guint8 *data) { (void)data; abort(); }
|
||||
static inline gdouble
|
||||
GST_READ_DOUBLE_LE(const guint8 *data) { (void)data; abort(); }
|
||||
static inline gdouble
|
||||
GST_READ_DOUBLE_BE(const guint8 *data) { (void)data; abort(); }
|
||||
|
||||
/* Write side — nalutils.c writes-out SEI bytes (dead path for us but
|
||||
* must compile). */
|
||||
#define GST_WRITE_UINT8(data, val) do { \
|
||||
((guint8 *)(data))[0] = (guint8)(val); \
|
||||
} while (0)
|
||||
|
||||
#define GST_WRITE_UINT16_BE(data, val) do { \
|
||||
((guint8 *)(data))[0] = (guint8)((val) >> 8); \
|
||||
((guint8 *)(data))[1] = (guint8)((val)); \
|
||||
} while (0)
|
||||
|
||||
#define GST_WRITE_UINT24_BE(data, val) do { \
|
||||
((guint8 *)(data))[0] = (guint8)((val) >> 16); \
|
||||
((guint8 *)(data))[1] = (guint8)((val) >> 8); \
|
||||
((guint8 *)(data))[2] = (guint8)((val)); \
|
||||
} while (0)
|
||||
|
||||
#define GST_WRITE_UINT32_BE(data, val) do { \
|
||||
((guint8 *)(data))[0] = (guint8)((val) >> 24); \
|
||||
((guint8 *)(data))[1] = (guint8)((val) >> 16); \
|
||||
((guint8 *)(data))[2] = (guint8)((val) >> 8); \
|
||||
((guint8 *)(data))[3] = (guint8)((val)); \
|
||||
} while (0)
|
||||
|
||||
#ifndef MIN
|
||||
# define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
# define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
/* ===== GArray ===== */
|
||||
|
||||
typedef struct {
|
||||
char *data; /* exposed via g_array_index / GArray->data */
|
||||
guint len; /* element count */
|
||||
guint capacity; /* allocated element slots */
|
||||
guint element_size;
|
||||
gboolean clear; /* zero-fill on grow */
|
||||
void (*clear_func)(gpointer);
|
||||
} GArray;
|
||||
|
||||
GArray *g_array_new(gboolean zero_terminated, gboolean clear, guint element_size);
|
||||
GArray *g_array_sized_new(gboolean zero_terminated, gboolean clear,
|
||||
guint element_size, guint reserved_size);
|
||||
GArray *g_array_set_size(GArray *array, guint length);
|
||||
GArray *g_array_append_vals(GArray *array, gconstpointer data, guint len);
|
||||
void g_array_set_clear_func(GArray *array, void (*clear_func)(gpointer));
|
||||
gchar *g_array_free(GArray *array, gboolean free_segment);
|
||||
GArray *g_array_unref(GArray *array);
|
||||
|
||||
#define g_array_append_val(a, v) g_array_append_vals((a), &(v), 1)
|
||||
#define g_array_index(a, t, i) (((t *)(void *)(a)->data)[i])
|
||||
|
||||
/* ===== GList — stubs that abort if reached =====
|
||||
*
|
||||
* Surveyed call sites: gsth265parser.c uses g_list_prepend / g_list_sort /
|
||||
* g_list_free_full in code paths the libva backend does not invoke for
|
||||
* basic SPS parsing (likely SEI message accumulation). Stub to abort so
|
||||
* any future call surfaces immediately rather than silently corrupting. */
|
||||
|
||||
/* GList — full struct (not opaque) so callers can do `list->data`.
|
||||
* The functions still abort because we never construct a GList. */
|
||||
typedef struct _GList GList;
|
||||
struct _GList {
|
||||
gpointer data;
|
||||
GList *next;
|
||||
GList *prev;
|
||||
};
|
||||
|
||||
static inline GList *g_list_prepend(GList *list G_GNUC_UNUSED, gpointer data G_GNUC_UNUSED) { abort(); }
|
||||
static inline GList *g_list_sort(GList *list G_GNUC_UNUSED, int (*cmp)(gconstpointer, gconstpointer) G_GNUC_UNUSED) { abort(); }
|
||||
static inline void g_list_free_full(GList *list G_GNUC_UNUSED, void (*free_func)(gpointer) G_GNUC_UNUSED) { abort(); }
|
||||
|
||||
/* ===== g_once_init_enter / g_once_init_leave =====
|
||||
*
|
||||
* GLib's lazy-init guards. The parser uses these for one-shot static
|
||||
* initialization (e.g. profile-name table). Our backend is single-
|
||||
* threaded at the parser-init site (driver_init), so we can simplify
|
||||
* to a plain run-once gate. */
|
||||
|
||||
#define g_once_init_enter(loc) (*(loc) == 0)
|
||||
#define g_once_init_leave(loc, val) (*(loc) = (val))
|
||||
|
||||
/* ===== conversions ===== */
|
||||
|
||||
#define GINT_TO_POINTER(i) ((gpointer)(uintptr_t)(gint)(i))
|
||||
#define GPOINTER_TO_INT(p) ((gint)(uintptr_t)(p))
|
||||
|
||||
#endif /* LIBVA_V4L2_REQUEST_FOURIER_GST_COMPAT_H */
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* v4l2-hevc-ext-controls.h — verbatim mirror of Linux 7.0+ V4L2 stateless
|
||||
* HEVC extended-SPS RPS control definitions, shipped as an internal
|
||||
* header so this libva backend can be built against pre-7.0
|
||||
* linux-api-headers packages (currently ampere ships 6.19-1).
|
||||
*
|
||||
* Upstream source: linux kernel, include/uapi/linux/v4l2-controls.h
|
||||
* As-of: Linux 7.0-rc3 (Detlev Casanova / Collabora "VDPU381/VDPU383"
|
||||
* series, see lkml.org/lkml/2026/1/9/1334). The two CIDs + two structs
|
||||
* + two flag macros below are byte-for-byte the kernel UAPI definitions.
|
||||
*
|
||||
* Once linux-api-headers >= 7.0 is the floor across the fleet, this
|
||||
* shim becomes redundant — `<linux/v4l2-controls.h>` will provide the
|
||||
* same symbols. The include order in h265.c is: this header BEFORE
|
||||
* <linux/v4l2-controls.h>, so when the system catches up, the macro
|
||||
* guards below silently no-op and we use the system definitions.
|
||||
*
|
||||
* License: MIT (matches backend's COPYING.MIT). Per LGPL § 3.b., the
|
||||
* kernel UAPI struct definitions themselves are excepted from the
|
||||
* kernel's overall GPL and may be copied verbatim into userspace
|
||||
* binaries without inheriting GPL.
|
||||
*
|
||||
* Rationale + iter2 plan: see
|
||||
* ~/src/ampere-kernel-decoders/phase4_plan_iter2.md (§Step 3)
|
||||
* ~/src/ampere-kernel-decoders/phase0_findings_iter2.md
|
||||
*/
|
||||
|
||||
#ifndef LIBVA_V4L2_REQUEST_FOURIER_V4L2_HEVC_EXT_CONTROLS_H
|
||||
#define LIBVA_V4L2_REQUEST_FOURIER_V4L2_HEVC_EXT_CONTROLS_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/v4l2-controls.h>
|
||||
|
||||
#ifndef V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS
|
||||
# define V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS \
|
||||
(V4L2_CID_CODEC_STATELESS_BASE + 408)
|
||||
#endif
|
||||
|
||||
#ifndef V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS
|
||||
# define V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS \
|
||||
(V4L2_CID_CODEC_STATELESS_BASE + 409)
|
||||
#endif
|
||||
|
||||
#ifndef V4L2_HEVC_EXT_SPS_ST_RPS_FLAG_INTER_REF_PIC_SET_PRED
|
||||
# define V4L2_HEVC_EXT_SPS_ST_RPS_FLAG_INTER_REF_PIC_SET_PRED 0x1
|
||||
#endif
|
||||
|
||||
#ifndef V4L2_HEVC_EXT_SPS_LT_RPS_FLAG_USED_LT
|
||||
# define V4L2_HEVC_EXT_SPS_LT_RPS_FLAG_USED_LT 0x1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* struct v4l2_ctrl_hevc_ext_sps_st_rps — HEVC short-term RPS parameters.
|
||||
*
|
||||
* Dynamic-size 1-dimension array. Number of elements is
|
||||
* v4l2_ctrl_hevc_sps::num_short_term_ref_pic_sets
|
||||
* Can contain up to 65 elements (the H.265 spec § 7.4.3.2.1 maximum).
|
||||
*/
|
||||
#ifndef V4L2_HEVC_EXT_SPS_ST_RPS_DEFINED
|
||||
# define V4L2_HEVC_EXT_SPS_ST_RPS_DEFINED 1
|
||||
struct v4l2_ctrl_hevc_ext_sps_st_rps {
|
||||
__u8 delta_idx_minus1;
|
||||
__u8 delta_rps_sign;
|
||||
__u8 num_negative_pics;
|
||||
__u8 num_positive_pics;
|
||||
__u32 used_by_curr_pic;
|
||||
__u32 use_delta_flag;
|
||||
__u16 abs_delta_rps_minus1;
|
||||
__u16 delta_poc_s0_minus1[16];
|
||||
__u16 delta_poc_s1_minus1[16];
|
||||
__u16 flags;
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* struct v4l2_ctrl_hevc_ext_sps_lt_rps — HEVC long-term RPS parameters.
|
||||
*
|
||||
* Dynamic-size 1-dimension array. Number of elements is
|
||||
* v4l2_ctrl_hevc_sps::num_long_term_ref_pics_sps
|
||||
* Can contain up to 33 elements (the H.265 spec § 7.4.3.2.1 maximum).
|
||||
*/
|
||||
#ifndef V4L2_HEVC_EXT_SPS_LT_RPS_DEFINED
|
||||
# define V4L2_HEVC_EXT_SPS_LT_RPS_DEFINED 1
|
||||
struct v4l2_ctrl_hevc_ext_sps_lt_rps {
|
||||
__u16 lt_ref_pic_poc_lsb_sps;
|
||||
__u16 flags;
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* LIBVA_V4L2_REQUEST_FOURIER_V4L2_HEVC_EXT_CONTROLS_H */
|
||||
+158
-3
@@ -31,7 +31,13 @@
|
||||
#include "video.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
#include "tiled_yuv.h"
|
||||
#include "utils.h"
|
||||
@@ -125,6 +131,7 @@ VAStatus RequestCreateImage(VADriverContextP context, VAImageFormat *format,
|
||||
|
||||
VAStatus RequestDestroyImage(VADriverContextP context, VAImageID image_id)
|
||||
{
|
||||
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_image *image_object;
|
||||
VAStatus status;
|
||||
@@ -149,12 +156,111 @@ static VAStatus copy_surface_to_image (struct request_data *driver_data,
|
||||
{
|
||||
struct object_buffer *buffer_object;
|
||||
unsigned int i;
|
||||
int sync_fds[VIDEO_MAX_PLANES];
|
||||
unsigned int n_sync_fds = 0;
|
||||
|
||||
buffer_object = BUFFER(driver_data, image->buf);
|
||||
if (buffer_object == NULL)
|
||||
return VA_STATUS_ERROR_INVALID_BUFFER;
|
||||
|
||||
for (i = 0; i < VIDEO_MAX_PLANES; i++)
|
||||
sync_fds[i] = -1;
|
||||
|
||||
/*
|
||||
* iter13 α-17: explicit cache sync around the CAPTURE buffer read.
|
||||
*
|
||||
* The CAPTURE buffer is V4L2_MEMORY_MMAP and was mapped at
|
||||
* cap_pool_init time with cached attributes. Kernel decode writes to
|
||||
* the buffer via DMA, which doesn't propagate to the CPU's cache
|
||||
* observer for that virtual mapping. Reading from
|
||||
* surface_object->destination_data[] without an explicit cache
|
||||
* invalidation returns stale data — observed empirically as Bug 4
|
||||
* (H.264 partial-fill) and Bug 5 (HEVC all-zero) when libva went
|
||||
* through the SAME readback path that kdirect ffmpeg-v4l2request +
|
||||
* DRM_PRIME-mmap successfully reads (kdirect's drm-prime mmap
|
||||
* implicitly handles sync).
|
||||
*
|
||||
* DMA_BUF_IOCTL_SYNC(START | READ) makes the CPU mapping coherent
|
||||
* with the producing engine's writes; END releases the sync.
|
||||
* Per V4L2 + dma-buf spec, this is the userspace contract for
|
||||
* cached-mmap'd buffers (Tomasz Figa, linaro-mm-sig 2024-07-11).
|
||||
*
|
||||
* Requires a dma-buf fd: get one via VIDIOC_EXPBUF, sync, close.
|
||||
* Per-call cost is one ioctl pair + one fd open/close per plane.
|
||||
* Could be optimised by caching the EXPBUF fd on the cap_pool slot,
|
||||
* but doing it just-in-time keeps the lifecycle uncomplicated. The
|
||||
* EXPBUF fd's dup count doesn't affect the V4L2 buffer's underlying
|
||||
* pages; closing the fd is a no-op on memory.
|
||||
*
|
||||
* If EXPBUF fails (e.g., consumer-held EXPBUF prevents a second one
|
||||
* — only true for hantro G1 oddity), we skip the sync silently. The
|
||||
* existing pre-iter13 behavior is preserved on the error path.
|
||||
*/
|
||||
if (surface_object->current_slot != NULL &&
|
||||
driver_data->video_format != NULL) {
|
||||
unsigned int capture_type =
|
||||
v4l2_type_video_capture(driver_data->video_format->v4l2_mplane);
|
||||
if (v4l2_export_buffer(driver_data->video_fd, capture_type,
|
||||
surface_object->destination_index,
|
||||
O_RDONLY, sync_fds,
|
||||
surface_object->destination_buffers_count) >= 0) {
|
||||
n_sync_fds = surface_object->destination_buffers_count;
|
||||
for (i = 0; i < n_sync_fds; i++) {
|
||||
struct dma_buf_sync s = {
|
||||
.flags = DMA_BUF_SYNC_START |
|
||||
DMA_BUF_SYNC_READ,
|
||||
};
|
||||
/* failure is non-fatal: we continue with the read */
|
||||
(void)ioctl(sync_fds[i], DMA_BUF_IOCTL_SYNC, &s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* AV1 film_grain: when this surface is the display surface of a
|
||||
* decode (current_display_picture != current_frame with apply_grain=1),
|
||||
* its slot is NULL because BeginPicture only fired on the decode
|
||||
* surface. Follow the back-link set in av1_set_controls and borrow
|
||||
* the decode surface's destination_data + sizes for the copy.
|
||||
*/
|
||||
if (surface_object->current_slot == NULL &&
|
||||
surface_object->linked_decode_surface_id != VA_INVALID_SURFACE) {
|
||||
struct object_surface *decode_surface =
|
||||
SURFACE(driver_data,
|
||||
surface_object->linked_decode_surface_id);
|
||||
if (decode_surface != NULL &&
|
||||
decode_surface->current_slot != NULL) {
|
||||
/* Mirror the fields we read below. The surface heap
|
||||
* pointer is stable for the surface's lifetime; we
|
||||
* only need destination_data + destination_sizes +
|
||||
* destination_planes_count from it. */
|
||||
surface_object->destination_planes_count =
|
||||
decode_surface->destination_planes_count;
|
||||
for (i = 0; i < decode_surface->destination_planes_count; i++) {
|
||||
surface_object->destination_data[i] =
|
||||
decode_surface->destination_data[i];
|
||||
surface_object->destination_sizes[i] =
|
||||
decode_surface->destination_sizes[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < surface_object->destination_planes_count; i++) {
|
||||
/* AV1 Phase 3 diag: surface NULL-deref hunt. */
|
||||
if (buffer_object->data == NULL ||
|
||||
surface_object->destination_data[i] == NULL) {
|
||||
request_log("copy_surface_to_image NULL i=%u "
|
||||
"buf_data=%p dest_data=%p dest_size=%u "
|
||||
"planes=%u slot=%p linked=0x%x\n",
|
||||
i, (void *)buffer_object->data,
|
||||
(void *)surface_object->destination_data[i],
|
||||
surface_object->destination_sizes[i],
|
||||
surface_object->destination_planes_count,
|
||||
(void *)surface_object->current_slot,
|
||||
surface_object->linked_decode_surface_id);
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
}
|
||||
#ifdef __arm__
|
||||
if (!video_format_is_linear(driver_data->video_format))
|
||||
tiled_to_planar(surface_object->destination_data[i],
|
||||
buffer_object->data + image->offsets[i],
|
||||
@@ -162,10 +268,22 @@ static VAStatus copy_surface_to_image (struct request_data *driver_data,
|
||||
i == 0 ? image->height :
|
||||
image->height / 2);
|
||||
else {
|
||||
#endif
|
||||
memcpy(buffer_object->data + image->offsets[i],
|
||||
surface_object->destination_data[i],
|
||||
surface_object->destination_sizes[i]);
|
||||
#ifdef __arm__
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* iter13 α-17: release cache sync. END pairs with each START. */
|
||||
for (i = 0; i < n_sync_fds; i++) {
|
||||
struct dma_buf_sync s = {
|
||||
.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ,
|
||||
};
|
||||
(void)ioctl(sync_fds[i], DMA_BUF_IOCTL_SYNC, &s);
|
||||
close(sync_fds[i]);
|
||||
}
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
@@ -180,6 +298,7 @@ VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id,
|
||||
VAImageFormat format;
|
||||
VAStatus status;
|
||||
|
||||
|
||||
surface_object = SURFACE(driver_data, surface_id);
|
||||
if (surface_object == NULL)
|
||||
return VA_STATUS_ERROR_INVALID_SURFACE;
|
||||
@@ -190,16 +309,33 @@ VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id,
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Fully populate VAImageFormat to match QueryImageFormats output. */
|
||||
memset(&format, 0, sizeof(format));
|
||||
format.fourcc = VA_FOURCC_NV12;
|
||||
format.byte_order = VA_LSB_FIRST;
|
||||
format.bits_per_pixel = 12;
|
||||
|
||||
status = RequestCreateImage(context, &format, surface_object->width,
|
||||
surface_object->height, image);
|
||||
if (status != VA_STATUS_SUCCESS)
|
||||
return status;
|
||||
|
||||
status = copy_surface_to_image (driver_data, surface_object, image);
|
||||
if (status != VA_STATUS_SUCCESS)
|
||||
return status;
|
||||
/*
|
||||
* Iter2 Fix 3: skip the surface→image copy when no CAPTURE slot is
|
||||
* bound. ffmpeg's av_hwframe_ctx_init probes vaDeriveImage on a
|
||||
* never-decoded surface to learn the format; it doesn't read the
|
||||
* data. With the cap_pool decoupling, destination_data[] is NULL
|
||||
* until BeginPicture binds a slot — copying from a NULL source
|
||||
* crashed in memcpy. The image's buffer remains zero-initialized;
|
||||
* subsequent post-decode DeriveImage on the same surface (after
|
||||
* BeginPicture has bound a slot) does the real copy.
|
||||
*/
|
||||
if (surface_object->current_slot != NULL) {
|
||||
status = copy_surface_to_image (driver_data, surface_object,
|
||||
image);
|
||||
if (status != VA_STATUS_SUCCESS)
|
||||
return status;
|
||||
}
|
||||
|
||||
surface_object->status = VASurfaceReady;
|
||||
|
||||
@@ -212,7 +348,25 @@ VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id,
|
||||
VAStatus RequestQueryImageFormats(VADriverContextP context,
|
||||
VAImageFormat *formats, int *formats_count)
|
||||
{
|
||||
|
||||
/*
|
||||
* Populate the VAImageFormat fully per VAAPI spec for NV12 —
|
||||
* not just .fourcc. Consumers (FFmpeg's hwcontext_vaapi, mpv,
|
||||
* Firefox) read .byte_order and .bits_per_pixel; leaving them
|
||||
* uninitialized inherits whatever caller-stack garbage is in
|
||||
* the buffer and produces non-deterministic behavior. Reference:
|
||||
* Mesa's gallium/frontends/va/image.c::vlVaQueryImageFormats and
|
||||
* intel-vaapi-driver's i965_drv_video.c — both publish NV12
|
||||
* with byte_order=VA_LSB_FIRST and bits_per_pixel=12.
|
||||
*
|
||||
* For YUV formats, depth/red_mask/green_mask/blue_mask/alpha_mask
|
||||
* are not meaningful (those describe RGB bit layouts); leave them
|
||||
* zeroed via memset before populating.
|
||||
*/
|
||||
memset(&formats[0], 0, sizeof(formats[0]));
|
||||
formats[0].fourcc = VA_FOURCC_NV12;
|
||||
formats[0].byte_order = VA_LSB_FIRST;
|
||||
formats[0].bits_per_pixel = 12;
|
||||
*formats_count = 1;
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
@@ -233,6 +387,7 @@ VAStatus RequestGetImage(VADriverContextP context, VASurfaceID surface_id,
|
||||
struct object_image *image_object;
|
||||
VAImage *image;
|
||||
|
||||
|
||||
surface_object = SURFACE(driver_data, surface_id);
|
||||
if (surface_object == NULL)
|
||||
return VA_STATUS_ERROR_INVALID_SURFACE;
|
||||
|
||||
+8
-8
@@ -26,7 +26,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/select.h>
|
||||
#include <poll.h>
|
||||
|
||||
#include <linux/media.h>
|
||||
|
||||
@@ -78,19 +78,19 @@ int media_request_queue(int request_fd)
|
||||
|
||||
int media_request_wait_completion(int request_fd)
|
||||
{
|
||||
struct timeval tv = { 0, 300000 };
|
||||
fd_set except_fds;
|
||||
/* poll() instead of select(): Firefox's RDD seccomp policy admits
|
||||
* poll/ppoll but not select/pselect6 (as of FF150). Functionally
|
||||
* equivalent here — the media request fd signals completion via
|
||||
* exceptional condition, mapped to POLLPRI for poll(). */
|
||||
struct pollfd pfd = { .fd = request_fd, .events = POLLPRI };
|
||||
int rc;
|
||||
|
||||
FD_ZERO(&except_fds);
|
||||
FD_SET(request_fd, &except_fds);
|
||||
|
||||
rc = select(request_fd + 1, NULL, NULL, &except_fds, &tv);
|
||||
rc = poll(&pfd, 1, 300 /* ms */);
|
||||
if (rc == 0) {
|
||||
request_log("Timeout when waiting for media request\n");
|
||||
return -1;
|
||||
} else if (rc < 0) {
|
||||
request_log("Unable to select media request: %s\n",
|
||||
request_log("Unable to poll media request: %s\n",
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
+47
-3
@@ -44,7 +44,23 @@ sources = [
|
||||
'v4l2.c',
|
||||
'mpeg2.c',
|
||||
'h264.c',
|
||||
'h265.c'
|
||||
'h264_slice_header.c',
|
||||
'request_pool.c',
|
||||
'cap_pool.c',
|
||||
'h265.c',
|
||||
'vp8.c',
|
||||
'vp9.c',
|
||||
'av1.c',
|
||||
'codec.c',
|
||||
|
||||
# Vendored GStreamer 1.28.2 H.265 parser + utilities (LGPL v2.1+,
|
||||
# see src/h265_parser/gst_compat.h for sourcing notes + per-iter2
|
||||
# adaptation strategy).
|
||||
'h265_parser/gst_compat.c',
|
||||
'h265_parser/gst/base/gstbitreader.c',
|
||||
'h265_parser/gst/base/gstbytereader.c',
|
||||
'h265_parser/gst/codecparsers/nalutils.c',
|
||||
'h265_parser/gst/codecparsers/gsth265parser.c'
|
||||
]
|
||||
|
||||
headers = [
|
||||
@@ -64,11 +80,39 @@ headers = [
|
||||
'v4l2.h',
|
||||
'mpeg2.h',
|
||||
'h264.h',
|
||||
'h265.h'
|
||||
'h264_slice_header.h',
|
||||
'request_pool.h',
|
||||
'cap_pool.h',
|
||||
'h265.h',
|
||||
'vp8.h',
|
||||
'vp9.h',
|
||||
'av1.h',
|
||||
'codec.h',
|
||||
|
||||
# Internal mirror of Linux 7.0 V4L2 HEVC EXT_SPS_*_RPS UAPI defs
|
||||
# (allows building against pre-7.0 linux-api-headers; redundant
|
||||
# once the host headers are 7.0+).
|
||||
'hevc-ctrls/v4l2-hevc-ext-controls.h',
|
||||
|
||||
# Vendored GStreamer + project shim headers (see sources above).
|
||||
'h265_parser/gst_compat.h',
|
||||
'h265_parser/gst/gst.h',
|
||||
'h265_parser/gst/glib-compat-private.h',
|
||||
'h265_parser/gst/base/base-prelude.h',
|
||||
'h265_parser/gst/base/gstbitreader.h',
|
||||
'h265_parser/gst/base/gstbytereader.h',
|
||||
'h265_parser/gst/base/gstbitwriter.h',
|
||||
'h265_parser/gst/codecparsers/codecparsers-prelude.h',
|
||||
'h265_parser/gst/codecparsers/gsth265parser.h',
|
||||
'h265_parser/gst/codecparsers/nalutils.h'
|
||||
]
|
||||
|
||||
includes = [
|
||||
include_directories('../include')
|
||||
include_directories('../include'),
|
||||
# Vendored GStreamer parser tree — the parser's #include <gst/base/...>
|
||||
# style references resolve here via stub headers that redirect to
|
||||
# gst_compat.h.
|
||||
include_directories('h265_parser')
|
||||
]
|
||||
|
||||
cflags = [
|
||||
|
||||
+182
-87
@@ -23,6 +23,34 @@
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* fresnel-fourier iter1 Phase 6 commit B: rewrite against new split
|
||||
* V4L2_CID_STATELESS_MPEG2_{SEQUENCE,PICTURE,QUANTISATION} stateless
|
||||
* controls (mainline kernel <linux/v4l2-controls.h>:1985-2105).
|
||||
*
|
||||
* Replaces the staging-era V4L2_CID_MPEG_VIDEO_MPEG2_{SLICE_PARAMS,
|
||||
* QUANTIZATION} combined-struct API that the fork previously used
|
||||
* via include/mpeg2-ctrls.h (deleted in commit C).
|
||||
*
|
||||
* Per-frame submission: one batched VIDIOC_S_EXT_CTRLS with three
|
||||
* controls (12-byte SEQUENCE + 32-byte PICTURE + 256-byte QUANTISATION),
|
||||
* matching FFmpeg libavcodec/v4l2_request_mpeg2.c:130-155 reference
|
||||
* implementation. Verified empirically in fresnel-fourier Phase 0
|
||||
* cross-validator sweep and Phase 3 Baseline C verbatim payload.
|
||||
*
|
||||
* Quantisation matrix order: zigzag scanning order per kernel doc
|
||||
* v4l2-controls.h:2076. VAAPI VAIQMatrixBufferMPEG2 also stores in
|
||||
* zigzag scanning order (per VAAPI spec). Direct memcpy works; no
|
||||
* permutation in the libva backend. Kernel hantro_mpeg2.c::
|
||||
* hantro_mpeg2_dec_copy_qtable applies the zigzag-to-raster
|
||||
* permutation when copying to the hardware quantisation table.
|
||||
*
|
||||
* Default matrices (when iqmatrix_set==false): MPEG-2 spec defaults
|
||||
* per ISO/IEC 13818-2 Table 7-3, transcribed from Phase 3 Baseline C
|
||||
* QUANTISATION verbatim payload (256 bytes captured from
|
||||
* ffmpeg-v4l2request decode of bbb_720p10s_mpeg2.ts).
|
||||
*/
|
||||
|
||||
#include "mpeg2.h"
|
||||
#include "context.h"
|
||||
#include "request.h"
|
||||
@@ -35,120 +63,187 @@
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
#include <mpeg2-ctrls.h>
|
||||
#include <linux/v4l2-controls.h>
|
||||
|
||||
#include "v4l2.h"
|
||||
|
||||
/*
|
||||
* MPEG-2 default intra quantisation matrix in zigzag scanning order
|
||||
* (ISO/IEC 13818-2 Table 7-3, verified empirically against
|
||||
* fresnel-fourier Phase 3 Baseline C QUANTISATION payload bytes 0..63
|
||||
* from a ffmpeg-v4l2request decode of the BBB 720p10s MPEG-2 fixture).
|
||||
*/
|
||||
static const __u8 mpeg2_default_intra_matrix[64] = {
|
||||
8, 16, 16, 19, 16, 19, 22, 22,
|
||||
22, 22, 22, 22, 26, 24, 26, 27,
|
||||
27, 27, 26, 26, 26, 26, 27, 27,
|
||||
27, 29, 29, 29, 34, 34, 34, 29,
|
||||
29, 29, 27, 27, 29, 29, 32, 32,
|
||||
34, 34, 37, 38, 37, 35, 35, 34,
|
||||
35, 38, 38, 40, 40, 40, 48, 48,
|
||||
46, 46, 56, 56, 58, 69, 69, 83,
|
||||
};
|
||||
|
||||
/*
|
||||
* MPEG-2 default non-intra quantisation matrix is uniformly 16 in spec.
|
||||
* Verified against Phase 3 Baseline C QUANTISATION payload bytes
|
||||
* 64..127 (all 0x10 = 16). Same applies to chroma_non_intra
|
||||
* (bytes 192..255). Filled at runtime via memset rather than a
|
||||
* separate const array to keep the binary smaller.
|
||||
*/
|
||||
|
||||
int mpeg2_set_controls(struct request_data *driver_data,
|
||||
struct object_context *context_object,
|
||||
struct object_surface *surface_object)
|
||||
{
|
||||
VAPictureParameterBufferMPEG2 *picture =
|
||||
&surface_object->params.mpeg2.picture;
|
||||
VASliceParameterBufferMPEG2 *slice =
|
||||
&surface_object->params.mpeg2.slice;
|
||||
VAIQMatrixBufferMPEG2 *iqmatrix =
|
||||
&surface_object->params.mpeg2.iqmatrix;
|
||||
bool iqmatrix_set = surface_object->params.mpeg2.iqmatrix_set;
|
||||
struct v4l2_ctrl_mpeg2_slice_params slice_params;
|
||||
struct v4l2_ctrl_mpeg2_quantization quantization;
|
||||
|
||||
/* Clause 2: v4l2_ctrl_mpeg2_sequence (12 bytes) */
|
||||
struct v4l2_ctrl_mpeg2_sequence sequence;
|
||||
/* Clause 3: v4l2_ctrl_mpeg2_picture (32 bytes; reserved[5] must be zero) */
|
||||
struct v4l2_ctrl_mpeg2_picture pic;
|
||||
/* Clause 4: v4l2_ctrl_mpeg2_quantisation (256 bytes) */
|
||||
struct v4l2_ctrl_mpeg2_quantisation quant;
|
||||
|
||||
struct object_surface *forward_reference_surface;
|
||||
struct object_surface *backward_reference_surface;
|
||||
uint64_t timestamp;
|
||||
unsigned int i;
|
||||
int rc;
|
||||
|
||||
memset(&slice_params, 0, sizeof(slice_params));
|
||||
memset(&sequence, 0, sizeof sequence);
|
||||
memset(&pic, 0, sizeof pic); /* zeros pic.reserved[5] per Clause 3 */
|
||||
memset(&quant, 0, sizeof quant);
|
||||
|
||||
slice_params.bit_size = surface_object->slices_size * 8;
|
||||
slice_params.data_bit_offset = 0;
|
||||
|
||||
slice_params.sequence.horizontal_size = picture->horizontal_size;
|
||||
slice_params.sequence.vertical_size = picture->vertical_size;
|
||||
slice_params.sequence.vbv_buffer_size = SOURCE_SIZE_MAX;
|
||||
|
||||
slice_params.sequence.profile_and_level_indication = 0;
|
||||
slice_params.sequence.progressive_sequence = 0;
|
||||
slice_params.sequence.chroma_format = 1; // 4:2:0
|
||||
|
||||
slice_params.picture.picture_coding_type = picture->picture_coding_type;
|
||||
slice_params.picture.f_code[0][0] = (picture->f_code >> 12) & 0x0f;
|
||||
slice_params.picture.f_code[0][1] = (picture->f_code >> 8) & 0x0f;
|
||||
slice_params.picture.f_code[1][0] = (picture->f_code >> 4) & 0x0f;
|
||||
slice_params.picture.f_code[1][1] = (picture->f_code >> 0) & 0x0f;
|
||||
|
||||
slice_params.picture.intra_dc_precision =
|
||||
picture->picture_coding_extension.bits.intra_dc_precision;
|
||||
slice_params.picture.picture_structure =
|
||||
picture->picture_coding_extension.bits.picture_structure;
|
||||
slice_params.picture.top_field_first =
|
||||
picture->picture_coding_extension.bits.top_field_first;
|
||||
slice_params.picture.frame_pred_frame_dct =
|
||||
picture->picture_coding_extension.bits.frame_pred_frame_dct;
|
||||
slice_params.picture.concealment_motion_vectors =
|
||||
picture->picture_coding_extension.bits
|
||||
.concealment_motion_vectors;
|
||||
slice_params.picture.q_scale_type =
|
||||
picture->picture_coding_extension.bits.q_scale_type;
|
||||
slice_params.picture.intra_vlc_format =
|
||||
picture->picture_coding_extension.bits.intra_vlc_format;
|
||||
slice_params.picture.alternate_scan =
|
||||
picture->picture_coding_extension.bits.alternate_scan;
|
||||
slice_params.picture.repeat_first_field =
|
||||
picture->picture_coding_extension.bits.repeat_first_field;
|
||||
slice_params.picture.progressive_frame =
|
||||
picture->picture_coding_extension.bits.progressive_frame;
|
||||
|
||||
slice_params.quantiser_scale_code = slice->quantiser_scale_code;
|
||||
/* === Clause 2: SEQUENCE ===
|
||||
*
|
||||
* VAAPI's VAPictureParameterBufferMPEG2 doesn't expose the
|
||||
* sequence-extension's progressive_sequence flag separately;
|
||||
* use progressive_frame from the picture-coding extension as a
|
||||
* proxy. They're identical for typical streams (BBB is
|
||||
* progressive throughout).
|
||||
*/
|
||||
sequence.horizontal_size = picture->horizontal_size;
|
||||
sequence.vertical_size = picture->vertical_size;
|
||||
sequence.vbv_buffer_size = surface_object->source_size;
|
||||
sequence.profile_and_level_indication = 0; /* not exposed by VAAPI */
|
||||
sequence.chroma_format = 1; /* 4:2:0 — campaign codec scope */
|
||||
if (picture->picture_coding_extension.bits.progressive_frame)
|
||||
sequence.flags |= V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE;
|
||||
|
||||
/* === Clause 3: PICTURE ===
|
||||
*
|
||||
* Behavioral correction vs. previous mpeg2.c at this iter1:
|
||||
* old code self-referenced surface_object->timestamp when the
|
||||
* VAAPI ref picture was VA_INVALID_ID. New code sets ts = 0 for
|
||||
* missing refs, matching kernel doc's 0-as-sentinel convention
|
||||
* (verified against Phase 3 Baseline C frame 1: I-frame has both
|
||||
* forward_ref_ts and backward_ref_ts == 0; FFmpeg
|
||||
* libavcodec/v4l2_request_mpeg2.c:98-108 uses same convention).
|
||||
*/
|
||||
forward_reference_surface =
|
||||
SURFACE(driver_data, picture->forward_reference_picture);
|
||||
if (forward_reference_surface == NULL)
|
||||
forward_reference_surface = surface_object;
|
||||
|
||||
timestamp = v4l2_timeval_to_ns(&forward_reference_surface->timestamp);
|
||||
slice_params.forward_ref_ts = timestamp;
|
||||
if (forward_reference_surface != NULL)
|
||||
pic.forward_ref_ts =
|
||||
v4l2_timeval_to_ns(&forward_reference_surface->timestamp);
|
||||
|
||||
backward_reference_surface =
|
||||
SURFACE(driver_data, picture->backward_reference_picture);
|
||||
if (backward_reference_surface == NULL)
|
||||
backward_reference_surface = surface_object;
|
||||
if (backward_reference_surface != NULL)
|
||||
pic.backward_ref_ts =
|
||||
v4l2_timeval_to_ns(&backward_reference_surface->timestamp);
|
||||
|
||||
timestamp = v4l2_timeval_to_ns(&backward_reference_surface->timestamp);
|
||||
slice_params.backward_ref_ts = timestamp;
|
||||
if (picture->picture_coding_extension.bits.top_field_first)
|
||||
pic.flags |= V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST;
|
||||
if (picture->picture_coding_extension.bits.frame_pred_frame_dct)
|
||||
pic.flags |= V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT;
|
||||
if (picture->picture_coding_extension.bits.concealment_motion_vectors)
|
||||
pic.flags |= V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV;
|
||||
if (picture->picture_coding_extension.bits.q_scale_type)
|
||||
pic.flags |= V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE;
|
||||
if (picture->picture_coding_extension.bits.intra_vlc_format)
|
||||
pic.flags |= V4L2_MPEG2_PIC_FLAG_INTRA_VLC;
|
||||
if (picture->picture_coding_extension.bits.alternate_scan)
|
||||
pic.flags |= V4L2_MPEG2_PIC_FLAG_ALT_SCAN;
|
||||
if (picture->picture_coding_extension.bits.repeat_first_field)
|
||||
pic.flags |= V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST;
|
||||
if (picture->picture_coding_extension.bits.progressive_frame)
|
||||
pic.flags |= V4L2_MPEG2_PIC_FLAG_PROGRESSIVE;
|
||||
|
||||
rc = v4l2_set_control(driver_data->video_fd, surface_object->request_fd,
|
||||
V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS,
|
||||
&slice_params, sizeof(slice_params));
|
||||
pic.f_code[0][0] = (picture->f_code >> 12) & 0x0f;
|
||||
pic.f_code[0][1] = (picture->f_code >> 8) & 0x0f;
|
||||
pic.f_code[1][0] = (picture->f_code >> 4) & 0x0f;
|
||||
pic.f_code[1][1] = (picture->f_code >> 0) & 0x0f;
|
||||
pic.picture_coding_type = picture->picture_coding_type;
|
||||
pic.picture_structure =
|
||||
picture->picture_coding_extension.bits.picture_structure;
|
||||
pic.intra_dc_precision =
|
||||
picture->picture_coding_extension.bits.intra_dc_precision;
|
||||
/* pic.reserved[5] zeroed by memset above */
|
||||
|
||||
/* === Clause 4: QUANTISATION ===
|
||||
*
|
||||
* Kernel always reads all four matrices unconditionally
|
||||
* (no load_* flags in the new API; kernel hantro_mpeg2.c
|
||||
* doesn't synthesize defaults). When VAAPI's consumer didn't
|
||||
* send VAIQMatrixBufferType (iqmatrix_set==false), populate
|
||||
* with MPEG-2 spec default matrices.
|
||||
*
|
||||
* VAAPI VAIQMatrixBufferMPEG2 stores matrices in zigzag scanning
|
||||
* order (per VAAPI spec). Kernel expects zigzag scanning order
|
||||
* (per v4l2-controls.h:2076). Direct memcpy.
|
||||
*/
|
||||
if (iqmatrix_set) {
|
||||
memcpy(quant.intra_quantiser_matrix,
|
||||
iqmatrix->intra_quantiser_matrix, 64);
|
||||
memcpy(quant.non_intra_quantiser_matrix,
|
||||
iqmatrix->non_intra_quantiser_matrix, 64);
|
||||
memcpy(quant.chroma_intra_quantiser_matrix,
|
||||
iqmatrix->chroma_intra_quantiser_matrix, 64);
|
||||
memcpy(quant.chroma_non_intra_quantiser_matrix,
|
||||
iqmatrix->chroma_non_intra_quantiser_matrix, 64);
|
||||
} else {
|
||||
memcpy(quant.intra_quantiser_matrix,
|
||||
mpeg2_default_intra_matrix, 64);
|
||||
memset(quant.non_intra_quantiser_matrix, 16, 64);
|
||||
memcpy(quant.chroma_intra_quantiser_matrix,
|
||||
mpeg2_default_intra_matrix, 64);
|
||||
memset(quant.chroma_non_intra_quantiser_matrix, 16, 64);
|
||||
}
|
||||
|
||||
/* === Clause 1+5: batched submission ===
|
||||
*
|
||||
* One VIDIOC_S_EXT_CTRLS with all three controls. Matches
|
||||
* src/h264.c:986 pattern (single v4l2_set_controls call) and
|
||||
* FFmpeg ff_v4l2_request_decode_frame contract. Bound to the
|
||||
* surface's permanent request_fd (iter6 per-OUTPUT-slot binding;
|
||||
* picture.c:284 sets surface_object->request_fd at BeginPicture).
|
||||
*/
|
||||
struct v4l2_ext_control ctrls[3] = {
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_MPEG2_SEQUENCE,
|
||||
.ptr = &sequence,
|
||||
.size = sizeof sequence,
|
||||
},
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_MPEG2_PICTURE,
|
||||
.ptr = &pic,
|
||||
.size = sizeof pic,
|
||||
},
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_MPEG2_QUANTISATION,
|
||||
.ptr = &quant,
|
||||
.size = sizeof quant,
|
||||
},
|
||||
};
|
||||
|
||||
rc = v4l2_set_controls(driver_data->video_fd,
|
||||
surface_object->request_fd,
|
||||
ctrls, 3);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
if (iqmatrix_set) {
|
||||
quantization.load_intra_quantiser_matrix =
|
||||
iqmatrix->load_intra_quantiser_matrix;
|
||||
quantization.load_non_intra_quantiser_matrix =
|
||||
iqmatrix->load_non_intra_quantiser_matrix;
|
||||
quantization.load_chroma_intra_quantiser_matrix =
|
||||
iqmatrix->load_chroma_intra_quantiser_matrix;
|
||||
quantization.load_chroma_non_intra_quantiser_matrix =
|
||||
iqmatrix->load_chroma_non_intra_quantiser_matrix;
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
quantization.intra_quantiser_matrix[i] =
|
||||
iqmatrix->intra_quantiser_matrix[i];
|
||||
quantization.non_intra_quantiser_matrix[i] =
|
||||
iqmatrix->non_intra_quantiser_matrix[i];
|
||||
quantization.chroma_intra_quantiser_matrix[i] =
|
||||
iqmatrix->chroma_intra_quantiser_matrix[i];
|
||||
quantization.chroma_non_intra_quantiser_matrix[i] =
|
||||
iqmatrix->chroma_non_intra_quantiser_matrix[i];
|
||||
}
|
||||
|
||||
rc = v4l2_set_control(driver_data->video_fd,
|
||||
surface_object->request_fd,
|
||||
V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION,
|
||||
&quantization, sizeof(quantization));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
+315
-11
@@ -34,8 +34,13 @@
|
||||
#include "h264.h"
|
||||
#include "h265.h"
|
||||
#include "mpeg2.h"
|
||||
#include "vp8.h"
|
||||
#include "vp9.h"
|
||||
#include "av1.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <errno.h>
|
||||
@@ -51,6 +56,7 @@
|
||||
#include "autoconfig.h"
|
||||
|
||||
static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
struct object_context *context,
|
||||
VAProfile profile,
|
||||
struct object_surface *surface_object,
|
||||
struct object_buffer *buffer_object)
|
||||
@@ -63,6 +69,47 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
* RenderPicture), we can't use a V4L2 buffer directly
|
||||
* and have to copy from a regular buffer.
|
||||
*/
|
||||
if (context->h264_start_code) {
|
||||
static const char start_code[3] = { 0x00, 0x00, 0x01 };
|
||||
|
||||
memcpy(surface_object->source_data +
|
||||
surface_object->slices_size,
|
||||
start_code, sizeof(start_code));
|
||||
surface_object->slices_size += sizeof(start_code);
|
||||
}
|
||||
/*
|
||||
* iter33 α-30: VP8 OUTPUT buffer needs the uncompressed
|
||||
* frame header that ffmpeg-vaapi stripped before submitting
|
||||
* VASliceData. Hantro's vp8_dec_run reads OUTPUT[0..N] with
|
||||
* an assumed offset of 10 bytes (keyframe) or 3 bytes
|
||||
* (interframe) before the first_partition data — see
|
||||
* rockchip_vpu2_hw_vp8_dec.c:349.
|
||||
*
|
||||
* ffmpeg-vaapi (vaapi_vp8.c:191-192) strips
|
||||
* header_size = 3 + 7 * s->keyframe
|
||||
* before submitting the slice data, so libva needs to
|
||||
* pre-pad the OUTPUT with that many bytes. Hantro only
|
||||
* uses these bytes for offset arithmetic, not parsing,
|
||||
* so zero-filled placeholder is sufficient.
|
||||
*
|
||||
* ffmpeg-v4l2request (kdirect path) does NOT strip the
|
||||
* header, hence its OUTPUT is byte-equal to SW reference
|
||||
* and decode works correctly. This is the only material
|
||||
* difference between the two front-ends for VP8.
|
||||
*
|
||||
* key_frame in VAAPI's pic_fields.bits is INVERTED:
|
||||
* 0 → keyframe, 1 → interframe.
|
||||
*/
|
||||
if (profile == VAProfileVP8Version0_3 &&
|
||||
surface_object->params.vp8.iqmatrix_set /* picture parsed by now */) {
|
||||
unsigned int header_size =
|
||||
surface_object->params.vp8.picture.pic_fields.bits.key_frame == 0 ?
|
||||
10 : 3;
|
||||
memset(surface_object->source_data +
|
||||
surface_object->slices_size,
|
||||
0, header_size);
|
||||
surface_object->slices_size += header_size;
|
||||
}
|
||||
memcpy(surface_object->source_data +
|
||||
surface_object->slices_size,
|
||||
buffer_object->data,
|
||||
@@ -97,6 +144,27 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
sizeof(surface_object->params.h265.picture));
|
||||
break;
|
||||
|
||||
case VAProfileVP8Version0_3:
|
||||
memcpy(&surface_object->params.vp8.picture,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.vp8.picture));
|
||||
break;
|
||||
|
||||
case VAProfileVP9Profile0:
|
||||
memcpy(&surface_object->params.vp9.picture,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.vp9.picture));
|
||||
break;
|
||||
|
||||
case VAProfileAV1Profile0:
|
||||
memcpy(&surface_object->params.av1.picture,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.av1.picture));
|
||||
/* Reset per-frame tile group entry array on each new
|
||||
* picture parameter buffer (start of a new frame). */
|
||||
surface_object->params.av1.num_tile_group_entries = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -114,11 +182,44 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
sizeof(surface_object->params.h264.slice));
|
||||
break;
|
||||
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileHEVCMain: {
|
||||
unsigned int n = surface_object->params.h265.num_slices;
|
||||
if (n < HEVC_MAX_SLICES_PER_FRAME) {
|
||||
memcpy(&surface_object->params.h265.slices[n],
|
||||
buffer_object->data,
|
||||
sizeof(VASliceParameterBufferHEVC));
|
||||
surface_object->params.h265.num_slices = n + 1;
|
||||
}
|
||||
/* Keep .slice mirror populated as last-slice ref for
|
||||
* h265_fill_pps which reads dependent_slice_segment_flag */
|
||||
memcpy(&surface_object->params.h265.slice,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.h265.slice));
|
||||
break;
|
||||
}
|
||||
|
||||
case VAProfileVP8Version0_3:
|
||||
memcpy(&surface_object->params.vp8.slice,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.vp8.slice));
|
||||
break;
|
||||
|
||||
case VAProfileVP9Profile0:
|
||||
memcpy(&surface_object->params.vp9.slice,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.vp9.slice));
|
||||
break;
|
||||
|
||||
case VAProfileAV1Profile0: {
|
||||
unsigned int n = surface_object->params.av1.num_tile_group_entries;
|
||||
if (n < AV1_MAX_TILES) {
|
||||
memcpy(&surface_object->params.av1.tile_group_entries[n],
|
||||
buffer_object->data,
|
||||
sizeof(VASliceParameterBufferAV1));
|
||||
surface_object->params.av1.num_tile_group_entries = n + 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
@@ -143,6 +244,7 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
memcpy(&surface_object->params.h264.matrix,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.h264.matrix));
|
||||
surface_object->params.h264.matrix_set = true;
|
||||
break;
|
||||
|
||||
case VAProfileHEVCMain:
|
||||
@@ -152,6 +254,27 @@ static VAStatus codec_store_buffer(struct request_data *driver_data,
|
||||
surface_object->params.h265.iqmatrix_set = true;
|
||||
break;
|
||||
|
||||
case VAProfileVP8Version0_3:
|
||||
memcpy(&surface_object->params.vp8.iqmatrix,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.vp8.iqmatrix));
|
||||
surface_object->params.vp8.iqmatrix_set = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case VAProbabilityBufferType:
|
||||
switch (profile) {
|
||||
case VAProfileVP8Version0_3:
|
||||
memcpy(&surface_object->params.vp8.probability,
|
||||
buffer_object->data,
|
||||
sizeof(surface_object->params.vp8.probability));
|
||||
surface_object->params.vp8.probability_set = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -184,7 +307,8 @@ static VAStatus codec_set_controls(struct request_data *driver_data,
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
rc = h264_set_controls(driver_data, context, surface_object);
|
||||
rc = h264_set_controls(driver_data, context, profile,
|
||||
surface_object);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
break;
|
||||
@@ -195,6 +319,23 @@ static VAStatus codec_set_controls(struct request_data *driver_data,
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
break;
|
||||
|
||||
case VAProfileVP8Version0_3:
|
||||
rc = vp8_set_controls(driver_data, context, surface_object);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
break;
|
||||
|
||||
case VAProfileVP9Profile0:
|
||||
rc = vp9_set_controls(driver_data, context, surface_object);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
break;
|
||||
case VAProfileAV1Profile0:
|
||||
rc = av1_set_controls(driver_data, context, surface_object);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
break;
|
||||
|
||||
default:
|
||||
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
||||
}
|
||||
@@ -208,6 +349,9 @@ VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id,
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_context *context_object;
|
||||
struct object_surface *surface_object;
|
||||
struct request_pool_slot *slot;
|
||||
int slot_index;
|
||||
|
||||
|
||||
context_object = CONTEXT(driver_data, context_id);
|
||||
if (context_object == NULL)
|
||||
@@ -217,9 +361,115 @@ VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id,
|
||||
if (surface_object == NULL)
|
||||
return VA_STATUS_ERROR_INVALID_SURFACE;
|
||||
|
||||
/* AV1 Phase 3 diag */
|
||||
request_log("BeginPicture id=0x%x prev_slot=%p status=%d\n",
|
||||
surface_object->base.id,
|
||||
(void *)surface_object->current_slot,
|
||||
surface_object->status);
|
||||
|
||||
if (surface_object->status == VASurfaceRendering)
|
||||
RequestSyncSurface(context, surface_id);
|
||||
|
||||
/*
|
||||
* Iter2 Fix 3: acquire a CAPTURE-pool slot for this decode cycle.
|
||||
* If the surface still holds a slot from a prior decode (DECODED
|
||||
* or EXPORTED — the consumer is done with it by definition since
|
||||
* we got back to BeginPicture for the same surface), release it
|
||||
* first. The new slot is bound and its V4L2 index + mmap pointers
|
||||
* are mirrored into surface_object->destination_* so the existing
|
||||
* QBUF/DQBUF/EXPBUF code paths see no behavioral change.
|
||||
*
|
||||
* AV1 Phase 3 finding: LIBVA_SKIP_REBIND=1 experiment (do NOT
|
||||
* unbind on rebind) did not improve PASS count for the av1_larger
|
||||
* film_grain stress vector — proving the iter2 Fix 3 release is
|
||||
* NOT the source of the inter-frame divergence. The issue is
|
||||
* deeper in ffmpeg-vaapi's AV1 hwaccel: per byte-equal OUTPUT
|
||||
* comparison with the patched-ffmpeg-v4l2request reference run
|
||||
* (LD_LIBRARY_PATH override on a debug libavcodec.so), 7/7 first
|
||||
* EndPicture submissions are byte-identical, libva has 2 EXTRA.
|
||||
*/
|
||||
if (surface_object->current_slot != NULL)
|
||||
surface_unbind_slot(driver_data, surface_object);
|
||||
|
||||
/*
|
||||
* AV1 Phase 5 review Amendment 4: clear any stale
|
||||
* linked_decode_surface_id from a prior film_grain display→decode
|
||||
* link. If ffmpeg-vaapi recycles a former display surface as a
|
||||
* decode target, BeginPicture binds a fresh slot — but without
|
||||
* this reset, copy_surface_to_image's link-follow would still
|
||||
* borrow from the now-stale linked surface and serve wrong data.
|
||||
* Cleared unconditionally (cheap) so the next AV1 grain frame
|
||||
* re-establishes the link if needed.
|
||||
*/
|
||||
surface_object->linked_decode_surface_id = VA_INVALID_SURFACE;
|
||||
{
|
||||
struct cap_pool_slot *cap_slot =
|
||||
cap_pool_acquire(&driver_data->capture_pool, surface_id);
|
||||
if (cap_slot == NULL)
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
surface_bind_slot(surface_object, cap_slot);
|
||||
|
||||
/*
|
||||
* iter8 Phase 7 IMP-1 experiment: env-gated CAPTURE buffer
|
||||
* pre-zero. LIBVA_V4L2_ZERO_CAPTURE=1 wipes the slot's mmap'd
|
||||
* region before kernel decode. Discriminates "kernel writes
|
||||
* partial then aborts" from "kernel writes nothing and we
|
||||
* see stale residue."
|
||||
*/
|
||||
{
|
||||
static const char *zero_env = NULL;
|
||||
static bool zero_env_checked = false;
|
||||
if (!zero_env_checked) {
|
||||
zero_env = getenv("LIBVA_V4L2_ZERO_CAPTURE");
|
||||
zero_env_checked = true;
|
||||
}
|
||||
if (zero_env != NULL && zero_env[0] == '1') {
|
||||
unsigned int b;
|
||||
for (b = 0; b < cap_slot->buffers_count; b++)
|
||||
if (cap_slot->map[b] != NULL)
|
||||
memset(cap_slot->map[b], 0,
|
||||
cap_slot->map_lengths[b]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Borrow an OUTPUT (bitstream-input) slot from the driver-wide
|
||||
* pool for the duration of this Begin/Render/End cycle. The
|
||||
* surface's source_* fields hold the borrow's mmap pointer/size/
|
||||
* V4L2 buffer index until RequestSyncSurface releases it after
|
||||
* VIDIOC_DQBUF.
|
||||
*/
|
||||
slot_index = request_pool_acquire(&driver_data->output_pool);
|
||||
if (slot_index < 0)
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
|
||||
slot = request_pool_slot(&driver_data->output_pool,
|
||||
(unsigned int)slot_index);
|
||||
if (slot == NULL) {
|
||||
request_pool_release(&driver_data->output_pool,
|
||||
(unsigned int)slot_index);
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
}
|
||||
|
||||
surface_object->source_index = slot->index;
|
||||
surface_object->source_data = slot->data;
|
||||
surface_object->source_size = slot->size;
|
||||
/*
|
||||
* iter6: bind the slot's permanent request_fd to this surface for the
|
||||
* duration of the decode cycle. Replaces the iter4 close+alloc-per-
|
||||
* frame model. The fd is REINIT'd (not closed) at RequestSyncSurface,
|
||||
* so the kernel-side request object is reset in place — no fd-reuse
|
||||
* race with another slot's pending decode.
|
||||
*/
|
||||
surface_object->request_fd = slot->request_fd;
|
||||
surface_object->slices_size = 0;
|
||||
surface_object->slices_count = 0;
|
||||
surface_object->params.h264.matrix_set = false;
|
||||
surface_object->params.h265.num_slices = 0;
|
||||
surface_object->params.vp8.iqmatrix_set = false;
|
||||
surface_object->params.vp8.probability_set = false;
|
||||
|
||||
surface_object->status = VASurfaceRendering;
|
||||
context_object->render_surface_id = surface_id;
|
||||
|
||||
@@ -255,7 +505,8 @@ VAStatus RequestRenderPicture(VADriverContextP context, VAContextID context_id,
|
||||
if (buffer_object == NULL)
|
||||
return VA_STATUS_ERROR_INVALID_BUFFER;
|
||||
|
||||
rc = codec_store_buffer(driver_data, config_object->profile,
|
||||
rc = codec_store_buffer(driver_data, context_object,
|
||||
config_object->profile,
|
||||
surface_object, buffer_object);
|
||||
if (rc != VA_STATUS_SUCCESS)
|
||||
return rc;
|
||||
@@ -296,22 +547,75 @@ VAStatus RequestEndPicture(VADriverContextP context, VAContextID context_id)
|
||||
if (surface_object == NULL)
|
||||
return VA_STATUS_ERROR_INVALID_SURFACE;
|
||||
|
||||
gettimeofday(&surface_object->timestamp, NULL);
|
||||
/*
|
||||
* iter9 α-7: monotonic per-context counter instead of gettimeofday,
|
||||
* so DPB.reference_ts / OUTPUT QBUF ts stay small (matches
|
||||
* ffmpeg-v4l2request's pattern). Confirmed in iter30 sweep
|
||||
* (1×, 1000×, 1000000× multipliers all produce identical output);
|
||||
* the counter scheme works on both rkvdec and hantro vb2_find_buffer.
|
||||
*/
|
||||
context_object->timestamp_counter++;
|
||||
surface_object->timestamp.tv_sec =
|
||||
(time_t)(context_object->timestamp_counter / 1000000);
|
||||
surface_object->timestamp.tv_usec =
|
||||
(suseconds_t)(context_object->timestamp_counter % 1000000);
|
||||
|
||||
/*
|
||||
* iter6: request_fd was bound to the surface in BeginPicture from
|
||||
* the OUTPUT pool slot's permanent fd. Per-frame allocation is gone.
|
||||
*/
|
||||
request_fd = surface_object->request_fd;
|
||||
if (request_fd < 0) {
|
||||
request_fd = media_request_alloc(driver_data->media_fd);
|
||||
if (request_fd < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
surface_object->request_fd = request_fd;
|
||||
}
|
||||
if (request_fd < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
rc = codec_set_controls(driver_data, context_object,
|
||||
config_object->profile, surface_object);
|
||||
if (rc != VA_STATUS_SUCCESS)
|
||||
return rc;
|
||||
|
||||
/*
|
||||
* iter14 α-16: env-gated dump of OUTPUT bitstream bytes immediately
|
||||
* before QBUF. LIBVA_V4L2_DUMP_OUTPUT=<dir> writes source_data[0..
|
||||
* slices_size] to <dir>/output_<profile>_<surface>_<frame>.bin.
|
||||
* Discriminates whether libva writes the same H.264/HEVC slice bytes
|
||||
* as kdirect — if YES, Bug 4/5 are not in the OUTPUT-side; if NO,
|
||||
* narrow to which slice-write path produces the divergence.
|
||||
*
|
||||
* Off by default; no behavior change when env unset.
|
||||
*/
|
||||
{
|
||||
static const char *dump_env = NULL;
|
||||
static bool dump_env_checked = false;
|
||||
if (!dump_env_checked) {
|
||||
dump_env = getenv("LIBVA_V4L2_DUMP_OUTPUT");
|
||||
dump_env_checked = true;
|
||||
}
|
||||
if (dump_env != NULL && dump_env[0] != '\0' &&
|
||||
surface_object->source_data != NULL &&
|
||||
surface_object->slices_size > 0) {
|
||||
char path[256];
|
||||
snprintf(path, sizeof(path),
|
||||
"%s/output_p%d_s%u_t%llu.bin",
|
||||
dump_env, (int)config_object->profile,
|
||||
(unsigned int)surface_object->base.id,
|
||||
(unsigned long long)context_object->timestamp_counter);
|
||||
FILE *fp = fopen(path, "wb");
|
||||
if (fp != NULL) {
|
||||
size_t w = fwrite(surface_object->source_data,
|
||||
1, surface_object->slices_size,
|
||||
fp);
|
||||
request_log("α-16: dumped %zu bytes to %s "
|
||||
"(slices_count=%u)\n",
|
||||
w, path,
|
||||
surface_object->slices_count);
|
||||
fclose(fp);
|
||||
} else {
|
||||
request_log("α-16: fopen(%s) failed: %s\n",
|
||||
path, strerror(errno));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL,
|
||||
surface_object->destination_index, 0,
|
||||
surface_object->destination_buffers_count);
|
||||
|
||||
+707
-6
@@ -25,10 +25,12 @@
|
||||
*/
|
||||
|
||||
#include "buffer.h"
|
||||
#include "cap_pool.h"
|
||||
#include "config.h"
|
||||
#include "context.h"
|
||||
#include "image.h"
|
||||
#include "picture.h"
|
||||
#include "request_pool.h"
|
||||
#include "subpicture.h"
|
||||
#include "surface.h"
|
||||
|
||||
@@ -41,6 +43,7 @@
|
||||
#include "v4l2.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
@@ -51,8 +54,524 @@
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <linux/media.h>
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
#include "hevc-ctrls/v4l2-hevc-ext-controls.h"
|
||||
|
||||
/*
|
||||
* fresnel-fourier iter4 Phase 6 commit Z + iter7 Phase 6 (B1a): device-path
|
||||
* auto-detect via media controller topology with decoder-entity discrimination.
|
||||
*
|
||||
* Pre-iter4 the backend hardcoded /dev/video0 + /dev/media0. On Linux 7.0 the
|
||||
* udev/probe order changed and rockchip-rga (an RGB color converter, no codec
|
||||
* support) now claims /dev/video0 — the legacy default returns an empty
|
||||
* profile list. iter4 commit Z replaced enumeration-order discovery with
|
||||
* media-topology discovery.
|
||||
*
|
||||
* iter7 (B1a): the iter4 walk treated the hantro-vpu driver name as a single
|
||||
* unit, but hantro-vpu registers BOTH encoder and decoder entities under one
|
||||
* /dev/mediaN on RK3399. iter4's "pick the first V4L_VIDEO interface" could
|
||||
* land on the encoder. iter7 walks ENTITIES looking for
|
||||
* MEDIA_ENT_F_PROC_VIDEO_DECODER, then follows the kernel's link graph
|
||||
* (data link from proc to IO entity, interface link from IO entity to V4L
|
||||
* interface) to the correct /dev/videoN.
|
||||
*
|
||||
* Two-pass to prefer rkvdec: pass 1 accepts only "rkvdec" (multi-codec
|
||||
* decoder, 3 of 5 codecs); pass 2 accepts any known decoder driver. On
|
||||
* RK3399 this makes auto-detect always pick rkvdec when available.
|
||||
*
|
||||
* iter4-B1b (multi-decoder routing — open BOTH rkvdec AND hantro from one
|
||||
* backend instance, dispatch per codec) is still deferred. Post-iter7 the
|
||||
* backend opens one decoder per process; MPEG-2/VP8 (hantro) still need
|
||||
* explicit LIBVA_V4L2_REQUEST_VIDEO_PATH override when iter7's pass-1
|
||||
* lands on rkvdec.
|
||||
*
|
||||
* Escape hatch: LIBVA_V4L2_REQUEST_NO_AUTODETECT=1 reverts to legacy
|
||||
* hardcoded /dev/video0 + /dev/media0 for callers that relied on it.
|
||||
*/
|
||||
static const char * const known_decoder_drivers[] = {
|
||||
"rkvdec",
|
||||
"hantro-vpu",
|
||||
"cedrus",
|
||||
"sun4i_csi",
|
||||
NULL
|
||||
};
|
||||
|
||||
static int resolve_dev_node(uint32_t major, uint32_t minor, char *out, size_t out_sz)
|
||||
{
|
||||
char sysfs_path[64], target[256];
|
||||
ssize_t n;
|
||||
const char *base;
|
||||
|
||||
snprintf(sysfs_path, sizeof sysfs_path, "/sys/dev/char/%u:%u", major, minor);
|
||||
n = readlink(sysfs_path, target, sizeof target - 1);
|
||||
if (n < 0)
|
||||
return -1;
|
||||
target[n] = '\0';
|
||||
base = strrchr(target, '/');
|
||||
base = base ? base + 1 : target;
|
||||
snprintf(out, out_sz, "/dev/%s", base);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* iter7 B1a: walk topology graph from decoder-proc entity to its V4L_VIDEO
|
||||
* interface. Returns 0 + sets video_out on success, -1 if this media device
|
||||
* has no decoder entity (e.g. encoder-only device).
|
||||
*
|
||||
* Algorithm (per Phase 5 review, empirically validated against
|
||||
* boltzmann:~/src/linux-rockchip):
|
||||
* 1. For each entity E with function == MEDIA_ENT_F_PROC_VIDEO_DECODER:
|
||||
* 2. Find IO entity neighbors via DATA links (entity↔entity).
|
||||
* 3. Find the V4L_VIDEO interface via INTERFACE links from those IO
|
||||
* neighbors.
|
||||
* 4. Resolve interface.devnode.major:minor to /dev/videoN.
|
||||
*
|
||||
* Two-call MEDIA_IOC_G_TOPOLOGY pattern (Phase 5 IMP-3): first call gets
|
||||
* counts; second call fills the three arrays after we allocate them.
|
||||
*
|
||||
* Link discrimination via MEDIA_LNK_FL_INTERFACE_LINK (1U<<28):
|
||||
* data links have flags & MEDIA_LNK_FL_INTERFACE_LINK == 0; interface
|
||||
* links have it set. source_id/sink_id ordering is not guaranteed —
|
||||
* check both endpoints.
|
||||
*/
|
||||
static int find_decoder_video_node_via_topology(int media_fd,
|
||||
char *video_out,
|
||||
size_t video_out_sz)
|
||||
{
|
||||
struct media_v2_topology topo;
|
||||
struct media_v2_entity *entities = NULL;
|
||||
struct media_v2_interface *interfaces = NULL;
|
||||
struct media_v2_link *links = NULL;
|
||||
struct media_v2_pad *pads = NULL;
|
||||
int ret = -1;
|
||||
unsigned int i, j;
|
||||
|
||||
memset(&topo, 0, sizeof topo);
|
||||
if (ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topo) < 0)
|
||||
return -1;
|
||||
if (topo.num_entities == 0 || topo.num_interfaces == 0 ||
|
||||
topo.num_links == 0 || topo.num_pads == 0)
|
||||
return -1;
|
||||
|
||||
entities = calloc(topo.num_entities, sizeof *entities);
|
||||
interfaces = calloc(topo.num_interfaces, sizeof *interfaces);
|
||||
links = calloc(topo.num_links, sizeof *links);
|
||||
pads = calloc(topo.num_pads, sizeof *pads);
|
||||
if (!entities || !interfaces || !links || !pads)
|
||||
goto out;
|
||||
|
||||
topo.ptr_entities = (uintptr_t)entities;
|
||||
topo.ptr_interfaces = (uintptr_t)interfaces;
|
||||
topo.ptr_links = (uintptr_t)links;
|
||||
topo.ptr_pads = (uintptr_t)pads;
|
||||
|
||||
if (ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topo) < 0)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < topo.num_entities; i++) {
|
||||
uint32_t proc_id;
|
||||
uint32_t proc_pad_ids[16];
|
||||
uint32_t io_entity_ids[16];
|
||||
unsigned int proc_pad_count = 0;
|
||||
unsigned int io_count = 0;
|
||||
|
||||
if (entities[i].function != MEDIA_ENT_F_PROC_VIDEO_DECODER)
|
||||
continue;
|
||||
proc_id = entities[i].id;
|
||||
|
||||
/* Step 2a: collect pads belonging to the proc entity. Data
|
||||
* links connect PADs, not entities directly. */
|
||||
for (j = 0; j < topo.num_pads; j++) {
|
||||
if (pads[j].entity_id != proc_id)
|
||||
continue;
|
||||
if (proc_pad_count < (sizeof proc_pad_ids /
|
||||
sizeof proc_pad_ids[0]))
|
||||
proc_pad_ids[proc_pad_count++] = pads[j].id;
|
||||
}
|
||||
|
||||
/* Step 2b: walk data links. For each link with either endpoint
|
||||
* in proc_pad_ids[], the other endpoint is a pad belonging to
|
||||
* an IO neighbor. Resolve that pad's entity_id via pads[]. */
|
||||
for (j = 0; j < topo.num_links; j++) {
|
||||
uint32_t other_pad = 0;
|
||||
unsigned int k;
|
||||
|
||||
if (links[j].flags & MEDIA_LNK_FL_INTERFACE_LINK)
|
||||
continue;
|
||||
for (k = 0; k < proc_pad_count; k++) {
|
||||
if (links[j].source_id == proc_pad_ids[k])
|
||||
other_pad = links[j].sink_id;
|
||||
else if (links[j].sink_id == proc_pad_ids[k])
|
||||
other_pad = links[j].source_id;
|
||||
if (other_pad != 0)
|
||||
break;
|
||||
}
|
||||
if (other_pad == 0)
|
||||
continue;
|
||||
/* Resolve other_pad to its entity_id. */
|
||||
for (k = 0; k < topo.num_pads; k++) {
|
||||
if (pads[k].id != other_pad)
|
||||
continue;
|
||||
if (io_count < (sizeof io_entity_ids /
|
||||
sizeof io_entity_ids[0]))
|
||||
io_entity_ids[io_count++] =
|
||||
pads[k].entity_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 3-4: find an interface link from any IO entity neighbor;
|
||||
* resolve devnode for the linked V4L_VIDEO interface.
|
||||
* Interface links connect interfaces↔entities directly (not
|
||||
* via pads), so source_id/sink_id is an entity ID on one side
|
||||
* and an interface ID on the other. */
|
||||
for (j = 0; j < topo.num_links; j++) {
|
||||
uint32_t intf_id = 0;
|
||||
unsigned int k;
|
||||
|
||||
if (!(links[j].flags & MEDIA_LNK_FL_INTERFACE_LINK))
|
||||
continue;
|
||||
for (k = 0; k < io_count; k++) {
|
||||
if (links[j].source_id == io_entity_ids[k])
|
||||
intf_id = links[j].sink_id;
|
||||
else if (links[j].sink_id == io_entity_ids[k])
|
||||
intf_id = links[j].source_id;
|
||||
if (intf_id != 0)
|
||||
break;
|
||||
}
|
||||
if (intf_id == 0)
|
||||
continue;
|
||||
|
||||
for (k = 0; k < topo.num_interfaces; k++) {
|
||||
if (interfaces[k].id != intf_id)
|
||||
continue;
|
||||
if (interfaces[k].intf_type !=
|
||||
MEDIA_INTF_T_V4L_VIDEO)
|
||||
break;
|
||||
if (resolve_dev_node(
|
||||
interfaces[k].devnode.major,
|
||||
interfaces[k].devnode.minor,
|
||||
video_out, video_out_sz) == 0)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
if (ret == 0)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
free(entities);
|
||||
free(interfaces);
|
||||
free(links);
|
||||
free(pads);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* iter7 B1a: two-pass walk of /dev/media0..N. Pass 1 accepts only "rkvdec"
|
||||
* (multi-codec decoder serving 3 of 5 codecs). Pass 2 accepts any
|
||||
* known_decoder_drivers entry. Within each pass, the chosen media device
|
||||
* must ALSO contain at least one MEDIA_ENT_F_PROC_VIDEO_DECODER entity —
|
||||
* guards against encoder-only devices that happen to share the same driver
|
||||
* name (e.g. hantro-vpu encoder vs decoder inside one /dev/mediaN).
|
||||
*/
|
||||
/*
|
||||
* iter38: locate a /dev/mediaN whose driver name matches `want_driver`
|
||||
* AND exposes at least one MEDIA_ENT_F_PROC_VIDEO_DECODER entity (rules
|
||||
* out encoder-only devices sharing the same driver name). Resolves the
|
||||
* matching /dev/videoM via topology graph walk.
|
||||
*
|
||||
* `want_driver`:
|
||||
* - non-NULL → match only that exact driver name
|
||||
* - NULL → match any name in known_decoder_drivers[]
|
||||
*/
|
||||
/*
|
||||
* iter2 (ampere-kernel-decoders campaign) — runtime probe for the
|
||||
* V4L2 stateless HEVC EXT_SPS_{ST,LT}_RPS controls added in
|
||||
* Linux 7.0 (Casanova VDPU381/VDPU383 series). Returns true iff BOTH
|
||||
* controls are registered on the given fd. Stored per-fd on
|
||||
* driver_data so the multi-device-probe model (iter38) doesn't
|
||||
* silently misbehave when codec routing switches devices.
|
||||
*
|
||||
* The two CIDs together are the gate — neither alone is meaningful
|
||||
* without the other (st-RPS + lt-RPS arrays both need to be set to
|
||||
* match the SPS num_short_term_ref_pic_sets / num_long_term_ref_pics_sps
|
||||
* counts). Old kernels (RK3399 rkvdec on linux 6.x) register neither;
|
||||
* RK3588 rkvdec (VDPU381/383 path) registers both.
|
||||
*
|
||||
* Reference: phase4_plan_iter2.md §Step 3 in
|
||||
* ~/src/ampere-kernel-decoders/.
|
||||
*/
|
||||
static bool probe_hevc_ext_sps_rps_controls(int video_fd)
|
||||
{
|
||||
struct v4l2_queryctrl q;
|
||||
|
||||
if (video_fd < 0)
|
||||
return false;
|
||||
|
||||
memset(&q, 0, sizeof(q));
|
||||
q.id = V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS;
|
||||
if (ioctl(video_fd, VIDIOC_QUERYCTRL, &q) < 0)
|
||||
return false;
|
||||
|
||||
memset(&q, 0, sizeof(q));
|
||||
q.id = V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS;
|
||||
if (ioctl(video_fd, VIDIOC_QUERYCTRL, &q) < 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inspect a /dev/videoN's OUTPUT formats for `want_pixfmt`. Returns true
|
||||
* iff at least one OUTPUT/OUTPUT_MPLANE format matches.
|
||||
*
|
||||
* Used to discriminate between multiple devices sharing a driver name —
|
||||
* RK3588 has 3 hantro-vpu instances and only one of them is vpu981 (the
|
||||
* dedicated AV1 decoder advertising V4L2_PIX_FMT_AV1_FRAME).
|
||||
*/
|
||||
static bool video_node_supports_output_fmt(int video_fd, uint32_t want_pixfmt)
|
||||
{
|
||||
struct v4l2_fmtdesc desc;
|
||||
const enum v4l2_buf_type types[] = {
|
||||
V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE,
|
||||
V4L2_BUF_TYPE_VIDEO_OUTPUT,
|
||||
};
|
||||
unsigned int t, i;
|
||||
|
||||
for (t = 0; t < sizeof(types) / sizeof(types[0]); t++) {
|
||||
for (i = 0; i < 64; i++) {
|
||||
memset(&desc, 0, sizeof desc);
|
||||
desc.index = i;
|
||||
desc.type = types[t];
|
||||
if (ioctl(video_fd, VIDIOC_ENUM_FMT, &desc) < 0)
|
||||
break;
|
||||
if (desc.pixelformat == want_pixfmt)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int find_decoder_device_by_driver(const char *want_driver,
|
||||
char *video_out, size_t video_out_sz,
|
||||
char *media_out, size_t media_out_sz)
|
||||
{
|
||||
struct media_device_info info;
|
||||
char path[32];
|
||||
const char * const *kd;
|
||||
int fd, i;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
bool match;
|
||||
|
||||
snprintf(path, sizeof path, "/dev/media%d", i);
|
||||
fd = open(path, O_RDWR | O_NONBLOCK);
|
||||
if (fd < 0)
|
||||
continue;
|
||||
memset(&info, 0, sizeof info);
|
||||
if (ioctl(fd, MEDIA_IOC_DEVICE_INFO, &info) != 0) {
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
if (want_driver != NULL) {
|
||||
match = (strcmp(info.driver, want_driver) == 0);
|
||||
} else {
|
||||
match = false;
|
||||
for (kd = known_decoder_drivers; *kd; kd++) {
|
||||
if (strcmp(info.driver, *kd) == 0) {
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!match) {
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
if (find_decoder_video_node_via_topology(
|
||||
fd, video_out, video_out_sz) == 0) {
|
||||
snprintf(media_out, media_out_sz, "%s", path);
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* ampere-av1-enablement Phase 2 — like find_decoder_device_by_driver but
|
||||
* additionally verifies the resolved /dev/videoN advertises `want_pixfmt`
|
||||
* as an OUTPUT format. Required for RK3588 where 3 hantro-vpu instances
|
||||
* share the driver name but only one is vpu981 (AV1 decoder).
|
||||
*
|
||||
* Walks all /dev/media* with matching driver name; takes the first hit
|
||||
* whose OUTPUT formats include `want_pixfmt`. Non-matching candidates
|
||||
* (encoder-only nodes, legacy hantro for MPEG2/VP8) are skipped.
|
||||
*/
|
||||
static int find_decoder_device_by_driver_with_fmt(const char *want_driver,
|
||||
uint32_t want_pixfmt,
|
||||
char *video_out,
|
||||
size_t video_out_sz,
|
||||
char *media_out,
|
||||
size_t media_out_sz)
|
||||
{
|
||||
struct media_device_info info;
|
||||
char path[32];
|
||||
char vpath[32];
|
||||
int fd, vfd, i;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
snprintf(path, sizeof path, "/dev/media%d", i);
|
||||
fd = open(path, O_RDWR | O_NONBLOCK);
|
||||
if (fd < 0)
|
||||
continue;
|
||||
memset(&info, 0, sizeof info);
|
||||
if (ioctl(fd, MEDIA_IOC_DEVICE_INFO, &info) != 0) {
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
if (strcmp(info.driver, want_driver) != 0) {
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
if (find_decoder_video_node_via_topology(fd, vpath,
|
||||
sizeof vpath) != 0) {
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
close(fd);
|
||||
|
||||
/* Capability check: does this /dev/videoN advertise the
|
||||
* codec-specific OUTPUT format? */
|
||||
vfd = open(vpath, O_RDWR | O_NONBLOCK);
|
||||
if (vfd < 0)
|
||||
continue;
|
||||
if (video_node_supports_output_fmt(vfd, want_pixfmt)) {
|
||||
close(vfd);
|
||||
snprintf(video_out, video_out_sz, "%s", vpath);
|
||||
snprintf(media_out, media_out_sz, "%s", path);
|
||||
return 0;
|
||||
}
|
||||
close(vfd);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int find_codec_device(char *video_out, size_t video_out_sz,
|
||||
char *media_out, size_t media_out_sz)
|
||||
{
|
||||
if (find_decoder_device_by_driver("rkvdec",
|
||||
video_out, video_out_sz,
|
||||
media_out, media_out_sz) == 0)
|
||||
return 0;
|
||||
return find_decoder_device_by_driver(NULL,
|
||||
video_out, video_out_sz,
|
||||
media_out, media_out_sz);
|
||||
}
|
||||
|
||||
/*
|
||||
* iter38: profile → which physical decoder device should serve it on
|
||||
* RK3399. Returns 'r' for rkvdec, 'h' for hantro, '?' for unknown.
|
||||
*
|
||||
* This is RK3399-shaped knowledge — a more general impl would interrogate
|
||||
* each open device's supported OUTPUT formats. For the campaign-scope
|
||||
* five codecs, the mapping is stable and explicit.
|
||||
*/
|
||||
char request_device_kind_for_profile(VAProfile profile);
|
||||
char request_device_kind_for_profile(VAProfile profile)
|
||||
{
|
||||
switch (profile) {
|
||||
case VAProfileH264Main:
|
||||
case VAProfileH264High:
|
||||
case VAProfileH264ConstrainedBaseline:
|
||||
case VAProfileH264MultiviewHigh:
|
||||
case VAProfileH264StereoHigh:
|
||||
case VAProfileHEVCMain:
|
||||
case VAProfileVP9Profile0:
|
||||
return 'r';
|
||||
case VAProfileMPEG2Simple:
|
||||
case VAProfileMPEG2Main:
|
||||
case VAProfileVP8Version0_3:
|
||||
return 'h';
|
||||
case VAProfileAV1Profile0:
|
||||
return 'a'; /* ampere-av1-enablement: vpu981 dedicated AV1 */
|
||||
default:
|
||||
return '?';
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* iter38: retarget driver_data->{video,media}_fd to the device kind
|
||||
* required by `profile`. If a switch is needed, tear down any per-device
|
||||
* pool state so the next RequestCreateContext rebuilds it against the
|
||||
* new device. Returns 0 on success, -1 if the required device wasn't
|
||||
* probed (e.g. trying VP8 on a system without hantro).
|
||||
*
|
||||
* Safe to call repeatedly with the same profile: if the active fd
|
||||
* already matches, the function is a no-op.
|
||||
*/
|
||||
int request_switch_device_for_profile(struct request_data *driver_data,
|
||||
VAProfile profile);
|
||||
int request_switch_device_for_profile(struct request_data *driver_data,
|
||||
VAProfile profile)
|
||||
{
|
||||
char kind = request_device_kind_for_profile(profile);
|
||||
int target_video, target_media;
|
||||
|
||||
if (kind == 'r') {
|
||||
target_video = driver_data->video_fd_rkvdec;
|
||||
target_media = driver_data->media_fd_rkvdec;
|
||||
} else if (kind == 'h') {
|
||||
target_video = driver_data->video_fd_hantro;
|
||||
target_media = driver_data->media_fd_hantro;
|
||||
} else if (kind == 'a') {
|
||||
target_video = driver_data->video_fd_vpu981;
|
||||
target_media = driver_data->media_fd_vpu981;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Either side never probed (e.g. env-override single-device init,
|
||||
* or this kind isn't present on the running kernel) → tolerate by
|
||||
* staying on whatever's already active. RequestCreateConfig still
|
||||
* accepted the profile via the format probe, so the active fd
|
||||
* supports it. */
|
||||
if (target_video < 0 || target_media < 0)
|
||||
return 0;
|
||||
|
||||
if (driver_data->video_fd == target_video &&
|
||||
driver_data->media_fd == target_media)
|
||||
return 0; /* already active, nothing to do */
|
||||
|
||||
/*
|
||||
* Tear down any per-device pool state. cap_pool needs capture_type,
|
||||
* which comes from video_format. Both rkvdec and hantro use
|
||||
* V4L2_PIX_FMT_NV12 MPLANE on RK3399 (verified Phase 0 inventory)
|
||||
* so the MPLANE form is always right here.
|
||||
*/
|
||||
if (driver_data->capture_pool.initialized) {
|
||||
cap_pool_destroy(&driver_data->capture_pool,
|
||||
driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
|
||||
}
|
||||
if (driver_data->output_pool.initialized)
|
||||
request_pool_destroy(&driver_data->output_pool);
|
||||
|
||||
/* video_format is a static-ref pointer; re-probe on next
|
||||
* CreateContext since the new device's format menu may differ. */
|
||||
driver_data->video_format = NULL;
|
||||
driver_data->fmt_valid = false;
|
||||
|
||||
driver_data->video_fd = target_video;
|
||||
driver_data->media_fd = target_media;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Set default visibility for the init function only. */
|
||||
VAStatus __attribute__((visibility("default")))
|
||||
VA_DRIVER_INIT_FUNC(VADriverContextP context);
|
||||
@@ -146,9 +665,23 @@ VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context)
|
||||
object_heap_init(&driver_data->image_heap, sizeof(struct object_image),
|
||||
IMAGE_ID_OFFSET);
|
||||
|
||||
static char auto_video[32], auto_media[32];
|
||||
bool auto_media_set = false;
|
||||
|
||||
video_path = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH");
|
||||
if (video_path == NULL)
|
||||
video_path = "/dev/video0";
|
||||
if (video_path == NULL) {
|
||||
if (getenv("LIBVA_V4L2_REQUEST_NO_AUTODETECT")) {
|
||||
video_path = "/dev/video0";
|
||||
} else if (find_codec_device(auto_video, sizeof auto_video,
|
||||
auto_media, sizeof auto_media) == 0) {
|
||||
video_path = auto_video;
|
||||
auto_media_set = true;
|
||||
request_log("auto-selected codec device: %s + %s\n",
|
||||
auto_video, auto_media);
|
||||
} else {
|
||||
video_path = "/dev/video0";
|
||||
}
|
||||
}
|
||||
|
||||
video_fd = open(video_path, O_RDWR | O_NONBLOCK);
|
||||
if (video_fd < 0)
|
||||
@@ -169,8 +702,12 @@ VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context)
|
||||
}
|
||||
|
||||
media_path = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH");
|
||||
if (media_path == NULL)
|
||||
media_path = "/dev/media0";
|
||||
if (media_path == NULL) {
|
||||
if (auto_media_set)
|
||||
media_path = auto_media;
|
||||
else
|
||||
media_path = "/dev/media0";
|
||||
}
|
||||
|
||||
media_fd = open(media_path, O_RDWR | O_NONBLOCK);
|
||||
if (media_fd < 0)
|
||||
@@ -178,6 +715,138 @@ VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context)
|
||||
|
||||
driver_data->video_fd = video_fd;
|
||||
driver_data->media_fd = media_fd;
|
||||
driver_data->video_fd_rkvdec = -1;
|
||||
driver_data->media_fd_rkvdec = -1;
|
||||
driver_data->video_fd_hantro = -1;
|
||||
driver_data->media_fd_hantro = -1;
|
||||
driver_data->video_fd_vpu981 = -1;
|
||||
driver_data->media_fd_vpu981 = -1;
|
||||
|
||||
/*
|
||||
* iter38: probe BOTH rkvdec and hantro-vpu so a single libva session
|
||||
* can serve all 5 codecs. Tag the primary fd (already opened above)
|
||||
* by inspecting which driver the media_fd is on, then probe the OTHER
|
||||
* driver and open its fds if present. RequestCreateConfig retargets
|
||||
* driver_data->{video,media}_fd to the right pair per profile.
|
||||
*
|
||||
* Skip the alt-probe when the user provided explicit
|
||||
* LIBVA_V4L2_REQUEST_VIDEO_PATH / MEDIA_PATH overrides — they signal
|
||||
* a specific single device intent.
|
||||
*/
|
||||
if (!getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH") &&
|
||||
!getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH")) {
|
||||
struct media_device_info info;
|
||||
const char *primary_driver = NULL;
|
||||
const char *alt_driver = NULL;
|
||||
|
||||
memset(&info, 0, sizeof info);
|
||||
if (ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &info) == 0) {
|
||||
if (strcmp(info.driver, "rkvdec") == 0) {
|
||||
primary_driver = "rkvdec";
|
||||
alt_driver = "hantro-vpu";
|
||||
driver_data->video_fd_rkvdec = video_fd;
|
||||
driver_data->media_fd_rkvdec = media_fd;
|
||||
} else if (strcmp(info.driver, "hantro-vpu") == 0) {
|
||||
primary_driver = "hantro-vpu";
|
||||
alt_driver = "rkvdec";
|
||||
driver_data->video_fd_hantro = video_fd;
|
||||
driver_data->media_fd_hantro = media_fd;
|
||||
}
|
||||
}
|
||||
|
||||
if (alt_driver != NULL) {
|
||||
static char alt_video[32], alt_media[32];
|
||||
if (find_decoder_device_by_driver(alt_driver,
|
||||
alt_video, sizeof alt_video,
|
||||
alt_media, sizeof alt_media) == 0) {
|
||||
int alt_v = open(alt_video, O_RDWR | O_NONBLOCK);
|
||||
int alt_m = (alt_v >= 0) ? open(alt_media, O_RDWR | O_NONBLOCK) : -1;
|
||||
if (alt_v >= 0 && alt_m >= 0) {
|
||||
if (strcmp(alt_driver, "rkvdec") == 0) {
|
||||
driver_data->video_fd_rkvdec = alt_v;
|
||||
driver_data->media_fd_rkvdec = alt_m;
|
||||
} else {
|
||||
driver_data->video_fd_hantro = alt_v;
|
||||
driver_data->media_fd_hantro = alt_m;
|
||||
}
|
||||
request_log("iter38: also opened %s decoder at %s + %s\n",
|
||||
alt_driver, alt_video, alt_media);
|
||||
} else {
|
||||
if (alt_v >= 0) close(alt_v);
|
||||
if (alt_m >= 0) close(alt_m);
|
||||
}
|
||||
}
|
||||
}
|
||||
(void)primary_driver;
|
||||
|
||||
/*
|
||||
* ampere-av1-enablement Phase 2 — additionally probe for
|
||||
* vpu981 (RK3588's dedicated AV1 decoder). Driver name
|
||||
* "hantro-vpu" alone is ambiguous on RK3588 (3 instances:
|
||||
* legacy MPEG2/VP8, encoder, vpu981 AV1). Discriminate by
|
||||
* V4L2_PIX_FMT_AV1_FRAME capability. If the primary or alt
|
||||
* hantro happens to BE vpu981 (unlikely but possible on
|
||||
* non-RK3588 boards), this probe finds it again and we just
|
||||
* dedupe via the fd value.
|
||||
*/
|
||||
{
|
||||
static char av1_video[32], av1_media[32];
|
||||
if (find_decoder_device_by_driver_with_fmt(
|
||||
"hantro-vpu", V4L2_PIX_FMT_AV1_FRAME,
|
||||
av1_video, sizeof av1_video,
|
||||
av1_media, sizeof av1_media) == 0) {
|
||||
int av1_v = open(av1_video, O_RDWR | O_NONBLOCK);
|
||||
int av1_m = (av1_v >= 0)
|
||||
? open(av1_media, O_RDWR | O_NONBLOCK)
|
||||
: -1;
|
||||
if (av1_v >= 0 && av1_m >= 0) {
|
||||
driver_data->video_fd_vpu981 = av1_v;
|
||||
driver_data->media_fd_vpu981 = av1_m;
|
||||
request_log(
|
||||
"ampere-av1: vpu981 AV1 decoder "
|
||||
"at %s + %s\n",
|
||||
av1_video, av1_media);
|
||||
} else {
|
||||
if (av1_v >= 0) close(av1_v);
|
||||
if (av1_m >= 0) close(av1_m);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* iter2 (ampere-kernel-decoders): probe the new HEVC EXT_SPS_RPS
|
||||
* controls on each rkvdec/hantro fd. Result is consumed by
|
||||
* h265_set_controls per-codec gate. Per-fd storage matches the
|
||||
* iter38 multi-device-probe pattern (Phase 5 review item).
|
||||
*/
|
||||
driver_data->has_hevc_ext_sps_rps_rkvdec =
|
||||
probe_hevc_ext_sps_rps_controls(driver_data->video_fd_rkvdec);
|
||||
driver_data->has_hevc_ext_sps_rps_hantro =
|
||||
probe_hevc_ext_sps_rps_controls(driver_data->video_fd_hantro);
|
||||
if (driver_data->has_hevc_ext_sps_rps_rkvdec) {
|
||||
request_log("iter2: kernel registers HEVC EXT_SPS_{ST,LT}_RPS "
|
||||
"controls on rkvdec fd (will route through "
|
||||
"vendored GStreamer parser)\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* ampere-av1 Phase 2.1: probe V4L2_CID_STATELESS_AV1_FILM_GRAIN
|
||||
* on the vpu981 fd. Per Janet v3 amendment, this runs at backend
|
||||
* init (not lazily) so any race window with concurrent device
|
||||
* switching can't observe an inconsistent flag.
|
||||
*/
|
||||
driver_data->has_av1_film_grain = false;
|
||||
if (driver_data->video_fd_vpu981 >= 0) {
|
||||
struct v4l2_query_ext_ctrl qec;
|
||||
if (v4l2_query_ext_ctrl(driver_data->video_fd_vpu981,
|
||||
V4L2_CID_STATELESS_AV1_FILM_GRAIN,
|
||||
&qec) == 0) {
|
||||
driver_data->has_av1_film_grain = true;
|
||||
request_log("ampere-av1: vpu981 advertises FILM_GRAIN "
|
||||
"control (will include in per-frame batch)\n");
|
||||
}
|
||||
}
|
||||
|
||||
status = VA_STATUS_SUCCESS;
|
||||
goto complete;
|
||||
@@ -205,8 +874,40 @@ VAStatus RequestTerminate(VADriverContextP context)
|
||||
struct object_config *config_object;
|
||||
int iterator;
|
||||
|
||||
close(driver_data->video_fd);
|
||||
close(driver_data->media_fd);
|
||||
/*
|
||||
* Tear down the OUTPUT buffer pool before closing video_fd so
|
||||
* the munmap calls in request_pool_destroy() can still touch the
|
||||
* mmap regions (which are tied to that fd's lifetime).
|
||||
*/
|
||||
request_pool_destroy(&driver_data->output_pool);
|
||||
|
||||
/*
|
||||
* iter38: close both probed device pairs. video_fd / media_fd above
|
||||
* are ACTIVE pointers into one of these pairs; close the underlying
|
||||
* fds explicitly. Each may be -1 if its device wasn't found.
|
||||
*/
|
||||
if (driver_data->video_fd_rkvdec >= 0)
|
||||
close(driver_data->video_fd_rkvdec);
|
||||
if (driver_data->media_fd_rkvdec >= 0)
|
||||
close(driver_data->media_fd_rkvdec);
|
||||
if (driver_data->video_fd_hantro >= 0)
|
||||
close(driver_data->video_fd_hantro);
|
||||
if (driver_data->media_fd_hantro >= 0)
|
||||
close(driver_data->media_fd_hantro);
|
||||
if (driver_data->video_fd_vpu981 >= 0)
|
||||
close(driver_data->video_fd_vpu981);
|
||||
if (driver_data->media_fd_vpu981 >= 0)
|
||||
close(driver_data->media_fd_vpu981);
|
||||
/* Fall back to direct close if neither alt fd captured the active
|
||||
* pair (env-override path). */
|
||||
if (driver_data->video_fd_rkvdec < 0 &&
|
||||
driver_data->video_fd_hantro < 0 &&
|
||||
driver_data->video_fd_vpu981 < 0) {
|
||||
if (driver_data->video_fd >= 0)
|
||||
close(driver_data->video_fd);
|
||||
if (driver_data->media_fd >= 0)
|
||||
close(driver_data->media_fd);
|
||||
}
|
||||
|
||||
/* Cleanup leftover buffers. */
|
||||
|
||||
|
||||
+157
@@ -31,11 +31,15 @@
|
||||
|
||||
#include "context.h"
|
||||
#include "object_heap.h"
|
||||
#include "request_pool.h"
|
||||
#include "cap_pool.h"
|
||||
#include "video.h"
|
||||
#include <va/va.h>
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
#include "hevc-ctrls/v4l2-hevc-ext-controls.h"
|
||||
|
||||
#define V4L2_REQUEST_STR_VENDOR "v4l2-request"
|
||||
|
||||
#define V4L2_REQUEST_MAX_PROFILES 11
|
||||
@@ -54,10 +58,163 @@ struct request_data {
|
||||
int video_fd;
|
||||
int media_fd;
|
||||
|
||||
/*
|
||||
* iter38: multi-device probe. RK3399 has two V4L2 stateless decoders:
|
||||
* - rkvdec → H264 / HEVC / VP9
|
||||
* - hantro-vpu (rk3399-vpu-dec) → MPEG-2 / VP8
|
||||
* At VA_DRIVER_INIT we probe both, open their fds, and store them
|
||||
* here. driver_data->video_fd / media_fd above are the "active" fds
|
||||
* (point at one of the pairs below). RequestCreateConfig retargets
|
||||
* them based on the profile's required device. Pools and video_format
|
||||
* are torn down at retarget time so the next CreateContext rebuilds
|
||||
* them against the right device.
|
||||
*
|
||||
* -1 means that device kind isn't present on this kernel boot.
|
||||
* Honours LIBVA_V4L2_REQUEST_VIDEO_PATH / MEDIA_PATH explicit
|
||||
* overrides — when those are set, only the single requested device
|
||||
* is opened and the alt fds stay -1.
|
||||
*/
|
||||
int video_fd_rkvdec;
|
||||
int media_fd_rkvdec;
|
||||
int video_fd_hantro;
|
||||
int media_fd_hantro;
|
||||
|
||||
/*
|
||||
* ampere-av1-enablement Phase 2 — vpu981 is a THIRD physical
|
||||
* hantro-vpu instance on RK3588 (separate from the legacy MPEG2/VP8
|
||||
* hantro at /dev/video2). It's the dedicated AV1 decoder at
|
||||
* /dev/video4 with card name "rockchip,rk3588-av1-vpu-dec".
|
||||
*
|
||||
* Driver-name alone ("hantro-vpu") is ambiguous on RK3588 — three
|
||||
* instances share the name. The probe discriminates by capability:
|
||||
* which OUTPUT format does the device advertise? Only vpu981
|
||||
* exposes V4L2_PIX_FMT_AV1_FRAME.
|
||||
*/
|
||||
int video_fd_vpu981;
|
||||
int media_fd_vpu981;
|
||||
|
||||
/*
|
||||
* iter2 (ampere-kernel-decoders campaign) — per-fd probe result
|
||||
* for the V4L2_CID_STATELESS_HEVC_EXT_SPS_{ST,LT}_RPS controls
|
||||
* introduced in Linux 7.0 (Casanova VDPU381/VDPU383 series).
|
||||
* RK3399 rkvdec doesn't have them and the probe returns false;
|
||||
* RK3588 rkvdec (VDPU381/383) registers them and the probe is
|
||||
* true. h265_set_controls consults only the rkvdec entry because
|
||||
* HEVC routes through rkvdec only — hantro's entry stays false
|
||||
* naturally (it doesn't have rkvdec-specific controls).
|
||||
*
|
||||
* The pair-of-flags layout mirrors video_fd_rkvdec /
|
||||
* video_fd_hantro above (iter38 multi-device-probe pattern,
|
||||
* memory feedback_multi_device_probe_design). Phase 5 review
|
||||
* surfaced this as a correctness item: a single scalar on
|
||||
* driver_data would silently misbehave across device-switch
|
||||
* boundaries; per-fd storage is the safe shape.
|
||||
*/
|
||||
bool has_hevc_ext_sps_rps_rkvdec;
|
||||
bool has_hevc_ext_sps_rps_hantro;
|
||||
|
||||
/*
|
||||
* ampere-av1 Phase 2.1: probe result for the optional
|
||||
* V4L2_CID_STATELESS_AV1_FILM_GRAIN control on the vpu981 fd.
|
||||
* Probed at VA_DRIVER_INIT (per Janet v3 amendment — init-time
|
||||
* not lazy). Consumed by av1_set_controls to conditionally include
|
||||
* the 4th control in the per-frame batch.
|
||||
*
|
||||
* True iff vpu981 advertises the control via VIDIOC_QUERY_EXT_CTRL.
|
||||
* False for non-RK3588 hosts (no vpu981 fd) or older kernels.
|
||||
*/
|
||||
bool has_av1_film_grain;
|
||||
|
||||
/*
|
||||
* iter2 — cached SPS-derived RPS arrays. SPS NALs only appear in
|
||||
* source_data on IDR frames; non-IDR frames' h265_set_controls
|
||||
* reuse the cached arrays so we don't submit zero-filled RPS to
|
||||
* the kernel (which would re-trigger the OOPS the iter2 fix is
|
||||
* designed to prevent). Single-slot cache (sps_id 0 only) —
|
||||
* adequate for the BBB / typical-stream case; multi-SPS streams
|
||||
* would need expanding to a [16] cache keyed by sps_id.
|
||||
*
|
||||
* The cache stores the post-mapped V4L2 control struct arrays
|
||||
* (not the intermediate GstH265SPS) so request.h doesn't need
|
||||
* to know about the vendored GStreamer parser types — only the
|
||||
* V4L2 UAPI structs from hevc-ctrls/v4l2-hevc-ext-controls.h
|
||||
* included above.
|
||||
*
|
||||
* Owned by h265.c; freed at RequestTerminate.
|
||||
*/
|
||||
struct v4l2_ctrl_hevc_ext_sps_st_rps *hevc_rps_cache_st;
|
||||
unsigned int hevc_rps_cache_st_count;
|
||||
struct v4l2_ctrl_hevc_ext_sps_lt_rps *hevc_rps_cache_lt;
|
||||
unsigned int hevc_rps_cache_lt_count;
|
||||
bool hevc_rps_cache_valid;
|
||||
|
||||
struct video_format *video_format;
|
||||
|
||||
/*
|
||||
* OUTPUT (bitstream-input) buffer pool, decoupled from VA
|
||||
* surfaces. Sized by codec pipeline depth, populated on first
|
||||
* RequestCreateContext, torn down at driver Terminate.
|
||||
*/
|
||||
struct request_pool output_pool;
|
||||
|
||||
/*
|
||||
* CAPTURE (decoded-frame) buffer pool, decoupled from VA
|
||||
* surfaces (iter2 Fix 3). Each surface acquires a slot at
|
||||
* vaBeginPicture time and releases it on the next acquisition
|
||||
* or vaDestroySurfaces. Pool sized to max(surfaces_count,
|
||||
* MIN_CAP_POOL) at first vaCreateSurfaces2; torn down at
|
||||
* vaDestroyContext.
|
||||
*
|
||||
* Background: pre-iter2 each surface was 1:1 bound to one
|
||||
* CAPTURE buffer index; mpv re-using a surface for a new decode
|
||||
* caused V4L2 to re-QBUF the same physical buffer while a
|
||||
* compositor still held an EXPBUF'd dma_buf fd, producing
|
||||
* visible stutter on mpv vaapi --vo=gpu.
|
||||
*/
|
||||
struct cap_pool capture_pool;
|
||||
|
||||
/*
|
||||
* iter5b-β: the pre-β last_output_{width,height} cache fields
|
||||
* and surface_reset_format_cache() helper are deleted. They
|
||||
* existed because CreateSurfaces2 owned the OUTPUT-side V4L2
|
||||
* device-format lifecycle and needed to gate re-S_FMT on
|
||||
* resolution change. β moves that lifecycle to CreateContext,
|
||||
* which is naturally one-shot per context cycle; no caching is
|
||||
* required. DestroyContext + next CreateContext rebuild from
|
||||
* scratch.
|
||||
*
|
||||
* iter5b-β Commit D: cache the format-uniform CAPTURE-side
|
||||
* geometry from v4l2_get_format so CreateSurfaces2 can populate
|
||||
* a newly-created surface's destination_* fields without
|
||||
* re-querying the device. Set by CreateContext after the
|
||||
* v4l2_get_format(CAPTURE) call; consumed by both:
|
||||
* 1. CreateContext's surface_heap walk (fills surfaces that
|
||||
* pre-exist when CreateContext fires);
|
||||
* 2. CreateSurfaces2's per-surface init (fills surfaces
|
||||
* created AFTER CreateContext, e.g. ffmpeg vaapi-copy
|
||||
* pool dynamics where the consumer passes surfaces_count=0
|
||||
* to vaCreateContext and creates surfaces lazily).
|
||||
*
|
||||
* fmt_valid is true once CreateContext has populated the cache;
|
||||
* CreateSurfaces2 only lazy-fills when fmt_valid is true.
|
||||
*/
|
||||
bool fmt_valid;
|
||||
unsigned int fmt_format_height;
|
||||
unsigned int fmt_planes_count;
|
||||
unsigned int fmt_buffers_count;
|
||||
unsigned int fmt_sizes[VIDEO_MAX_PLANES];
|
||||
unsigned int fmt_bytesperlines[VIDEO_MAX_PLANES];
|
||||
};
|
||||
|
||||
VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context);
|
||||
VAStatus RequestTerminate(VADriverContextP context);
|
||||
|
||||
/*
|
||||
* iter38: retarget driver_data->{video,media}_fd to the device required by
|
||||
* `profile`. Returns 0 on success, -1 on profile not mappable to any kind.
|
||||
* Defined in request.c.
|
||||
*/
|
||||
int request_switch_device_for_profile(struct request_data *driver_data,
|
||||
VAProfile profile);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,226 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Markus Fritsche <fritsche.markus@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
|
||||
*/
|
||||
|
||||
#include "request_pool.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "media.h"
|
||||
#include "utils.h"
|
||||
#include "v4l2.h"
|
||||
|
||||
int request_pool_init(struct request_pool *pool, int video_fd, int media_fd,
|
||||
unsigned int output_type, unsigned int count)
|
||||
{
|
||||
unsigned int index_base;
|
||||
unsigned int length;
|
||||
unsigned int offset;
|
||||
unsigned int i;
|
||||
int rc;
|
||||
|
||||
if (pool == NULL || count == 0)
|
||||
return -1;
|
||||
|
||||
if (pool->initialized)
|
||||
return 0;
|
||||
|
||||
pool->slots = calloc(count, sizeof(*pool->slots));
|
||||
if (pool->slots == NULL)
|
||||
return -1;
|
||||
|
||||
pool->count = count;
|
||||
pool->next = 0;
|
||||
pool->media_fd = media_fd; /* iter7: kept for force_release re-alloc */
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
pool->slots[i].request_fd = -1;
|
||||
|
||||
rc = v4l2_create_buffers(video_fd, output_type, count, &index_base);
|
||||
if (rc < 0)
|
||||
goto error;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
pool->slots[i].index = index_base + i;
|
||||
pool->slots[i].busy = false;
|
||||
|
||||
rc = v4l2_query_buffer(video_fd, output_type,
|
||||
pool->slots[i].index,
|
||||
&length, &offset, 1);
|
||||
if (rc < 0)
|
||||
goto error;
|
||||
|
||||
pool->slots[i].data = mmap(NULL, length,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, video_fd, offset);
|
||||
if (pool->slots[i].data == MAP_FAILED) {
|
||||
pool->slots[i].data = NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
pool->slots[i].size = length;
|
||||
|
||||
/*
|
||||
* iter6: each pool slot owns a permanent media-request fd,
|
||||
* allocated once here and REINIT'd between uses in
|
||||
* RequestSyncSurface. Replaces the iter4 close+alloc-per-
|
||||
* frame model, whose lowest-free fd reuse was racing with
|
||||
* the kernel's per-buffer state-machine teardown when the
|
||||
* consumer rotated through multiple OUTPUT pool slots
|
||||
* faster than the kernel cleanup drained (Firefox's
|
||||
* MediaSource pattern). 1:1 slot-to-fd binding eliminates
|
||||
* cross-slot fd reuse.
|
||||
*/
|
||||
pool->slots[i].request_fd = media_request_alloc(media_fd);
|
||||
if (pool->slots[i].request_fd < 0)
|
||||
goto error;
|
||||
}
|
||||
|
||||
pool->initialized = true;
|
||||
return 0;
|
||||
|
||||
error:
|
||||
request_pool_destroy(pool);
|
||||
return -1;
|
||||
}
|
||||
|
||||
void request_pool_destroy(struct request_pool *pool)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (pool == NULL || pool->slots == NULL)
|
||||
return;
|
||||
|
||||
for (i = 0; i < pool->count; i++) {
|
||||
if (pool->slots[i].request_fd >= 0)
|
||||
close(pool->slots[i].request_fd);
|
||||
if (pool->slots[i].data != NULL && pool->slots[i].size > 0)
|
||||
munmap(pool->slots[i].data, pool->slots[i].size);
|
||||
}
|
||||
|
||||
free(pool->slots);
|
||||
pool->slots = NULL;
|
||||
pool->count = 0;
|
||||
pool->next = 0;
|
||||
pool->initialized = false;
|
||||
}
|
||||
|
||||
int request_pool_acquire(struct request_pool *pool)
|
||||
{
|
||||
unsigned int start;
|
||||
unsigned int i;
|
||||
|
||||
if (pool == NULL || !pool->initialized || pool->count == 0)
|
||||
return -1;
|
||||
|
||||
start = pool->next;
|
||||
for (i = 0; i < pool->count; i++) {
|
||||
unsigned int slot = (start + i) % pool->count;
|
||||
|
||||
if (!pool->slots[slot].busy) {
|
||||
pool->slots[slot].busy = true;
|
||||
pool->next = (slot + 1) % pool->count;
|
||||
return (int)pool->slots[slot].index;
|
||||
}
|
||||
}
|
||||
|
||||
/* All slots busy; caller must wait for an in-flight DQBUF. */
|
||||
return -1;
|
||||
}
|
||||
|
||||
void request_pool_release(struct request_pool *pool, unsigned int index)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (pool == NULL || pool->slots == NULL)
|
||||
return;
|
||||
|
||||
for (i = 0; i < pool->count; i++) {
|
||||
if (pool->slots[i].index == index) {
|
||||
pool->slots[i].busy = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void request_pool_force_release(struct request_pool *pool, unsigned int index)
|
||||
{
|
||||
struct request_pool_slot *slot;
|
||||
unsigned int i;
|
||||
|
||||
if (pool == NULL || pool->slots == NULL)
|
||||
return;
|
||||
|
||||
slot = NULL;
|
||||
for (i = 0; i < pool->count; i++) {
|
||||
if (pool->slots[i].index == index) {
|
||||
slot = &pool->slots[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (slot == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Try to recover the kernel-side request object via REINIT first.
|
||||
* REINIT is the cheap path: kernel resets the request in place,
|
||||
* fd stays valid, slot can be reused immediately.
|
||||
*/
|
||||
if (slot->request_fd >= 0 && media_request_reinit(slot->request_fd) == 0) {
|
||||
slot->busy = false;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* REINIT failed (or slot's fd was already invalid). Close the fd
|
||||
* and try to allocate a fresh one. This costs an extra ioctl pair
|
||||
* relative to the REINIT happy path but keeps the slot usable.
|
||||
*
|
||||
* NOTE: alloc may return the same lowest-free fd number that was
|
||||
* just closed. That's fine here because (a) this is a rare error-
|
||||
* recovery path, not the per-frame happy path, and (b) the slot's
|
||||
* V4L2 buffer has already been DQBUF'd by this point (or is in an
|
||||
* indeterminate state we can't recover from regardless), so the
|
||||
* iter6 race condition (cross-slot fd-reuse against a kernel
|
||||
* buffer in mid-cleanup) does not apply.
|
||||
*/
|
||||
if (slot->request_fd >= 0)
|
||||
close(slot->request_fd);
|
||||
slot->request_fd = media_request_alloc(pool->media_fd);
|
||||
if (slot->request_fd < 0) {
|
||||
/*
|
||||
* Realloc failed. Slot is now permanently dead — leave
|
||||
* busy=true so acquire skips it. Pool capacity is
|
||||
* effectively reduced by 1 until pool destroy.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
slot->busy = false;
|
||||
}
|
||||
|
||||
struct request_pool_slot *request_pool_slot(struct request_pool *pool,
|
||||
unsigned int index)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (pool == NULL || pool->slots == NULL)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < pool->count; i++) {
|
||||
if (pool->slots[i].index == index)
|
||||
return &pool->slots[i];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Markus Fritsche <fritsche.markus@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
|
||||
*/
|
||||
|
||||
#ifndef _REQUEST_POOL_H_
|
||||
#define _REQUEST_POOL_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
/*
|
||||
* OUTPUT (bitstream-input) buffer pool, decoupled from caller-allocated
|
||||
* VA surfaces. Sizing is driven by codec pipeline depth (typically 4
|
||||
* for H.264), not by the consumer's surface count.
|
||||
*
|
||||
* The pool owns the V4L2 buffer indices and their mmap pointers. A
|
||||
* decode request "borrows" a slot at vaBeginPicture, fills it across
|
||||
* vaRenderPicture calls, queues it at vaEndPicture, and releases it
|
||||
* after VIDIOC_DQBUF returns.
|
||||
*
|
||||
* This replaces the per-surface OUTPUT-buffer ownership model in the
|
||||
* pre-refactor code, where object_surface.source_* fields permanently
|
||||
* held a single OUTPUT buffer per surface — incorrect because OUTPUT
|
||||
* buffers are request-time resources, not picture-time resources, and
|
||||
* because the per-surface loop in RequestCreateContext only ran when
|
||||
* surfaces_count > 0 (breaking ffmpeg's vaapi-copy num_render_targets=0
|
||||
* convention).
|
||||
*/
|
||||
|
||||
struct request_pool_slot {
|
||||
unsigned int index; /* V4L2 buffer index in OUTPUT queue */
|
||||
void *data; /* mmap pointer for this slot */
|
||||
unsigned int size; /* mmap size in bytes */
|
||||
bool busy; /* true while borrowed for a request */
|
||||
int request_fd; /* per-slot media-request fd, allocated
|
||||
* once at pool init, REINIT'd between
|
||||
* uses. iter6: replaces iter4 close+
|
||||
* alloc-per-frame to eliminate cross-
|
||||
* slot fd-reuse race that broke Firefox
|
||||
* MediaSource's multi-surface decode. */
|
||||
};
|
||||
|
||||
struct request_pool {
|
||||
struct request_pool_slot *slots;
|
||||
unsigned int count;
|
||||
unsigned int next; /* round-robin acquire cursor */
|
||||
int media_fd; /* iter7: kept for
|
||||
* force_release re-alloc */
|
||||
bool initialized;
|
||||
};
|
||||
|
||||
/*
|
||||
* Allocate count OUTPUT buffers via VIDIOC_CREATE_BUFS, query and mmap
|
||||
* each, populate pool->slots[]. Caller must have already done
|
||||
* VIDIOC_S_FMT on the OUTPUT queue. Returns 0 on success, -1 on
|
||||
* failure.
|
||||
*/
|
||||
int request_pool_init(struct request_pool *pool, int video_fd, int media_fd,
|
||||
unsigned int output_type, unsigned int count);
|
||||
|
||||
/*
|
||||
* Munmap all slots and free the slots array. Idempotent.
|
||||
*/
|
||||
void request_pool_destroy(struct request_pool *pool);
|
||||
|
||||
/*
|
||||
* Claim the next free slot (round-robin). Returns the slot's V4L2
|
||||
* buffer index on success (slot in pool->slots[] is determined by
|
||||
* the returned index), or -1 if all slots are busy.
|
||||
*/
|
||||
int request_pool_acquire(struct request_pool *pool);
|
||||
|
||||
/*
|
||||
* Mark the slot at pool->slots[i] free for reuse. Caller must pass the
|
||||
* V4L2 buffer index returned earlier from request_pool_acquire().
|
||||
*/
|
||||
void request_pool_release(struct request_pool *pool, unsigned int index);
|
||||
|
||||
/*
|
||||
* iter7: error-recovery release. Called from RequestSyncSurface error
|
||||
* paths when media_request_reinit or VIDIOC_DQBUF failed mid-cycle and
|
||||
* the slot's request_fd is now in an undefined state. REINITs the fd;
|
||||
* if REINIT fails (kernel-side request object too far gone), close
|
||||
* the fd and re-alloc a fresh one. If realloc also fails, the slot
|
||||
* is left busy=true (effectively dead, count decremented by 1) — pool
|
||||
* survives but with reduced capacity until driver terminate. Other
|
||||
* slots are unaffected.
|
||||
*
|
||||
* Caller passes the V4L2 buffer index from request_pool_acquire().
|
||||
*/
|
||||
void request_pool_force_release(struct request_pool *pool,
|
||||
unsigned int index);
|
||||
|
||||
/*
|
||||
* Look up the pool slot owning a given V4L2 buffer index. Returns
|
||||
* pointer to the slot on success, NULL if the index is out of range.
|
||||
* The returned pointer is valid until pool destruction; do not free.
|
||||
*/
|
||||
struct request_pool_slot *request_pool_slot(struct request_pool *pool,
|
||||
unsigned int index);
|
||||
|
||||
#endif
|
||||
+444
-149
@@ -29,6 +29,7 @@
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
@@ -46,6 +47,119 @@
|
||||
#include "v4l2.h"
|
||||
#include "video.h"
|
||||
|
||||
/*
|
||||
* iter5b-β: the OUTPUT-side V4L2 device-format lifecycle moved out
|
||||
* of this file. Pre-β CreateSurfaces2 owned the S_FMT(OUTPUT) +
|
||||
* CAPTURE-format probe + cap_pool_init + per-surface destination_*
|
||||
* fill; now that responsibility lives in context.c::RequestCreateContext
|
||||
* where the bound config (and therefore the active VAProfile) is
|
||||
* known via config_id. CreateSurfaces2 retains only surface object
|
||||
* ID allocation and per-surface bookkeeping. The previous
|
||||
* `surface_reset_format_cache` helper and `last_output_width/height`
|
||||
* fields are deleted (β doesn't gate re-S_FMT on
|
||||
* resolution — the lifecycle is CreateContext-centric and natural
|
||||
* setup/teardown happens at each context cycle).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Iter2 Fix 3 helpers — bind / unbind a cap_pool_slot to an
|
||||
* object_surface. Called from BeginPicture (acquire+bind) and
|
||||
* DestroySurfaces (unbind). Populates surface_object->destination_*
|
||||
* fields from the slot so existing code paths (the QBUF in
|
||||
* picture.c::EndPicture, the EXPBUF in ExportSurfaceHandle, the
|
||||
* mmap-read in copy_surface_to_image) continue to work unchanged.
|
||||
*
|
||||
* surface_bind_slot is called only from BeginPicture; the surface's
|
||||
* format-uniform fields (destination_planes_count, destination_sizes,
|
||||
* destination_offsets, destination_bytesperlines) are already set
|
||||
* by CreateSurfaces2 and stay constant.
|
||||
*/
|
||||
void surface_bind_slot(struct object_surface *surface_object,
|
||||
struct cap_pool_slot *slot)
|
||||
{
|
||||
unsigned int j;
|
||||
|
||||
surface_object->current_slot = slot;
|
||||
surface_object->destination_index = slot->v4l2_index;
|
||||
surface_object->destination_buffers_count = slot->buffers_count;
|
||||
|
||||
for (j = 0; j < slot->buffers_count; j++) {
|
||||
surface_object->destination_map[j] = slot->map[j];
|
||||
surface_object->destination_map_lengths[j] = slot->map_lengths[j];
|
||||
surface_object->destination_map_offsets[j] = slot->map_offsets[j];
|
||||
}
|
||||
|
||||
/*
|
||||
* destination_data[j] is the per-plane CPU pointer used by
|
||||
* copy_surface_to_image. For single-buffer MPLANE NV12 (our
|
||||
* common case), all planes live in slot->map[0] at varying
|
||||
* offsets recorded in destination_offsets[].
|
||||
*/
|
||||
if (slot->buffers_count == 1) {
|
||||
for (j = 0; j < surface_object->destination_planes_count; j++)
|
||||
surface_object->destination_data[j] =
|
||||
(unsigned char *)slot->map[0] +
|
||||
surface_object->destination_offsets[j];
|
||||
} else {
|
||||
for (j = 0; j < surface_object->destination_planes_count; j++)
|
||||
surface_object->destination_data[j] = slot->map[j];
|
||||
}
|
||||
}
|
||||
|
||||
void surface_unbind_slot(struct request_data *driver_data,
|
||||
struct object_surface *surface_object)
|
||||
{
|
||||
if (surface_object->current_slot == NULL)
|
||||
return;
|
||||
/* AV1 Phase 3 diag: log every unbind with surface id + slot idx
|
||||
* + status — confirms whether BeginPicture rebind is racing the
|
||||
* consumer's vaGetImage on the previous frame. */
|
||||
request_log("surface_unbind_slot id=0x%x status=%d slot_idx=%u\n",
|
||||
surface_object->base.id,
|
||||
surface_object->status,
|
||||
surface_object->current_slot->v4l2_index);
|
||||
cap_pool_release(&driver_data->capture_pool, surface_object->current_slot);
|
||||
surface_object->current_slot = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* iter5b-β Commit D: fill format-uniform destination_* on a surface
|
||||
* from driver_data's CAPTURE-format cache. Idempotent: no-op if
|
||||
* destination_planes_count is non-zero already.
|
||||
*/
|
||||
void surface_fill_format_uniform(struct request_data *driver_data,
|
||||
struct object_surface *surface_object)
|
||||
{
|
||||
unsigned int j;
|
||||
|
||||
if (!driver_data->fmt_valid)
|
||||
return;
|
||||
if (surface_object->destination_planes_count != 0)
|
||||
return;
|
||||
|
||||
surface_object->destination_planes_count = driver_data->fmt_planes_count;
|
||||
surface_object->destination_buffers_count = driver_data->fmt_buffers_count;
|
||||
|
||||
if (driver_data->fmt_buffers_count == 1) {
|
||||
for (j = 0; j < driver_data->fmt_planes_count; j++) {
|
||||
surface_object->destination_offsets[j] =
|
||||
j > 0 ? driver_data->fmt_sizes[j - 1] : 0;
|
||||
surface_object->destination_sizes[j] =
|
||||
driver_data->fmt_sizes[j];
|
||||
surface_object->destination_bytesperlines[j] =
|
||||
driver_data->fmt_bytesperlines[0];
|
||||
}
|
||||
} else if (driver_data->fmt_buffers_count == driver_data->fmt_planes_count) {
|
||||
for (j = 0; j < driver_data->fmt_planes_count; j++) {
|
||||
surface_object->destination_offsets[j] = 0;
|
||||
surface_object->destination_sizes[j] =
|
||||
driver_data->fmt_sizes[j];
|
||||
surface_object->destination_bytesperlines[j] =
|
||||
driver_data->fmt_bytesperlines[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format,
|
||||
unsigned int width, unsigned int height,
|
||||
VASurfaceID *surfaces_ids,
|
||||
@@ -55,130 +169,41 @@ VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format,
|
||||
{
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_surface *surface_object;
|
||||
struct video_format *video_format = NULL;
|
||||
unsigned int destination_sizes[VIDEO_MAX_PLANES];
|
||||
unsigned int destination_bytesperlines[VIDEO_MAX_PLANES];
|
||||
unsigned int destination_planes_count;
|
||||
unsigned int format_width, format_height;
|
||||
unsigned int capture_type;
|
||||
unsigned int index_base;
|
||||
unsigned int index;
|
||||
unsigned int i, j;
|
||||
unsigned int i;
|
||||
VASurfaceID id;
|
||||
bool found;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* iter5b-β: only RT-format-level validation here. All V4L2
|
||||
* device state (OUTPUT format, CAPTURE format probe,
|
||||
* cap_pool_init, per-surface destination_* fill) is deferred
|
||||
* to RequestCreateContext where the bound VAConfigID
|
||||
* (and therefore the active VAProfile) is known. CreateSurfaces2
|
||||
* has no config_id parameter; the VA-API contract is
|
||||
* CreateConfig → CreateSurfaces → CreateContext, and we
|
||||
* can't know the OUTPUT pixel format until CreateContext binds.
|
||||
*
|
||||
* Surface objects allocated here hold only the requested
|
||||
* width/height and per-surface lifecycle bookkeeping
|
||||
* (current_slot, status, params, etc). The format-uniform
|
||||
* destination_* fields are filled by CreateContext via
|
||||
* surface_bind_format_uniform_fields(); the per-slot
|
||||
* destination_* fields fill at BeginPicture via surface_bind_slot.
|
||||
*/
|
||||
if (format != VA_RT_FORMAT_YUV420)
|
||||
return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
|
||||
|
||||
|
||||
if (!driver_data->video_format) {
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_CAPTURE,
|
||||
V4L2_PIX_FMT_SUNXI_TILED_NV12);
|
||||
if (found)
|
||||
video_format = video_format_find(V4L2_PIX_FMT_SUNXI_TILED_NV12);
|
||||
|
||||
found = v4l2_find_format(driver_data->video_fd,
|
||||
V4L2_BUF_TYPE_VIDEO_CAPTURE,
|
||||
V4L2_PIX_FMT_NV12);
|
||||
if (found)
|
||||
video_format = video_format_find(V4L2_PIX_FMT_NV12);
|
||||
|
||||
if (video_format == NULL)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
driver_data->video_format = video_format;
|
||||
|
||||
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
||||
|
||||
rc = v4l2_set_format(driver_data->video_fd, capture_type,
|
||||
video_format->v4l2_format, width, height);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
} else {
|
||||
video_format = driver_data->video_format;
|
||||
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
||||
}
|
||||
|
||||
rc = v4l2_get_format(driver_data->video_fd, capture_type, &format_width,
|
||||
&format_height, destination_bytesperlines,
|
||||
destination_sizes, NULL);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
destination_planes_count = video_format->planes_count;
|
||||
|
||||
rc = v4l2_create_buffers(driver_data->video_fd, capture_type,
|
||||
surfaces_count, &index_base);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
|
||||
for (i = 0; i < surfaces_count; i++) {
|
||||
index = index_base + i;
|
||||
|
||||
id = object_heap_allocate(&driver_data->surface_heap);
|
||||
surface_object = SURFACE(driver_data, id);
|
||||
if (surface_object == NULL)
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
|
||||
rc = v4l2_query_buffer(driver_data->video_fd, capture_type,
|
||||
index,
|
||||
surface_object->destination_map_lengths,
|
||||
surface_object->destination_map_offsets,
|
||||
video_format->v4l2_buffers_count);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
|
||||
for (j = 0; j < video_format->v4l2_buffers_count; j++) {
|
||||
surface_object->destination_map[j] =
|
||||
mmap(NULL,
|
||||
surface_object->destination_map_lengths[j],
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
driver_data->video_fd,
|
||||
surface_object->destination_map_offsets[j]);
|
||||
|
||||
if (surface_object->destination_map[j] == MAP_FAILED)
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
}
|
||||
|
||||
/*
|
||||
* FIXME: Handle this per-pixelformat, trying to generalize it
|
||||
* is not a reasonable approach. The final description should be
|
||||
* in terms of (logical) planes.
|
||||
*/
|
||||
|
||||
if (video_format->v4l2_buffers_count == 1) {
|
||||
destination_sizes[0] = destination_bytesperlines[0] *
|
||||
format_height;
|
||||
|
||||
for (j = 1; j < destination_planes_count; j++)
|
||||
destination_sizes[j] = destination_sizes[0] / 2;
|
||||
|
||||
for (j = 0; j < destination_planes_count; j++) {
|
||||
surface_object->destination_offsets[j] =
|
||||
j > 0 ? destination_sizes[j - 1] : 0;
|
||||
surface_object->destination_data[j] =
|
||||
((unsigned char *)surface_object->destination_map[0] +
|
||||
surface_object->destination_offsets[j]);
|
||||
surface_object->destination_sizes[j] =
|
||||
destination_sizes[j];
|
||||
surface_object->destination_bytesperlines[j] =
|
||||
destination_bytesperlines[0];
|
||||
}
|
||||
} else if (video_format->v4l2_buffers_count == destination_planes_count) {
|
||||
for (j = 0; j < destination_planes_count; j++) {
|
||||
surface_object->destination_offsets[j] = 0;
|
||||
surface_object->destination_data[j] =
|
||||
surface_object->destination_map[j];
|
||||
surface_object->destination_sizes[j] =
|
||||
destination_sizes[j];
|
||||
surface_object->destination_bytesperlines[j] =
|
||||
destination_bytesperlines[j];
|
||||
}
|
||||
} else {
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
}
|
||||
surface_object->current_slot = NULL; /* iter2 Fix 3 */
|
||||
surface_object->linked_decode_surface_id = VA_INVALID_SURFACE;
|
||||
surface_object->av1_order_hint = 0;
|
||||
surface_object->destination_index = 0; /* set on bind */
|
||||
surface_object->destination_planes_count = 0; /* set at CreateContext */
|
||||
surface_object->destination_buffers_count = 0; /* set at CreateContext */
|
||||
|
||||
surface_object->status = VASurfaceReady;
|
||||
surface_object->width = width;
|
||||
@@ -188,13 +213,6 @@ VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format,
|
||||
surface_object->source_data = NULL;
|
||||
surface_object->source_size = 0;
|
||||
|
||||
surface_object->destination_index = index;
|
||||
|
||||
surface_object->destination_planes_count =
|
||||
destination_planes_count;
|
||||
surface_object->destination_buffers_count =
|
||||
video_format->v4l2_buffers_count;
|
||||
|
||||
memset(&surface_object->params, 0,
|
||||
sizeof(surface_object->params));
|
||||
surface_object->slices_count = 0;
|
||||
@@ -202,6 +220,17 @@ VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format,
|
||||
|
||||
surface_object->request_fd = -1;
|
||||
|
||||
/*
|
||||
* iter5b-β Commit D: if CreateContext has already populated
|
||||
* the format-uniform cache (driver_data->fmt_valid), fill
|
||||
* the new surface's destination_* immediately. This covers
|
||||
* the case where a consumer creates more surfaces AFTER
|
||||
* CreateContext. The first batch of surfaces (created before
|
||||
* CreateContext) gets filled by CreateContext's surface_heap
|
||||
* walk; this lazy-fill handles late arrivals.
|
||||
*/
|
||||
surface_fill_format_uniform(driver_data, surface_object);
|
||||
|
||||
surfaces_ids[i] = id;
|
||||
}
|
||||
|
||||
@@ -221,26 +250,32 @@ VAStatus RequestDestroySurfaces(VADriverContextP context,
|
||||
{
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_surface *surface_object;
|
||||
unsigned int i, j;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < surfaces_count; i++) {
|
||||
surface_object = SURFACE(driver_data, surfaces_ids[i]);
|
||||
if (surface_object == NULL)
|
||||
return VA_STATUS_ERROR_INVALID_SURFACE;
|
||||
|
||||
if (surface_object->source_data != NULL &&
|
||||
surface_object->source_size > 0)
|
||||
munmap(surface_object->source_data,
|
||||
surface_object->source_size);
|
||||
/*
|
||||
* source_* are now transient borrows from request_pool, not
|
||||
* surface-owned mappings; the pool owns the underlying mmap.
|
||||
* Nothing to free here.
|
||||
*
|
||||
* Iter2 Fix 3: destination_* mappings are owned by cap_pool;
|
||||
* surface_unbind_slot returns the slot to FREE (closing OUR
|
||||
* EXPBUF fd if any). Pool-owned mmaps are freed at
|
||||
* cap_pool_destroy time (RequestDestroyContext).
|
||||
*/
|
||||
surface_unbind_slot(driver_data, surface_object);
|
||||
|
||||
for (j = 0; j < surface_object->destination_buffers_count; j++)
|
||||
if (surface_object->destination_map[j] != NULL &&
|
||||
surface_object->destination_map_lengths[j] > 0)
|
||||
munmap(surface_object->destination_map[j],
|
||||
surface_object->destination_map_lengths[j]);
|
||||
|
||||
if (surface_object->request_fd > 0)
|
||||
close(surface_object->request_fd);
|
||||
/*
|
||||
* iter6: request_fd is owned by the OUTPUT pool slot, not by
|
||||
* the surface. Do not close here. The pool closes all slot
|
||||
* fds at request_pool_destroy time, which fires from
|
||||
* RequestTerminate (driver unload) — the pool is driver-wide
|
||||
* and survives context destroy/recreate cycles.
|
||||
*/
|
||||
|
||||
object_heap_free(&driver_data->surface_heap,
|
||||
(struct object_base *)surface_object);
|
||||
@@ -251,8 +286,9 @@ VAStatus RequestDestroySurfaces(VADriverContextP context,
|
||||
|
||||
VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id)
|
||||
{
|
||||
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_surface *surface_object;
|
||||
struct object_surface *surface_object = NULL;
|
||||
VAStatus status;
|
||||
struct video_format *video_format;
|
||||
unsigned int output_type, capture_type;
|
||||
@@ -297,19 +333,51 @@ VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* iter6: the request_fd belongs to the OUTPUT pool slot, not to the
|
||||
* surface. REINIT to reset its state in place — close+alloc would
|
||||
* reuse the lowest-free fd number against a kernel object whose
|
||||
* teardown hasn't fully drained, racing with QBUF on a slot that
|
||||
* was just released. The pool's 1:1 slot-to-fd binding eliminates
|
||||
* cross-slot fd reuse, and REINIT here resets the request object
|
||||
* for the next decode cycle on the same slot.
|
||||
*
|
||||
* Iter4's frame-11 EINVAL (which prompted the iter4 close+alloc
|
||||
* model) was a control-payload bug — DPB carry-over with FFmpeg's
|
||||
* V4L2_H264_FRAME_REF semantics not yet matched. That's been fixed
|
||||
* since iter4 (`74d8dd1`), so REINIT is no longer compromised by
|
||||
* the cluster-validation EINVAL pattern.
|
||||
*/
|
||||
rc = media_request_reinit(request_fd);
|
||||
if (rc < 0) {
|
||||
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
goto error;
|
||||
}
|
||||
surface_object->request_fd = -1;
|
||||
|
||||
rc = v4l2_dequeue_buffer(driver_data->video_fd, -1, output_type,
|
||||
surface_object->source_index, 1);
|
||||
if (rc < 0) {
|
||||
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
goto error;
|
||||
/*
|
||||
* iter7: OUTPUT DQBUF failed. The V4L2 buffer is in an
|
||||
* indeterminate kernel state — it may still be QUEUED. Do
|
||||
* NOT return the slot to acquire-rotation: the next QBUF
|
||||
* on it would EINVAL. Leave source_data set so the error
|
||||
* handler skips force_release and the slot stays dead-busy.
|
||||
*/
|
||||
goto error_buffer_indeterminate;
|
||||
}
|
||||
|
||||
/*
|
||||
* OUTPUT buffer is back from the kernel: return its pool slot
|
||||
* for reuse and clear the surface's transient borrow handle.
|
||||
*/
|
||||
request_pool_release(&driver_data->output_pool,
|
||||
surface_object->source_index);
|
||||
surface_object->source_data = NULL;
|
||||
surface_object->source_size = 0;
|
||||
|
||||
rc = v4l2_dequeue_buffer(driver_data->video_fd, -1, capture_type,
|
||||
surface_object->destination_index,
|
||||
surface_object->destination_buffers_count);
|
||||
@@ -318,14 +386,152 @@ VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iter2 Fix 3: CAPTURE buffer is back from the kernel with valid
|
||||
* pixel content. Transition the slot IN_DECODE → DECODED. The slot
|
||||
* stays bound to this surface until either ExportSurfaceHandle
|
||||
* (→ EXPORTED), the next BeginPicture for this surface (slot is
|
||||
* released first), or DestroySurfaces (release).
|
||||
*/
|
||||
if (surface_object->current_slot != NULL) {
|
||||
cap_pool_mark_decoded(&driver_data->capture_pool,
|
||||
surface_object->current_slot);
|
||||
|
||||
/*
|
||||
* iter8 Phase 6 (γ): env-gated diagnostic dump of the CAPTURE
|
||||
* buffer immediately after DQBUF + mark_decoded. Distinguishes
|
||||
* "kernel didn't write" from "libva mis-reads" for Bug 4
|
||||
* (H.264 partial-fill). Off by default; enable with
|
||||
* LIBVA_V4L2_DUMP_CAPTURE=1. destination_data[] is valid here
|
||||
* (surface_bind_slot populated it at BeginPicture).
|
||||
*/
|
||||
static const char *dump_env = NULL;
|
||||
static bool dump_env_checked = false;
|
||||
if (!dump_env_checked) {
|
||||
dump_env = getenv("LIBVA_V4L2_DUMP_CAPTURE");
|
||||
dump_env_checked = true;
|
||||
}
|
||||
if (dump_env != NULL && dump_env[0] == '1') {
|
||||
unsigned int p;
|
||||
char hexbuf[128];
|
||||
request_log("γ-dump: surface_id=%u v4l2_index=%u planes=%u\n",
|
||||
(unsigned int)surface_id,
|
||||
surface_object->destination_index,
|
||||
surface_object->destination_planes_count);
|
||||
for (p = 0; p < surface_object->destination_planes_count; p++) {
|
||||
const unsigned char *d = surface_object->destination_data[p];
|
||||
size_t sz = surface_object->destination_sizes[p];
|
||||
size_t scan_lim;
|
||||
unsigned int nz = 0;
|
||||
size_t i;
|
||||
int pos;
|
||||
|
||||
if (d == NULL) {
|
||||
request_log("γ-dump: plane[%u] NULL ptr (size=%zu)\n",
|
||||
p, sz);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Phase 5 MIN-2: scan at least one Y-MB row
|
||||
* (16 lines * bytesperline) for plane 0, else
|
||||
* 1024 bytes for chroma plane.
|
||||
*/
|
||||
if (p == 0) {
|
||||
size_t mbrow =
|
||||
surface_object->destination_bytesperlines[0] * 16;
|
||||
scan_lim = sz < mbrow ? sz : mbrow;
|
||||
} else {
|
||||
scan_lim = sz < 1024 ? sz : 1024;
|
||||
}
|
||||
for (i = 0; i < scan_lim; i++)
|
||||
if (d[i] != 0)
|
||||
nz++;
|
||||
|
||||
request_log("γ-dump: plane[%u] sz=%zu bpl=%u "
|
||||
"scan=%zu non_zero=%u\n",
|
||||
p, sz,
|
||||
surface_object->destination_bytesperlines[p],
|
||||
scan_lim, nz);
|
||||
|
||||
pos = 0;
|
||||
for (i = 0; i < 32 && i < sz; i++)
|
||||
pos += snprintf(hexbuf + pos,
|
||||
sizeof(hexbuf) - pos,
|
||||
"%02x ", d[i]);
|
||||
request_log("γ-dump: plane[%u] head[0..32]: %s\n",
|
||||
p, hexbuf);
|
||||
|
||||
if (sz >= 32) {
|
||||
pos = 0;
|
||||
for (i = 0; i < 32; i++)
|
||||
pos += snprintf(hexbuf + pos,
|
||||
sizeof(hexbuf) - pos,
|
||||
"%02x ", d[sz - 32 + i]);
|
||||
request_log("γ-dump: plane[%u] tail[%zu..%zu]: %s\n",
|
||||
p, sz - 32, sz - 1, hexbuf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
surface_object->status = VASurfaceDisplaying;
|
||||
|
||||
status = VA_STATUS_SUCCESS;
|
||||
goto complete;
|
||||
|
||||
error:
|
||||
if (request_fd >= 0) {
|
||||
close(request_fd);
|
||||
/*
|
||||
* iter7: error recovery for the OUTPUT pool slot. If the surface
|
||||
* acquired a slot in BeginPicture (source_data != NULL indicates
|
||||
* an active borrow), reset the slot's request_fd via
|
||||
* request_pool_force_release so the slot returns to the
|
||||
* acquire-rotation. force_release tries REINIT first; falls back
|
||||
* to close+alloc if REINIT fails; leaves the slot dead-busy if
|
||||
* even alloc fails (other slots unaffected). Replaces iter6's
|
||||
* accepted bounded leak.
|
||||
*
|
||||
* Reachable from: media_request_queue / wait_completion / REINIT
|
||||
* failures. NOT reachable for OUTPUT-DQBUF failure (separate label
|
||||
* `error_buffer_indeterminate` below) because in that case the
|
||||
* V4L2 buffer is in an indeterminate kernel state and reusing the
|
||||
* slot would EINVAL on the next QBUF.
|
||||
*
|
||||
* If the surface never acquired a slot (source_data == NULL),
|
||||
* there is no slot to release; nothing to do.
|
||||
*/
|
||||
if (surface_object != NULL) {
|
||||
if (surface_object->source_data != NULL) {
|
||||
request_pool_force_release(&driver_data->output_pool,
|
||||
surface_object->source_index);
|
||||
surface_object->source_data = NULL;
|
||||
surface_object->source_size = 0;
|
||||
}
|
||||
surface_object->request_fd = -1;
|
||||
}
|
||||
goto complete;
|
||||
|
||||
error_buffer_indeterminate:
|
||||
/*
|
||||
* iter7: OUTPUT DQBUF failed after a successful REINIT. The kernel
|
||||
* V4L2 buffer is in an unknown state (possibly still QUEUED with
|
||||
* pending decode result, possibly half-dequeued, possibly stuck
|
||||
* in driver internals). The slot's request_fd has already been
|
||||
* REINIT'd to a clean state, but reusing the slot for a new
|
||||
* decode would QBUF on a buffer the kernel may still hold —
|
||||
* triggering exactly the iter6 race we eliminated for the happy
|
||||
* path.
|
||||
*
|
||||
* Leave the slot dead-busy: don't release, don't force_release.
|
||||
* Other slots are unaffected. If this fires repeatedly, the pool
|
||||
* leaks slots until starvation, at which point acquire returns -1
|
||||
* and BeginPicture cleanly propagates ALLOCATION_FAILED. This is
|
||||
* a strictly safer failure mode than reusing an indeterminate
|
||||
* V4L2 buffer.
|
||||
*/
|
||||
if (surface_object != NULL) {
|
||||
surface_object->source_data = NULL;
|
||||
surface_object->source_size = 0;
|
||||
surface_object->request_fd = -1;
|
||||
}
|
||||
|
||||
@@ -338,6 +544,7 @@ VAStatus RequestQuerySurfaceAttributes(VADriverContextP context,
|
||||
VASurfaceAttrib *attributes,
|
||||
unsigned int *attributes_count)
|
||||
{
|
||||
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
VASurfaceAttrib *attributes_list;
|
||||
unsigned int attributes_list_size = V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES *
|
||||
@@ -416,6 +623,7 @@ VAStatus RequestQuerySurfaceStatus(VADriverContextP context,
|
||||
struct request_data *driver_data = context->pDriverData;
|
||||
struct object_surface *surface_object;
|
||||
|
||||
|
||||
surface_object = SURFACE(driver_data, surface_id);
|
||||
if (surface_object == NULL)
|
||||
return VA_STATUS_ERROR_INVALID_SURFACE;
|
||||
@@ -493,6 +701,18 @@ VAStatus RequestExportSurfaceHandle(VADriverContextP context,
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iter2 Fix 3: pool now owns OUR copy of the EXPBUF'd fd. The
|
||||
* consumer receives a dup'd / equivalent fd via the descriptor.
|
||||
* Slot transitions DECODED → EXPORTED; it will be force-recyclable
|
||||
* by LRU when the pool is exhausted, but FREE slots are always
|
||||
* preferred.
|
||||
*/
|
||||
if (surface_object->current_slot != NULL && export_fds_count > 0)
|
||||
cap_pool_mark_exported(&driver_data->capture_pool,
|
||||
surface_object->current_slot,
|
||||
export_fds[0]);
|
||||
|
||||
planes_count = surface_object->destination_planes_count;
|
||||
|
||||
surface_descriptor->fourcc = VA_FOURCC_NV12;
|
||||
@@ -506,27 +726,102 @@ VAStatus RequestExportSurfaceHandle(VADriverContextP context,
|
||||
for (i = 0; i < planes_count; i++)
|
||||
size += surface_object->destination_sizes[i];
|
||||
|
||||
/*
|
||||
* Iteration 2 Fix 2: choose drm_format_modifier conditionally on
|
||||
* pitch alignment. Mesa's WSI / Panfrost compositor path rejects
|
||||
* DRM_FORMAT_MOD_NONE (= LINEAR explicit) buffers whose pitch isn't
|
||||
* GPU-aligned (typically 64+ bytes for Mali). For 1920-wide content
|
||||
* the pitch is 1920 (64-aligned, fine); for 864-wide content the
|
||||
* pitch is 864 (only 16-aligned), Mesa rejects with "WSI pitch not
|
||||
* properly aligned" and Firefox falls back to SW.
|
||||
*
|
||||
* Setting DRM_FORMAT_MOD_INVALID tells the importer "modifier
|
||||
* unknown, treat as implicit / texture-only" — Firefox's
|
||||
* DMABufSurface.cpp:1920 explicitly omits modifier attribs from
|
||||
* eglCreateImage when the value is MOD_INVALID, bypassing Mesa's
|
||||
* scanout-alignment check. The buffer is then texture-imported
|
||||
* (small perf cost) instead of WSI scanout-imported, which is
|
||||
* the correct behavior for a buffer that doesn't meet scanout
|
||||
* alignment requirements.
|
||||
*
|
||||
* We branch on pitch alignment to preserve LINEAR semantics for
|
||||
* already-aligned content (avoids unnecessary perf cost on the
|
||||
* common 1920-wide case).
|
||||
*
|
||||
* Sonnet Phase 5 review (iter2 question 4) endorsed this
|
||||
* conditional approach over a universal MOD_INVALID change.
|
||||
*/
|
||||
for (i = 0; i < export_fds_count; i++) {
|
||||
surface_descriptor->objects[i].drm_format_modifier =
|
||||
video_format->drm_modifier;
|
||||
uint64_t modifier = video_format->drm_modifier;
|
||||
unsigned int bytesperline =
|
||||
surface_object->destination_bytesperlines[0];
|
||||
if (bytesperline & 63) /* not 64-byte aligned */
|
||||
modifier = DRM_FORMAT_MOD_INVALID;
|
||||
surface_descriptor->objects[i].drm_format_modifier = modifier;
|
||||
surface_descriptor->objects[i].fd = export_fds[i];
|
||||
surface_descriptor->objects[i].size = export_fds_count == 1 ?
|
||||
size :
|
||||
surface_object->destination_sizes[i];
|
||||
}
|
||||
|
||||
surface_descriptor->num_layers = 1;
|
||||
/*
|
||||
* Layer construction depends on the consumer's request flags
|
||||
* (VA_EXPORT_SURFACE_*_LAYERS):
|
||||
*
|
||||
* COMPOSED_LAYERS (default, mpv): one layer carrying both
|
||||
* Y and UV planes (drm_format=NV12, num_planes=2). Mesa
|
||||
* imports as a single NV12 EGLImage.
|
||||
*
|
||||
* SEPARATE_LAYERS (Firefox 150 RDD): two layers, Y as a
|
||||
* single-plane R8 layer, UV as a single-plane GR88 layer.
|
||||
* Firefox's GetVAAPISurfaceDescriptor passes
|
||||
* VA_EXPORT_SURFACE_SEPARATE_LAYERS so its DMABufSurfaceYUV
|
||||
* import code can address Y and UV planes independently.
|
||||
* Without this branch, Firefox parsed our COMPOSED layout
|
||||
* as if it were SEPARATE, found bogus layer-1 data, and
|
||||
* silently fell back to FFmpeg(FFVPX) software decode.
|
||||
*
|
||||
* The earlier path 0001 mplane port assumed a single COMPOSED
|
||||
* shape — fine for mpv but breaks any consumer requesting
|
||||
* SEPARATE. Honor the flag.
|
||||
*/
|
||||
if ((flags & VA_EXPORT_SURFACE_SEPARATE_LAYERS) && planes_count == 2) {
|
||||
surface_descriptor->num_layers = 2;
|
||||
|
||||
surface_descriptor->layers[0].drm_format = video_format->drm_format;
|
||||
surface_descriptor->layers[0].num_planes = planes_count;
|
||||
/* Layer 0: Y plane as DRM_FORMAT_R8 (1 byte/pixel luma). */
|
||||
surface_descriptor->layers[0].drm_format = DRM_FORMAT_R8;
|
||||
surface_descriptor->layers[0].num_planes = 1;
|
||||
surface_descriptor->layers[0].object_index[0] =
|
||||
export_fds_count == 1 ? 0 : 0;
|
||||
surface_descriptor->layers[0].offset[0] =
|
||||
surface_object->destination_offsets[0];
|
||||
surface_descriptor->layers[0].pitch[0] =
|
||||
surface_object->destination_bytesperlines[0];
|
||||
|
||||
for (i = 0; i < planes_count; i++) {
|
||||
surface_descriptor->layers[0].object_index[i] =
|
||||
export_fds_count == 1 ? 0 : i;
|
||||
surface_descriptor->layers[0].offset[i] =
|
||||
surface_object->destination_offsets[i];
|
||||
surface_descriptor->layers[0].pitch[i] =
|
||||
surface_object->destination_bytesperlines[i];
|
||||
/* Layer 1: UV plane as DRM_FORMAT_GR88 (interleaved
|
||||
* U+V, 2 bytes/pixel chroma at half resolution). */
|
||||
surface_descriptor->layers[1].drm_format = DRM_FORMAT_GR88;
|
||||
surface_descriptor->layers[1].num_planes = 1;
|
||||
surface_descriptor->layers[1].object_index[0] =
|
||||
export_fds_count == 1 ? 0 : 1;
|
||||
surface_descriptor->layers[1].offset[0] =
|
||||
surface_object->destination_offsets[1];
|
||||
surface_descriptor->layers[1].pitch[0] =
|
||||
surface_object->destination_bytesperlines[1];
|
||||
} else {
|
||||
/* COMPOSED_LAYERS / default: one layer with all planes. */
|
||||
surface_descriptor->num_layers = 1;
|
||||
surface_descriptor->layers[0].drm_format = video_format->drm_format;
|
||||
surface_descriptor->layers[0].num_planes = planes_count;
|
||||
|
||||
for (i = 0; i < planes_count; i++) {
|
||||
surface_descriptor->layers[0].object_index[i] =
|
||||
export_fds_count == 1 ? 0 : i;
|
||||
surface_descriptor->layers[0].offset[i] =
|
||||
surface_object->destination_offsets[i];
|
||||
surface_descriptor->layers[0].pitch[i] =
|
||||
surface_object->destination_bytesperlines[i];
|
||||
}
|
||||
}
|
||||
|
||||
status = VA_STATUS_SUCCESS;
|
||||
|
||||
+114
-1
@@ -32,6 +32,11 @@
|
||||
#include <va/va_backend.h>
|
||||
|
||||
#include "object_heap.h"
|
||||
#include "cap_pool.h"
|
||||
|
||||
#include "h265.h"
|
||||
|
||||
struct request_data;
|
||||
|
||||
#define SURFACE(data, id) \
|
||||
((struct object_surface *)object_heap_lookup(&(data)->surface_heap, id))
|
||||
@@ -40,7 +45,7 @@
|
||||
struct object_surface {
|
||||
struct object_base base;
|
||||
|
||||
VAStatus status;
|
||||
VASurfaceStatus status;
|
||||
int width;
|
||||
int height;
|
||||
|
||||
@@ -48,6 +53,26 @@ struct object_surface {
|
||||
void *source_data;
|
||||
unsigned int source_size;
|
||||
|
||||
/*
|
||||
* Iter2 Fix 3: destination_* fields below are now per-decode-cycle.
|
||||
* They are populated from current_slot in RequestBeginPicture and
|
||||
* remain valid through SyncSurface, ExportSurfaceHandle, and
|
||||
* DeriveImage/copy_surface_to_image (vaapi-copy path). Subsequent
|
||||
* BeginPicture for this surface releases the prior slot and
|
||||
* acquires a new one.
|
||||
*
|
||||
* destination_planes_count, destination_sizes, destination_offsets,
|
||||
* destination_bytesperlines are FORMAT-uniform across all CAPTURE
|
||||
* buffers, so they're set once at CreateSurfaces2 time and stay.
|
||||
*
|
||||
* destination_index, destination_map[], destination_map_lengths,
|
||||
* destination_map_offsets, destination_data[] are SLOT-specific
|
||||
* and re-populated each BeginPicture from current_slot.
|
||||
*
|
||||
* destination_buffers_count is also format-uniform (V4L2 planes
|
||||
* per buffer = 1 for single-plane MPLANE NV12).
|
||||
*/
|
||||
struct cap_pool_slot *current_slot; /* iter2 Fix 3 */
|
||||
unsigned int destination_index;
|
||||
void *destination_map[VIDEO_MAX_PLANES];
|
||||
unsigned int destination_map_lengths[VIDEO_MAX_PLANES];
|
||||
@@ -64,6 +89,33 @@ struct object_surface {
|
||||
|
||||
struct timeval timestamp;
|
||||
|
||||
/*
|
||||
* AV1 Phase 3: for streams with apply_grain=1, VAAPI's
|
||||
* VADecPictureParameterBufferAV1 carries current_display_picture
|
||||
* (display-time surface) separate from current_frame (decode
|
||||
* target). vpu981 HW applies grain inline to the decode CAPTURE
|
||||
* buffer, so the decoded data lives in current_frame's slot — but
|
||||
* ffmpeg calls vaGetImage on current_display_picture which has no
|
||||
* slot bound. linked_decode_surface_id, set in av1_set_controls
|
||||
* on the display surface, points to the decode surface so
|
||||
* copy_surface_to_image can borrow its destination_data[].
|
||||
*
|
||||
* VA_INVALID_SURFACE = no link (the common case: 8-bit codecs,
|
||||
* AV1 with apply_grain=0, AV1 frames where cur_frame ==
|
||||
* cur_display).
|
||||
*/
|
||||
VASurfaceID linked_decode_surface_id;
|
||||
|
||||
/*
|
||||
* AV1 Phase 3: AV1 order_hint of the frame currently decoded into
|
||||
* this surface. VAAPI's VADecPictureParameterBufferAV1.order_hint
|
||||
* is per-frame; kernel's v4l2_ctrl_av1_frame.order_hints[8] is
|
||||
* per-reference. We track each decoded frame's order_hint here so
|
||||
* the next frame's av1_set_controls can populate order_hints[i]
|
||||
* from ref_frame_map[i] → SURFACE → av1_order_hint.
|
||||
*/
|
||||
uint8_t av1_order_hint;
|
||||
|
||||
union {
|
||||
struct {
|
||||
VAPictureParameterBufferMPEG2 picture;
|
||||
@@ -73,15 +125,43 @@ struct object_surface {
|
||||
} mpeg2;
|
||||
struct {
|
||||
VAIQMatrixBufferH264 matrix;
|
||||
bool matrix_set;
|
||||
VAPictureParameterBufferH264 picture;
|
||||
VASliceParameterBufferH264 slice;
|
||||
} h264;
|
||||
struct {
|
||||
VAPictureParameterBufferHEVC picture;
|
||||
VASliceParameterBufferHEVC slice;
|
||||
VASliceParameterBufferHEVC slices[HEVC_MAX_SLICES_PER_FRAME];
|
||||
unsigned int num_slices;
|
||||
VAIQMatrixBufferHEVC iqmatrix;
|
||||
bool iqmatrix_set;
|
||||
} h265;
|
||||
struct {
|
||||
VAPictureParameterBufferVP8 picture;
|
||||
VASliceParameterBufferVP8 slice;
|
||||
VAIQMatrixBufferVP8 iqmatrix;
|
||||
bool iqmatrix_set;
|
||||
VAProbabilityDataBufferVP8 probability;
|
||||
bool probability_set;
|
||||
} vp8;
|
||||
struct {
|
||||
VADecPictureParameterBufferVP9 picture;
|
||||
VASliceParameterBufferVP9 slice;
|
||||
} vp9;
|
||||
/*
|
||||
* ampere-av1-enablement: AV1 needs picture-header +
|
||||
* variable number of slice/tile params (one per tile).
|
||||
* tile_group_entries[] holds parsed VASliceParameterBufferAV1
|
||||
* entries up to MAX_TILES; av1.c builds the matching
|
||||
* v4l2_ctrl_av1_tile_group_entry[] at set_controls time.
|
||||
*/
|
||||
struct {
|
||||
#define AV1_MAX_TILES 128
|
||||
VADecPictureParameterBufferAV1 picture;
|
||||
VASliceParameterBufferAV1 tile_group_entries[AV1_MAX_TILES];
|
||||
unsigned int num_tile_group_entries;
|
||||
} av1;
|
||||
} params;
|
||||
|
||||
int request_fd;
|
||||
@@ -125,4 +205,37 @@ VAStatus RequestExportSurfaceHandle(VADriverContextP context,
|
||||
VASurfaceID surface_id, uint32_t mem_type,
|
||||
uint32_t flags, void *descriptor);
|
||||
|
||||
/*
|
||||
* iter5b-β Commit D: populate a surface's format-uniform destination_*
|
||||
* fields (planes_count, buffers_count, offsets, sizes, bytesperlines)
|
||||
* from driver_data's cached CAPTURE-side geometry. Idempotent: skip
|
||||
* if already filled (destination_planes_count != 0). Caller must
|
||||
* ensure driver_data->fmt_valid is true (CreateContext has run).
|
||||
*
|
||||
* Called by:
|
||||
* - context.c::RequestCreateContext after v4l2_get_format(CAPTURE)
|
||||
* populates the cache; walks the surface_heap and fills every
|
||||
* existing surface (covers surfaces created before CreateContext,
|
||||
* including the ffmpeg vaapi-copy case where surfaces_count=0 is
|
||||
* passed but surfaces exist in the heap from earlier
|
||||
* CreateSurfaces2 calls).
|
||||
* - surface.c::RequestCreateSurfaces2 after surface allocation,
|
||||
* covering the case where CreateContext fired before this
|
||||
* CreateSurfaces2 call (fmt cache is valid, fill immediately).
|
||||
*/
|
||||
void surface_fill_format_uniform(struct request_data *driver_data,
|
||||
struct object_surface *surface_object);
|
||||
|
||||
/*
|
||||
* Iter2 Fix 3: bind / unbind a CAPTURE-pool slot to an object_surface.
|
||||
* Called from picture.c::RequestBeginPicture (acquire+bind) and
|
||||
* surface.c::RequestDestroySurfaces (unbind). Mirrors slot's V4L2 index
|
||||
* and mmap pointers into surface_object->destination_* so existing
|
||||
* QBUF/DQBUF/EXPBUF code paths see no behavioral change.
|
||||
*/
|
||||
void surface_bind_slot(struct object_surface *surface_object,
|
||||
struct cap_pool_slot *slot);
|
||||
void surface_unbind_slot(struct request_data *driver_data,
|
||||
struct object_surface *surface_object);
|
||||
|
||||
#endif
|
||||
|
||||
+1
-1
@@ -27,7 +27,7 @@
|
||||
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
|
||||
#endif
|
||||
|
||||
#ifndef __aarch64__
|
||||
#ifdef __arm__
|
||||
|
||||
.text
|
||||
.syntax unified
|
||||
|
||||
+137
-12
@@ -428,37 +428,102 @@ int v4l2_export_buffer(int video_fd, unsigned int type, unsigned int index,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int v4l2_set_control(int video_fd, int request_fd, unsigned int id, void *data,
|
||||
unsigned int size)
|
||||
static int v4l2_ioctl_controls(int video_fd, int request_fd, unsigned long ioc,
|
||||
struct v4l2_ext_control *control_array,
|
||||
unsigned int num_controls)
|
||||
{
|
||||
struct v4l2_ext_control control;
|
||||
struct v4l2_ext_controls controls;
|
||||
int rc;
|
||||
|
||||
memset(&control, 0, sizeof(control));
|
||||
memset(&controls, 0, sizeof(controls));
|
||||
|
||||
control.id = id;
|
||||
control.ptr = data;
|
||||
control.size = size;
|
||||
|
||||
controls.controls = &control;
|
||||
controls.count = 1;
|
||||
controls.controls = control_array;
|
||||
controls.count = num_controls;
|
||||
|
||||
if (request_fd >= 0) {
|
||||
controls.which = V4L2_CTRL_WHICH_REQUEST_VAL;
|
||||
controls.request_fd = request_fd;
|
||||
}
|
||||
|
||||
rc = ioctl(video_fd, VIDIOC_S_EXT_CTRLS, &controls);
|
||||
rc = ioctl(video_fd, ioc, &controls);
|
||||
if (rc < 0) {
|
||||
request_log("Unable to set control: %s\n", strerror(errno));
|
||||
/* ampere-av1 Phase 2.1 diag: surface error_idx so the caller's
|
||||
* error path knows which CID failed validation. error_idx >=
|
||||
* count means the failure was pre-validation (e.g., bad
|
||||
* request_fd). errno carries the syscall-level reason. */
|
||||
const char *failed_cid_label = "<pre-validation>";
|
||||
unsigned int failed_size = 0;
|
||||
if (controls.error_idx < num_controls) {
|
||||
failed_size = control_array[controls.error_idx].size;
|
||||
(void)failed_cid_label; /* keep symbol if logger truncates */
|
||||
}
|
||||
request_log("v4l2_ioctl_controls: rc=%d errno=%d (%s) "
|
||||
"ioc=0x%lx error_idx=%u count=%u "
|
||||
"failed_cid=0x%x failed_size=%u\n",
|
||||
rc, errno, strerror(errno), ioc,
|
||||
controls.error_idx, num_controls,
|
||||
controls.error_idx < num_controls
|
||||
? control_array[controls.error_idx].id : 0,
|
||||
failed_size);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
int v4l2_get_controls(int video_fd, int request_fd,
|
||||
struct v4l2_ext_control *control_array,
|
||||
unsigned int num_controls)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = v4l2_ioctl_controls(video_fd, request_fd, VIDIOC_G_EXT_CTRLS,
|
||||
control_array, num_controls);
|
||||
if (rc < 0) {
|
||||
/*
|
||||
* EACCES on G_EXT_CTRLS for request fds is the normal case on
|
||||
* this hantro rig — the kernel doesn't allow readback through
|
||||
* the request_fd. Caller (h264.c) tracks this with a one-time
|
||||
* "V4L2 readback unavailable" announcement. Suppress per-call
|
||||
* noise to keep the log signal-to-noise high.
|
||||
*/
|
||||
if (errno != EACCES)
|
||||
request_log("Unable to get control(s): %s\n",
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int v4l2_set_controls(int video_fd, int request_fd,
|
||||
struct v4l2_ext_control *control_array,
|
||||
unsigned int num_controls)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = v4l2_ioctl_controls(video_fd, request_fd, VIDIOC_S_EXT_CTRLS,
|
||||
control_array, num_controls);
|
||||
if (rc < 0) {
|
||||
request_log("Unable to set control(s): %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int v4l2_set_control(int video_fd, int request_fd, unsigned int id, void *data,
|
||||
unsigned int size)
|
||||
{
|
||||
struct v4l2_ext_control control;
|
||||
|
||||
memset(&control, 0, sizeof(control));
|
||||
|
||||
control.id = id;
|
||||
control.ptr = data;
|
||||
control.size = size;
|
||||
|
||||
return v4l2_set_controls(video_fd, request_fd, &control, 1);
|
||||
}
|
||||
|
||||
int v4l2_set_stream(int video_fd, unsigned int type, bool enable)
|
||||
{
|
||||
enum v4l2_buf_type buf_type = type;
|
||||
@@ -474,3 +539,63 @@ int v4l2_set_stream(int video_fd, unsigned int type, bool enable)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int v4l2_query_ext_ctrl(int video_fd, unsigned int id,
|
||||
struct v4l2_query_ext_ctrl *qec)
|
||||
{
|
||||
struct v4l2_query_ext_ctrl local;
|
||||
struct v4l2_query_ext_ctrl *target = qec ? qec : &local;
|
||||
int rc;
|
||||
|
||||
memset(target, 0, sizeof(*target));
|
||||
target->id = id;
|
||||
|
||||
rc = ioctl(video_fd, VIDIOC_QUERY_EXT_CTRL, target);
|
||||
if (rc < 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int v4l2_query_menu(int video_fd, unsigned int id, unsigned int index,
|
||||
struct v4l2_querymenu *qm)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (qm == NULL)
|
||||
return -1;
|
||||
|
||||
memset(qm, 0, sizeof(*qm));
|
||||
qm->id = id;
|
||||
qm->index = index;
|
||||
|
||||
rc = ioctl(video_fd, VIDIOC_QUERYMENU, qm);
|
||||
if (rc < 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool v4l2_ctrl_menu_has_value(int video_fd, unsigned int id,
|
||||
unsigned int value)
|
||||
{
|
||||
struct v4l2_query_ext_ctrl qec;
|
||||
struct v4l2_querymenu qm;
|
||||
long long i;
|
||||
|
||||
if (v4l2_query_ext_ctrl(video_fd, id, &qec) < 0)
|
||||
return false;
|
||||
|
||||
if (qec.type != V4L2_CTRL_TYPE_MENU &&
|
||||
qec.type != V4L2_CTRL_TYPE_INTEGER_MENU)
|
||||
return false;
|
||||
|
||||
for (i = qec.minimum; i <= qec.maximum; i += qec.step ? qec.step : 1) {
|
||||
if (v4l2_query_menu(video_fd, id, (unsigned int)i, &qm) < 0)
|
||||
continue;
|
||||
if ((unsigned int)i == value)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
+39
@@ -54,8 +54,47 @@ int v4l2_dequeue_buffer(int video_fd, int request_fd, unsigned int type,
|
||||
int v4l2_export_buffer(int video_fd, unsigned int type, unsigned int index,
|
||||
unsigned int flags, int *export_fds,
|
||||
unsigned int export_fds_count);
|
||||
int v4l2_get_controls(int video_fd, int request_fd,
|
||||
struct v4l2_ext_control *controls,
|
||||
unsigned int num_controls);
|
||||
int v4l2_set_controls(int video_fd, int request_fd,
|
||||
struct v4l2_ext_control *controls,
|
||||
unsigned int num_controls);
|
||||
int v4l2_set_control(int video_fd, int request_fd, unsigned int id, void *data,
|
||||
unsigned int size);
|
||||
int v4l2_set_stream(int video_fd, unsigned int type, bool enable);
|
||||
|
||||
/*
|
||||
* Capability-probe helpers. These let calling code discover what the
|
||||
* backing kernel driver supports rather than hardcoding assumptions
|
||||
* about specific decoder hardware.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Query the metadata of an extended control by CID. Fills *qec on
|
||||
* success. Returns 0 if the control exists, -1 (errno=EINVAL) if the
|
||||
* driver does not expose this CID. Pass qec=NULL to test existence
|
||||
* only.
|
||||
*/
|
||||
struct v4l2_query_ext_ctrl;
|
||||
int v4l2_query_ext_ctrl(int video_fd, unsigned int id,
|
||||
struct v4l2_query_ext_ctrl *qec);
|
||||
|
||||
/*
|
||||
* Query a single menu item of a menu/intmenu control at the given
|
||||
* index. Fills *qm on success. Returns 0 if the menu item exists at
|
||||
* this index, -1 otherwise.
|
||||
*/
|
||||
struct v4l2_querymenu;
|
||||
int v4l2_query_menu(int video_fd, unsigned int id, unsigned int index,
|
||||
struct v4l2_querymenu *qm);
|
||||
|
||||
/*
|
||||
* Convenience: for a menu-type control, return true iff `value` is a
|
||||
* valid menu entry (i.e. the driver accepts it). Walks all menu items
|
||||
* up to the control's maximum to check.
|
||||
*/
|
||||
bool v4l2_ctrl_menu_has_value(int video_fd, unsigned int id,
|
||||
unsigned int value);
|
||||
|
||||
#endif
|
||||
|
||||
+4
-1
@@ -39,12 +39,14 @@ static struct video_format formats[] = {
|
||||
.description = "NV12 YUV",
|
||||
.v4l2_format = V4L2_PIX_FMT_NV12,
|
||||
.v4l2_buffers_count = 1,
|
||||
.v4l2_mplane = false,
|
||||
.v4l2_mplane = true,
|
||||
.drm_format = DRM_FORMAT_NV12,
|
||||
.drm_modifier = DRM_FORMAT_MOD_NONE,
|
||||
.planes_count = 2,
|
||||
.bpp = 16,
|
||||
},
|
||||
// Code to handle this DRM_FORMAT is __arm__ only
|
||||
#ifdef __arm__
|
||||
{
|
||||
.description = "Sunxi tiled NV12 YUV",
|
||||
.v4l2_format = V4L2_PIX_FMT_SUNXI_TILED_NV12,
|
||||
@@ -55,6 +57,7 @@ static struct video_format formats[] = {
|
||||
.planes_count = 2,
|
||||
.bpp = 16
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
||||
static unsigned int formats_count = sizeof(formats) / sizeof(formats[0]);
|
||||
|
||||
@@ -0,0 +1,263 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Markus Fritsche <fritsche.markus@gmail.com>
|
||||
*
|
||||
* fresnel-fourier iter3 Phase 6 commit B: VP8 codec dispatcher
|
||||
* implemented against V4L2_CID_STATELESS_VP8_FRAME (kernel UAPI
|
||||
* <linux/v4l2-controls.h>:1900-1958). Single batched control per
|
||||
* frame, no init-time device-wide menus (VP8 has no DECODE_MODE/
|
||||
* START_CODE — confirmed by Phase 0 V4L2 inventory + Phase 3
|
||||
* cross-validator strace).
|
||||
*
|
||||
* Reference: FFmpeg libavcodec/v4l2_request_vp8.c (kwiboo branch);
|
||||
* FFmpeg libavcodec/vaapi_vp8.c (VAAPI source-side
|
||||
* verification of the field semantics);
|
||||
* kernel drivers/media/platform/verisilicon/
|
||||
* hantro_g1_vp8_dec.c (RK3399 hardware reads
|
||||
* first_part_header_bits + first_part_size to compute
|
||||
* MB-data DMA offset).
|
||||
*
|
||||
* Phase 5 review amendments incorporated (see phase5_iter3_review.md):
|
||||
* C1 first_part_header_bits = slice->macroblock_offset
|
||||
* (NOT 0; kernel reads it unconditionally; same formula as
|
||||
* v4l2_request_vp8.c uses internally)
|
||||
* C2 first_part_size = slice->partition_size[0] +
|
||||
* ((macroblock_offset + 7) / 8)
|
||||
* (recover total partition size from VAAPI's post-parse
|
||||
* remainder)
|
||||
* C3 VAProbabilityBufferType (not VAProbabilityDataBufferType)
|
||||
* C4 (int8_t) cast (not (s8); kernel-internal typedef not in
|
||||
* userspace UAPI)
|
||||
* S3 assert(probability_set) runtime guard (kernel has NO
|
||||
* coeff_probs default fallback; consumer MUST send
|
||||
* VAProbabilityBufferType per frame)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "vp8.h"
|
||||
#include "context.h"
|
||||
#include "request.h"
|
||||
#include "surface.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
#include <linux/v4l2-controls.h>
|
||||
|
||||
#include "v4l2.h"
|
||||
|
||||
int vp8_set_controls(struct request_data *driver_data,
|
||||
struct object_context *context_object,
|
||||
struct object_surface *surface_object)
|
||||
{
|
||||
VAPictureParameterBufferVP8 *picture =
|
||||
&surface_object->params.vp8.picture;
|
||||
VASliceParameterBufferVP8 *slice =
|
||||
&surface_object->params.vp8.slice;
|
||||
VAIQMatrixBufferVP8 *iqmatrix =
|
||||
&surface_object->params.vp8.iqmatrix;
|
||||
VAProbabilityDataBufferVP8 *probability =
|
||||
&surface_object->params.vp8.probability;
|
||||
bool iqmatrix_set = surface_object->params.vp8.iqmatrix_set;
|
||||
bool probability_set = surface_object->params.vp8.probability_set;
|
||||
|
||||
struct v4l2_ctrl_vp8_frame frame;
|
||||
struct object_surface *last_ref;
|
||||
struct object_surface *golden_ref;
|
||||
struct object_surface *alt_ref;
|
||||
int rc;
|
||||
int i, j;
|
||||
|
||||
memset(&frame, 0, sizeof frame);
|
||||
|
||||
/* Phase 5 S3: kernel has no coeff_probs default fallback. The
|
||||
* VAAPI consumer chain (FFmpeg's vaapi_vp8.c:146-148, used by
|
||||
* mpv and ffmpeg-vaapi) always sends VAProbabilityBufferType
|
||||
* per frame. Surface immediately if a future consumer doesn't. */
|
||||
assert(probability_set);
|
||||
|
||||
/* Clause 3: frame geometry + per-frame scalars */
|
||||
frame.width = picture->frame_width;
|
||||
frame.height = picture->frame_height;
|
||||
frame.horizontal_scale = 0; /* not exposed by VAAPI */
|
||||
frame.vertical_scale = 0;
|
||||
frame.version = picture->pic_fields.bits.version;
|
||||
frame.prob_skip_false = picture->prob_skip_false;
|
||||
frame.prob_intra = picture->prob_intra;
|
||||
frame.prob_last = picture->prob_last;
|
||||
frame.prob_gf = picture->prob_gf;
|
||||
/* Phase 3 Q2: VAAPI counts include control partition;
|
||||
* kernel counts DCT only — off-by-one. */
|
||||
frame.num_dct_parts = slice->num_of_partitions - 1;
|
||||
|
||||
/* Clause 4: DPB timestamp resolution (mirrors mpeg2.c pattern;
|
||||
* NULL surface → timestamp stays 0 from memset). */
|
||||
last_ref = SURFACE(driver_data, picture->last_ref_frame);
|
||||
golden_ref = SURFACE(driver_data, picture->golden_ref_frame);
|
||||
alt_ref = SURFACE(driver_data, picture->alt_ref_frame);
|
||||
if (last_ref != NULL)
|
||||
frame.last_frame_ts =
|
||||
v4l2_timeval_to_ns(&last_ref->timestamp);
|
||||
if (golden_ref != NULL)
|
||||
frame.golden_frame_ts =
|
||||
v4l2_timeval_to_ns(&golden_ref->timestamp);
|
||||
if (alt_ref != NULL)
|
||||
frame.alt_frame_ts =
|
||||
v4l2_timeval_to_ns(&alt_ref->timestamp);
|
||||
|
||||
/* Clause 5: loop filter mapping */
|
||||
for (i = 0; i < 4; i++) {
|
||||
frame.lf.ref_frm_delta[i] =
|
||||
picture->loop_filter_deltas_ref_frame[i];
|
||||
frame.lf.mb_mode_delta[i] =
|
||||
picture->loop_filter_deltas_mode[i];
|
||||
}
|
||||
frame.lf.sharpness_level = picture->pic_fields.bits.sharpness_level;
|
||||
frame.lf.level = picture->loop_filter_level[0];
|
||||
if (picture->pic_fields.bits.loop_filter_adj_enable)
|
||||
frame.lf.flags |= V4L2_VP8_LF_ADJ_ENABLE;
|
||||
if (picture->pic_fields.bits.mode_ref_lf_delta_update)
|
||||
frame.lf.flags |= V4L2_VP8_LF_DELTA_UPDATE;
|
||||
if (picture->pic_fields.bits.filter_type)
|
||||
frame.lf.flags |= V4L2_VP8_LF_FILTER_TYPE_SIMPLE;
|
||||
|
||||
/* Clause 6: quantization base + delta derivation */
|
||||
if (iqmatrix_set) {
|
||||
frame.quant.y_ac_qi =
|
||||
iqmatrix->quantization_index[0][0];
|
||||
frame.quant.y_dc_delta = (int8_t)
|
||||
(iqmatrix->quantization_index[0][1] -
|
||||
iqmatrix->quantization_index[0][0]);
|
||||
frame.quant.y2_dc_delta = (int8_t)
|
||||
(iqmatrix->quantization_index[0][2] -
|
||||
iqmatrix->quantization_index[0][0]);
|
||||
frame.quant.y2_ac_delta = (int8_t)
|
||||
(iqmatrix->quantization_index[0][3] -
|
||||
iqmatrix->quantization_index[0][0]);
|
||||
frame.quant.uv_dc_delta = (int8_t)
|
||||
(iqmatrix->quantization_index[0][4] -
|
||||
iqmatrix->quantization_index[0][0]);
|
||||
frame.quant.uv_ac_delta = (int8_t)
|
||||
(iqmatrix->quantization_index[0][5] -
|
||||
iqmatrix->quantization_index[0][0]);
|
||||
}
|
||||
|
||||
if (picture->pic_fields.bits.segmentation_enabled && iqmatrix_set) {
|
||||
for (i = 1; i < 4; i++)
|
||||
frame.segment.quant_update[i] = (int8_t)
|
||||
(iqmatrix->quantization_index[i][0] -
|
||||
iqmatrix->quantization_index[0][0]);
|
||||
}
|
||||
|
||||
/* Clause 7: segment fields */
|
||||
for (i = 0; i < 3; i++)
|
||||
frame.segment.segment_probs[i] =
|
||||
picture->mb_segment_tree_probs[i];
|
||||
if (picture->pic_fields.bits.segmentation_enabled)
|
||||
frame.segment.flags |= V4L2_VP8_SEGMENT_FLAG_ENABLED;
|
||||
if (picture->pic_fields.bits.update_mb_segmentation_map)
|
||||
frame.segment.flags |= V4L2_VP8_SEGMENT_FLAG_UPDATE_MAP;
|
||||
if (picture->pic_fields.bits.update_segment_feature_data)
|
||||
frame.segment.flags |=
|
||||
V4L2_VP8_SEGMENT_FLAG_UPDATE_FEATURE_DATA;
|
||||
/* DELTA_VALUE_MODE: VAAPI doesn't expose abs_delta. FFmpeg sets
|
||||
* unconditionally per !s->segmentation.absolute_vals (default).
|
||||
* Kernel ignores when ENABLED bit clear (BBB case). */
|
||||
frame.segment.flags |= V4L2_VP8_SEGMENT_FLAG_DELTA_VALUE_MODE;
|
||||
|
||||
if (picture->pic_fields.bits.segmentation_enabled) {
|
||||
for (i = 0; i < 4; i++)
|
||||
frame.segment.lf_update[i] = (int8_t)
|
||||
(picture->loop_filter_level[i] -
|
||||
picture->loop_filter_level[0]);
|
||||
}
|
||||
|
||||
/* Clause 8: entropy table mapping (3 VAAPI sources merged) */
|
||||
for (i = 0; i < 4; i++)
|
||||
frame.entropy.y_mode_probs[i] = picture->y_mode_probs[i];
|
||||
for (i = 0; i < 3; i++)
|
||||
frame.entropy.uv_mode_probs[i] = picture->uv_mode_probs[i];
|
||||
for (i = 0; i < 2; i++)
|
||||
for (j = 0; j < 19; j++)
|
||||
frame.entropy.mv_probs[i][j] =
|
||||
picture->mv_probs[i][j];
|
||||
/* coeff_probs[4][8][3][11]: VAAPI layout matches kernel exactly;
|
||||
* direct memcpy. Both vaapi_vp8.c:133-143 and v4l2_request_vp8.c:
|
||||
* 141-153 apply identical coeff_bands_inverse reordering before
|
||||
* writing — VAAPI consumer has done the reordering for us. */
|
||||
memcpy(frame.entropy.coeff_probs,
|
||||
probability->dct_coeff_probs,
|
||||
sizeof frame.entropy.coeff_probs);
|
||||
|
||||
/* Clause 9: coder state + first-partition fields */
|
||||
frame.coder_state.range = picture->bool_coder_ctx.range;
|
||||
frame.coder_state.value = picture->bool_coder_ctx.value;
|
||||
frame.coder_state.bit_count = picture->bool_coder_ctx.count;
|
||||
|
||||
/* Phase 5 C1+C2: macroblock_offset IS first_part_header_bits by
|
||||
* source identity; kernel hantro_g1_vp8_dec.c:260 reads it
|
||||
* unconditionally to compute MB-data DMA offset. partition_size[0]
|
||||
* is the post-parse REMAINDER; recover total via
|
||||
* + ceil(macroblock_offset/8). */
|
||||
frame.first_part_header_bits = slice->macroblock_offset;
|
||||
frame.first_part_size =
|
||||
slice->partition_size[0] +
|
||||
((uint32_t)slice->macroblock_offset + 7) / 8;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
frame.dct_part_sizes[i] = slice->partition_size[i + 1];
|
||||
|
||||
/* Clause 9: flags assembly (6 mainline-documented bits only;
|
||||
* EXPERIMENTAL + bit 0x40 NOT replicated despite ffmpeg-v4l2-
|
||||
* request-git setting them — kernel hantro_vp8.c only inspects
|
||||
* KEY_FRAME bit). VAAPI inverts: key_frame=0 means it IS a
|
||||
* keyframe per VP8 spec. */
|
||||
if (!picture->pic_fields.bits.key_frame)
|
||||
frame.flags |= V4L2_VP8_FRAME_FLAG_KEY_FRAME;
|
||||
frame.flags |= V4L2_VP8_FRAME_FLAG_SHOW_FRAME;
|
||||
if (picture->pic_fields.bits.mb_no_coeff_skip)
|
||||
frame.flags |= V4L2_VP8_FRAME_FLAG_MB_NO_SKIP_COEFF;
|
||||
if (picture->pic_fields.bits.sign_bias_golden)
|
||||
frame.flags |= V4L2_VP8_FRAME_FLAG_SIGN_BIAS_GOLDEN;
|
||||
if (picture->pic_fields.bits.sign_bias_alternate)
|
||||
frame.flags |= V4L2_VP8_FRAME_FLAG_SIGN_BIAS_ALT;
|
||||
|
||||
/* Clause 1+10: single-control batched submission */
|
||||
struct v4l2_ext_control ctrls[1] = {
|
||||
{
|
||||
.id = V4L2_CID_STATELESS_VP8_FRAME,
|
||||
.ptr = &frame,
|
||||
.size = sizeof frame,
|
||||
},
|
||||
};
|
||||
|
||||
rc = v4l2_set_controls(driver_data->video_fd,
|
||||
surface_object->request_fd,
|
||||
ctrls, 1);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Markus Fritsche <fritsche.markus@gmail.com>
|
||||
*
|
||||
* fresnel-fourier iter3: VP8 codec dispatcher header.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _VP8_H_
|
||||
#define _VP8_H_
|
||||
|
||||
struct object_context;
|
||||
struct object_surface;
|
||||
struct request_data;
|
||||
|
||||
int vp8_set_controls(struct request_data *driver_data,
|
||||
struct object_context *context,
|
||||
struct object_surface *surface_object);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,754 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Markus Fritsche <fritsche.markus@gmail.com>
|
||||
*
|
||||
* fresnel-fourier iter4 Phase 6 commit B: VP9 codec dispatcher
|
||||
* implemented against V4L2_CID_STATELESS_VP9_FRAME (0xa40a2c) +
|
||||
* V4L2_CID_STATELESS_VP9_COMPRESSED_HDR (0xa40a2d). rkvdec on
|
||||
* RK3399 mandatorily requires both controls per
|
||||
* drivers/staging/media/rkvdec/rkvdec-vp9.c::rkvdec_vp9_run_preamble:752.
|
||||
*
|
||||
* Reference: FFmpeg libavcodec/v4l2_request_vp9.c (kwiboo branch);
|
||||
* FFmpeg libavcodec/vaapi_vp9.c (VAAPI source-side
|
||||
* verification of field semantics);
|
||||
* kernel drivers/media/v4l2-core/v4l2-vp9.c +
|
||||
* drivers/staging/media/rkvdec/rkvdec-vp9.c.
|
||||
*
|
||||
* Phase 5 review amendments incorporated (see phase5_iter4_review.md):
|
||||
* C1 frame.interpolation_filter = picture->mcomp_filter_type
|
||||
* (NO XOR; vaapi_vp9.c:62 already applied the XOR before storing
|
||||
* into VAAPI's mcomp_filter_type; double-XOR would swap
|
||||
* EIGHTTAP and EIGHTTAP_SMOOTH for inter frames)
|
||||
* C2 LF deltas persisted across frames in object_context.vp9_lf,
|
||||
* init to VP9 spec defaults {1,0,-1,-1,0,0} on
|
||||
* keyframe/intra_only/error_resilient, updated only when parsed
|
||||
* lf_delta.update=1, ALWAYS copied to kernel control
|
||||
* C3 vp9_fill_compressed_hdr takes out_reference_mode pointer
|
||||
* (reference_mode lives in v4l2_ctrl_vp9_frame, NOT in
|
||||
* _compressed_hdr; threaded via parameter)
|
||||
*
|
||||
* Suggested findings incorporated:
|
||||
* S4 uv_mode memcpy from FFmpeg's fill_compressed_hdr is omitted —
|
||||
* rkvdec reads uv_mode from kernel's persistent
|
||||
* probability_tables, NOT from prob_updates ctrl
|
||||
* S5 lossless_flag semantics align with FFmpeg's s->s.h.lossless
|
||||
* (LosslessFlag = base_qindex==0 && y_dc_delta_q==0 &&
|
||||
* uv_dc_delta_q==0 && uv_ac_delta_q==0)
|
||||
*/
|
||||
|
||||
#include "vp9.h"
|
||||
|
||||
#include "v4l2.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <linux/v4l2-controls.h>
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
/* Clause 3: compile-time size assertions. UAPI shifts must fail loudly. */
|
||||
_Static_assert(sizeof(struct v4l2_ctrl_vp9_frame) == 168,
|
||||
"v4l2_ctrl_vp9_frame size mismatch — kernel UAPI changed");
|
||||
_Static_assert(sizeof(struct v4l2_ctrl_vp9_compressed_hdr) == 2040,
|
||||
"v4l2_ctrl_vp9_compressed_hdr size mismatch — kernel UAPI changed");
|
||||
|
||||
/*
|
||||
* VPX range coder — minimal port of FFmpeg vpx_rac.[ch] + vp89_rac.h.
|
||||
* Stateless static helpers; bitstream-only readers. ~80 LOC.
|
||||
*/
|
||||
|
||||
static const uint8_t vpx_norm_shift[256] = {
|
||||
8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
};
|
||||
|
||||
struct vp9_rac {
|
||||
int high;
|
||||
int bits;
|
||||
const uint8_t *buffer;
|
||||
const uint8_t *end;
|
||||
unsigned int code_word;
|
||||
};
|
||||
|
||||
static int vp9_rac_init(struct vp9_rac *c, const uint8_t *buf, int size)
|
||||
{
|
||||
if (size < 1)
|
||||
return -1;
|
||||
c->high = 255;
|
||||
c->bits = -16;
|
||||
c->buffer = buf;
|
||||
c->end = buf + size;
|
||||
c->code_word = ((unsigned)buf[0] << 16) | ((unsigned)buf[1] << 8) | buf[2];
|
||||
c->buffer += 3;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned vp9_rac_renorm(struct vp9_rac *c)
|
||||
{
|
||||
int shift = vpx_norm_shift[c->high];
|
||||
int bits = c->bits;
|
||||
unsigned code_word = c->code_word;
|
||||
|
||||
c->high <<= shift;
|
||||
code_word <<= shift;
|
||||
bits += shift;
|
||||
if (bits >= 0 && c->buffer + 1 < c->end) {
|
||||
code_word |= (((unsigned)c->buffer[0] << 8) | c->buffer[1]) << bits;
|
||||
c->buffer += 2;
|
||||
bits -= 16;
|
||||
}
|
||||
c->bits = bits;
|
||||
return code_word;
|
||||
}
|
||||
|
||||
static int vp9_rac_get_prob(struct vp9_rac *c, uint8_t prob)
|
||||
{
|
||||
unsigned code_word = vp9_rac_renorm(c);
|
||||
unsigned low = 1 + (((c->high - 1) * prob) >> 8);
|
||||
unsigned low_shift = low << 16;
|
||||
int bit = code_word >= low_shift;
|
||||
|
||||
c->high = bit ? c->high - low : low;
|
||||
c->code_word = bit ? code_word - low_shift : code_word;
|
||||
return bit;
|
||||
}
|
||||
|
||||
static int vp9_rac_get_branchy(struct vp9_rac *c, int prob)
|
||||
{
|
||||
return vp9_rac_get_prob(c, (uint8_t)prob);
|
||||
}
|
||||
|
||||
static int vp9_rac_bit(struct vp9_rac *c)
|
||||
{
|
||||
return vp9_rac_get_prob(c, 128);
|
||||
}
|
||||
|
||||
static int vp9_rac_uint(struct vp9_rac *c, int bits)
|
||||
{
|
||||
int value = 0;
|
||||
|
||||
while (bits--)
|
||||
value = (value << 1) | vp9_rac_bit(c);
|
||||
return value;
|
||||
}
|
||||
|
||||
/* inv_map_table: VP9 differential probability update table.
|
||||
* Verbatim copy from FFmpeg v4l2_request_vp9.c:44-64. */
|
||||
static const uint8_t inv_map_table[255] = {
|
||||
7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
|
||||
189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
|
||||
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
|
||||
25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
|
||||
55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
|
||||
70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
|
||||
86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
|
||||
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
|
||||
116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
|
||||
131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
|
||||
146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
|
||||
161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
|
||||
177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
|
||||
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
|
||||
207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
|
||||
222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
|
||||
237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
|
||||
252, 253, 253,
|
||||
};
|
||||
|
||||
static int read_prob_delta(struct vp9_rac *c)
|
||||
{
|
||||
int d;
|
||||
|
||||
if (!vp9_rac_bit(c)) {
|
||||
d = vp9_rac_uint(c, 4);
|
||||
} else if (!vp9_rac_bit(c)) {
|
||||
d = vp9_rac_uint(c, 4) + 16;
|
||||
} else if (!vp9_rac_bit(c)) {
|
||||
d = vp9_rac_uint(c, 5) + 32;
|
||||
} else {
|
||||
d = vp9_rac_uint(c, 7);
|
||||
if (d >= 65)
|
||||
d = (d << 1) - 65 + vp9_rac_bit(c);
|
||||
d += 64;
|
||||
}
|
||||
return inv_map_table[d];
|
||||
}
|
||||
|
||||
/*
|
||||
* Clause 6: minimal big-endian bit reader over the uncompressed header
|
||||
* for the fields VAAPI doesn't expose: lf_delta_enabled / lf_delta_update /
|
||||
* lf_ref_deltas / lf_mode_deltas / base_q_idx / delta_q_y_dc / delta_q_uv_dc /
|
||||
* delta_q_uv_ac.
|
||||
*
|
||||
* Walks: frame_marker(2) + profile(2 or 3) + show_existing_frame(1) +
|
||||
* frame_type(1) + show_frame(1) + error_resilient(1) +
|
||||
* if keyframe: sync_code(24) + color_config + frame_size + render_size
|
||||
* else: intra_only(1 if !show_frame) + reset(2) +
|
||||
* if intra_only: sync_code(24) + (if profile>0: color_config) +
|
||||
* refresh_flags(8) + frame_size + render_size
|
||||
* else: refresh_flags(8) + 3*(ref_idx(3)+sign_bias(1)) +
|
||||
* frame_size_with_refs + allow_hpmv(1) + interp_filter(2 or 3)
|
||||
* loop_filter_params + quantization_params
|
||||
*
|
||||
* Only profile-0 paths are exercised for BBB; non-profile-0 fields read
|
||||
* their bits but do not write them back. Keep targeted, not general.
|
||||
*/
|
||||
|
||||
struct uh_reader {
|
||||
const uint8_t *buf;
|
||||
size_t size;
|
||||
size_t bit_pos;
|
||||
};
|
||||
|
||||
static unsigned uh_read_bits(struct uh_reader *r, int n)
|
||||
{
|
||||
unsigned v = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
size_t byte = r->bit_pos >> 3;
|
||||
int bit = 7 - (r->bit_pos & 7);
|
||||
|
||||
if (byte >= r->size)
|
||||
return 0;
|
||||
v = (v << 1) | ((r->buf[byte] >> bit) & 1);
|
||||
r->bit_pos++;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Phase 7 fix: VP9 spec s(N) is N magnitude bits + 1 sign bit (total N+1).
|
||||
* Previous uh_read_signed_6 read 4+1=5 bits instead of 6+1=7; bit drift of
|
||||
* 2 bits per ref_delta accumulated across the lf_delta updates and shifted
|
||||
* base_q_idx by 8 bits, producing 0x41 (frame 1 keyframe) instead of 0x2e.
|
||||
* Phase 3 anchor cross-check confirmed the corrected 7-bit read places
|
||||
* base_q_idx at bit 111 with value 0x2e=46. */
|
||||
static int uh_read_sbits(struct uh_reader *r, int n)
|
||||
{
|
||||
int v = (int)uh_read_bits(r, n);
|
||||
int sign = (int)uh_read_bits(r, 1);
|
||||
|
||||
return sign ? -v : v;
|
||||
}
|
||||
|
||||
static int uh_read_delta_q(struct uh_reader *r)
|
||||
{
|
||||
/* read_delta_q(): if delta_coded bit set, read s(4) = 4 mag + 1 sign */
|
||||
if (uh_read_bits(r, 1))
|
||||
return uh_read_sbits(r, 4);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vp9_parse_uncompressed_header_lf_quant(
|
||||
const uint8_t *data, uint32_t size,
|
||||
struct v4l2_ctrl_vp9_frame *frame,
|
||||
int8_t persistent_ref_deltas[4],
|
||||
int8_t persistent_mode_deltas[2],
|
||||
bool *out_keyframe_or_intraonly,
|
||||
bool *out_lf_delta_updated)
|
||||
{
|
||||
struct uh_reader r = { .buf = data, .size = size, .bit_pos = 0 };
|
||||
bool keyframe, intra_only = false, show_frame, error_resilient;
|
||||
int profile;
|
||||
int i;
|
||||
|
||||
*out_lf_delta_updated = false;
|
||||
|
||||
uh_read_bits(&r, 2); /* frame_marker */
|
||||
{
|
||||
int p_lo = uh_read_bits(&r, 1);
|
||||
int p_hi = uh_read_bits(&r, 1);
|
||||
profile = p_lo + (p_hi << 1);
|
||||
if (profile == 3)
|
||||
uh_read_bits(&r, 1);
|
||||
}
|
||||
|
||||
if (uh_read_bits(&r, 1)) /* show_existing_frame */
|
||||
return; /* no LF/quant in the bitstream */
|
||||
|
||||
keyframe = !uh_read_bits(&r, 1);
|
||||
show_frame = uh_read_bits(&r, 1);
|
||||
error_resilient = uh_read_bits(&r, 1);
|
||||
|
||||
if (keyframe) {
|
||||
uh_read_bits(&r, 24); /* sync_code */
|
||||
/* color_config (profile=0): just bt709 + range bit */
|
||||
if (profile >= 2)
|
||||
uh_read_bits(&r, 1); /* ten_or_twelve_bit */
|
||||
uh_read_bits(&r, 3); /* color_space */
|
||||
if (1) { /* color_space != CS_RGB */
|
||||
uh_read_bits(&r, 1); /* color_range */
|
||||
if (profile == 1 || profile == 3) {
|
||||
uh_read_bits(&r, 2); /* subsampling */
|
||||
uh_read_bits(&r, 1); /* reserved */
|
||||
}
|
||||
} else if (profile == 1 || profile == 3) {
|
||||
uh_read_bits(&r, 1); /* reserved */
|
||||
}
|
||||
uh_read_bits(&r, 16); /* frame_width_minus_1 */
|
||||
uh_read_bits(&r, 16); /* frame_height_minus_1 */
|
||||
if (uh_read_bits(&r, 1)) {
|
||||
uh_read_bits(&r, 16);
|
||||
uh_read_bits(&r, 16);
|
||||
}
|
||||
} else {
|
||||
intra_only = show_frame ? 0 : uh_read_bits(&r, 1);
|
||||
if (!error_resilient)
|
||||
uh_read_bits(&r, 2); /* reset_frame_context */
|
||||
if (intra_only) {
|
||||
uh_read_bits(&r, 24); /* sync_code */
|
||||
if (profile > 0) {
|
||||
if (profile >= 2)
|
||||
uh_read_bits(&r, 1);
|
||||
uh_read_bits(&r, 3); /* color_space */
|
||||
uh_read_bits(&r, 1); /* color_range */
|
||||
if (profile == 1 || profile == 3) {
|
||||
uh_read_bits(&r, 2);
|
||||
uh_read_bits(&r, 1);
|
||||
}
|
||||
}
|
||||
uh_read_bits(&r, 8); /* refresh_frame_flags */
|
||||
uh_read_bits(&r, 16);
|
||||
uh_read_bits(&r, 16);
|
||||
if (uh_read_bits(&r, 1)) {
|
||||
uh_read_bits(&r, 16);
|
||||
uh_read_bits(&r, 16);
|
||||
}
|
||||
} else {
|
||||
uh_read_bits(&r, 8); /* refresh_frame_flags */
|
||||
for (i = 0; i < 3; i++) {
|
||||
uh_read_bits(&r, 3);
|
||||
uh_read_bits(&r, 1);
|
||||
}
|
||||
/* frame_size_with_refs: up to 3 found_ref bits, then
|
||||
* if no found_ref: explicit width+height; else ref-pick.
|
||||
* Then render_size. Just walk it. */
|
||||
{
|
||||
bool found = false;
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (uh_read_bits(&r, 1))
|
||||
found = true;
|
||||
}
|
||||
if (!found) {
|
||||
uh_read_bits(&r, 16);
|
||||
uh_read_bits(&r, 16);
|
||||
}
|
||||
if (uh_read_bits(&r, 1)) {
|
||||
uh_read_bits(&r, 16);
|
||||
uh_read_bits(&r, 16);
|
||||
}
|
||||
}
|
||||
uh_read_bits(&r, 1); /* allow_hpmv */
|
||||
if (uh_read_bits(&r, 1)) /* interp_filter switchable */
|
||||
;
|
||||
else
|
||||
uh_read_bits(&r, 2); /* interp_filter literal */
|
||||
}
|
||||
}
|
||||
|
||||
*out_keyframe_or_intraonly = keyframe || intra_only;
|
||||
|
||||
uh_read_bits(&r, 1); /* refresh_frame_context */
|
||||
uh_read_bits(&r, 1); /* frame_parallel_decoding_mode */
|
||||
if (!error_resilient || keyframe || intra_only)
|
||||
uh_read_bits(&r, 2); /* frame_context_idx + reset_frame_context */
|
||||
|
||||
/* loop_filter_params */
|
||||
uh_read_bits(&r, 6); /* filter_level (already in VAAPI) */
|
||||
uh_read_bits(&r, 3); /* sharpness (already in VAAPI) */
|
||||
if (uh_read_bits(&r, 1)) { /* lf_delta.enabled */
|
||||
frame->lf.flags |= V4L2_VP9_LOOP_FILTER_FLAG_DELTA_ENABLED;
|
||||
if (uh_read_bits(&r, 1)) { /* lf_delta.updated */
|
||||
frame->lf.flags |= V4L2_VP9_LOOP_FILTER_FLAG_DELTA_UPDATE;
|
||||
*out_lf_delta_updated = true;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (uh_read_bits(&r, 1))
|
||||
persistent_ref_deltas[i] =
|
||||
(int8_t)uh_read_sbits(&r, 6);
|
||||
}
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (uh_read_bits(&r, 1))
|
||||
persistent_mode_deltas[i] =
|
||||
(int8_t)uh_read_sbits(&r, 6);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* quantization_params */
|
||||
frame->quant.base_q_idx = (uint8_t)uh_read_bits(&r, 8);
|
||||
frame->quant.delta_q_y_dc = (int8_t)uh_read_delta_q(&r);
|
||||
frame->quant.delta_q_uv_dc = (int8_t)uh_read_delta_q(&r);
|
||||
frame->quant.delta_q_uv_ac = (int8_t)uh_read_delta_q(&r);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clause 9: compressed-header parser — port of FFmpeg
|
||||
* v4l2_request_vp9.c:99-261::fill_compressed_hdr.
|
||||
*
|
||||
* Phase 5 C3: out_reference_mode threaded via out-param. Callers
|
||||
* derive `allowcompinter` from VAAPI sign-bias bits and pass it.
|
||||
*/
|
||||
#define V4L2_VP9_TX_MODE_ONLY_4X4_LOCAL 0
|
||||
#define V4L2_VP9_TX_MODE_ALLOW_32X32_LOCAL 3
|
||||
#define V4L2_VP9_TX_MODE_SELECT_LOCAL 4
|
||||
|
||||
static void vp9_fill_compressed_hdr(
|
||||
struct v4l2_ctrl_vp9_compressed_hdr *ctrl,
|
||||
const uint8_t *buffer, uint32_t size,
|
||||
uint8_t lossless_flag,
|
||||
bool keyframe_or_intraonly,
|
||||
bool allowcompinter,
|
||||
bool highprecision_mvs,
|
||||
int interp_filter_switchable,
|
||||
uint8_t *out_reference_mode)
|
||||
{
|
||||
struct vp9_rac c;
|
||||
int comppredmode = 0;
|
||||
int i, j, k, l, m, n;
|
||||
|
||||
*out_reference_mode = 0;
|
||||
|
||||
if (vp9_rac_init(&c, buffer, size) < 0)
|
||||
return;
|
||||
|
||||
if (vp9_rac_get_branchy(&c, 128)) /* marker bit */
|
||||
return;
|
||||
|
||||
if (lossless_flag) {
|
||||
ctrl->tx_mode = V4L2_VP9_TX_MODE_ONLY_4X4_LOCAL;
|
||||
} else {
|
||||
ctrl->tx_mode = (uint8_t)vp9_rac_uint(&c, 2);
|
||||
if (ctrl->tx_mode == V4L2_VP9_TX_MODE_ALLOW_32X32_LOCAL)
|
||||
ctrl->tx_mode = (uint8_t)(ctrl->tx_mode + vp9_rac_bit(&c));
|
||||
if (ctrl->tx_mode == V4L2_VP9_TX_MODE_SELECT_LOCAL) {
|
||||
for (i = 0; i < 2; i++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->tx8[i][0] = (uint8_t)read_prob_delta(&c);
|
||||
for (i = 0; i < 2; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->tx16[i][j] = (uint8_t)read_prob_delta(&c);
|
||||
for (i = 0; i < 2; i++)
|
||||
for (j = 0; j < 3; j++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->tx32[i][j] = (uint8_t)read_prob_delta(&c);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (vp9_rac_bit(&c)) {
|
||||
for (j = 0; j < 2; j++)
|
||||
for (k = 0; k < 2; k++)
|
||||
for (l = 0; l < 6; l++)
|
||||
for (m = 0; m < 6; m++) {
|
||||
if (m >= 3 && l == 0)
|
||||
break;
|
||||
for (n = 0; n < 3; n++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->coef[i][j][k][l][m][n] =
|
||||
(uint8_t)read_prob_delta(&c);
|
||||
}
|
||||
}
|
||||
if (ctrl->tx_mode == i)
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->skip[i] = (uint8_t)read_prob_delta(&c);
|
||||
|
||||
if (!keyframe_or_intraonly) {
|
||||
for (i = 0; i < 7; i++)
|
||||
for (j = 0; j < 3; j++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->inter_mode[i][j] = (uint8_t)read_prob_delta(&c);
|
||||
|
||||
if (interp_filter_switchable)
|
||||
for (i = 0; i < 4; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->interp_filter[i][j] =
|
||||
(uint8_t)read_prob_delta(&c);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->is_inter[i] = (uint8_t)read_prob_delta(&c);
|
||||
|
||||
if (allowcompinter) {
|
||||
comppredmode = vp9_rac_bit(&c);
|
||||
if (comppredmode)
|
||||
comppredmode += vp9_rac_bit(&c);
|
||||
if (comppredmode == 2) /* PRED_SWITCHABLE */
|
||||
for (i = 0; i < 5; i++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->comp_mode[i] = (uint8_t)read_prob_delta(&c);
|
||||
} else {
|
||||
comppredmode = 0; /* PRED_SINGLEREF */
|
||||
}
|
||||
|
||||
if (comppredmode != 1) { /* != PRED_COMPREF */
|
||||
for (i = 0; i < 5; i++) {
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->single_ref[i][0] = (uint8_t)read_prob_delta(&c);
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->single_ref[i][1] = (uint8_t)read_prob_delta(&c);
|
||||
}
|
||||
}
|
||||
if (comppredmode != 0) { /* != PRED_SINGLEREF */
|
||||
for (i = 0; i < 5; i++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->comp_ref[i] = (uint8_t)read_prob_delta(&c);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
for (j = 0; j < 9; j++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->y_mode[i][j] = (uint8_t)read_prob_delta(&c);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
for (j = 0; j < 4; j++)
|
||||
for (k = 0; k < 3; k++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->partition[(i * 4) + j][k] =
|
||||
(uint8_t)read_prob_delta(&c);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->mv.joint[i] = (uint8_t)((vp9_rac_uint(&c, 7) << 1) | 1);
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->mv.sign[i] = (uint8_t)((vp9_rac_uint(&c, 7) << 1) | 1);
|
||||
for (j = 0; j < 10; j++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->mv.classes[i][j] = (uint8_t)((vp9_rac_uint(&c, 7) << 1) | 1);
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->mv.class0_bit[i] = (uint8_t)((vp9_rac_uint(&c, 7) << 1) | 1);
|
||||
for (j = 0; j < 10; j++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->mv.bits[i][j] = (uint8_t)((vp9_rac_uint(&c, 7) << 1) | 1);
|
||||
}
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (j = 0; j < 2; j++)
|
||||
for (k = 0; k < 3; k++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->mv.class0_fr[i][j][k] =
|
||||
(uint8_t)((vp9_rac_uint(&c, 7) << 1) | 1);
|
||||
for (j = 0; j < 3; j++)
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->mv.fr[i][j] = (uint8_t)((vp9_rac_uint(&c, 7) << 1) | 1);
|
||||
}
|
||||
if (highprecision_mvs) {
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->mv.class0_hp[i] = (uint8_t)((vp9_rac_uint(&c, 7) << 1) | 1);
|
||||
if (vp9_rac_get_branchy(&c, 252))
|
||||
ctrl->mv.hp[i] = (uint8_t)((vp9_rac_uint(&c, 7) << 1) | 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*out_reference_mode = (uint8_t)comppredmode;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clause 1+2+4+5+7+10+11+12: orchestrate VP9 control submission.
|
||||
* 2 batched controls per frame: VP9_FRAME + VP9_COMPRESSED_HDR.
|
||||
*/
|
||||
int vp9_set_controls(struct request_data *driver_data,
|
||||
struct object_context *context,
|
||||
struct object_surface *surface_object)
|
||||
{
|
||||
VADecPictureParameterBufferVP9 *picture =
|
||||
&surface_object->params.vp9.picture;
|
||||
VASliceParameterBufferVP9 *slice =
|
||||
&surface_object->params.vp9.slice;
|
||||
|
||||
struct v4l2_ctrl_vp9_frame frame;
|
||||
struct v4l2_ctrl_vp9_compressed_hdr compressed_hdr;
|
||||
struct v4l2_ext_control ctrls[2];
|
||||
int rc, i;
|
||||
bool keyframe = !picture->pic_fields.bits.frame_type;
|
||||
bool intra_only = picture->pic_fields.bits.intra_only;
|
||||
bool error_resilient = picture->pic_fields.bits.error_resilient_mode;
|
||||
bool allowcompinter;
|
||||
bool keyframe_or_intraonly_parsed = false;
|
||||
bool lf_delta_updated = false;
|
||||
uint8_t parsed_reference_mode = 0;
|
||||
|
||||
memset(&frame, 0, sizeof frame);
|
||||
memset(&compressed_hdr, 0, sizeof compressed_hdr);
|
||||
|
||||
/* Clause 4: frame geometry + per-frame scalars */
|
||||
frame.frame_width_minus_1 = (uint16_t)(picture->frame_width - 1);
|
||||
frame.frame_height_minus_1 = (uint16_t)(picture->frame_height - 1);
|
||||
frame.render_width_minus_1 = frame.frame_width_minus_1;
|
||||
frame.render_height_minus_1 = frame.frame_height_minus_1;
|
||||
|
||||
frame.profile = picture->profile;
|
||||
frame.bit_depth = picture->bit_depth;
|
||||
frame.tile_cols_log2 = picture->log2_tile_columns;
|
||||
frame.tile_rows_log2 = picture->log2_tile_rows;
|
||||
frame.frame_context_idx = picture->pic_fields.bits.frame_context_idx;
|
||||
|
||||
frame.lf.level = picture->filter_level;
|
||||
frame.lf.sharpness = picture->sharpness_level;
|
||||
|
||||
frame.uncompressed_header_size = picture->frame_header_length_in_bytes;
|
||||
frame.compressed_header_size = picture->first_partition_size;
|
||||
|
||||
/* Clause 5: DPB timestamp resolution */
|
||||
{
|
||||
VASurfaceID last_id = picture->reference_frames[picture->pic_fields.bits.last_ref_frame];
|
||||
VASurfaceID gold_id = picture->reference_frames[picture->pic_fields.bits.golden_ref_frame];
|
||||
VASurfaceID alt_id = picture->reference_frames[picture->pic_fields.bits.alt_ref_frame];
|
||||
struct object_surface *last_ref =
|
||||
(last_id != VA_INVALID_SURFACE) ? SURFACE(driver_data, last_id) : NULL;
|
||||
struct object_surface *gold_ref =
|
||||
(gold_id != VA_INVALID_SURFACE) ? SURFACE(driver_data, gold_id) : NULL;
|
||||
struct object_surface *alt_ref =
|
||||
(alt_id != VA_INVALID_SURFACE) ? SURFACE(driver_data, alt_id) : NULL;
|
||||
|
||||
if (last_ref) frame.last_frame_ts = v4l2_timeval_to_ns(&last_ref->timestamp);
|
||||
if (gold_ref) frame.golden_frame_ts = v4l2_timeval_to_ns(&gold_ref->timestamp);
|
||||
if (alt_ref) frame.alt_frame_ts = v4l2_timeval_to_ns(&alt_ref->timestamp);
|
||||
}
|
||||
|
||||
if (picture->pic_fields.bits.last_ref_frame_sign_bias)
|
||||
frame.ref_frame_sign_bias |= V4L2_VP9_SIGN_BIAS_LAST;
|
||||
if (picture->pic_fields.bits.golden_ref_frame_sign_bias)
|
||||
frame.ref_frame_sign_bias |= V4L2_VP9_SIGN_BIAS_GOLDEN;
|
||||
if (picture->pic_fields.bits.alt_ref_frame_sign_bias)
|
||||
frame.ref_frame_sign_bias |= V4L2_VP9_SIGN_BIAS_ALT;
|
||||
|
||||
allowcompinter = !(
|
||||
picture->pic_fields.bits.last_ref_frame_sign_bias ==
|
||||
picture->pic_fields.bits.golden_ref_frame_sign_bias &&
|
||||
picture->pic_fields.bits.golden_ref_frame_sign_bias ==
|
||||
picture->pic_fields.bits.alt_ref_frame_sign_bias);
|
||||
|
||||
/* Clause 6: persistent LF delta state — Phase 5 C2 */
|
||||
if (!context->vp9_lf.initialized || keyframe || intra_only || error_resilient) {
|
||||
context->vp9_lf.ref_deltas[0] = 1;
|
||||
context->vp9_lf.ref_deltas[1] = 0;
|
||||
context->vp9_lf.ref_deltas[2] = -1;
|
||||
context->vp9_lf.ref_deltas[3] = -1;
|
||||
context->vp9_lf.mode_deltas[0] = 0;
|
||||
context->vp9_lf.mode_deltas[1] = 0;
|
||||
context->vp9_lf.initialized = true;
|
||||
}
|
||||
|
||||
vp9_parse_uncompressed_header_lf_quant(
|
||||
surface_object->source_data,
|
||||
surface_object->source_size,
|
||||
&frame,
|
||||
context->vp9_lf.ref_deltas,
|
||||
context->vp9_lf.mode_deltas,
|
||||
&keyframe_or_intraonly_parsed,
|
||||
&lf_delta_updated);
|
||||
(void)lf_delta_updated;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
frame.lf.ref_deltas[i] = context->vp9_lf.ref_deltas[i];
|
||||
for (i = 0; i < 2; i++)
|
||||
frame.lf.mode_deltas[i] = context->vp9_lf.mode_deltas[i];
|
||||
|
||||
/* Clause 7: segmentation mapping */
|
||||
for (i = 0; i < 7; i++)
|
||||
frame.seg.tree_probs[i] = picture->mb_segment_tree_probs[i];
|
||||
for (i = 0; i < 3; i++)
|
||||
frame.seg.pred_probs[i] = picture->segment_pred_probs[i];
|
||||
|
||||
if (picture->pic_fields.bits.segmentation_enabled)
|
||||
frame.seg.flags |= V4L2_VP9_SEGMENTATION_FLAG_ENABLED;
|
||||
if (picture->pic_fields.bits.segmentation_update_map)
|
||||
frame.seg.flags |= V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP;
|
||||
if (picture->pic_fields.bits.segmentation_temporal_update)
|
||||
frame.seg.flags |= V4L2_VP9_SEGMENTATION_FLAG_TEMPORAL_UPDATE;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (slice->seg_param[i].segment_flags.fields.segment_reference_enabled) {
|
||||
frame.seg.feature_enabled[i] |= 1 << V4L2_VP9_SEG_LVL_REF_FRAME;
|
||||
frame.seg.feature_data[i][V4L2_VP9_SEG_LVL_REF_FRAME] =
|
||||
(int16_t)slice->seg_param[i].segment_flags.fields.segment_reference;
|
||||
}
|
||||
if (slice->seg_param[i].segment_flags.fields.segment_reference_skipped)
|
||||
frame.seg.feature_enabled[i] |= 1 << V4L2_VP9_SEG_LVL_SKIP;
|
||||
}
|
||||
|
||||
/* Clause 10: frame flags + reference_mode + interpolation_filter */
|
||||
if (keyframe)
|
||||
frame.flags |= V4L2_VP9_FRAME_FLAG_KEY_FRAME;
|
||||
if (picture->pic_fields.bits.show_frame)
|
||||
frame.flags |= V4L2_VP9_FRAME_FLAG_SHOW_FRAME;
|
||||
if (error_resilient)
|
||||
frame.flags |= V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT;
|
||||
if (intra_only)
|
||||
frame.flags |= V4L2_VP9_FRAME_FLAG_INTRA_ONLY;
|
||||
if (picture->pic_fields.bits.allow_high_precision_mv)
|
||||
frame.flags |= V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV;
|
||||
if (picture->pic_fields.bits.refresh_frame_context)
|
||||
frame.flags |= V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX;
|
||||
if (picture->pic_fields.bits.frame_parallel_decoding_mode)
|
||||
frame.flags |= V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE;
|
||||
if (picture->pic_fields.bits.subsampling_x)
|
||||
frame.flags |= V4L2_VP9_FRAME_FLAG_X_SUBSAMPLING;
|
||||
if (picture->pic_fields.bits.subsampling_y)
|
||||
frame.flags |= V4L2_VP9_FRAME_FLAG_Y_SUBSAMPLING;
|
||||
|
||||
/* Phase 5 C1: NO XOR. VAAPI's mcomp_filter_type is already post-XOR. */
|
||||
frame.interpolation_filter = picture->pic_fields.bits.mcomp_filter_type;
|
||||
|
||||
/* reset_frame_context: FFmpeg's (resetctx > 0 ? resetctx - 1 : 0) */
|
||||
frame.reset_frame_context =
|
||||
picture->pic_fields.bits.reset_frame_context > 0
|
||||
? (uint8_t)(picture->pic_fields.bits.reset_frame_context - 1)
|
||||
: 0;
|
||||
|
||||
/* Clause 9: compressed-header parser fills both compressed_hdr and
|
||||
* out_reference_mode. allowcompinter derived from sign biases above. */
|
||||
{
|
||||
int interp_switchable = (frame.interpolation_filter == V4L2_VP9_INTERP_FILTER_SWITCHABLE);
|
||||
|
||||
vp9_fill_compressed_hdr(
|
||||
&compressed_hdr,
|
||||
surface_object->source_data + frame.uncompressed_header_size,
|
||||
frame.compressed_header_size,
|
||||
picture->pic_fields.bits.lossless_flag,
|
||||
keyframe || intra_only,
|
||||
allowcompinter,
|
||||
picture->pic_fields.bits.allow_high_precision_mv,
|
||||
interp_switchable,
|
||||
&parsed_reference_mode);
|
||||
}
|
||||
frame.reference_mode = parsed_reference_mode;
|
||||
|
||||
/* Clause 11: 2-control batched submission */
|
||||
memset(ctrls, 0, sizeof ctrls);
|
||||
ctrls[0].id = V4L2_CID_STATELESS_VP9_FRAME;
|
||||
ctrls[0].ptr = &frame;
|
||||
ctrls[0].size = sizeof frame;
|
||||
ctrls[1].id = V4L2_CID_STATELESS_VP9_COMPRESSED_HDR;
|
||||
ctrls[1].ptr = &compressed_hdr;
|
||||
ctrls[1].size = sizeof compressed_hdr;
|
||||
|
||||
rc = v4l2_set_controls(driver_data->video_fd,
|
||||
surface_object->request_fd,
|
||||
ctrls, 2);
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Markus Fritsche <fritsche.markus@gmail.com>
|
||||
*
|
||||
* fresnel-fourier iter4 Phase 6 commit B: VP9 codec dispatcher header.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _VP9_H_
|
||||
#define _VP9_H_
|
||||
|
||||
#include "context.h"
|
||||
#include "request.h"
|
||||
#include "surface.h"
|
||||
|
||||
int vp9_set_controls(struct request_data *driver_data,
|
||||
struct object_context *context,
|
||||
struct object_surface *surface);
|
||||
|
||||
#endif /* _VP9_H_ */
|
||||
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
* cap_pool_probe_pattern.c — synthetic regression test for the
|
||||
* iter5 sonnet C4 / iter6 candidate A "cap_pool resolution-change race."
|
||||
*
|
||||
* Exercises the surface-allocation pattern that originally tripped
|
||||
* REQBUFS-EBUSY on the iter5-end driver: vaCreateSurfaces at one
|
||||
* resolution, then vaDestroySurfaces, then vaCreateSurfaces at a
|
||||
* different resolution. iter6's REINIT discipline + cap_pool's
|
||||
* REQBUFS(0)-on-CAPTURE-and-OUTPUT during S_FMT-on-resolution-change
|
||||
* (CreateSurfaces2 in surface.c) closes this race; this test anchors
|
||||
* that fact with a deterministic repro.
|
||||
*
|
||||
* Build:
|
||||
* gcc -O2 -Wall -Wextra -o cap_pool_probe_pattern \
|
||||
* cap_pool_probe_pattern.c \
|
||||
* $(pkg-config --cflags --libs libva libva-drm)
|
||||
*
|
||||
* Run:
|
||||
* LIBVA_DRIVER_NAME=v4l2_request \
|
||||
* LIBVA_V4L2_REQUEST_VIDEO_PATH=/dev/video1 \
|
||||
* LIBVA_V4L2_REQUEST_MEDIA_PATH=/dev/media0 \
|
||||
* ./cap_pool_probe_pattern
|
||||
*
|
||||
* Pass criterion (on iter6 driver and later):
|
||||
* - Exit code 0
|
||||
* - No "REQBUFS" / "EBUSY" / "Unable to request buffers" /
|
||||
* "Unable to set format" lines on the v4l2-request driver's stderr
|
||||
* - vainfo or visual inspection confirms the test program reached
|
||||
* the "PASS" line on stdout
|
||||
*
|
||||
* Fail behavior pre-iter5: vaCreateSurfaces at the second resolution
|
||||
* would emit REQBUFS-EBUSY because OUTPUT/CAPTURE buffers from the
|
||||
* first allocation hadn't been torn down before S_FMT was attempted
|
||||
* on the new resolution. iter5's CreateSurfaces2 added the dual
|
||||
* REQBUFS(0) drain; iter6's REINIT keeps the OUTPUT pool's request_fd
|
||||
* lifecycle clean across the destroy-recreate cycle.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <va/va.h>
|
||||
#include <va/va_drm.h>
|
||||
|
||||
#define DRM_RENDER_NODE "/dev/dri/renderD128"
|
||||
|
||||
static const char *va_status_str(VAStatus s)
|
||||
{
|
||||
return vaErrorStr(s);
|
||||
}
|
||||
|
||||
#define VA_OK_OR_FAIL(call, msg) do { \
|
||||
VAStatus _vs = (call); \
|
||||
if (_vs != VA_STATUS_SUCCESS) { \
|
||||
fprintf(stderr, "FAIL: %s: %s (0x%x)\n", \
|
||||
(msg), va_status_str(_vs), _vs); \
|
||||
return 10; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int drm_fd;
|
||||
VADisplay dpy;
|
||||
int va_major = 0, va_minor = 0;
|
||||
VAConfigID config = VA_INVALID_ID;
|
||||
VAContextID context = VA_INVALID_ID;
|
||||
VASurfaceID small_surfaces[4];
|
||||
VASurfaceID big_surfaces[4];
|
||||
const unsigned int small_w = 128, small_h = 128;
|
||||
const unsigned int big_w = 1920, big_h = 1080;
|
||||
|
||||
/* Open render node + libva display. */
|
||||
drm_fd = open(DRM_RENDER_NODE, O_RDWR);
|
||||
if (drm_fd < 0) {
|
||||
fprintf(stderr, "FAIL: open(%s): %s\n",
|
||||
DRM_RENDER_NODE, strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
dpy = vaGetDisplayDRM(drm_fd);
|
||||
if (dpy == NULL) {
|
||||
fprintf(stderr, "FAIL: vaGetDisplayDRM returned NULL\n");
|
||||
close(drm_fd);
|
||||
return 2;
|
||||
}
|
||||
|
||||
VA_OK_OR_FAIL(vaInitialize(dpy, &va_major, &va_minor),
|
||||
"vaInitialize");
|
||||
printf("libva %d.%d initialized via %s\n", va_major, va_minor,
|
||||
DRM_RENDER_NODE);
|
||||
|
||||
/*
|
||||
* vaCreateConfig with H.264 Main + VLD entrypoint forces our
|
||||
* driver's RequestCreateConfig to set up the H.264 decode path,
|
||||
* which is the path that reaches CreateSurfaces2 (and the
|
||||
* resolution-change handling there).
|
||||
*/
|
||||
VA_OK_OR_FAIL(vaCreateConfig(dpy, VAProfileH264Main, VAEntrypointVLD,
|
||||
NULL, 0, &config),
|
||||
"vaCreateConfig(H264Main, VLD)");
|
||||
|
||||
/* Phase 1: allocate small probe-pattern surfaces.
|
||||
*
|
||||
* iter5 sonnet C4 specified the race as vaCreateSurfaces(small)
|
||||
* then vaCreateSurfaces(big), allocation-only — matching mpv's
|
||||
* libplacebo probe pattern that surfaced the original failure.
|
||||
* No context creation needed for the C4 race; the cap_pool's
|
||||
* resolution-change handling lives in CreateSurfaces2 itself
|
||||
* (REQBUFS(0)+S_FMT pair on the OUTPUT queue, cap_pool_destroy
|
||||
* + cap_pool_init on the CAPTURE queue).
|
||||
*
|
||||
* (vaCreateContext + recreate at a new resolution surfaced an
|
||||
* additional STREAMON-on-recreate failure during iter7 Phase 7
|
||||
* verification. That's iter8 candidate; out of scope for the C4
|
||||
* regression test.)
|
||||
*/
|
||||
printf("Phase 1: vaCreateSurfaces %ux%u, count=4\n", small_w, small_h);
|
||||
VA_OK_OR_FAIL(vaCreateSurfaces(dpy, VA_RT_FORMAT_YUV420,
|
||||
small_w, small_h, small_surfaces, 4,
|
||||
NULL, 0),
|
||||
"vaCreateSurfaces (small)");
|
||||
|
||||
/* Phase 2: dispose small surfaces. Our driver's CreateSurfaces2
|
||||
* keeps the cap_pool initialized at the small resolution; the
|
||||
* pool will be torn down + rebuilt by Phase 3's resolution-change
|
||||
* branch in CreateSurfaces2.
|
||||
*/
|
||||
printf("Phase 2: vaDestroySurfaces (small)\n");
|
||||
VA_OK_OR_FAIL(vaDestroySurfaces(dpy, small_surfaces, 4),
|
||||
"vaDestroySurfaces (small)");
|
||||
|
||||
/* Phase 3: allocate at the new (much larger) resolution. This is
|
||||
* the C4 race-hitting path: pre-iter5 hit REQBUFS-EBUSY because
|
||||
* CAPTURE/OUTPUT buffers from the small allocation hadn't been
|
||||
* torn down before S_FMT on the new size. iter5's CreateSurfaces2
|
||||
* added the dual REQBUFS(0) drain; iter7 also adds OUTPUT pool
|
||||
* teardown for the case where a context-bound resolution change
|
||||
* leaves the request_pool stale (defensive — not exercised in
|
||||
* this no-context test path).
|
||||
*/
|
||||
printf("Phase 3: vaCreateSurfaces %ux%u, count=4 (resolution change)\n",
|
||||
big_w, big_h);
|
||||
VA_OK_OR_FAIL(vaCreateSurfaces(dpy, VA_RT_FORMAT_YUV420,
|
||||
big_w, big_h, big_surfaces, 4,
|
||||
NULL, 0),
|
||||
"vaCreateSurfaces (big)");
|
||||
|
||||
/* Phase 4: clean up. */
|
||||
printf("Phase 4: cleanup\n");
|
||||
VA_OK_OR_FAIL(vaDestroySurfaces(dpy, big_surfaces, 4),
|
||||
"vaDestroySurfaces (big)");
|
||||
VA_OK_OR_FAIL(vaDestroyConfig(dpy, config),
|
||||
"vaDestroyConfig");
|
||||
VA_OK_OR_FAIL(vaTerminate(dpy),
|
||||
"vaTerminate");
|
||||
close(drm_fd);
|
||||
(void)context; /* unused in the C4-faithful no-context test path */
|
||||
|
||||
printf("PASS: cap_pool probe-pattern resolution-change handled cleanly.\n");
|
||||
printf("Inspect driver stderr for absence of REQBUFS/EBUSY/Unable lines.\n");
|
||||
return 0;
|
||||
}
|
||||
Executable
+53
@@ -0,0 +1,53 @@
|
||||
#!/bin/bash
|
||||
# run_cap_pool_probe.sh — orchestrate the cap_pool probe-pattern regression test.
|
||||
#
|
||||
# Runs the cap_pool_probe_pattern test program with the v4l2_request driver
|
||||
# and grep-checks driver stderr for race indicators. Exits 0 on PASS, 1 on FAIL.
|
||||
#
|
||||
# Usage: ./run_cap_pool_probe.sh [path_to_test_binary]
|
||||
# If no argument, looks for ./cap_pool_probe_pattern in the same directory.
|
||||
|
||||
set -eu
|
||||
|
||||
BIN="${1:-$(dirname "$0")/cap_pool_probe_pattern}"
|
||||
|
||||
if [[ ! -x "$BIN" ]]; then
|
||||
echo "FAIL: test binary not found or not executable: $BIN" >&2
|
||||
echo "Build it first:" >&2
|
||||
echo " gcc -O2 -Wall -Wextra -o $BIN $(dirname "$0")/cap_pool_probe_pattern.c \\" >&2
|
||||
echo " \$(pkg-config --cflags --libs libva libva-drm)" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
LOG=$(mktemp -t cap_pool_probe.XXXXXX.log)
|
||||
trap 'rm -f "$LOG"' EXIT
|
||||
|
||||
env LIBVA_DRIVER_NAME=v4l2_request \
|
||||
LIBVA_V4L2_REQUEST_VIDEO_PATH=/dev/video1 \
|
||||
LIBVA_V4L2_REQUEST_MEDIA_PATH=/dev/media0 \
|
||||
"$BIN" >"$LOG" 2>&1
|
||||
rc=$?
|
||||
|
||||
echo "--- test program output ---"
|
||||
cat "$LOG"
|
||||
echo "--- end output ---"
|
||||
|
||||
if [[ "$rc" -ne 0 ]]; then
|
||||
echo "FAIL: test binary exited with rc=$rc" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Race indicators on driver-prefixed lines only (avoids matching the
|
||||
# test program's own informational output). Driver log lines start with
|
||||
# "v4l2-request:".
|
||||
race_lines=$(grep -E '^v4l2-request:' "$LOG" \
|
||||
| grep -iE 'REQBUFS|EBUSY|Unable to request buffers|Unable to set format' \
|
||||
|| true)
|
||||
if [[ -n "$race_lines" ]]; then
|
||||
echo "FAIL: driver stderr contains race indicators:" >&2
|
||||
echo "$race_lines" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PASS: cap_pool probe-pattern test clean (no race indicators)."
|
||||
exit 0
|
||||
Executable
+139
@@ -0,0 +1,139 @@
|
||||
#!/bin/bash
|
||||
# run_msync_pixel_verify.sh — verify decoded pixel correctness post-msync-removal.
|
||||
#
|
||||
# iter5 sweep commit d3a299b removed msync(MS_SYNC|MS_INVALIDATE) from the
|
||||
# CAPTURE buffer DQBUF path alongside the iter1 patch-0010 hex-dump diagnostic.
|
||||
# iter5 Phase 5 sonnet caveat C3 flagged: no formal pixel-correctness check
|
||||
# was done. This script is that check.
|
||||
#
|
||||
# Approach:
|
||||
# 1. SW reference: ffmpeg libavcodec H.264 decode of bbb_1080p30_h264.mp4,
|
||||
# first 100 frames, NV12 raw output -> sw_ref.yuv.
|
||||
# 2. HW subject: same input through our v4l2_request driver via
|
||||
# ffmpeg -hwaccel vaapi -hwaccel_output_format vaapi
|
||||
# -i ... -vf hwdownload,format=nv12 -f rawvideo -pix_fmt nv12
|
||||
# Captures the post-DQBUF buffer through libva readback, exercising
|
||||
# the same code path we removed msync from.
|
||||
# 3. Compare: byte-for-byte cmp + per-frame sha256.
|
||||
#
|
||||
# Pass: byte-for-byte identical (or per-frame sha matches) -> msync
|
||||
# verifiably unnecessary on this hardware/kernel; iter5 sonnet C3 closes.
|
||||
# Fail: divergence; restore msync in surface.c, re-run, document outcome.
|
||||
#
|
||||
# Usage: ./run_msync_pixel_verify.sh [fixture_path]
|
||||
# If no argument, defaults to /home/mfritsche/fourier-test/bbb_1080p30_h264.mp4
|
||||
|
||||
set -eu
|
||||
|
||||
FIXTURE="${1:-/home/mfritsche/fourier-test/bbb_1080p30_h264.mp4}"
|
||||
N_FRAMES=100
|
||||
WORKDIR=$(mktemp -d -t msync_verify.XXXXXX)
|
||||
trap 'rm -rf "$WORKDIR"' EXIT
|
||||
|
||||
if [[ ! -f "$FIXTURE" ]]; then
|
||||
echo "FAIL: fixture not found: $FIXTURE" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Probe fixture dimensions for crop alignment of the HW path.
|
||||
# Hantro pads height to MB boundaries (16-line align); FFmpeg SW decode
|
||||
# returns crop-aligned (visible) frame size. Without explicit cropping
|
||||
# on the HW side, hwdownload + format=nv12 emits MB-padded frames, which
|
||||
# would diverge in size from SW even if pixels are correct.
|
||||
read FIXTURE_W FIXTURE_H < <(ffprobe -v error -select_streams v:0 \
|
||||
-show_entries stream=width,height -of csv=p=0 "$FIXTURE" \
|
||||
| tr ',' ' ')
|
||||
if [[ -z "${FIXTURE_W:-}" || -z "${FIXTURE_H:-}" ]]; then
|
||||
echo "FAIL: ffprobe could not read width/height from $FIXTURE" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
echo "Fixture: $FIXTURE ($FIXTURE_W x $FIXTURE_H)"
|
||||
echo "Frames: $N_FRAMES"
|
||||
echo "Workdir: $WORKDIR"
|
||||
echo
|
||||
|
||||
# 1. SW reference
|
||||
echo "[1/3] FFmpeg SW decode -> sw_ref.yuv"
|
||||
ffmpeg -hide_banner -loglevel error -y \
|
||||
-i "$FIXTURE" \
|
||||
-frames:v "$N_FRAMES" \
|
||||
-f rawvideo -pix_fmt nv12 \
|
||||
"$WORKDIR/sw_ref.yuv"
|
||||
SW_BYTES=$(stat -c %s "$WORKDIR/sw_ref.yuv")
|
||||
SW_SHA=$(sha256sum "$WORKDIR/sw_ref.yuv" | cut -d' ' -f1)
|
||||
echo " sw_ref.yuv: $SW_BYTES bytes, sha256=$SW_SHA"
|
||||
|
||||
# 2. HW subject via libva v4l2_request
|
||||
# Explicit crop=$FIXTURE_W:$FIXTURE_H after hwdownload normalizes any
|
||||
# MB-padding the HW driver applies (hantro pads height to multiples of
|
||||
# 16). Without this crop, an iter6+ correct decode could falsely
|
||||
# diverge in total byte count from the SW reference.
|
||||
echo "[2/3] FFmpeg HW decode via v4l2_request driver -> hw_capture.yuv"
|
||||
env LIBVA_DRIVER_NAME=v4l2_request \
|
||||
LIBVA_V4L2_REQUEST_VIDEO_PATH=/dev/video1 \
|
||||
LIBVA_V4L2_REQUEST_MEDIA_PATH=/dev/media0 \
|
||||
ffmpeg -hide_banner -loglevel error -y \
|
||||
-hwaccel vaapi -hwaccel_output_format vaapi \
|
||||
-i "$FIXTURE" \
|
||||
-vf "hwdownload,format=nv12,crop=$FIXTURE_W:$FIXTURE_H:0:0" \
|
||||
-frames:v "$N_FRAMES" \
|
||||
-f rawvideo -pix_fmt nv12 \
|
||||
"$WORKDIR/hw_capture.yuv"
|
||||
HW_BYTES=$(stat -c %s "$WORKDIR/hw_capture.yuv")
|
||||
HW_SHA=$(sha256sum "$WORKDIR/hw_capture.yuv" | cut -d' ' -f1)
|
||||
echo " hw_capture.yuv: $HW_BYTES bytes, sha256=$HW_SHA"
|
||||
echo
|
||||
|
||||
# 3. Compare
|
||||
echo "[3/3] Compare"
|
||||
if [[ "$SW_BYTES" -ne "$HW_BYTES" ]]; then
|
||||
# Diagnose stride/padding artifacts before declaring pixel
|
||||
# corruption. With explicit crop in step 2 this should not
|
||||
# happen, but if a future kernel change shifts the alignment
|
||||
# we want a clear diagnostic, not a false pixel-corruption
|
||||
# accusation.
|
||||
EXPECTED_SW=$(( FIXTURE_W * FIXTURE_H * 3 / 2 * N_FRAMES ))
|
||||
for PAD in 16 32; do
|
||||
PADDED_H=$(( (FIXTURE_H + PAD - 1) / PAD * PAD ))
|
||||
EXPECTED_PADDED=$(( FIXTURE_W * PADDED_H * 3 / 2 * N_FRAMES ))
|
||||
if [[ "$HW_BYTES" -eq "$EXPECTED_PADDED" ]]; then
|
||||
echo "DIAGNOSTIC: HW size $HW_BYTES matches MB-padded layout" >&2
|
||||
echo " ($FIXTURE_W x $PADDED_H, $PAD-line align). The crop=$FIXTURE_W:$FIXTURE_H" >&2
|
||||
echo " filter step did not normalize. Check FFmpeg version / hwdownload behavior." >&2
|
||||
echo " This is a stride artifact, not pixel corruption." >&2
|
||||
exit 3
|
||||
fi
|
||||
done
|
||||
echo "FAIL: size mismatch (SW=$SW_BYTES vs HW=$HW_BYTES, expected $EXPECTED_SW)" >&2
|
||||
echo " Different frame count or NV12 packing — investigate." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$SW_SHA" == "$HW_SHA" ]]; then
|
||||
echo "PASS: byte-for-byte identical."
|
||||
echo " msync removal verified safe on this hardware/kernel."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Per-frame divergence analysis on full-buffer mismatch.
|
||||
echo "Buffer-level sha differs. Computing per-frame divergence..."
|
||||
FRAME_SIZE=$(( SW_BYTES / N_FRAMES ))
|
||||
DIVERGENT=0
|
||||
for ((i = 0; i < N_FRAMES; i++)); do
|
||||
OFFSET=$(( i * FRAME_SIZE ))
|
||||
SW_FRAME_SHA=$(dd if="$WORKDIR/sw_ref.yuv" bs="$FRAME_SIZE" \
|
||||
count=1 skip="$i" 2>/dev/null | sha256sum | cut -d' ' -f1)
|
||||
HW_FRAME_SHA=$(dd if="$WORKDIR/hw_capture.yuv" bs="$FRAME_SIZE" \
|
||||
count=1 skip="$i" 2>/dev/null | sha256sum | cut -d' ' -f1)
|
||||
if [[ "$SW_FRAME_SHA" != "$HW_FRAME_SHA" ]]; then
|
||||
DIVERGENT=$(( DIVERGENT + 1 ))
|
||||
[[ "$DIVERGENT" -le 5 ]] && \
|
||||
echo " frame $i: SW=$SW_FRAME_SHA HW=$HW_FRAME_SHA"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "FAIL: $DIVERGENT / $N_FRAMES frames diverge from SW reference."
|
||||
echo " Action: restore msync(MS_SYNC|MS_INVALIDATE) in surface.c"
|
||||
echo " RequestSyncSurface DQBUF path; re-run this script."
|
||||
exit 1
|
||||
Executable
+299
@@ -0,0 +1,299 @@
|
||||
#!/bin/bash
|
||||
# run_perf_binding_cell.sh — iter8 perf binding cell.
|
||||
#
|
||||
# Anchors campaign-wide claims with measured numbers. Runs four consumer
|
||||
# configurations for $DURATION seconds each on $FIXTURE and emits a
|
||||
# markdown table comparing:
|
||||
# 1. mpv --hwdec=vaapi (DMA-BUF zero-copy through libva)
|
||||
# 2. mpv --hwdec=vaapi-copy (HW decode + VAImage readback)
|
||||
# 3. firefox (iter5-amend, sandbox enabled, file:// URL)
|
||||
# 4. mpv --hwdec=no (SW decode baseline / control)
|
||||
#
|
||||
# For each consumer: CPU% (median + p90), GPU freq (median MHz), drops in
|
||||
# measurement window, p50 frame interval (ms), VmRSS delta (MiB).
|
||||
#
|
||||
# Usage:
|
||||
# ./run_perf_binding_cell.sh [fixture_path]
|
||||
#
|
||||
# If no argument, defaults to /home/mfritsche/fourier-test/bbb_1080p30_h264.mp4
|
||||
# Override DURATION via env: DURATION=60 ./run_perf_binding_cell.sh
|
||||
#
|
||||
# Reproducibility: results depend on (a) the iter7-end driver being installed
|
||||
# at /usr/lib/dri/v4l2_request_drv_video.so, (b) ohm idle (no other compute
|
||||
# load), (c) fixture present at the expected path. Run on a stable thermal
|
||||
# state (after a few minutes of cool-down).
|
||||
|
||||
set -eu
|
||||
|
||||
FIXTURE="${1:-/home/mfritsche/fourier-test/bbb_1080p30_h264.mp4}"
|
||||
DURATION="${DURATION:-30}"
|
||||
WORKDIR="${WORKDIR:-$(mktemp -d -t perf_binding.XXXXXX)}"
|
||||
GPU_DEVFREQ_PATH="${GPU_DEVFREQ_PATH:-/sys/class/devfreq/fde60000.gpu/cur_freq}"
|
||||
|
||||
# DISPLAY/Wayland env for the operator's session, needed for Firefox under sudo.
|
||||
export XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/run/user/1001}"
|
||||
export WAYLAND_DISPLAY="${WAYLAND_DISPLAY:-wayland-0}"
|
||||
export DISPLAY="${DISPLAY:-:0}"
|
||||
export XAUTHORITY="${XAUTHORITY:-/run/user/1001/xauth_pxiMur}"
|
||||
|
||||
# libva env vars for the v4l2_request driver path.
|
||||
export LIBVA_DRIVER_NAME=v4l2_request
|
||||
export LIBVA_V4L2_REQUEST_VIDEO_PATH=/dev/video1
|
||||
export LIBVA_V4L2_REQUEST_MEDIA_PATH=/dev/media0
|
||||
|
||||
if [[ ! -f "$FIXTURE" ]]; then
|
||||
echo "FAIL: fixture not found: $FIXTURE" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
mkdir -p "$WORKDIR"
|
||||
echo "Fixture: $FIXTURE ($(stat -c %s "$FIXTURE") bytes)"
|
||||
echo "Duration: ${DURATION}s per configuration"
|
||||
echo "Workdir: $WORKDIR"
|
||||
echo "GPU freq: $GPU_DEVFREQ_PATH"
|
||||
echo "Driver sha: $(sha256sum /usr/lib/dri/v4l2_request_drv_video.so | cut -d' ' -f1)"
|
||||
echo
|
||||
|
||||
# percentile_from_stream sorted_file pct -> echo Nth percentile value
|
||||
# Argument: a file with one numeric value per line (no header), and a percentile
|
||||
# integer (50, 90, etc.). Numbers can be float; uses awk.
|
||||
percentile_from_stream() {
|
||||
local file="$1" pct="$2"
|
||||
awk -v pct="$pct" '
|
||||
{ a[NR] = $1 }
|
||||
END {
|
||||
if (NR == 0) { print "0"; exit }
|
||||
# sort
|
||||
for (i = 1; i <= NR; i++) for (j = i+1; j <= NR; j++) if (a[i] > a[j]) { t = a[i]; a[i] = a[j]; a[j] = t }
|
||||
idx = int((pct/100.0) * NR + 0.5)
|
||||
if (idx < 1) idx = 1
|
||||
if (idx > NR) idx = NR
|
||||
print a[idx]
|
||||
}' "$file"
|
||||
}
|
||||
|
||||
# Background-poll GPU freq while the consumer runs. Writes Hz values to $1.
|
||||
poll_gpu_freq() {
|
||||
local out="$1"
|
||||
: >"$out"
|
||||
while [[ -e "/proc/$BG_PARENT_PID" ]]; do
|
||||
if [[ -r "$GPU_DEVFREQ_PATH" ]]; then
|
||||
cat "$GPU_DEVFREQ_PATH" 2>/dev/null >>"$out" || true
|
||||
fi
|
||||
sleep 0.1
|
||||
done
|
||||
}
|
||||
|
||||
# Run a single consumer configuration. Args:
|
||||
# $1 label (used for filename, no spaces)
|
||||
# $2 launcher cmd (will be exec'd as mfritsche; should be a single line)
|
||||
# $3 'mpv' or 'firefox' — affects how we find the PID to track
|
||||
run_consumer() {
|
||||
local label="$1"
|
||||
local launcher="$2"
|
||||
local kind="$3"
|
||||
local logdir="$WORKDIR/$label"
|
||||
mkdir -p "$logdir"
|
||||
|
||||
echo "=== Running: $label ==="
|
||||
|
||||
# Kill any running firefox/mpv first to clean state.
|
||||
pkill -f firefox 2>/dev/null || true
|
||||
pkill -x mpv 2>/dev/null || true
|
||||
sleep 1
|
||||
|
||||
# VmRSS at start (we'll read again at end) — captured per-PID after launch.
|
||||
# Launch consumer in background, capture stdout+stderr to a log.
|
||||
(
|
||||
eval "$launcher" >"$logdir/consumer.log" 2>&1
|
||||
) &
|
||||
local launcher_pid=$!
|
||||
|
||||
# Wait briefly for the process tree to spawn the actual decode worker.
|
||||
sleep 4
|
||||
|
||||
local target_pid
|
||||
case "$kind" in
|
||||
mpv)
|
||||
target_pid=$(pgrep -x mpv | head -1)
|
||||
;;
|
||||
firefox)
|
||||
# Firefox's RDD process holds /dev/video1; that's the one with
|
||||
# the libva decoder context. Wait an extra few seconds for it
|
||||
# to spawn and bind the device.
|
||||
sleep 6
|
||||
target_pid=$(pgrep -af 'contentproc.*\brdd\b' | awk '{print $1}' | head -1)
|
||||
if [[ -z "${target_pid:-}" ]]; then
|
||||
# Fallback: find whichever firefox process holds /dev/video1.
|
||||
target_pid=$(sudo lsof -t /dev/video1 2>/dev/null | head -1 || true)
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo " bad kind: $kind" >&2
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
if [[ -z "${target_pid:-}" ]]; then
|
||||
echo " WARN: could not locate $kind process; skipping pidstat" >&2
|
||||
# Let the consumer run for the duration anyway so the log gets data.
|
||||
sleep "$DURATION"
|
||||
kill -TERM "$launcher_pid" 2>/dev/null || true
|
||||
pkill -f firefox 2>/dev/null || true
|
||||
pkill -x mpv 2>/dev/null || true
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo " Tracking PID $target_pid"
|
||||
|
||||
# VmRSS at start.
|
||||
local rss_start
|
||||
rss_start=$(awk '/^VmRSS:/{print $2}' "/proc/$target_pid/status" 2>/dev/null || echo 0)
|
||||
echo " VmRSS start: ${rss_start} kB"
|
||||
|
||||
# Poll GPU freq in background (keyed off launcher_pid).
|
||||
BG_PARENT_PID=$launcher_pid
|
||||
poll_gpu_freq "$logdir/gpu_freq.log" &
|
||||
local poll_pid=$!
|
||||
|
||||
# Run pidstat for $DURATION seconds.
|
||||
pidstat -u -p "$target_pid" 1 "$DURATION" >"$logdir/pidstat.log" 2>&1 || true
|
||||
|
||||
# VmRSS at end (before killing).
|
||||
local rss_end
|
||||
rss_end=$(awk '/^VmRSS:/{print $2}' "/proc/$target_pid/status" 2>/dev/null || echo "$rss_start")
|
||||
|
||||
# Stop everything.
|
||||
kill "$poll_pid" 2>/dev/null || true
|
||||
kill -TERM "$launcher_pid" 2>/dev/null || true
|
||||
pkill -f firefox 2>/dev/null || true
|
||||
pkill -x mpv 2>/dev/null || true
|
||||
sleep 1
|
||||
|
||||
# Parse pidstat by header: locate the %CPU column index from the
|
||||
# column-name row (where any field equals "%CPU"), then apply it
|
||||
# to data rows. Robust across sysstat 12.x point releases.
|
||||
# pidstat default output has no '#' header marker — the header is
|
||||
# the first row containing "%CPU" as a field.
|
||||
awk '
|
||||
# Header row: any line where some field equals "%CPU".
|
||||
!col {
|
||||
for (i = 1; i <= NF; i++) if ($i == "%CPU") { col = i; next }
|
||||
}
|
||||
# Data row: lines whose value at $col is numeric. Skip the
|
||||
# trailing "Average" summary by requiring $col to parse cleanly.
|
||||
col && NF >= col && $col ~ /^[0-9]+(\.[0-9]+)?$/ {
|
||||
print $col
|
||||
}
|
||||
' "$logdir/pidstat.log" >"$logdir/cpu_pct.log" || true
|
||||
|
||||
local cpu_p50 cpu_p90
|
||||
if [[ -s "$logdir/cpu_pct.log" ]]; then
|
||||
cpu_p50=$(percentile_from_stream "$logdir/cpu_pct.log" 50)
|
||||
cpu_p90=$(percentile_from_stream "$logdir/cpu_pct.log" 90)
|
||||
else
|
||||
cpu_p50="ERR"
|
||||
cpu_p90="ERR"
|
||||
fi
|
||||
|
||||
# GPU freq median. Values are Hz; convert to MHz via temp file (avoids
|
||||
# unreliable /dev/stdin in a nested subshell-over-pipe).
|
||||
local gpu_med_mhz
|
||||
if [[ -s "$logdir/gpu_freq.log" ]]; then
|
||||
awk '{print $1/1000000}' "$logdir/gpu_freq.log" >"$logdir/gpu_freq_mhz.log"
|
||||
gpu_med_mhz=$(percentile_from_stream "$logdir/gpu_freq_mhz.log" 50)
|
||||
else
|
||||
gpu_med_mhz="—"
|
||||
fi
|
||||
|
||||
# RSS delta MiB.
|
||||
local rss_delta_mib
|
||||
rss_delta_mib=$(awk -v s="$rss_start" -v e="$rss_end" 'BEGIN{printf "%.1f", (e-s)/1024.0}')
|
||||
|
||||
# Drops + p50 frame interval — only available for mpv.
|
||||
local drops="—"
|
||||
local p50_frame_ms="—"
|
||||
if [[ "$kind" == "mpv" ]]; then
|
||||
drops=$(grep -oE 'frame-drop-count[^\t ]*\s*=\s*[0-9]+' "$logdir/consumer.log" \
|
||||
| awk -F= '{print $2}' | tr -d ' ' | tail -1)
|
||||
drops="${drops:-0}"
|
||||
# p50 frame interval from mpv vsync-jitter or frame timing — leave
|
||||
# as "—" unless mpv emitted detailed timing.
|
||||
fi
|
||||
|
||||
# Emit row.
|
||||
cat >>"$WORKDIR/results.tsv" <<-ROW
|
||||
$label $cpu_p50 $cpu_p90 $drops $p50_frame_ms $gpu_med_mhz $rss_delta_mib
|
||||
ROW
|
||||
|
||||
echo " CPU% p50=$cpu_p50 p90=$cpu_p90 drops=$drops gpu_med=$gpu_med_mhz MHz rss_delta=$rss_delta_mib MiB"
|
||||
echo
|
||||
}
|
||||
|
||||
# Header for results.
|
||||
echo "consumer cpu_p50 cpu_p90 drops_${DURATION}s p50_frame_ms gpu_med_mhz rss_delta_mib" >"$WORKDIR/results.tsv"
|
||||
|
||||
# === Configurations ===
|
||||
|
||||
# 1. mpv DMA-BUF zero-copy
|
||||
run_consumer "mpv-vaapi-dmabuf" \
|
||||
"sudo -u mfritsche env LIBVA_DRIVER_NAME=v4l2_request \
|
||||
LIBVA_V4L2_REQUEST_VIDEO_PATH=/dev/video1 \
|
||||
LIBVA_V4L2_REQUEST_MEDIA_PATH=/dev/media0 \
|
||||
mpv --no-config --hwdec=vaapi --vo=null --no-audio \
|
||||
--term-status-msg='\${frame-drop-count}' \
|
||||
--length=$DURATION '$FIXTURE'" \
|
||||
mpv
|
||||
|
||||
# 2. mpv vaapi-copy
|
||||
run_consumer "mpv-vaapi-copy" \
|
||||
"sudo -u mfritsche env LIBVA_DRIVER_NAME=v4l2_request \
|
||||
LIBVA_V4L2_REQUEST_VIDEO_PATH=/dev/video1 \
|
||||
LIBVA_V4L2_REQUEST_MEDIA_PATH=/dev/media0 \
|
||||
mpv --no-config --hwdec=vaapi-copy --vo=null --no-audio \
|
||||
--term-status-msg='\${frame-drop-count}' \
|
||||
--length=$DURATION '$FIXTURE'" \
|
||||
mpv
|
||||
|
||||
# 3. Firefox-fourier (iter5-amend, sandbox enabled)
|
||||
run_consumer "firefox-fourier-hw" \
|
||||
"sudo -u mfritsche env XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR \
|
||||
WAYLAND_DISPLAY=$WAYLAND_DISPLAY DISPLAY=$DISPLAY \
|
||||
XAUTHORITY=$XAUTHORITY \
|
||||
LIBVA_DRIVER_NAME=v4l2_request \
|
||||
LIBVA_V4L2_REQUEST_VIDEO_PATH=/dev/video1 \
|
||||
LIBVA_V4L2_REQUEST_MEDIA_PATH=/dev/media0 \
|
||||
firefox --new-window 'file://$FIXTURE'" \
|
||||
firefox
|
||||
|
||||
# 4. SW baseline
|
||||
run_consumer "mpv-sw-baseline" \
|
||||
"sudo -u mfritsche mpv --no-config --hwdec=no --vo=null --no-audio \
|
||||
--term-status-msg='\${frame-drop-count}' \
|
||||
--length=$DURATION '$FIXTURE'" \
|
||||
mpv
|
||||
|
||||
# === Generate markdown table ===
|
||||
{
|
||||
echo "# Performance binding cell — iter8 (libva-multiplanar campaign)"
|
||||
echo
|
||||
echo "Run date: $(date -Iseconds)"
|
||||
echo "Host: $(uname -n) ($(uname -m))"
|
||||
echo "Kernel: $(uname -r)"
|
||||
echo "Driver sha256: \`$(sha256sum /usr/lib/dri/v4l2_request_drv_video.so | cut -d' ' -f1)\`"
|
||||
echo "Fixture: \`$FIXTURE\` ($(stat -c %s "$FIXTURE") bytes)"
|
||||
echo "Duration per consumer: ${DURATION}s"
|
||||
echo
|
||||
echo "| Consumer | CPU% p50 | CPU% p90 | Drops in window | p50 frame ms | GPU MHz median | VmRSS Δ MiB |"
|
||||
echo "|---|---|---|---|---|---|---|"
|
||||
tail -n +2 "$WORKDIR/results.tsv" | awk -F'\t' '{
|
||||
printf "| %s | %s | %s | %s | %s | %s | %s |\n",
|
||||
$1, $2, $3, $4, $5, $6, $7
|
||||
}'
|
||||
} >"$WORKDIR/perf_binding_cell.md"
|
||||
|
||||
echo "=== Done ==="
|
||||
echo "Results: $WORKDIR/perf_binding_cell.md"
|
||||
echo "Per-consumer logs: $WORKDIR/{mpv-vaapi-dmabuf,mpv-vaapi-copy,firefox-fourier-hw,mpv-sw-baseline}/"
|
||||
echo
|
||||
cat "$WORKDIR/perf_binding_cell.md"
|
||||
Reference in New Issue
Block a user