diff --git a/src/h264.c b/src/h264.c index 33872d2..7cc72a0 100644 --- a/src/h264.c +++ b/src/h264.c @@ -469,6 +469,32 @@ static void h264_va_matrix_to_v4l2(struct request_data *driver_data, sizeof(v4l2_matrix->scaling_list_8x8[3])); } +/* + * H.264 spec default scaling matrices: Flat_4x4_16 and Flat_8x8_16 + * (every entry = 16). When sps_scaling_matrix_present_flag and + * pps_scaling_matrix_present_flag are both false, the bitstream + * carries no explicit scaling lists and the decoder uses these + * flat defaults — matching ITU-T H.264 (08/2024) §7.4.2.1.1.1 + * (sequence scaling) and §7.4.2.2 (picture scaling). + * + * Why we always provide the matrix: hantro G1's set_params reads + * pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT to drive + * the G1_REG_DEC_CTRL2_TYPE1_QUANT_E hardware bit. FFmpeg's + * v4l2_request_h264.c always submits the SCALING_MATRIX control + * with the spec default when the bitstream omits explicit lists, + * and always sets the SCALING_MATRIX_PRESENT flag (commit + * comment: "FFmpeg always provide a scaling matrix"). We mirror + * that so the kernel sees a consistent control set across drivers. + */ +static void h264_default_flat_scaling_matrix( + struct v4l2_ctrl_h264_scaling_matrix *v4l2_matrix) +{ + memset(v4l2_matrix->scaling_list_4x4, 16, + sizeof(v4l2_matrix->scaling_list_4x4)); + memset(v4l2_matrix->scaling_list_8x8, 16, + sizeof(v4l2_matrix->scaling_list_8x8)); +} + static void h264_copy_pred_table(struct v4l2_h264_weight_factors *factors, unsigned int num_refs, int16_t luma_weight[32], @@ -713,12 +739,60 @@ int h264_set_controls(struct request_data *driver_data, h264_va_picture_to_v4l2(driver_data, context, surface, &surface->params.h264.picture, &decode, &pps, &sps); - h264_va_matrix_to_v4l2(driver_data, context, - &surface->params.h264.matrix, &matrix); + + /* + * Populate the scaling matrix unconditionally: from VAAPI's + * VAIQMatrixBufferH264 when the consumer sent one this frame + * (matrix_set), otherwise from the H.264 spec flat defaults. + * Submitted to the kernel as V4L2_CID_STATELESS_H264_SCALING_MATRIX + * for every request — required for FFmpeg/hantro contract parity + * (see h264_default_flat_scaling_matrix() docblock). + */ + if (surface->params.h264.matrix_set) + h264_va_matrix_to_v4l2(driver_data, context, + &surface->params.h264.matrix, &matrix); + else + h264_default_flat_scaling_matrix(&matrix); + h264_va_slice_to_v4l2(driver_data, context, &surface->params.h264.slice, &surface->params.h264.picture, &slice, &weights); + /* + * Mirror SCALING_MATRIX_PRESENT in PPS flags. Hantro G1 set_params + * gates its G1_REG_DEC_CTRL2_TYPE1_QUANT_E register bit on this; + * FFmpeg sets it unconditionally with the comment "FFmpeg always + * provide a scaling matrix." We submit the matrix always (above), + * so the flag must be set always to match. + */ + pps.flags |= V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT; + + /* + * Populate pps->num_ref_idx_l0/l1_default_active_minus1. Hantro G1 + * writes both into G1_REG_DEC_CTRL6_REFIDX0_ACTIVE / REFIDX1_ACTIVE + * MMIO registers (via "(field) + 1", so an uninitialized 0 here + * would advertise "1 active reference per list" to hardware, wrong + * for I/IDR frames with 0 refs and wrong for B frames with >1). + * + * VAAPI's VAPictureParameterBufferH264 does not carry the parsed + * PPS num_ref_idx_l*_default_active_minus1 fields — those are in + * the bitstream's PPS NAL which VAAPI consumers parse client-side + * but don't forward. The closest available source is VASlice's + * num_ref_idx_l*_active_minus1, which is the per-slice override + * defaulting to the PPS value (H.264 §7.4.3 num_ref_idx_active_ + * override_flag). For most streams these values match; mismatch + * only on streams with explicit per-slice overrides. + * + * For IDR frames (no references), the values are not used by + * hantro's reference list builder, so a wrong value here is + * harmless. For inter frames it matters and slice-derived is + * the best we can do without a full PPS-NAL parser. + */ + pps.num_ref_idx_l0_default_active_minus1 = + surface->params.h264.slice.num_ref_idx_l0_active_minus1; + pps.num_ref_idx_l1_default_active_minus1 = + surface->params.h264.slice.num_ref_idx_l1_active_minus1; + /* * Derive PFRAME / BFRAME flags in v4l2_ctrl_h264_decode_params.flags * from VASliceParameterBufferH264.slice_type. VAAPI's slice_type @@ -766,16 +840,17 @@ int h264_set_controls(struct request_data *driver_data, /* * Build the per-request control list incrementally: - * - SPS, PPS, DECODE_PARAMS: always required (in either decode - * mode). - * - SCALING_MATRIX: gated on surface->params.h264.matrix_set, - * i.e. the consumer sent a VAIQMatrixBufferH264 this frame. - * This matches the H.264 spec: explicit scaling lists are - * present iff sps_scaling_matrix_present_flag || - * pps_scaling_matrix_present_flag, in which case VAAPI - * consumers send the matrix; otherwise the kernel uses - * spec-defined defaults. Independent of FRAME_BASED / - * SLICE_BASED. + * - SPS, PPS, DECODE_PARAMS, SCALING_MATRIX: always required. + * Hantro G1 reads the SCALING_MATRIX_PRESENT flag from PPS to + * gate hardware register G1_REG_DEC_CTRL2_TYPE1_QUANT_E and + * reads the matrix entries directly into hardware tables when + * decoding. FFmpeg always submits the matrix (with spec-default + * flat values when no explicit lists are in the bitstream); we + * match that — see h264_default_flat_scaling_matrix() docblock. + * Earlier patch 0012 made SCALING_MATRIX submission conditional + * on VAAPI's VAIQMatrixBuffer arrival; that was corpus-correct + * (bbb has no explicit scaling lists) but inconsistent with the + * hantro contract — replaced 2026-05-04. * - SLICE_PARAMS: SLICE_BASED only. Kernel doc * ext-ctrls-codec-stateless.rst (FRAME_BASED entry): * "When this mode is selected, the @@ -808,12 +883,10 @@ int h264_set_controls(struct request_data *driver_data, controls[num_controls].size = sizeof(decode); num_controls++; - if (surface->params.h264.matrix_set) { - controls[num_controls].id = V4L2_CID_STATELESS_H264_SCALING_MATRIX; - controls[num_controls].p_h264_scaling_matrix = &matrix; - controls[num_controls].size = sizeof(matrix); - num_controls++; - } + controls[num_controls].id = V4L2_CID_STATELESS_H264_SCALING_MATRIX; + controls[num_controls].p_h264_scaling_matrix = &matrix; + controls[num_controls].size = sizeof(matrix); + num_controls++; if (slice_based) { controls[num_controls].id = V4L2_CID_STATELESS_H264_SLICE_PARAMS;