From 841f616e74a323abd576e99798a168eac00afc3e Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Sat, 2 May 2026 12:00:00 +0000 Subject: [PATCH] h264: gate SCALING_MATRIX submission on VAIQMatrixBuffer presence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VAAPI signals "explicit scaling lists are present in the bitstream" implicitly: the consumer (ffmpeg-vaapi, mpv, etc.) sends a VAIQMatrixBufferH264 alongside RenderPicture iff sps_scaling_matrix_present_flag || pps_scaling_matrix_present_flag. When the bitstream uses default (flat) scaling, no IQMatrixBuffer arrives and the in-tree h264.matrix struct stays zero-initialised. fourier's existing codec_store_buffer for MPEG2 and HEVC tracks this via a per-surface iqmatrix_set boolean (surface.h::mpeg2.iqmatrix_set, h265.iqmatrix_set) — the H.264 path was missing the equivalent flag, so set_controls always submitted the scaling matrix, including the zero-initialised case. Symptom on hantro-vpu RK3568: when TRANSFORM_8X8_MODE is enabled in PPS, the kernel multiplies all 8x8 DCT coefficients by the zeroed scaling_list_8x8, producing a zeroed CAPTURE buffer despite a successful decode round-trip (no V4L2_BUF_FLAG_ERROR, bytesused=3655712 reported). Earlier draft of this patch unconditionally omitted SCALING_MATRIX in FRAME_BASED. That's corpus-correct (bbb has no explicit scaling lists) but the wrong predicate: the kernel-side gating is by "matrix-supplied vs. not," not by decode mode. Streams that signal explicit scaling lists must submit SCALING_MATRIX in either mode. Contract verification (audit_0008_decode_params_2026-05-01.md + hantro_h264.c::assemble_scaling_list): the kernel uses the supplied matrix when SCALING_MATRIX is in the control batch and falls back to spec-defined defaults when absent. Mode-independent. This patch: - surface.h: adds bool matrix_set to params.h264, mirroring mpeg2.iqmatrix_set / h265.iqmatrix_set. - picture.c codec_store_buffer (H.264 VAIQMatrixBufferType case): sets matrix_set = true when the buffer arrives. - picture.c RequestBeginPicture: resets matrix_set = false at the start of each Begin/Render/End cycle. - h264.c h264_set_controls: builds the controls[] array incrementally; SPS/PPS/DECODE_PARAMS always; SCALING_MATRIX iff matrix_set; SLICE_PARAMS only in SLICE_BASED; PRED_WEIGHTS only when both SLICE_BASED and V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED. The pre-existing FRAME_BASED-omits-SLICE_PARAMS rule is preserved — kernel doc ext-ctrls-codec-stateless.rst:752: "When this mode is selected, the V4L2_CID_STATELESS_H264_SLICE_PARAMS control shall not be set." Cross-reference: kernel UAPI section ext-ctrls-codec-stateless.rst V4L2_CID_STATELESS_H264_SCALING_MATRIX (matrix supplied iff explicit scaling lists in bitstream) and hantro_h264.c::assemble_scaling_list (consumes supplied matrix or falls back to defaults). Signed-off-by: Markus Fritsche --- src/h264.c | 114 +++++++++++++++++++++++++------------------------- src/picture.c | 2 + src/surface.h | 1 + 3 files changed, 61 insertions(+), 56 deletions(-) diff --git a/src/h264.c b/src/h264.c index b5c712d..b22beb4 100644 --- a/src/h264.c +++ b/src/h264.c @@ -553,66 +553,68 @@ int h264_set_controls(struct request_data *driver_data, sps.profile_idc = h264_profile_to_idc(profile); /* - * Per-request control batch, ordered so the controls REQUIRED in - * V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED come first - * (indices 0..3) and the SLICE_BASED-only controls come last - * (indices 4..5). + * Build the per-request control list incrementally: + * - SPS, PPS, DECODE_PARAMS: always required (in either decode + * mode). + * - SCALING_MATRIX: gated on surface->params.h264.matrix_set, + * i.e. the consumer sent a VAIQMatrixBufferH264 this frame. + * This matches the H.264 spec: explicit scaling lists are + * present iff sps_scaling_matrix_present_flag || + * pps_scaling_matrix_present_flag, in which case VAAPI + * consumers send the matrix; otherwise the kernel uses + * spec-defined defaults. Independent of FRAME_BASED / + * SLICE_BASED. + * - SLICE_PARAMS: SLICE_BASED only. Kernel doc + * ext-ctrls-codec-stateless.rst (FRAME_BASED entry): + * "When this mode is selected, the + * V4L2_CID_STATELESS_H264_SLICE_PARAMS control shall not be + * set." Submitting it under FRAME_BASED triggers cluster- + * validation EINVAL at error_idx=count. + * - PRED_WEIGHTS: SLICE_BASED + V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED. * - * Cross-reference: GStreamer gst-plugins-bad - * sys/v4l2codecs/gstv4l2codech264dec.c (commit 9e3e775, - * lines 1263-1304) gates SLICE_PARAMS and PRED_WEIGHTS on - * is_slice_based(self); under FRAME_BASED only SPS/PPS/ - * SCALING_MATRIX/DECODE_PARAMS are submitted. The kernel - * parses the bitstream itself in FRAME_BASED mode; submitting - * per-slice controls in that mode triggers cluster-validation - * EINVAL at error_idx=count. - */ - struct v4l2_ext_control controls[6] = { - { - .id = V4L2_CID_STATELESS_H264_SPS, - .p_h264_sps = &sps, - .size = sizeof(sps), - }, { - .id = V4L2_CID_STATELESS_H264_PPS, - .p_h264_pps = &pps, - .size = sizeof(pps), - }, { - .id = V4L2_CID_STATELESS_H264_SCALING_MATRIX, - .p_h264_scaling_matrix = &matrix, - .size = sizeof(matrix), - }, { - .id = V4L2_CID_STATELESS_H264_DECODE_PARAMS, - .p_h264_decode_params = &decode, - .size = sizeof(decode), - }, { - .id = V4L2_CID_STATELESS_H264_SLICE_PARAMS, - .p_h264_slice_params = &slice, - .size = sizeof(slice), - }, { - .id = V4L2_CID_STATELESS_H264_PRED_WEIGHTS, - .ptr = &weights, - .size = sizeof(weights), - } - }; - - /* - * Decode-mode dispatch. Patch 0002 unconditionally sets the - * device to FRAME_BASED, so we hardcode that here. When the - * planned probe-then-set commit lands, slice_based becomes - * context->decode_mode == V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED - * with context->decode_mode populated at CreateContext via - * VIDIOC_QUERYCTRL/G_EXT_CTRLS. - * - * FRAME_BASED: 4 controls (SPS, PPS, SCALING_MATRIX, DECODE_PARAMS). - * SLICE_BASED: +SLICE_PARAMS (always), +PRED_WEIGHTS (when - * V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED). + * Patch 0002 unconditionally sets the device to FRAME_BASED, + * so slice_based is hardcoded false here. When the planned + * probe-then-set commit lands, this becomes + * context->decode_mode == V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED. */ + struct v4l2_ext_control controls[6] = { 0 }; + unsigned int num_controls = 0; const bool slice_based = false; /* TODO: probe via context->decode_mode */ - unsigned int num_controls = 4; + + controls[num_controls].id = V4L2_CID_STATELESS_H264_SPS; + controls[num_controls].p_h264_sps = &sps; + controls[num_controls].size = sizeof(sps); + num_controls++; + + controls[num_controls].id = V4L2_CID_STATELESS_H264_PPS; + controls[num_controls].p_h264_pps = &pps; + controls[num_controls].size = sizeof(pps); + num_controls++; + + controls[num_controls].id = V4L2_CID_STATELESS_H264_DECODE_PARAMS; + controls[num_controls].p_h264_decode_params = &decode; + controls[num_controls].size = sizeof(decode); + num_controls++; + + if (surface->params.h264.matrix_set) { + controls[num_controls].id = V4L2_CID_STATELESS_H264_SCALING_MATRIX; + controls[num_controls].p_h264_scaling_matrix = &matrix; + controls[num_controls].size = sizeof(matrix); + num_controls++; + } + if (slice_based) { - num_controls = 5; - if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(&pps, &slice)) - num_controls = 6; + controls[num_controls].id = V4L2_CID_STATELESS_H264_SLICE_PARAMS; + controls[num_controls].p_h264_slice_params = &slice; + controls[num_controls].size = sizeof(slice); + num_controls++; + + if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(&pps, &slice)) { + controls[num_controls].id = V4L2_CID_STATELESS_H264_PRED_WEIGHTS; + controls[num_controls].ptr = &weights; + controls[num_controls].size = sizeof(weights); + num_controls++; + } } rc = v4l2_set_controls(driver_data->video_fd, surface->request_fd, diff --git a/src/picture.c b/src/picture.c index bab34f3..1001028 100644 --- a/src/picture.c +++ b/src/picture.c @@ -153,6 +153,7 @@ static VAStatus codec_store_buffer(struct request_data *driver_data, memcpy(&surface_object->params.h264.matrix, buffer_object->data, sizeof(surface_object->params.h264.matrix)); + surface_object->params.h264.matrix_set = true; break; case VAProfileHEVCMain: @@ -255,6 +256,7 @@ VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id, surface_object->source_size = slot->size; surface_object->slices_size = 0; surface_object->slices_count = 0; + surface_object->params.h264.matrix_set = false; surface_object->status = VASurfaceRendering; context_object->render_surface_id = surface_id; diff --git a/src/surface.h b/src/surface.h index a9be915..e673fc7 100644 --- a/src/surface.h +++ b/src/surface.h @@ -73,6 +73,7 @@ struct object_surface { } mpeg2; struct { VAIQMatrixBufferH264 matrix; + bool matrix_set; VAPictureParameterBufferH264 picture; VASliceParameterBufferH264 slice; } h264;