b47938e0bc
build and publish packages / distcc-avahi-aarch64 (push) Successful in 1m3s
build and publish packages / lmcp-any (push) Successful in 9s
build and publish packages / lmcp-debian (push) Successful in 4s
build and publish packages / claude-his-any (push) Failing after 4s
build and publish packages / ffmpeg-v4l2-request-aarch64 (push) Has been skipped
build and publish packages / claude-his-debian (push) Has been skipped
Mirrors phase6/step1/ from the ohm_gl_fix campaign. Contract-correct
hantro multi-planar / chromium-149-era stateless H.264 port of
bootlin's libva-v4l2-request, patches 0001..0018 + fourier-local.
Honest characterisation in README:
- Builds cleanly on chromium-builder LXC (boltzmann)
- vainfo enumerates H.264 profiles cleanly with LIBVA_DRIVER_NAME=v4l2_request
- NOT on Brave's decode path on ohm_gl_fix stack — Brave uses
Chromium's own V4L2VideoDecoder in media/gpu/v4l2/.
- Most likely useful for a future Firefox-via-libavcodec-vaapi
campaign, modulo a separate Mesa-panfrost WSI pitch issue.
- DEBUG patches (0010, 0011, 0014) intentionally kept in series
for development; remove for cleaner production runs.
Audit trail in the source repo at ohm_gl_fix:
phase6/step1/audit_0008_decode_params_2026-05-01.md
phase6/step1/api_contract_findings_2026-05-01.md
phase3_remeasure_2026-05-02/B3_decoder_discovery.md (why this
isn't on Brave's path)
215 lines
8.4 KiB
Diff
215 lines
8.4 KiB
Diff
From: Markus Fritsche <fritsche.markus@gmail.com>
|
|
Date: 2026-05-02
|
|
Subject: [PATCH] h264: gate SCALING_MATRIX submission on VAIQMatrixBuffer presence
|
|
|
|
VAAPI signals "explicit scaling lists are present in the bitstream"
|
|
implicitly: the consumer (ffmpeg-vaapi, mpv, etc.) sends a
|
|
VAIQMatrixBufferH264 alongside RenderPicture iff
|
|
sps_scaling_matrix_present_flag || pps_scaling_matrix_present_flag.
|
|
When the bitstream uses default (flat) scaling, no IQMatrixBuffer
|
|
arrives and the in-tree h264.matrix struct stays zero-initialised.
|
|
|
|
fourier's existing codec_store_buffer for MPEG2 and HEVC tracks this
|
|
via a per-surface iqmatrix_set boolean (surface.h::mpeg2.iqmatrix_set,
|
|
h265.iqmatrix_set) — the H.264 path was missing the equivalent flag,
|
|
so set_controls always submitted the scaling matrix, including the
|
|
zero-initialised case.
|
|
|
|
Symptom on hantro-vpu RK3568: when TRANSFORM_8X8_MODE is enabled in
|
|
PPS, the kernel multiplies all 8x8 DCT coefficients by the zeroed
|
|
scaling_list_8x8, producing a zeroed CAPTURE buffer despite a
|
|
successful decode round-trip (no V4L2_BUF_FLAG_ERROR,
|
|
bytesused=3655712 reported).
|
|
|
|
Earlier draft of this patch unconditionally omitted SCALING_MATRIX in
|
|
FRAME_BASED. That's corpus-correct (bbb has no explicit scaling
|
|
lists) but the wrong predicate: the kernel-side gating is by
|
|
"matrix-supplied vs. not," not by decode mode. Streams that signal
|
|
explicit scaling lists must submit SCALING_MATRIX in either mode.
|
|
|
|
Contract verification (audit_0008_decode_params_2026-05-01.md +
|
|
hantro_h264.c::assemble_scaling_list): the kernel uses the supplied
|
|
matrix when SCALING_MATRIX is in the control batch and falls back
|
|
to spec-defined defaults when absent. Mode-independent.
|
|
|
|
This patch:
|
|
- surface.h: adds bool matrix_set to params.h264, mirroring
|
|
mpeg2.iqmatrix_set / h265.iqmatrix_set.
|
|
- picture.c codec_store_buffer (H.264 VAIQMatrixBufferType case):
|
|
sets matrix_set = true when the buffer arrives.
|
|
- picture.c RequestBeginPicture: resets matrix_set = false at the
|
|
start of each Begin/Render/End cycle.
|
|
- h264.c h264_set_controls: builds the controls[] array
|
|
incrementally; SPS/PPS/DECODE_PARAMS always; SCALING_MATRIX iff
|
|
matrix_set; SLICE_PARAMS only in SLICE_BASED; PRED_WEIGHTS only
|
|
when both SLICE_BASED and V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED.
|
|
|
|
The pre-existing FRAME_BASED-omits-SLICE_PARAMS rule is preserved —
|
|
kernel doc ext-ctrls-codec-stateless.rst:752: "When this mode is
|
|
selected, the V4L2_CID_STATELESS_H264_SLICE_PARAMS control shall
|
|
not be set."
|
|
|
|
Cross-reference: kernel UAPI section
|
|
ext-ctrls-codec-stateless.rst V4L2_CID_STATELESS_H264_SCALING_MATRIX
|
|
(matrix supplied iff explicit scaling lists in bitstream) and
|
|
hantro_h264.c::assemble_scaling_list (consumes supplied matrix or
|
|
falls back to defaults).
|
|
|
|
Signed-off-by: Markus Fritsche <fritsche.markus@gmail.com>
|
|
---
|
|
--- a/src/surface.h
|
|
+++ b/src/surface.h
|
|
@@ -73,6 +73,7 @@
|
|
} mpeg2;
|
|
struct {
|
|
VAIQMatrixBufferH264 matrix;
|
|
+ bool matrix_set;
|
|
VAPictureParameterBufferH264 picture;
|
|
VASliceParameterBufferH264 slice;
|
|
} h264;
|
|
--- a/src/picture.c
|
|
+++ b/src/picture.c
|
|
@@ -153,6 +153,7 @@
|
|
memcpy(&surface_object->params.h264.matrix,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h264.matrix));
|
|
+ surface_object->params.h264.matrix_set = true;
|
|
break;
|
|
|
|
case VAProfileHEVCMain:
|
|
@@ -255,6 +256,7 @@
|
|
surface_object->source_size = slot->size;
|
|
surface_object->slices_size = 0;
|
|
surface_object->slices_count = 0;
|
|
+ surface_object->params.h264.matrix_set = false;
|
|
|
|
surface_object->status = VASurfaceRendering;
|
|
context_object->render_surface_id = surface_id;
|
|
--- a/src/h264.c
|
|
+++ b/src/h264.c
|
|
@@ -553,66 +553,68 @@
|
|
sps.profile_idc = h264_profile_to_idc(profile);
|
|
|
|
/*
|
|
- * Per-request control batch, ordered so the controls REQUIRED in
|
|
- * V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED come first
|
|
- * (indices 0..3) and the SLICE_BASED-only controls come last
|
|
- * (indices 4..5).
|
|
+ * Build the per-request control list incrementally:
|
|
+ * - SPS, PPS, DECODE_PARAMS: always required (in either decode
|
|
+ * mode).
|
|
+ * - SCALING_MATRIX: gated on surface->params.h264.matrix_set,
|
|
+ * i.e. the consumer sent a VAIQMatrixBufferH264 this frame.
|
|
+ * This matches the H.264 spec: explicit scaling lists are
|
|
+ * present iff sps_scaling_matrix_present_flag ||
|
|
+ * pps_scaling_matrix_present_flag, in which case VAAPI
|
|
+ * consumers send the matrix; otherwise the kernel uses
|
|
+ * spec-defined defaults. Independent of FRAME_BASED /
|
|
+ * SLICE_BASED.
|
|
+ * - SLICE_PARAMS: SLICE_BASED only. Kernel doc
|
|
+ * ext-ctrls-codec-stateless.rst (FRAME_BASED entry):
|
|
+ * "When this mode is selected, the
|
|
+ * V4L2_CID_STATELESS_H264_SLICE_PARAMS control shall not be
|
|
+ * set." Submitting it under FRAME_BASED triggers cluster-
|
|
+ * validation EINVAL at error_idx=count.
|
|
+ * - PRED_WEIGHTS: SLICE_BASED + V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED.
|
|
*
|
|
- * Cross-reference: GStreamer gst-plugins-bad
|
|
- * sys/v4l2codecs/gstv4l2codech264dec.c (commit 9e3e775,
|
|
- * lines 1263-1304) gates SLICE_PARAMS and PRED_WEIGHTS on
|
|
- * is_slice_based(self); under FRAME_BASED only SPS/PPS/
|
|
- * SCALING_MATRIX/DECODE_PARAMS are submitted. The kernel
|
|
- * parses the bitstream itself in FRAME_BASED mode; submitting
|
|
- * per-slice controls in that mode triggers cluster-validation
|
|
- * EINVAL at error_idx=count.
|
|
- */
|
|
- struct v4l2_ext_control controls[6] = {
|
|
- {
|
|
- .id = V4L2_CID_STATELESS_H264_SPS,
|
|
- .p_h264_sps = &sps,
|
|
- .size = sizeof(sps),
|
|
- }, {
|
|
- .id = V4L2_CID_STATELESS_H264_PPS,
|
|
- .p_h264_pps = &pps,
|
|
- .size = sizeof(pps),
|
|
- }, {
|
|
- .id = V4L2_CID_STATELESS_H264_SCALING_MATRIX,
|
|
- .p_h264_scaling_matrix = &matrix,
|
|
- .size = sizeof(matrix),
|
|
- }, {
|
|
- .id = V4L2_CID_STATELESS_H264_DECODE_PARAMS,
|
|
- .p_h264_decode_params = &decode,
|
|
- .size = sizeof(decode),
|
|
- }, {
|
|
- .id = V4L2_CID_STATELESS_H264_SLICE_PARAMS,
|
|
- .p_h264_slice_params = &slice,
|
|
- .size = sizeof(slice),
|
|
- }, {
|
|
- .id = V4L2_CID_STATELESS_H264_PRED_WEIGHTS,
|
|
- .ptr = &weights,
|
|
- .size = sizeof(weights),
|
|
- }
|
|
- };
|
|
-
|
|
- /*
|
|
- * Decode-mode dispatch. Patch 0002 unconditionally sets the
|
|
- * device to FRAME_BASED, so we hardcode that here. When the
|
|
- * planned probe-then-set commit lands, slice_based becomes
|
|
- * context->decode_mode == V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED
|
|
- * with context->decode_mode populated at CreateContext via
|
|
- * VIDIOC_QUERYCTRL/G_EXT_CTRLS.
|
|
- *
|
|
- * FRAME_BASED: 4 controls (SPS, PPS, SCALING_MATRIX, DECODE_PARAMS).
|
|
- * SLICE_BASED: +SLICE_PARAMS (always), +PRED_WEIGHTS (when
|
|
- * V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED).
|
|
+ * Patch 0002 unconditionally sets the device to FRAME_BASED,
|
|
+ * so slice_based is hardcoded false here. When the planned
|
|
+ * probe-then-set commit lands, this becomes
|
|
+ * context->decode_mode == V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED.
|
|
*/
|
|
+ struct v4l2_ext_control controls[6] = { 0 };
|
|
+ unsigned int num_controls = 0;
|
|
const bool slice_based = false; /* TODO: probe via context->decode_mode */
|
|
- unsigned int num_controls = 4;
|
|
+
|
|
+ controls[num_controls].id = V4L2_CID_STATELESS_H264_SPS;
|
|
+ controls[num_controls].p_h264_sps = &sps;
|
|
+ controls[num_controls].size = sizeof(sps);
|
|
+ num_controls++;
|
|
+
|
|
+ controls[num_controls].id = V4L2_CID_STATELESS_H264_PPS;
|
|
+ controls[num_controls].p_h264_pps = &pps;
|
|
+ controls[num_controls].size = sizeof(pps);
|
|
+ num_controls++;
|
|
+
|
|
+ controls[num_controls].id = V4L2_CID_STATELESS_H264_DECODE_PARAMS;
|
|
+ controls[num_controls].p_h264_decode_params = &decode;
|
|
+ controls[num_controls].size = sizeof(decode);
|
|
+ num_controls++;
|
|
+
|
|
+ if (surface->params.h264.matrix_set) {
|
|
+ controls[num_controls].id = V4L2_CID_STATELESS_H264_SCALING_MATRIX;
|
|
+ controls[num_controls].p_h264_scaling_matrix = &matrix;
|
|
+ controls[num_controls].size = sizeof(matrix);
|
|
+ num_controls++;
|
|
+ }
|
|
+
|
|
if (slice_based) {
|
|
- num_controls = 5;
|
|
- if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(&pps, &slice))
|
|
- num_controls = 6;
|
|
+ controls[num_controls].id = V4L2_CID_STATELESS_H264_SLICE_PARAMS;
|
|
+ controls[num_controls].p_h264_slice_params = &slice;
|
|
+ controls[num_controls].size = sizeof(slice);
|
|
+ num_controls++;
|
|
+
|
|
+ if (V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED(&pps, &slice)) {
|
|
+ controls[num_controls].id = V4L2_CID_STATELESS_H264_PRED_WEIGHTS;
|
|
+ controls[num_controls].ptr = &weights;
|
|
+ controls[num_controls].size = sizeof(weights);
|
|
+ num_controls++;
|
|
+ }
|
|
}
|
|
|
|
rc = v4l2_set_controls(driver_data->video_fd, surface->request_fd,
|