forked from marfrit/libva-v4l2-request-fourier
DEBUG: cache-fix CAPTURE dump + VIDIOC_G_EXT_CTRLS readback
Tier 3E + 3F observability hardening from the libva-multiplanar
campaign Phase 6 follow-up. Improves diagnostic reliability for
future probes; no functional decode path change.
Tier 3E (cache-fix): patch-0010's CAPTURE Y-plane dump now calls
msync(p, 32, MS_SYNC|MS_INVALIDATE) before the read so userspace
sees what the kernel actually DMA-wrote, not a stale CPU cache
line. Without this, the previous version of the dump consistently
showed the patch-0011 sentinel (0xab) even when the kernel had
overwritten it — caused half a day of mistaken "kernel never wrote
the buffer" diagnosis. Also computes a luma min/max/variance
signal so a uniform fill (variance=0) is visually obvious vs
real pixel data (variance > 0).
Tier 3F (VIDIOC_G_EXT_CTRLS readback): after v4l2_set_controls
in h264_set_controls, reads back DECODE_PARAMS + PPS via
v4l2_get_controls (added by patch 0003) on the request fd.
Logs key fields:
dec.idr_pic_id, poc_lsb, refmark_bits, poc_bits — confirms
slice-header parser outputs landed in the V4L2 control batch.
dec.top_foc / bot_foc — confirms
patch-0015 POC sentinel strip actually applied (should NOT
show 65536 unless the strip mis-fired).
dec.frame_num — cross-checks
against VAAPI's pre-parsed frame_num (also already logged by
patch 0014).
pps.flags + (SMP=...) — confirms
SCALING_MATRIX_PRESENT bit set this build.
pps.refidx_l0/l1 — confirms
Tier 1B num_ref_idx writes landed.
Discriminates "we wrote X but kernel saw Y" from "we wrote zero
all along" — the failure mode the original patch series didn't
catch when slice-header bit_size fields were left zero.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+53
@@ -992,6 +992,59 @@ int h264_set_controls(struct request_data *driver_data,
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
/*
|
||||
* DEBUG INSTRUMENTATION (3F observability — added 2026-05-04):
|
||||
* VIDIOC_G_EXT_CTRLS readback on the request fd to confirm the
|
||||
* V4L2 layer accepted our writes verbatim. Cross-validates the
|
||||
* patch-0015 POC sentinel strip (top_field_order_cnt should be
|
||||
* the stripped value, not 65536), the slice-header parser
|
||||
* outputs (idr_pic_id, dec_ref_pic_marking_bit_size,
|
||||
* pic_order_cnt_bit_size), and the SCALING_MATRIX_PRESENT flag.
|
||||
*
|
||||
* Read into separate stack-allocated targets to avoid any
|
||||
* aliasing with the SET-side controls; the ioctl will overwrite
|
||||
* the targets with what's currently stored in the request.
|
||||
*/
|
||||
{
|
||||
struct v4l2_ctrl_h264_decode_params dec_rb = { 0 };
|
||||
struct v4l2_ctrl_h264_pps pps_rb = { 0 };
|
||||
struct v4l2_ext_control rb_controls[2] = { 0 };
|
||||
int rb_rc;
|
||||
|
||||
rb_controls[0].id = V4L2_CID_STATELESS_H264_DECODE_PARAMS;
|
||||
rb_controls[0].p_h264_decode_params = &dec_rb;
|
||||
rb_controls[0].size = sizeof(dec_rb);
|
||||
|
||||
rb_controls[1].id = V4L2_CID_STATELESS_H264_PPS;
|
||||
rb_controls[1].p_h264_pps = &pps_rb;
|
||||
rb_controls[1].size = sizeof(pps_rb);
|
||||
|
||||
rb_rc = v4l2_get_controls(driver_data->video_fd,
|
||||
surface->request_fd,
|
||||
rb_controls, 2);
|
||||
if (rb_rc == 0) {
|
||||
request_log("V4L2 readback: dec.idr_pic_id=%u "
|
||||
"dec.poc_lsb=%u dec.refmark_bits=%u "
|
||||
"dec.poc_bits=%u dec.top_foc=%d "
|
||||
"dec.bot_foc=%d dec.frame_num=%u "
|
||||
"pps.flags=0x%llx (SMP=%d) "
|
||||
"pps.refidx_l0=%u pps.refidx_l1=%u\n",
|
||||
dec_rb.idr_pic_id, dec_rb.pic_order_cnt_lsb,
|
||||
dec_rb.dec_ref_pic_marking_bit_size,
|
||||
dec_rb.pic_order_cnt_bit_size,
|
||||
dec_rb.top_field_order_cnt,
|
||||
dec_rb.bottom_field_order_cnt,
|
||||
dec_rb.frame_num,
|
||||
(unsigned long long)pps_rb.flags,
|
||||
!!(pps_rb.flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT),
|
||||
pps_rb.num_ref_idx_l0_default_active_minus1,
|
||||
pps_rb.num_ref_idx_l1_default_active_minus1);
|
||||
} else {
|
||||
request_log("V4L2 readback FAILED — controls written "
|
||||
"but readback ioctl errored\n");
|
||||
}
|
||||
}
|
||||
|
||||
dpb_insert(context, &surface->params.h264.picture.CurrPic, output);
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
|
||||
+28
-9
@@ -367,13 +367,24 @@ VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id)
|
||||
|
||||
/*
|
||||
* DEBUG INSTRUMENTATION (0010): hex-dump first 32 bytes of the
|
||||
* decoded CAPTURE Y-plane after DQBUF. If the kernel actually
|
||||
* decoded the frame, these should reflect a real Y-luma pattern
|
||||
* (varied bytes). All-zero or all-identical means no decode
|
||||
* landed pixels in the buffer. Removed once Step 1 is verified.
|
||||
* decoded CAPTURE Y-plane after DQBUF, plus a 32-byte luma
|
||||
* variance signal (max - min) so we can tell at a glance whether
|
||||
* the bytes are real decoded pixels or a uniform fill.
|
||||
*
|
||||
* Cache hygiene fix (2026-05-04): without msync(MS_SYNC|
|
||||
* MS_INVALIDATE) the previous version of this dump consistently
|
||||
* showed the patch-0011 sentinel (0xab) even when the kernel had
|
||||
* DMA-overwritten the buffer with real pixels — userspace's
|
||||
* cached mmap line for the first 32 bytes contained the sentinel
|
||||
* write and was never invalidated by DQBUF alone on this hantro
|
||||
* CMA-backed config. Caused half a day of mistaken "kernel never
|
||||
* wrote the buffer" diagnosis (see libva-multiplanar campaign
|
||||
* phase0_evidence/2026-05-04-kernel-trace/findings.md). The
|
||||
* msync() forces the kernel to invalidate userspace cache for
|
||||
* this region so the read goes to physical memory.
|
||||
*/
|
||||
{
|
||||
const unsigned char *p =
|
||||
unsigned char *p =
|
||||
(unsigned char *)surface_object->destination_map[0];
|
||||
char hex[32 * 3 + 1] = { 0 };
|
||||
unsigned int i;
|
||||
@@ -381,11 +392,19 @@ VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id)
|
||||
request_log("CAPTURE[idx=%u, plane0]: (NULL)\n",
|
||||
surface_object->destination_index);
|
||||
} else {
|
||||
for (i = 0; i < 32; i++)
|
||||
snprintf(hex + i * 3, 4, " %02x", p[i]);
|
||||
request_log("CAPTURE[idx=%u, plane0]:%s\n",
|
||||
unsigned char ymin = 0xff, ymax = 0;
|
||||
(void)msync(p, 32, MS_SYNC | MS_INVALIDATE);
|
||||
for (i = 0; i < 32; i++) {
|
||||
unsigned char y = p[i];
|
||||
snprintf(hex + i * 3, 4, " %02x", y);
|
||||
if (y < ymin) ymin = y;
|
||||
if (y > ymax) ymax = y;
|
||||
}
|
||||
request_log("CAPTURE[idx=%u, plane0]:%s "
|
||||
"(ymin=0x%02x ymax=0x%02x var=%u)\n",
|
||||
surface_object->destination_index,
|
||||
hex);
|
||||
hex, ymin, ymax,
|
||||
(unsigned)(ymax - ymin));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user