diff --git a/src/h264.c b/src/h264.c index 996766c..a36de8a 100644 --- a/src/h264.c +++ b/src/h264.c @@ -992,6 +992,59 @@ int h264_set_controls(struct request_data *driver_data, if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; + /* + * DEBUG INSTRUMENTATION (3F observability — added 2026-05-04): + * VIDIOC_G_EXT_CTRLS readback on the request fd to confirm the + * V4L2 layer accepted our writes verbatim. Cross-validates the + * patch-0015 POC sentinel strip (top_field_order_cnt should be + * the stripped value, not 65536), the slice-header parser + * outputs (idr_pic_id, dec_ref_pic_marking_bit_size, + * pic_order_cnt_bit_size), and the SCALING_MATRIX_PRESENT flag. + * + * Read into separate stack-allocated targets to avoid any + * aliasing with the SET-side controls; the ioctl will overwrite + * the targets with what's currently stored in the request. + */ + { + struct v4l2_ctrl_h264_decode_params dec_rb = { 0 }; + struct v4l2_ctrl_h264_pps pps_rb = { 0 }; + struct v4l2_ext_control rb_controls[2] = { 0 }; + int rb_rc; + + rb_controls[0].id = V4L2_CID_STATELESS_H264_DECODE_PARAMS; + rb_controls[0].p_h264_decode_params = &dec_rb; + rb_controls[0].size = sizeof(dec_rb); + + rb_controls[1].id = V4L2_CID_STATELESS_H264_PPS; + rb_controls[1].p_h264_pps = &pps_rb; + rb_controls[1].size = sizeof(pps_rb); + + rb_rc = v4l2_get_controls(driver_data->video_fd, + surface->request_fd, + rb_controls, 2); + if (rb_rc == 0) { + request_log("V4L2 readback: dec.idr_pic_id=%u " + "dec.poc_lsb=%u dec.refmark_bits=%u " + "dec.poc_bits=%u dec.top_foc=%d " + "dec.bot_foc=%d dec.frame_num=%u " + "pps.flags=0x%llx (SMP=%d) " + "pps.refidx_l0=%u pps.refidx_l1=%u\n", + dec_rb.idr_pic_id, dec_rb.pic_order_cnt_lsb, + dec_rb.dec_ref_pic_marking_bit_size, + dec_rb.pic_order_cnt_bit_size, + dec_rb.top_field_order_cnt, + dec_rb.bottom_field_order_cnt, + dec_rb.frame_num, + (unsigned long long)pps_rb.flags, + !!(pps_rb.flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT), + pps_rb.num_ref_idx_l0_default_active_minus1, + pps_rb.num_ref_idx_l1_default_active_minus1); + } else { + request_log("V4L2 readback FAILED — controls written " + "but readback ioctl errored\n"); + } + } + dpb_insert(context, &surface->params.h264.picture.CurrPic, output); return VA_STATUS_SUCCESS; diff --git a/src/surface.c b/src/surface.c index 56d03bd..2a55b08 100644 --- a/src/surface.c +++ b/src/surface.c @@ -367,13 +367,24 @@ VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id) /* * DEBUG INSTRUMENTATION (0010): hex-dump first 32 bytes of the - * decoded CAPTURE Y-plane after DQBUF. If the kernel actually - * decoded the frame, these should reflect a real Y-luma pattern - * (varied bytes). All-zero or all-identical means no decode - * landed pixels in the buffer. Removed once Step 1 is verified. + * decoded CAPTURE Y-plane after DQBUF, plus a 32-byte luma + * variance signal (max - min) so we can tell at a glance whether + * the bytes are real decoded pixels or a uniform fill. + * + * Cache hygiene fix (2026-05-04): without msync(MS_SYNC| + * MS_INVALIDATE) the previous version of this dump consistently + * showed the patch-0011 sentinel (0xab) even when the kernel had + * DMA-overwritten the buffer with real pixels — userspace's + * cached mmap line for the first 32 bytes contained the sentinel + * write and was never invalidated by DQBUF alone on this hantro + * CMA-backed config. Caused half a day of mistaken "kernel never + * wrote the buffer" diagnosis (see libva-multiplanar campaign + * phase0_evidence/2026-05-04-kernel-trace/findings.md). The + * msync() forces the kernel to invalidate userspace cache for + * this region so the read goes to physical memory. */ { - const unsigned char *p = + unsigned char *p = (unsigned char *)surface_object->destination_map[0]; char hex[32 * 3 + 1] = { 0 }; unsigned int i; @@ -381,11 +392,19 @@ VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id) request_log("CAPTURE[idx=%u, plane0]: (NULL)\n", surface_object->destination_index); } else { - for (i = 0; i < 32; i++) - snprintf(hex + i * 3, 4, " %02x", p[i]); - request_log("CAPTURE[idx=%u, plane0]:%s\n", + unsigned char ymin = 0xff, ymax = 0; + (void)msync(p, 32, MS_SYNC | MS_INVALIDATE); + for (i = 0; i < 32; i++) { + unsigned char y = p[i]; + snprintf(hex + i * 3, 4, " %02x", y); + if (y < ymin) ymin = y; + if (y > ymax) ymax = y; + } + request_log("CAPTURE[idx=%u, plane0]:%s " + "(ymin=0x%02x ymax=0x%02x var=%u)\n", surface_object->destination_index, - hex); + hex, ymin, ymax, + (unsigned)(ymax - ymin)); } }