iter5 sweep: remove iter1 patch-0010 hex-dumps + patch-0011 sentinel

picture.c: remove the 0xab sentinel write into CAPTURE buffer first
32 bytes pre-QBUF + the OUTPUT hex-dump pre-QBUF. Both were iter1
diagnostics for "where does the buffer write go?" investigation.

surface.c: remove the post-DQBUF CAPTURE Y-plane hex-dump + luma
variance signal. The msync(MS_SYNC|MS_INVALIDATE) was added as a
companion fix for the cached-mmap issue surfaced by the dump itself —
removing the dump removes the need for the msync.

With iter1+iter2+iter3+iter4 fixes landed, these dumps fire on every
single frame and produce hundreds of MB of log noise during sustained
decode. Now gone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-05 14:48:31 +00:00
parent 951233a12e
commit d3a299b4cc
2 changed files with 0 additions and 81 deletions
-39
View File
@@ -371,51 +371,12 @@ VAStatus RequestEndPicture(VADriverContextP context, VAContextID context_id)
if (rc != VA_STATUS_SUCCESS) if (rc != VA_STATUS_SUCCESS)
return rc; return rc;
/*
* DEBUG INSTRUMENTATION (0011): write a sentinel pattern into
* the CAPTURE buffer's first 32 bytes BEFORE QBUF. If after
* DQBUF the sentinel survives (per surface.c hex dump), the
* kernel never wrote to this buffer. If the sentinel is gone
* (replaced by zeros), the kernel did write but our CPU read
* sees stale-cached data — cache-coherency issue.
*/
{
unsigned char *p = (unsigned char *)
surface_object->destination_map[0];
if (p != NULL) {
unsigned int i;
for (i = 0; i < 32; i++)
p[i] = 0xab;
}
}
rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL, rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL,
surface_object->destination_index, 0, surface_object->destination_index, 0,
surface_object->destination_buffers_count); surface_object->destination_buffers_count);
if (rc < 0) if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED; return VA_STATUS_ERROR_OPERATION_FAILED;
/*
* DEBUG INSTRUMENTATION (0010): hex-dump first 32 bytes of the
* OUTPUT buffer at the moment we hand it to the kernel. Helps
* pin down whether our bitstream prepend logic is correct.
* For a valid ANNEX_B IDR slice the dump should start
* 00 00 01 65 ... (00 00 01 = start code; 0x65 = nal_ref_idc=3,
* nal_unit_type=5 = IDR slice). Removed once Step 1 decode is
* verified working.
*/
{
const unsigned char *p = surface_object->source_data;
char hex[32 * 3 + 1] = { 0 };
unsigned int i, n = surface_object->slices_size < 32 ?
surface_object->slices_size : 32;
for (i = 0; i < n; i++)
snprintf(hex + i * 3, 4, " %02x", p[i]);
request_log("OUTPUT[idx=%u, len=%u]:%s\n",
surface_object->source_index,
surface_object->slices_size, hex);
}
rc = v4l2_queue_buffer(driver_data->video_fd, request_fd, output_type, rc = v4l2_queue_buffer(driver_data->video_fd, request_fd, output_type,
&surface_object->timestamp, &surface_object->timestamp,
surface_object->source_index, surface_object->source_index,
-42
View File
@@ -509,48 +509,6 @@ VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id)
cap_pool_mark_decoded(&driver_data->capture_pool, cap_pool_mark_decoded(&driver_data->capture_pool,
surface_object->current_slot); surface_object->current_slot);
/*
* DEBUG INSTRUMENTATION (0010): hex-dump first 32 bytes of the
* decoded CAPTURE Y-plane after DQBUF, plus a 32-byte luma
* variance signal (max - min) so we can tell at a glance whether
* the bytes are real decoded pixels or a uniform fill.
*
* Cache hygiene fix (2026-05-04): without msync(MS_SYNC|
* MS_INVALIDATE) the previous version of this dump consistently
* showed the patch-0011 sentinel (0xab) even when the kernel had
* DMA-overwritten the buffer with real pixels — userspace's
* cached mmap line for the first 32 bytes contained the sentinel
* write and was never invalidated by DQBUF alone on this hantro
* CMA-backed config. Caused half a day of mistaken "kernel never
* wrote the buffer" diagnosis (see libva-multiplanar campaign
* phase0_evidence/2026-05-04-kernel-trace/findings.md). The
* msync() forces the kernel to invalidate userspace cache for
* this region so the read goes to physical memory.
*/
{
unsigned char *p =
(unsigned char *)surface_object->destination_map[0];
char hex[32 * 3 + 1] = { 0 };
unsigned int i;
if (p == NULL) {
request_log("CAPTURE[idx=%u, plane0]: (NULL)\n",
surface_object->destination_index);
} else {
unsigned char ymin = 0xff, ymax = 0;
(void)msync(p, 32, MS_SYNC | MS_INVALIDATE);
for (i = 0; i < 32; i++) {
unsigned char y = p[i];
snprintf(hex + i * 3, 4, " %02x", y);
if (y < ymin) ymin = y;
if (y > ymax) ymax = y;
}
request_log("CAPTURE[idx=%u, plane0]:%s "
"(ymin=0x%02x ymax=0x%02x var=%u)\n",
surface_object->destination_index,
hex, ymin, ymax,
(unsigned)(ymax - ymin));
}
}
surface_object->status = VASurfaceDisplaying; surface_object->status = VASurfaceDisplaying;