diff --git a/src/picture.c b/src/picture.c
index f72828b..9739f6f 100644
--- a/src/picture.c
+++ b/src/picture.c
@@ -488,6 +488,49 @@ VAStatus RequestEndPicture(VADriverContextP context, VAContextID context_id)
if (rc != VA_STATUS_SUCCESS)
return rc;
+ /*
+ * iter14 α-16: env-gated dump of OUTPUT bitstream bytes immediately
+ * before QBUF. LIBVA_V4L2_DUMP_OUTPUT=
writes source_data[0..
+ * slices_size] to /output___.bin.
+ * Discriminates whether libva writes the same H.264/HEVC slice bytes
+ * as kdirect — if YES, Bug 4/5 are not in the OUTPUT-side; if NO,
+ * narrow to which slice-write path produces the divergence.
+ *
+ * Off by default; no behavior change when env unset.
+ */
+ {
+ static const char *dump_env = NULL;
+ static bool dump_env_checked = false;
+ if (!dump_env_checked) {
+ dump_env = getenv("LIBVA_V4L2_DUMP_OUTPUT");
+ dump_env_checked = true;
+ }
+ if (dump_env != NULL && dump_env[0] != '\0' &&
+ surface_object->source_data != NULL &&
+ surface_object->slices_size > 0) {
+ char path[256];
+ snprintf(path, sizeof(path),
+ "%s/output_p%d_s%u_t%llu.bin",
+ dump_env, (int)config_object->profile,
+ (unsigned int)surface_object->base.id,
+ (unsigned long long)context_object->timestamp_counter);
+ FILE *fp = fopen(path, "wb");
+ if (fp != NULL) {
+ size_t w = fwrite(surface_object->source_data,
+ 1, surface_object->slices_size,
+ fp);
+ request_log("α-16: dumped %zu bytes to %s "
+ "(slices_count=%u)\n",
+ w, path,
+ surface_object->slices_count);
+ fclose(fp);
+ } else {
+ request_log("α-16: fopen(%s) failed: %s\n",
+ path, strerror(errno));
+ }
+ }
+ }
+
rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL,
surface_object->destination_index, 0,
surface_object->destination_buffers_count);