From 94be8c3d03b678ac40e3b7814e032c4099d7ecc2 Mon Sep 17 00:00:00 2001
From: claude-noether <claude-noether@noreply.localhost>
Date: Sat, 23 May 2026 17:06:06 +0200
Subject: [PATCH] kernel: drain in-flight m2m jobs on daemon disconnect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes issue #146 — daemon-crash (SIGKILL, SEGV, anything that
triggers chardev release) leaves V4L2 consumers in unkillable
TASK_UNINTERRUPTIBLE on /dev/video0 close.

## Root cause

device_run() adds an entry to dev->inflight when it sends a
REQ_DECODE to the daemon, marking the m2m job as "running".
The job is only cleared via v4l2_m2m_buf_done_and_job_finish()
in daedalus_complete_resp_frame(), which only fires on RESP_FRAME.

If the daemon dies (SIGKILL, SEGV, exit) BEFORE writing the
matching RESP_FRAME:
  - the inflight entry is never popped
  - v4l2_m2m_buf_done_and_job_finish is never called
  - the m2m scheduler still thinks a job is running

Later, when the V4L2 consumer's close() runs (or gets signalled
to exit), v4l2_m2m_ctx_release() → v4l2_m2m_cancel_job() waits
for !job_running indefinitely.  The consumer enters D-state and
survives SIGKILL until reboot.

Reproduced on hertz 2026-05-23, kernel 6.12.75+rpt-rpi-2712:

  $ sudo kill -STOP $DAEMON_PID            # block daemon I/O
  $ ./test_m2m_decode keyframe.bin out.nv12 1920 1080 vp9 &
  $ sudo kill -9 $DAEMON_PID               # chardev_release fires
  $ kill -9 $CLIENT_PID                    # ignored — D-state
  # client stack:
  v4l2_m2m_cancel_job+0x14c [v4l2_mem2mem]
  v4l2_m2m_ctx_release+0x20 [v4l2_mem2mem]
  daedalus_release+0x2c [daedalus_v4l2]
  v4l2_release+0x7c [videodev]
  __fput → do_exit → SIGKILL never delivered

## Fix

New API daedalus_drain_inflight_on_disconnect() in main.{c,h}:
walks the in-flight list, marks both src+dst buffers
VB2_BUF_STATE_ERROR via v4l2_m2m_buf_done_and_job_finish(), and
releases the bound media_request if any.  Same completion shape
as daedalus_complete_resp_frame() takes on the success path,
just with state = ERROR for every in-flight entry.

chardev_release calls the drain after flushing dev->req_queue
(messages still in req_queue weren't released to the daemon yet,
so they don't need the m2m-job-finish dance — freeing them is
sufficient).  The order matters: queue first (cheap), then m2m
drain (heavier, takes the inflight list).

Locking: list_splice_init under inflight_lock to take the entire
list atomically; lock dropped before iterating because
v4l2_m2m_buf_done_and_job_finish can sleep via vb2's buffer-done
dispatch and can re-enter device_run via the scheduler (which
would need inflight_lock again on the next REQ_DECODE).

## Verification path

Cannot rmmod the running module on hertz right now — the D-state
corpse from the repro session pins the refcount.  Verification
of the fixed module needs a reboot or fresh test host:

  $ sudo reboot                            # clears hung client
  $ sudo make modules_install              # install new .ko
  $ sudo modprobe daedalus_v4l2
  $ # rerun the repro script — client should die cleanly with
  $ # an -EIO / similar return from poll/DQBUF instead of hanging.

Build: clean on Linux 6.12.75 + rpt-rpi-2712, no new warnings.
The pre-existing "frame size 2128 > 2048" warning on
daedalus_device_run is unchanged by this commit.

## Followup not in scope

If a new V4L2 consumer races a REQ_DECODE through device_run
AFTER the drain has spliced the list (but before the daemon
chardev is reopened), the new entry sits in a freshly-empty
inflight list and the same hang can recur for that consumer
when the systemd auto-restart of the daemon either fails or
takes longer than the consumer's patience.  A secondary
safeguard would be to fail-fast in device_run when dev->chardev
is unopened — proposing as a separate ticket if this race
materialises in practice.

Closes #146.
---
 kernel/daedalus_v4l2_chardev.c | 20 ++++++++++++
 kernel/daedalus_v4l2_main.c    | 59 ++++++++++++++++++++++++++++++++++
 kernel/daedalus_v4l2_main.h    | 23 +++++++++++++
 3 files changed, 102 insertions(+)

diff --git a/kernel/daedalus_v4l2_chardev.c b/kernel/daedalus_v4l2_chardev.c
index 9d6ab18..bb7ed00 100644
--- a/kernel/daedalus_v4l2_chardev.c
+++ b/kernel/daedalus_v4l2_chardev.c
@@ -167,6 +167,26 @@ static int daedalus_chardev_release(struct inode *inode, struct file *file)
 	}
 	mutex_unlock(&dev->req_lock);
 
+	/*
+	 * Drain the V4L2-side in-flight list before the daemon goes
+	 * away.  Any REQ_DECODE we already sent to the daemon won't
+	 * get a matching RESP_FRAME — without this drain,
+	 * v4l2_m2m_cancel_job() in the V4L2 consumer's close() path
+	 * (or in vb2's STREAMOFF path) blocks forever waiting for a
+	 * job_finish that will never arrive, and the consumer becomes
+	 * unkillable D-state.  Issue #146.
+	 *
+	 * Done AFTER draining the request queue: any REQ_DECODE still
+	 * sitting in dev->req_queue is per definition not yet "in
+	 * flight" (the kernel never released it to the daemon), so it
+	 * doesn't need the m2m-job-finish dance — freeing the message
+	 * is sufficient.  The inflight list holds entries the kernel
+	 * already committed to (added in device_run after the message
+	 * was queued or written), which is exactly what needs to be
+	 * failed back to vb2 here.
+	 */
+	daedalus_drain_inflight_on_disconnect();
+
 	mutex_lock(&dev->open_lock);
 	dev->opened = 0;
 	mutex_unlock(&dev->open_lock);
diff --git a/kernel/daedalus_v4l2_main.c b/kernel/daedalus_v4l2_main.c
index 61fc312..fd65951 100644
--- a/kernel/daedalus_v4l2_main.c
+++ b/kernel/daedalus_v4l2_main.c
@@ -1005,6 +1005,65 @@ void daedalus_complete_resp_frame(u32 cookie,
 	kfree(inf);
 }
 
+/* -- daemon disconnect drain ----------------------------------------- */
+
+void daedalus_drain_inflight_on_disconnect(void)
+{
+	struct daedalus_dev *dev = g_daedalus_dev;
+	struct daedalus_inflight *inf, *tmp;
+	LIST_HEAD(local);
+
+	if (!dev)
+		return;
+
+	/*
+	 * Splice the in-flight list onto a local list under the lock,
+	 * then process each entry with the lock dropped — every
+	 * v4l2_m2m_buf_done_and_job_finish call may itself try to
+	 * re-enter device_run via the scheduler (which would need to
+	 * walk dev->inflight again on a future REQ_DECODE), and
+	 * v4l2_m2m_buf_done can sleep via vb2's buffer-done dispatch.
+	 * Holding inflight_lock across either is a deadlock invitation.
+	 */
+	mutex_lock(&dev->inflight_lock);
+	list_splice_init(&dev->inflight, &local);
+	mutex_unlock(&dev->inflight_lock);
+
+	list_for_each_entry_safe(inf, tmp, &local, list) {
+		list_del(&inf->list);
+
+		v4l2_warn(&dev->v4l2_dev,
+			  "draining inflight cookie=%u (daemon disconnect)\n",
+			  inf->cookie);
+
+		/*
+		 * Complete the per-request control state before
+		 * buf_done_and_job_finish, same ordering as the success
+		 * path in daedalus_complete_resp_frame().  For non-request
+		 * flows inf->req is NULL and v4l2_ctrl_request_complete
+		 * no-ops.
+		 */
+		if (inf->req)
+			v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl);
+
+		/*
+		 * Mark both buffers ERROR and clear the m2m scheduler's
+		 * job_running flag.  This is what unsticks
+		 * v4l2_m2m_cancel_job() inside the consumer's close()
+		 * path; without it, the consumer hangs in TASK_UNINTERRUPTIBLE
+		 * forever (issue #146).
+		 */
+		v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev,
+						 inf->ctx->m2m_ctx,
+						 VB2_BUF_STATE_ERROR);
+
+		if (inf->req)
+			media_request_put(inf->req);
+
+		kfree(inf);
+	}
+}
+
 /* -- v4l2_ioctl_ops -------------------------------------------------- */
 
 static int daedalus_querycap(struct file *file, void *priv,
diff --git a/kernel/daedalus_v4l2_main.h b/kernel/daedalus_v4l2_main.h
index e6496b4..e229bf2 100644
--- a/kernel/daedalus_v4l2_main.h
+++ b/kernel/daedalus_v4l2_main.h
@@ -103,4 +103,27 @@ void daedalus_complete_resp_frame(u32 cookie,
 int  daedalus_export_capture_dmabuf(u32 cookie, u32 plane, u32 flags,
 				    int *out_fd);
 
+/**
+ * daedalus_drain_inflight_on_disconnect() - fail all in-flight m2m jobs
+ *
+ * Called from daedalus_chardev_release() when the daemon disconnects
+ * (graceful close, SIGKILL, daemon crash — anything that triggers
+ * chardev release).  Walks the in-flight list and, for every entry,
+ * marks both src+dst buffers VB2_BUF_STATE_ERROR and calls
+ * v4l2_m2m_buf_done_and_job_finish() to clear the m2m scheduler's
+ * "job_running" flag.
+ *
+ * Without this, v4l2_m2m_cancel_job() (called from
+ * v4l2_m2m_ctx_release() during the consumer's close() / task exit)
+ * blocks forever waiting for a job_finish that the dead daemon will
+ * never send — the consumer enters TASK_UNINTERRUPTIBLE and survives
+ * SIGKILL until reboot.  See issue #146 for the full trace.
+ *
+ * Safe to call with an empty in-flight list; no-op in that case.
+ * Must NOT be called from atomic context — uses inflight_lock
+ * (sleeping mutex) and v4l2_m2m_buf_done_and_job_finish (which can
+ * sleep via vb2 buffer-done dispatch).
+ */
+void daedalus_drain_inflight_on_disconnect(void);
+
 #endif /* DAEDALUS_V4L2_MAIN_H */
-- 
2.47.3