diff --git a/kernel/daedalus_v4l2_chardev.c b/kernel/daedalus_v4l2_chardev.c index 9d6ab18..bb7ed00 100644 --- a/kernel/daedalus_v4l2_chardev.c +++ b/kernel/daedalus_v4l2_chardev.c @@ -167,6 +167,26 @@ static int daedalus_chardev_release(struct inode *inode, struct file *file) } mutex_unlock(&dev->req_lock); + /* + * Drain the V4L2-side in-flight list before the daemon goes + * away. Any REQ_DECODE we already sent to the daemon won't + * get a matching RESP_FRAME — without this drain, + * v4l2_m2m_cancel_job() in the V4L2 consumer's close() path + * (or in vb2's STREAMOFF path) blocks forever waiting for a + * job_finish that will never arrive, and the consumer becomes + * unkillable D-state. Issue #146. + * + * Done AFTER draining the request queue: any REQ_DECODE still + * sitting in dev->req_queue is per definition not yet "in + * flight" (the kernel never released it to the daemon), so it + * doesn't need the m2m-job-finish dance — freeing the message + * is sufficient. The inflight list holds entries the kernel + * already committed to (added in device_run after the message + * was queued or written), which is exactly what needs to be + * failed back to vb2 here. + */ + daedalus_drain_inflight_on_disconnect(); + mutex_lock(&dev->open_lock); dev->opened = 0; mutex_unlock(&dev->open_lock); diff --git a/kernel/daedalus_v4l2_main.c b/kernel/daedalus_v4l2_main.c index 61fc312..fd65951 100644 --- a/kernel/daedalus_v4l2_main.c +++ b/kernel/daedalus_v4l2_main.c @@ -1005,6 +1005,65 @@ void daedalus_complete_resp_frame(u32 cookie, kfree(inf); } +/* -- daemon disconnect drain ----------------------------------------- */ + +void daedalus_drain_inflight_on_disconnect(void) +{ + struct daedalus_dev *dev = g_daedalus_dev; + struct daedalus_inflight *inf, *tmp; + LIST_HEAD(local); + + if (!dev) + return; + + /* + * Splice the in-flight list onto a local list under the lock, + * then process each entry with the lock dropped — every + * v4l2_m2m_buf_done_and_job_finish call may itself try to + * re-enter device_run via the scheduler (which would need to + * walk dev->inflight again on a future REQ_DECODE), and + * v4l2_m2m_buf_done can sleep via vb2's buffer-done dispatch. + * Holding inflight_lock across either is a deadlock invitation. + */ + mutex_lock(&dev->inflight_lock); + list_splice_init(&dev->inflight, &local); + mutex_unlock(&dev->inflight_lock); + + list_for_each_entry_safe(inf, tmp, &local, list) { + list_del(&inf->list); + + v4l2_warn(&dev->v4l2_dev, + "draining inflight cookie=%u (daemon disconnect)\n", + inf->cookie); + + /* + * Complete the per-request control state before + * buf_done_and_job_finish, same ordering as the success + * path in daedalus_complete_resp_frame(). For non-request + * flows inf->req is NULL and v4l2_ctrl_request_complete + * no-ops. + */ + if (inf->req) + v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl); + + /* + * Mark both buffers ERROR and clear the m2m scheduler's + * job_running flag. This is what unsticks + * v4l2_m2m_cancel_job() inside the consumer's close() + * path; without it, the consumer hangs in TASK_UNINTERRUPTIBLE + * forever (issue #146). + */ + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, + inf->ctx->m2m_ctx, + VB2_BUF_STATE_ERROR); + + if (inf->req) + media_request_put(inf->req); + + kfree(inf); + } +} + /* -- v4l2_ioctl_ops -------------------------------------------------- */ static int daedalus_querycap(struct file *file, void *priv, diff --git a/kernel/daedalus_v4l2_main.h b/kernel/daedalus_v4l2_main.h index e6496b4..e229bf2 100644 --- a/kernel/daedalus_v4l2_main.h +++ b/kernel/daedalus_v4l2_main.h @@ -103,4 +103,27 @@ void daedalus_complete_resp_frame(u32 cookie, int daedalus_export_capture_dmabuf(u32 cookie, u32 plane, u32 flags, int *out_fd); +/** + * daedalus_drain_inflight_on_disconnect() - fail all in-flight m2m jobs + * + * Called from daedalus_chardev_release() when the daemon disconnects + * (graceful close, SIGKILL, daemon crash — anything that triggers + * chardev release). Walks the in-flight list and, for every entry, + * marks both src+dst buffers VB2_BUF_STATE_ERROR and calls + * v4l2_m2m_buf_done_and_job_finish() to clear the m2m scheduler's + * "job_running" flag. + * + * Without this, v4l2_m2m_cancel_job() (called from + * v4l2_m2m_ctx_release() during the consumer's close() / task exit) + * blocks forever waiting for a job_finish that the dead daemon will + * never send — the consumer enters TASK_UNINTERRUPTIBLE and survives + * SIGKILL until reboot. See issue #146 for the full trace. + * + * Safe to call with an empty in-flight list; no-op in that case. + * Must NOT be called from atomic context — uses inflight_lock + * (sleeping mutex) and v4l2_m2m_buf_done_and_job_finish (which can + * sleep via vb2 buffer-done dispatch). + */ +void daedalus_drain_inflight_on_disconnect(void); + #endif /* DAEDALUS_V4L2_MAIN_H */