kernel: drain in-flight m2m jobs on daemon disconnect (fixes #146 D-state) #23
@@ -167,6 +167,26 @@ static int daedalus_chardev_release(struct inode *inode, struct file *file)
|
|||||||
}
|
}
|
||||||
mutex_unlock(&dev->req_lock);
|
mutex_unlock(&dev->req_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Drain the V4L2-side in-flight list before the daemon goes
|
||||||
|
* away. Any REQ_DECODE we already sent to the daemon won't
|
||||||
|
* get a matching RESP_FRAME — without this drain,
|
||||||
|
* v4l2_m2m_cancel_job() in the V4L2 consumer's close() path
|
||||||
|
* (or in vb2's STREAMOFF path) blocks forever waiting for a
|
||||||
|
* job_finish that will never arrive, and the consumer becomes
|
||||||
|
* unkillable D-state. Issue #146.
|
||||||
|
*
|
||||||
|
* Done AFTER draining the request queue: any REQ_DECODE still
|
||||||
|
* sitting in dev->req_queue is per definition not yet "in
|
||||||
|
* flight" (the kernel never released it to the daemon), so it
|
||||||
|
* doesn't need the m2m-job-finish dance — freeing the message
|
||||||
|
* is sufficient. The inflight list holds entries the kernel
|
||||||
|
* already committed to (added in device_run after the message
|
||||||
|
* was queued or written), which is exactly what needs to be
|
||||||
|
* failed back to vb2 here.
|
||||||
|
*/
|
||||||
|
daedalus_drain_inflight_on_disconnect();
|
||||||
|
|
||||||
mutex_lock(&dev->open_lock);
|
mutex_lock(&dev->open_lock);
|
||||||
dev->opened = 0;
|
dev->opened = 0;
|
||||||
mutex_unlock(&dev->open_lock);
|
mutex_unlock(&dev->open_lock);
|
||||||
|
|||||||
@@ -1005,6 +1005,65 @@ void daedalus_complete_resp_frame(u32 cookie,
|
|||||||
kfree(inf);
|
kfree(inf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* -- daemon disconnect drain ----------------------------------------- */
|
||||||
|
|
||||||
|
void daedalus_drain_inflight_on_disconnect(void)
|
||||||
|
{
|
||||||
|
struct daedalus_dev *dev = g_daedalus_dev;
|
||||||
|
struct daedalus_inflight *inf, *tmp;
|
||||||
|
LIST_HEAD(local);
|
||||||
|
|
||||||
|
if (!dev)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Splice the in-flight list onto a local list under the lock,
|
||||||
|
* then process each entry with the lock dropped — every
|
||||||
|
* v4l2_m2m_buf_done_and_job_finish call may itself try to
|
||||||
|
* re-enter device_run via the scheduler (which would need to
|
||||||
|
* walk dev->inflight again on a future REQ_DECODE), and
|
||||||
|
* v4l2_m2m_buf_done can sleep via vb2's buffer-done dispatch.
|
||||||
|
* Holding inflight_lock across either is a deadlock invitation.
|
||||||
|
*/
|
||||||
|
mutex_lock(&dev->inflight_lock);
|
||||||
|
list_splice_init(&dev->inflight, &local);
|
||||||
|
mutex_unlock(&dev->inflight_lock);
|
||||||
|
|
||||||
|
list_for_each_entry_safe(inf, tmp, &local, list) {
|
||||||
|
list_del(&inf->list);
|
||||||
|
|
||||||
|
v4l2_warn(&dev->v4l2_dev,
|
||||||
|
"draining inflight cookie=%u (daemon disconnect)\n",
|
||||||
|
inf->cookie);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Complete the per-request control state before
|
||||||
|
* buf_done_and_job_finish, same ordering as the success
|
||||||
|
* path in daedalus_complete_resp_frame(). For non-request
|
||||||
|
* flows inf->req is NULL and v4l2_ctrl_request_complete
|
||||||
|
* no-ops.
|
||||||
|
*/
|
||||||
|
if (inf->req)
|
||||||
|
v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark both buffers ERROR and clear the m2m scheduler's
|
||||||
|
* job_running flag. This is what unsticks
|
||||||
|
* v4l2_m2m_cancel_job() inside the consumer's close()
|
||||||
|
* path; without it, the consumer hangs in TASK_UNINTERRUPTIBLE
|
||||||
|
* forever (issue #146).
|
||||||
|
*/
|
||||||
|
v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev,
|
||||||
|
inf->ctx->m2m_ctx,
|
||||||
|
VB2_BUF_STATE_ERROR);
|
||||||
|
|
||||||
|
if (inf->req)
|
||||||
|
media_request_put(inf->req);
|
||||||
|
|
||||||
|
kfree(inf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* -- v4l2_ioctl_ops -------------------------------------------------- */
|
/* -- v4l2_ioctl_ops -------------------------------------------------- */
|
||||||
|
|
||||||
static int daedalus_querycap(struct file *file, void *priv,
|
static int daedalus_querycap(struct file *file, void *priv,
|
||||||
|
|||||||
@@ -103,4 +103,27 @@ void daedalus_complete_resp_frame(u32 cookie,
|
|||||||
int daedalus_export_capture_dmabuf(u32 cookie, u32 plane, u32 flags,
|
int daedalus_export_capture_dmabuf(u32 cookie, u32 plane, u32 flags,
|
||||||
int *out_fd);
|
int *out_fd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* daedalus_drain_inflight_on_disconnect() - fail all in-flight m2m jobs
|
||||||
|
*
|
||||||
|
* Called from daedalus_chardev_release() when the daemon disconnects
|
||||||
|
* (graceful close, SIGKILL, daemon crash — anything that triggers
|
||||||
|
* chardev release). Walks the in-flight list and, for every entry,
|
||||||
|
* marks both src+dst buffers VB2_BUF_STATE_ERROR and calls
|
||||||
|
* v4l2_m2m_buf_done_and_job_finish() to clear the m2m scheduler's
|
||||||
|
* "job_running" flag.
|
||||||
|
*
|
||||||
|
* Without this, v4l2_m2m_cancel_job() (called from
|
||||||
|
* v4l2_m2m_ctx_release() during the consumer's close() / task exit)
|
||||||
|
* blocks forever waiting for a job_finish that the dead daemon will
|
||||||
|
* never send — the consumer enters TASK_UNINTERRUPTIBLE and survives
|
||||||
|
* SIGKILL until reboot. See issue #146 for the full trace.
|
||||||
|
*
|
||||||
|
* Safe to call with an empty in-flight list; no-op in that case.
|
||||||
|
* Must NOT be called from atomic context — uses inflight_lock
|
||||||
|
* (sleeping mutex) and v4l2_m2m_buf_done_and_job_finish (which can
|
||||||
|
* sleep via vb2 buffer-done dispatch).
|
||||||
|
*/
|
||||||
|
void daedalus_drain_inflight_on_disconnect(void);
|
||||||
|
|
||||||
#endif /* DAEDALUS_V4L2_MAIN_H */
|
#endif /* DAEDALUS_V4L2_MAIN_H */
|
||||||
|
|||||||
Reference in New Issue
Block a user