Revert "Merge pull request 'kernel + daemon: H.264 B-frame display reorder fix (closes #6)' (#7) from noether/kernel-daemon-h264-reorder-fix into main"

This reverts commit 79256dc7ef, reversing
changes made to 7ff2d897ea.
This commit is contained in:
2026-05-21 14:40:59 +02:00
parent 49e60c9bba
commit 714d781d22
6 changed files with 276 additions and 713 deletions
+89 -186
View File
@@ -611,28 +611,8 @@ struct daedalus_inflight {
struct list_head list;
u32 cookie;
struct daedalus_ctx *ctx;
/*
* src_buf / dst_buf decouple in the daedalus-v4l2#6 reorder fix.
* src_buf is cleared (NULL'd) when DAEDALUS_RESP_FLAG_SRC_CONSUMED
* arrives — that signals libavcodec has accepted the bitstream
* even if no display-order frame is ready yet. dst_buf is cleared
* when DAEDALUS_RESP_FLAG_HAS_PIXELS arrives — the daemon has
* written pixels into this CAPTURE buffer. When both are NULL
* the inflight entry is removed and freed.
*/
struct vb2_v4l2_buffer *src_buf;
struct vb2_v4l2_buffer *dst_buf;
/*
* src_buf->vb2_buf.timestamp captured at device_run time.
* Mirrored into REQ_DECODE.src_pts so the daemon can set
* pkt->pts = src_pts on avcodec_send_packet, and read back
* frame->pts to identify which OUTPUT bitstream produced the
* current display-order frame. Kept here so the kernel can
* stamp dst_buf.timestamp explicitly at HAS_PIXELS time even
* though V4L2_BUF_FLAG_TIMESTAMP_COPY's automatic src->dst
* pairing no longer applies (src/dst lifecycles decoupled).
*/
u64 src_pts;
/*
* Captured media_request the src_buf was bound to (if any).
* Set by device_run from src_buf->vb2_buf.req_obj.req;
@@ -643,22 +623,16 @@ struct daedalus_inflight {
struct media_request *req;
};
/*
* Peek (don't remove). The split-completion path may receive
* multiple RESP_FRAME messages on a single inflight item (one for
* SRC_CONSUMED, one for HAS_PIXELS — possibly separated in time if
* libavcodec held the picture for display reorder). Caller removes
* the entry only when both src_buf and dst_buf have been cleared
* from inside the inflight lock.
*/
static struct daedalus_inflight *
daedalus_inflight_peek_locked(struct daedalus_dev *dev, u32 cookie)
daedalus_inflight_pop_locked(struct daedalus_dev *dev, u32 cookie)
{
struct daedalus_inflight *e;
list_for_each_entry(e, &dev->inflight, list) {
if (e->cookie == cookie)
if (e->cookie == cookie) {
list_del(&e->list);
return e;
}
}
return NULL;
}
@@ -821,17 +795,6 @@ static void daedalus_device_run(void *priv)
req->codec_id = cid;
req->bitstream_len = (u32) blen;
/*
* Ferry the OUTPUT buffer's vb2 timestamp through to the
* daemon for the H.264 B-frame display-reorder fix
* (daedalus-v4l2#6). Daemon sets pkt->pts = src_pts before
* avcodec_send_packet; libavcodec stamps frame->pts with
* the same value when it eventually outputs the frame in
* display order, letting the daemon route HAS_PIXELS RESPs
* to the correct cookie even when libavcodec's display
* order disagrees with V4L2's decode submission order.
*/
req->src_pts = (u64) src_buf->vb2_buf.timestamp;
req->capture_width = ctx->dst_fmt.width;
req->capture_height = ctx->dst_fmt.height;
req->capture_pix_fmt = ctx->dst_fmt.pixelformat;
@@ -861,7 +824,6 @@ static void daedalus_device_run(void *priv)
inf->ctx = ctx;
inf->src_buf = src_buf;
inf->dst_buf = dst_buf;
inf->src_pts = req->src_pts;
/*
* Capture the bound media_request (if any) so the
* completion path can call v4l2_ctrl_request_complete +
@@ -927,179 +889,120 @@ static const struct v4l2_m2m_ops daedalus_m2m_ops = {
/* -- chardev RESP_FRAME → buf_done bridge ---------------------------- */
/*
* Pack the daemon's pixel delivery into the inflight item's CAPTURE
* buffer. Called from daedalus_complete_resp_frame on the
* HAS_PIXELS branch, after the lock has been dropped (vb2 ops may
* sleep / take their own locks). The dst_buf reference was
* snapshotted under the inflight lock and cleared from the entry,
* so no other RESP can race for this buffer.
*
* pixels_len == 0 → dmabuf path (Phase 8.6+); the daemon mmap'd the
* CAPTURE plane via GET_DMABUF and wrote pixels in place; we just
* set the plane payloads. pixels_len > 0 → legacy Phase 8.5 inline
* NV12 path; we memcpy from the chardev payload.
*/
static void daedalus_pack_pixels_into_dst(struct vb2_v4l2_buffer *dst_buf,
const struct daedalus_resp_frame *fr,
const u8 *pixels, size_t pixels_len)
{
struct vb2_buffer *vb = &dst_buf->vb2_buf;
void *dst_y, *dst_uv;
u32 y_size, uv_size;
unsigned int p;
if (pixels_len) {
y_size = min_t(u32, fr->luma_len,
(u32) vb2_plane_size(vb, 0));
uv_size = vb->num_planes > 1 ?
min_t(u32, fr->chroma_len,
(u32) vb2_plane_size(vb, 1)) : 0;
dst_y = vb2_plane_vaddr(vb, 0);
dst_uv = vb->num_planes > 1 ?
vb2_plane_vaddr(vb, 1) : NULL;
if (dst_y && y_size && pixels_len >= y_size)
memcpy(dst_y, pixels, y_size);
else
y_size = 0;
if (dst_uv && uv_size &&
pixels_len >= y_size + uv_size)
memcpy(dst_uv, pixels + y_size, uv_size);
else
uv_size = 0;
vb2_set_plane_payload(vb, 0, y_size);
if (vb->num_planes > 1)
vb2_set_plane_payload(vb, 1, uv_size);
} else {
for (p = 0; p < vb->num_planes; p++)
vb2_set_plane_payload(vb, p,
vb2_plane_size(vb, p));
}
}
void daedalus_complete_resp_frame(u32 cookie,
const struct daedalus_resp_frame *fr,
const u8 *pixels, size_t pixels_len)
{
struct daedalus_dev *dev = g_daedalus_dev;
struct daedalus_inflight *inf;
struct daedalus_ctx *ctx = NULL;
struct vb2_v4l2_buffer *src_to_complete = NULL;
struct vb2_v4l2_buffer *dst_to_complete = NULL;
struct media_request *req_to_complete = NULL;
enum vb2_buffer_state state;
u64 dst_timestamp = 0;
bool entry_freed = false;
bool has_pixels, src_consumed;
void *dst_y, *dst_uv;
u32 y_size, uv_size;
if (!dev)
return;
state = (fr->status == DAEDALUS_DECODE_OK)
? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
has_pixels = !!(fr->flags & DAEDALUS_RESP_FLAG_HAS_PIXELS);
src_consumed = !!(fr->flags & DAEDALUS_RESP_FLAG_SRC_CONSUMED);
if (!has_pixels && !src_consumed) {
pr_warn_ratelimited(
"daedalus_v4l2: RESP_FRAME cookie=%u with neither HAS_PIXELS nor SRC_CONSUMED — ignoring\n",
cookie);
return;
}
mutex_lock(&dev->inflight_lock);
inf = daedalus_inflight_peek_locked(dev, cookie);
inf = daedalus_inflight_pop_locked(dev, cookie);
mutex_unlock(&dev->inflight_lock);
if (!inf) {
mutex_unlock(&dev->inflight_lock);
pr_warn_ratelimited(
"daedalus_v4l2: RESP_FRAME for unknown cookie=%u\n",
cookie);
return;
}
ctx = inf->ctx;
state = (fr->status == DAEDALUS_DECODE_OK)
? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
/*
* Snapshot what this RESP completes and clear the matching
* fields on the inflight item, so concurrent RESPs (e.g. a
* later HAS_PIXELS arriving on the same cookie after this
* SRC_CONSUMED clears src_buf) see the correct residual
* state. Actual vb2 buf_done calls happen below the lock.
* Two routes the daemon can take, both supported:
*
* Sanity check on output_src_pts only when HAS_PIXELS is
* set — the daemon's output_src_pts should equal this
* inflight's stored src_pts, since the daemon routes pixels
* to the cookie of the OUTPUT bitstream that contained the
* frame's slices (which is what we stored at device_run time).
* Surface a mismatch loudly — indicates daemon-side pts→cookie
* mapping bug, not silent data corruption.
* (a) dmabuf path (Phase 8.6+) — daemon called
* DAEDALUS_IOC_GET_DMABUF, mmap'd the CAPTURE buffer,
* wrote pixels in place. RESP_FRAME carries metadata
* only (pixels_len == 0). Each plane's payload is
* the full plane size (the daemon wrote everything
* the format requires).
*
* (b) Phase 8.5 inline path — daemon shipped raw NV12 in
* the chardev payload (≤ 64 KiB cap). We memcpy
* into the vb2 buffer. Plane payloads come from
* the daemon's NV12 luma/chroma counts.
*/
if (has_pixels) {
if (fr->output_src_pts != inf->src_pts)
pr_warn_ratelimited(
"daedalus_v4l2: RESP HAS_PIXELS cookie=%u output_src_pts=%llu but inflight.src_pts=%llu — daemon dispatch bug?\n",
cookie,
(unsigned long long) fr->output_src_pts,
(unsigned long long) inf->src_pts);
if (state == VB2_BUF_STATE_DONE) {
struct vb2_buffer *vb = &inf->dst_buf->vb2_buf;
unsigned int p;
dst_to_complete = inf->dst_buf;
dst_timestamp = inf->src_pts;
inf->dst_buf = NULL;
}
if (src_consumed) {
src_to_complete = inf->src_buf;
req_to_complete = inf->req;
inf->src_buf = NULL;
inf->req = NULL;
}
if (!inf->src_buf && !inf->dst_buf) {
list_del(&inf->list);
entry_freed = true;
}
mutex_unlock(&dev->inflight_lock);
/*
* Complete the CAPTURE side first (when applicable). vb2-core's
* V4L2_BUF_FLAG_TIMESTAMP_COPY semantics no longer auto-copy
* src→dst timestamps because src and dst are no longer paired
* 1:1 in m2m's view — stamp dst explicitly from the inflight's
* stored src_pts (= the OUTPUT vb2_buf.timestamp captured at
* device_run). The V4L2 client gets the same display-PTS it
* originally set on the OUTPUT side.
*/
if (dst_to_complete) {
if (state == VB2_BUF_STATE_DONE)
daedalus_pack_pixels_into_dst(dst_to_complete, fr,
pixels, pixels_len);
dst_to_complete->vb2_buf.timestamp = dst_timestamp;
v4l2_m2m_dst_buf_remove_by_buf(ctx->m2m_ctx, dst_to_complete);
v4l2_m2m_buf_done(dst_to_complete, state);
if (pixels_len) {
/* (b) inline NV12 copy — legacy 2-plane only */
y_size = min_t(u32, fr->luma_len,
(u32) vb2_plane_size(vb, 0));
uv_size = vb->num_planes > 1 ?
min_t(u32, fr->chroma_len,
(u32) vb2_plane_size(vb, 1)) : 0;
dst_y = vb2_plane_vaddr(vb, 0);
dst_uv = vb->num_planes > 1 ?
vb2_plane_vaddr(vb, 1) : NULL;
if (dst_y && y_size && pixels_len >= y_size)
memcpy(dst_y, pixels, y_size);
else
y_size = 0;
if (dst_uv && uv_size &&
pixels_len >= y_size + uv_size)
memcpy(dst_uv, pixels + y_size, uv_size);
else
uv_size = 0;
vb2_set_plane_payload(vb, 0, y_size);
if (vb->num_planes > 1)
vb2_set_plane_payload(vb, 1, uv_size);
} else {
/* (a) dmabuf path: plane is fully populated by
* the daemon, so payload == sizeimage. */
for (p = 0; p < vb->num_planes; p++)
vb2_set_plane_payload(vb, p,
vb2_plane_size(vb, p));
}
}
/*
* Complete the OUTPUT side: release the bound media_request's
* controls (libva-driven path), drop our request reference taken
* in device_run, mark src done, then job_finish so the m2m
* scheduler can dispatch the next pending REQ on this ctx. The
* dst_buf for this cookie may still be parked (HAS_PIXELS hasn't
* arrived yet — libavcodec is holding the frame for display-
* order release). That's fine: the next device_run picks a
* different next_dst_buf out of the CAPTURE queue and proceeds.
* Phase 8.14: if the src_buf was bound to a media_request
* (libva-driven decode path), complete the per-request
* control state BEFORE buf_done_and_job_finish. vb2-core's
* buf_done unbinds the buffer's req_obj on its own, but the
* control object stays bound until v4l2_ctrl_request_complete
* runs — only after BOTH objects unbind does the request
* transition to MEDIA_REQUEST_STATE_COMPLETE and wake any
* userspace poll on the request fd.
*
* For non-request flows (test_m2m_stream direct QBUF) inf->req
* is NULL and v4l2_ctrl_request_complete just no-ops.
*/
if (src_to_complete) {
if (req_to_complete)
v4l2_ctrl_request_complete(req_to_complete, &ctx->hdl);
v4l2_m2m_src_buf_remove_by_buf(ctx->m2m_ctx, src_to_complete);
v4l2_m2m_buf_done(src_to_complete, state);
if (req_to_complete)
media_request_put(req_to_complete);
v4l2_m2m_job_finish(dev->m2m_dev, ctx->m2m_ctx);
}
if (inf->req)
v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl);
if (entry_freed)
kfree(inf);
/*
* Use the buf_done_and_job_finish helper rather than plain
* buf_done + job_finish: the helper pops the buffers off
* the m2m queue before marking them done, otherwise the
* scheduler immediately re-runs device_run on the same
* still-queued src buffer. Caught during Phase 8.5 first
* run — second REQ_DECODE with identical bitstream + oops
* in stop_streaming when the test client tore down.
*/
v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, inf->ctx->m2m_ctx,
state);
/*
* Release our reference taken in device_run; safe to do
* AFTER buf_done_and_job_finish (which dropped the vb2
* reference) because we still hold this one. If the
* refcount hits zero here, media-core releases the request.
*/
if (inf->req)
media_request_put(inf->req);
kfree(inf);
}
/* -- v4l2_ioctl_ops -------------------------------------------------- */