From 714d781d227fcaff0b8d31cae41dc07cc0ac19c5 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Thu, 21 May 2026 14:40:59 +0200 Subject: [PATCH] Revert "Merge pull request 'kernel + daemon: H.264 B-frame display reorder fix (closes #6)' (#7) from noether/kernel-daemon-h264-reorder-fix into main" This reverts commit 79256dc7ef41f83873ca9c23db20f5888858e65d, reversing changes made to 7ff2d897ea82228fbd9d14fa1d8b851ad9043f75. --- daemon/src/chardev_client.c | 275 +++++---------------------------- daemon/src/chardev_client.h | 26 ---- daemon/src/decoder.c | 280 +++++++++++++++------------------- daemon/src/decoder.h | 79 +++------- include/daedalus_v4l2_proto.h | 54 +------ kernel/daedalus_v4l2_main.c | 275 +++++++++++---------------------- 6 files changed, 276 insertions(+), 713 deletions(-) diff --git a/daemon/src/chardev_client.c b/daemon/src/chardev_client.c index c03c872..10621cc 100644 --- a/daemon/src/chardev_client.c +++ b/daemon/src/chardev_client.c @@ -133,288 +133,87 @@ static int send_response(struct chardev_client *cli, uint32_t type, return rc; } -/* - * Register a new (src_pts → cookie) mapping in the pending table. - * Reuses an existing slot for src_pts if one exists (defensive — the - * kernel should never re-use the same src_pts for two live cookies, - * but libva running against a test client without timestamps might - * send all-zero src_pts; collapse them onto the latest cookie so the - * 1:1-per-stream case keeps working). Returns 0 on success, -ENOSPC - * if the table is full. - */ -static int pending_register(struct chardev_client *cli, uint64_t src_pts, - uint32_t cookie, - const struct daedalus_req_decode *req) -{ - int free_slot = -1; - int i; - - for (i = 0; i < DAEDALUS_MAX_PENDING_COOKIES; i++) { - if (cli->pending[i].used && cli->pending[i].src_pts == src_pts) { - cli->pending[i].cookie = cookie; - cli->pending[i].cached_req = *req; - return 0; - } - if (!cli->pending[i].used && free_slot < 0) - free_slot = i; - } - - if (free_slot < 0) { - log_err("pending: table full registering cookie=%u src_pts=%llu", - cookie, (unsigned long long) src_pts); - return -ENOSPC; - } - - cli->pending[free_slot].used = 1; - cli->pending[free_slot].src_pts = src_pts; - cli->pending[free_slot].cookie = cookie; - cli->pending[free_slot].cached_req = *req; - return 0; -} - -/* - * Look up the cookie + cached REQ_DECODE that originally introduced - * @src_pts. Returns 0 + populates @cookie_out / @req_out, or -ENOENT - * if no match (likely a daemon bug or codec output we can't route). - */ -static int pending_lookup(const struct chardev_client *cli, - uint64_t src_pts, - uint32_t *cookie_out, - struct daedalus_req_decode *req_out) -{ - int i; - - for (i = 0; i < DAEDALUS_MAX_PENDING_COOKIES; i++) { - if (cli->pending[i].used && - cli->pending[i].src_pts == src_pts) { - *cookie_out = cli->pending[i].cookie; - *req_out = cli->pending[i].cached_req; - return 0; - } - } - return -ENOENT; -} - -static void pending_release(struct chardev_client *cli, uint64_t src_pts) -{ - int i; - - for (i = 0; i < DAEDALUS_MAX_PENDING_COOKIES; i++) { - if (cli->pending[i].used && - cli->pending[i].src_pts == src_pts) { - cli->pending[i].used = 0; - cli->pending[i].src_pts = 0; - cli->pending[i].cookie = 0; - return; - } - } -} - -/* - * Pack the daemon's current AVFrame into the CAPTURE buffer owned by - * @owner_cookie, then ship RESP_FRAME with the flags caller asked for. - * Returns 0 on success; -errno on GET_DMABUF / mmap failure (RESP is - * still emitted so the kernel doesn't park the dst buffer forever). - */ -static int deliver_frame_to_cookie(struct chardev_client *cli, - uint32_t owner_cookie, - const struct daedalus_req_decode *owner_req, - struct daedalus_resp_frame *resp, - uint32_t resp_flags) -{ - struct daedalus_capture_planes planes; - int orc; - - orc = daedalus_capture_planes_open(cli->fd, owner_cookie, owner_req, - &planes); - if (orc < 0) { - log_warn("drain: GET_DMABUF cookie=%u failed (%d); RESP metadata-only", - owner_cookie, orc); - } else { - (void) daedalus_decoder_pack_current(cli->decoder, &planes, - owner_req->capture_pix_fmt); - daedalus_capture_planes_close(&planes); - } - - resp->flags |= resp_flags; - return send_response(cli, DAEDALUS_MSG_RESP_FRAME, owner_cookie, - resp, sizeof(*resp)); -} - static int handle_req_decode(struct chardev_client *cli, const struct daedalus_msg_hdr *hdr, const uint8_t *payload) { struct daedalus_req_decode req; + struct daedalus_resp_frame resp; + struct daedalus_capture_planes planes; const struct daedalus_h264_meta *h264_meta = NULL; size_t meta_off, meta_len = 0; - int submit_status; - int src_consumed_emitted = 0; int rc; + int decoded = 0; if (hdr->payload_len < sizeof(req)) { - struct daedalus_resp_frame err = { 0 }; - log_err("REQ_DECODE cookie=%u: payload too short %u < %zu", hdr->cookie, hdr->payload_len, sizeof(req)); - err.status = DAEDALUS_DECODE_ERR_RECV; - err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS | - DAEDALUS_RESP_FLAG_SRC_CONSUMED; + memset(&resp, 0, sizeof(resp)); + resp.status = DAEDALUS_DECODE_ERR_RECV; return send_response(cli, DAEDALUS_MSG_RESP_FRAME, - hdr->cookie, &err, sizeof(err)); + hdr->cookie, &resp, sizeof(resp)); } memcpy(&req, payload, sizeof(req)); + /* Optional H.264 meta block follows req when the flag is set; + * bitstream comes after meta. */ if (req.flags & DAEDALUS_REQ_FLAG_H264_META) meta_len = sizeof(struct daedalus_h264_meta); meta_off = sizeof(req); if ((size_t) req.bitstream_len + sizeof(req) + meta_len != hdr->payload_len) { - struct daedalus_resp_frame err = { 0 }; - log_err("REQ_DECODE cookie=%u: bitstream_len %u + meta %zu inconsistent with payload_len %u", hdr->cookie, req.bitstream_len, meta_len, hdr->payload_len); - err.status = DAEDALUS_DECODE_ERR_RECV; - err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS | - DAEDALUS_RESP_FLAG_SRC_CONSUMED; + memset(&resp, 0, sizeof(resp)); + resp.status = DAEDALUS_DECODE_ERR_RECV; return send_response(cli, DAEDALUS_MSG_RESP_FRAME, - hdr->cookie, &err, sizeof(err)); + hdr->cookie, &resp, sizeof(resp)); } if (meta_len) h264_meta = (const struct daedalus_h264_meta *) (payload + meta_off); - log_info("REQ_DECODE cookie=%u codec=%u bitstream=%u bytes meta=%s capture=%ux%u %u planes src_pts=%llu", + log_info("REQ_DECODE cookie=%u codec=%u bitstream=%u bytes meta=%s capture=%ux%u %u planes", hdr->cookie, req.codec_id, req.bitstream_len, h264_meta ? "h264" : "none", req.capture_width, req.capture_height, - req.capture_num_planes, - (unsigned long long) req.src_pts); + req.capture_num_planes); /* - * Register (src_pts → cookie) mapping BEFORE submit, so any drained - * frame whose pts matches this REQ's src_pts (the steady-state - * 1:1 path) can find its owner via pending_lookup below. Out of - * space here is fatal — we'd lose the routing identity for this - * cookie's eventual frame. Send an error RESP that releases both - * src and dst so the V4L2 client moves on. + * Open dmabuf-fds for every CAPTURE plane and mmap them. + * If this fails we still attempt the decode (so the kernel + * gets a structured error response) — but we pass NULL + * planes so pixels aren't written anywhere. */ - rc = pending_register(cli, req.src_pts, hdr->cookie, &req); + rc = daedalus_capture_planes_open(cli->fd, hdr->cookie, &req, + &planes); if (rc < 0) { - struct daedalus_resp_frame err = { 0 }; - - err.status = DAEDALUS_DECODE_ERR_SEND; - err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS | - DAEDALUS_RESP_FLAG_SRC_CONSUMED; - return send_response(cli, DAEDALUS_MSG_RESP_FRAME, - hdr->cookie, &err, sizeof(err)); + log_warn("REQ_DECODE cookie=%u: GET_DMABUF/mmap failed (%d); decode metadata-only", + hdr->cookie, rc); + /* planes is already zeroed by capture_planes_open */ } - submit_status = daedalus_decoder_submit(cli->decoder, &req, - payload + meta_off + meta_len, - h264_meta); - if (submit_status != 0) { - /* - * avcodec_send_packet failed before any frame could have - * been queued for this src_pts. Drop the pending entry - * (no future drain will find a matching pts), and emit a - * combined HAS_PIXELS|SRC_CONSUMED error RESP for this - * cookie so the V4L2 client unblocks. - */ - struct daedalus_resp_frame err = { 0 }; + rc = daedalus_decoder_run_request(cli->decoder, &req, + payload + meta_off + meta_len, + h264_meta, + &resp, + planes.nr ? &planes : NULL); + decoded = (rc >= 0); - pending_release(cli, req.src_pts); - err.status = (uint32_t) submit_status; - err.codec_id = req.codec_id; - err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS | - DAEDALUS_RESP_FLAG_SRC_CONSUMED; - err.output_src_pts = req.src_pts; - return send_response(cli, DAEDALUS_MSG_RESP_FRAME, - hdr->cookie, &err, sizeof(err)); - } + daedalus_capture_planes_close(&planes); + + if (!decoded) + return rc; /* - * Drain libavcodec for as many display-ordered frames as it can - * emit right now. Each frame's pts identifies which cookie's - * CAPTURE buffer the pixels go in (see [[daedalus-v4l2#6]]). In - * steady state for VP9/AV1 (no reorder) the loop runs exactly - * once, draining the just-submitted packet's own frame. For - * H.264 with B-frames the first drained frame may belong to an - * EARLIER cookie's bitstream — that's the entire point. + * RESP_FRAME is metadata-only in Phase 8.6 — pixels already + * live in the V4L2 client's CAPTURE buffer via the dmabuf + * the daemon wrote to in pack_nv12_to_planes. */ - for (;;) { - struct daedalus_resp_frame resp; - uint32_t owner_cookie = 0; - struct daedalus_req_decode owner_req; - uint32_t flags; - - rc = daedalus_decoder_drain_one(cli->decoder, req.codec_id, - &resp); - if (rc == -EAGAIN) - break; - if (rc != 0) { - /* - * Hard codec error during drain. resp->status is set. - * Pin it to THIS REQ's cookie (we can't know whose - * pts the failed frame would have had); set both - * flags so the V4L2 client moves on. - */ - pending_release(cli, req.src_pts); - resp.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS | - DAEDALUS_RESP_FLAG_SRC_CONSUMED; - resp.output_src_pts = req.src_pts; - (void) send_response(cli, DAEDALUS_MSG_RESP_FRAME, - hdr->cookie, &resp, sizeof(resp)); - src_consumed_emitted = 1; - break; - } - - if (pending_lookup(cli, resp.output_src_pts, - &owner_cookie, &owner_req) != 0) { - /* - * Frame's pts has no registered owner — implies a - * daemon-side tracking bug or a codec output for a - * packet we never registered (e.g. a B-frame that - * was queued before the daemon caught up). Drop the - * frame; can't safely route it. - */ - log_warn("drain: no pending entry for output_src_pts=%llu (codec dropped a frame?)", - (unsigned long long) resp.output_src_pts); - continue; - } - - flags = DAEDALUS_RESP_FLAG_HAS_PIXELS; - if (owner_cookie == hdr->cookie) { - flags |= DAEDALUS_RESP_FLAG_SRC_CONSUMED; - src_consumed_emitted = 1; - } - - (void) deliver_frame_to_cookie(cli, owner_cookie, &owner_req, - &resp, flags); - pending_release(cli, resp.output_src_pts); - } - - /* - * If the drain loop didn't already SRC_CONSUMED this REQ's cookie - * (libavcodec held the frame for display-order reorder — the - * pixels will arrive in a future drain), emit a standalone - * SRC_CONSUMED RESP now. Kernel releases src_buf + runs - * job_finish; dst_buf parked until the matching HAS_PIXELS - * shows up later. - */ - if (!src_consumed_emitted) { - struct daedalus_resp_frame resp = { 0 }; - - resp.status = DAEDALUS_DECODE_OK; - resp.codec_id = req.codec_id; - resp.flags = DAEDALUS_RESP_FLAG_SRC_CONSUMED; - (void) send_response(cli, DAEDALUS_MSG_RESP_FRAME, - hdr->cookie, &resp, sizeof(resp)); - } - - return 0; + return send_response(cli, DAEDALUS_MSG_RESP_FRAME, hdr->cookie, + &resp, sizeof(resp)); } static int handle_ping(struct chardev_client *cli, diff --git a/daemon/src/chardev_client.h b/daemon/src/chardev_client.h index dad9289..f0b22fe 100644 --- a/daemon/src/chardev_client.h +++ b/daemon/src/chardev_client.h @@ -18,44 +18,18 @@ struct ffmpeg_loader; struct daedalus_decoder; -/* - * Per-inflight (cookie, src_pts) tracking for the H.264 B-frame - * display-reorder fix (daedalus-v4l2#6). When the daemon drains a - * frame from libavcodec, frame->pts (= src_pts of the OUTPUT bitstream - * that contained the frame's slices) identifies which cookie's CAPTURE - * buffer the pixels belong in — distinct from the cookie of the REQ - * that triggered the receive_frame call. Mapping is small (bounded - * by the V4L2 client's buffer pool depth, typically ≤24) so a linear - * array beats a hashtable for cache-locality. - * - * cached_req carries the capture geometry (num_planes, plane sizes, - * strides, pix_fmt) so a later drain — which may target this cookie - * from a DIFFERENT REQ's drain loop — can call GET_DMABUF + open - * planes with the original REQ's parameters. - */ -#define DAEDALUS_MAX_PENDING_COOKIES 64 - -struct chardev_pending_cookie { - int used; - uint64_t src_pts; - uint32_t cookie; - struct daedalus_req_decode cached_req; -}; - /** * struct chardev_client - daemon-side chardev state * @fd: open /dev/daedalus-v4l2 descriptor (-1 if not open) * @loader: dlopen'd FFmpeg loader (borrowed; not owned) * @decoder: per-codec AVCodecContext cache (owned) * @stop_flag: set non-zero from a signal handler to break the loop - * @pending: pts → cookie lookup table for split SRC/DST RESPs */ struct chardev_client { int fd; struct ffmpeg_loader *loader; struct daedalus_decoder *decoder; volatile sig_atomic_t *stop_flag; - struct chardev_pending_cookie pending[DAEDALUS_MAX_PENDING_COOKIES]; }; /** diff --git a/daemon/src/decoder.c b/daemon/src/decoder.c index 815c4f5..e91eb35 100644 --- a/daemon/src/decoder.c +++ b/daemon/src/decoder.c @@ -348,30 +348,31 @@ static int pack_nv12_to_planes(struct AVFrame *fr, return 0; } -/* - * Per-codec assemble + send_packet. Returns 0 on success, or one - * of DAEDALUS_DECODE_ERR_* on failure (errors here propagate via - * the caller's RESP_FRAME status field — they are NOT logged as a - * silent skip). pkt->pts is stamped from req->src_pts so the - * resulting frame->pts comes back identifiable on the drain side. - */ -int daedalus_decoder_submit(struct daedalus_decoder *dec, - const struct daedalus_req_decode *req, - const uint8_t *bitstream, - const struct daedalus_h264_meta *h264_meta) +int daedalus_decoder_run_request(struct daedalus_decoder *dec, + const struct daedalus_req_decode *req, + const uint8_t *bitstream, + const struct daedalus_h264_meta *h264_meta, + struct daedalus_resp_frame *resp, + const struct daedalus_capture_planes *planes) { struct ffmpeg_loader *fm = dec->loader; struct AVCodecContext *ctx = NULL; uint8_t *assembled = NULL; size_t assembled_len = 0; int rc; - int status = 0; + + memset(resp, 0, sizeof(*resp)); + resp->codec_id = req->codec_id; rc = decoder_open_codec(dec, req->codec_id, &ctx); - if (rc == -ENOSYS) - return DAEDALUS_DECODE_ERR_CODEC; - if (rc < 0) - return DAEDALUS_DECODE_ERR_OPEN; + if (rc == -ENOSYS) { + resp->status = DAEDALUS_DECODE_ERR_CODEC; + goto out; + } + if (rc < 0) { + resp->status = DAEDALUS_DECODE_ERR_OPEN; + goto out; + } fm->av_packet_unref(dec->pkt); @@ -396,14 +397,14 @@ int daedalus_decoder_submit(struct daedalus_decoder *dec, if (sps_len == 0 || pps_len == 0) { log_err("decoder: SPS/PPS NAL synth failed (sps=%zu pps=%zu)", sps_len, pps_len); - status = DAEDALUS_DECODE_ERR_SEND; + resp->status = DAEDALUS_DECODE_ERR_SEND; goto out; } assembled_len = sps_len + pps_len + req->bitstream_len; assembled = malloc(assembled_len + AV_INPUT_BUFFER_PADDING_SIZE); if (!assembled) { - status = DAEDALUS_DECODE_ERR_SEND; + resp->status = DAEDALUS_DECODE_ERR_SEND; goto out; } memcpy(assembled, sps_nal, sps_len); @@ -440,162 +441,133 @@ int daedalus_decoder_submit(struct daedalus_decoder *dec, dec->pkt->size = (int) req->bitstream_len; } - /* - * Stamp pkt->pts from REQ_DECODE's src_pts (the V4L2 OUTPUT - * buffer's vb2 timestamp captured by the kernel at device_run - * time). libavcodec carries pkt->pts forward to frame->pts on - * the receive_frame side — even after display-order reordering - * inside the H.264 DPB — which lets the chardev_client identify - * which cookie's CAPTURE buffer a drained frame's pixels belong - * in. Without this stamp, every drained frame would look like - * it came from the current REQ; pairs of B/P would swap places - * in the visible output (daedalus-v4l2#6). - */ - dec->pkt->pts = (int64_t) req->src_pts; - rc = fm->avcodec_send_packet(ctx, dec->pkt); if (rc < 0) { log_err("decoder: avcodec_send_packet failed: %d", rc); - status = DAEDALUS_DECODE_ERR_SEND; + resp->status = DAEDALUS_DECODE_ERR_SEND; goto out; } -out: - free(assembled); - (void) assembled_len; - return status; -} - -/* - * Pull the next display-ordered frame out of libavcodec's DPB. - * Returns 0 if a frame was returned (dec->frame holds it and resp - * is populated with metadata + output_src_pts == frame->pts), - * -EAGAIN if libavcodec needs more input, or DAEDALUS_DECODE_ERR_* - * on a hard codec error. Caller may immediately invoke - * daedalus_decoder_pack_current() to copy this frame's pixels into - * a CAPTURE buffer's mapped planes, then call drain_one again for - * any further frames in the DPB. - */ -int daedalus_decoder_drain_one(struct daedalus_decoder *dec, - uint32_t codec_id, - struct daedalus_resp_frame *resp) -{ - struct ffmpeg_loader *fm = dec->loader; - struct AVCodecContext *ctx = NULL; - struct AVFrame *fr; - const AVPixFmtDescriptor *desc; - uint32_t h, luma_len = 0, chroma_len = 0; - int rc; - - memset(resp, 0, sizeof(*resp)); - resp->codec_id = codec_id; - - rc = decoder_open_codec(dec, codec_id, &ctx); - if (rc == -ENOSYS) { - resp->status = DAEDALUS_DECODE_ERR_CODEC; - return DAEDALUS_DECODE_ERR_CODEC; - } - if (rc < 0) { - resp->status = DAEDALUS_DECODE_ERR_OPEN; - return DAEDALUS_DECODE_ERR_OPEN; - } - fm->av_frame_unref(dec->frame); rc = fm->avcodec_receive_frame(ctx, dec->frame); - if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) - return -EAGAIN; + if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) { + log_debug("decoder: no frame ready yet (rc=%d)", rc); + resp->status = DAEDALUS_DECODE_NO_FRAME; + goto out; + } if (rc < 0) { log_err("decoder: avcodec_receive_frame failed: %d", rc); resp->status = DAEDALUS_DECODE_ERR_RECV; - return DAEDALUS_DECODE_ERR_RECV; + goto out; } - fr = dec->frame; - desc = fm->av_pix_fmt_desc_get(fr->format); - h = fnv1a32_init(); + { + struct AVFrame *fr = dec->frame; + const AVPixFmtDescriptor *desc = + fm->av_pix_fmt_desc_get(fr->format); + uint32_t h = fnv1a32_init(); + uint32_t luma_len = 0, chroma_len = 0; - resp->status = DAEDALUS_DECODE_OK; - resp->width = (uint32_t) fr->width; - resp->height = (uint32_t) fr->height; - resp->pix_fmt = fr->format; - resp->output_src_pts = (uint64_t) fr->pts; + resp->status = DAEDALUS_DECODE_OK; + resp->width = (uint32_t) fr->width; + resp->height = (uint32_t) fr->height; + resp->pix_fmt = fr->format; - if (!desc) { - log_warn("decoder: no descriptor for pix_fmt %d", fr->format); - } else { - int p, max_plane = 0; - int i; + /* + * Walk every plane reported by the AVPixFmtDescriptor. + * For each component, byte width = ((plane_w * + * step_minus1) >> 0) — but the descriptor only tells + * us which plane each component sits in, not the + * plane's byte stride per pixel. In practice for the + * formats we care about (YUV420P, YUV422P, YUV444P, + * GBRP, NV12), each plane has exactly one component + * at 1 byte/sample. Hash each plane at + * (width >> log2_chroma_w) × (height >> log2_chroma_h) + * for chroma planes, full-size for plane 0. + * + * This generalises cleanly to anything 8-bit-per- + * sample-per-plane; 10/12-bit (P010, YUV420P10LE) will + * need depth handling when Phase 8.6 brings HDR + * content into play. + */ + if (!desc) { + log_warn("decoder: no descriptor for pix_fmt %d", + fr->format); + } else { + int p, max_plane = 0; + int i; - for (i = 0; i < desc->nb_components; i++) { - if (desc->comp[i].plane > max_plane) - max_plane = desc->comp[i].plane; - } - - for (p = 0; p <= max_plane; p++) { - int pw, ph; - if (!fr->data[p] || !fr->linesize[p]) - continue; - if (p == 0) { - pw = fr->width; - ph = fr->height; - luma_len += (uint32_t) pw * (uint32_t) ph; - } else { - pw = AV_CEIL_RSHIFT(fr->width, - desc->log2_chroma_w); - ph = AV_CEIL_RSHIFT(fr->height, - desc->log2_chroma_h); - chroma_len += (uint32_t) pw * (uint32_t) ph; + for (i = 0; i < desc->nb_components; i++) { + if (desc->comp[i].plane > max_plane) + max_plane = desc->comp[i].plane; + } + + for (p = 0; p <= max_plane; p++) { + int pw, ph; + if (!fr->data[p] || !fr->linesize[p]) + continue; + if (p == 0) { + pw = fr->width; + ph = fr->height; + luma_len += (uint32_t) pw * + (uint32_t) ph; + } else { + pw = AV_CEIL_RSHIFT(fr->width, + desc->log2_chroma_w); + ph = AV_CEIL_RSHIFT(fr->height, + desc->log2_chroma_h); + chroma_len += (uint32_t) pw * + (uint32_t) ph; + } + h = fnv1a32_plane(h, fr->data[p], pw, ph, + fr->linesize[p]); } - h = fnv1a32_plane(h, fr->data[p], pw, ph, - fr->linesize[p]); } + + resp->luma_len = luma_len; + resp->chroma_len = chroma_len; + resp->fnv1a_yuv = h; + + /* + * Pack pixels directly into the mapped CAPTURE dmabuf + * planes. Dispatch on the V4L2 fourcc the kernel + * negotiated: + * V4L2_PIX_FMT_NV12M (default, 8-bit, 2 planes) + * V4L2_PIX_FMT_P010 (10-bit HDR, 1 plane) + */ + if (planes && planes->nr >= 1) { + int prc = 0; + switch (req->capture_pix_fmt) { + case V4L2_PIX_FMT_NV12M: + prc = pack_nv12_to_planes(fr, desc, planes); + break; + case V4L2_PIX_FMT_NV12: + prc = pack_nv12_single_to_plane(fr, desc, planes); + break; + case V4L2_PIX_FMT_P010: + prc = pack_p010_to_plane(fr, desc, planes); + break; + default: + log_warn("decoder: unsupported capture fourcc 0x%08x", + req->capture_pix_fmt); + prc = -EINVAL; + break; + } + if (prc < 0) + log_warn("decoder: pack failed (pix_fmt=%d cap_fourcc=0x%08x) — kernel will see metadata only", + fr->format, req->capture_pix_fmt); + } + + log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u", + fr->width, fr->height, fr->format, + desc ? desc->name : "?", + h, luma_len, chroma_len); } - resp->luma_len = luma_len; - resp->chroma_len = chroma_len; - resp->fnv1a_yuv = h; - - log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u src_pts=%llu", - fr->width, fr->height, fr->format, - desc ? desc->name : "?", - h, luma_len, chroma_len, - (unsigned long long) fr->pts); + fm->av_frame_unref(dec->frame); +out: + free(assembled); + (void) assembled_len; return 0; } - -int daedalus_decoder_pack_current(struct daedalus_decoder *dec, - const struct daedalus_capture_planes *planes, - uint32_t capture_pix_fmt) -{ - struct ffmpeg_loader *fm = dec->loader; - struct AVFrame *fr = dec->frame; - const AVPixFmtDescriptor *desc; - int prc; - - if (!planes || planes->nr < 1 || !fr || !fr->width || !fr->height) - return -EINVAL; - - desc = fm->av_pix_fmt_desc_get(fr->format); - - switch (capture_pix_fmt) { - case V4L2_PIX_FMT_NV12M: - prc = pack_nv12_to_planes(fr, desc, planes); - break; - case V4L2_PIX_FMT_NV12: - prc = pack_nv12_single_to_plane(fr, desc, planes); - break; - case V4L2_PIX_FMT_P010: - prc = pack_p010_to_plane(fr, desc, planes); - break; - default: - log_warn("decoder: unsupported capture fourcc 0x%08x", - capture_pix_fmt); - prc = -EINVAL; - break; - } - if (prc < 0) - log_warn("decoder: pack failed (pix_fmt=%d cap_fourcc=0x%08x)", - fr->format, capture_pix_fmt); - return prc; -} diff --git a/daemon/src/decoder.h b/daemon/src/decoder.h index 9aad3c6..f6eba56 100644 --- a/daemon/src/decoder.h +++ b/daemon/src/decoder.h @@ -56,68 +56,33 @@ int daedalus_decoder_init(struct daedalus_decoder *dec, void daedalus_decoder_cleanup(struct daedalus_decoder *dec); /** - * daedalus_decoder_submit - send one REQ_DECODE's bitstream into libavcodec + * daedalus_decoder_run_request - decode one REQ_DECODE payload * @dec: initialised decoder - * @req: REQ_DECODE prefix (from the wire); src_pts is stamped on - * the AVPacket so libavcodec returns frame->pts == src_pts - * when it eventually outputs the matching frame in display - * order (daedalus-v4l2#6). + * @req: REQ_DECODE prefix (from the wire) * @bitstream: bitstream blob (req->bitstream_len bytes) * @h264_meta: optional H.264 SPS/PPS metadata; non-NULL only when * codec_id == H264 and the kernel set DAEDALUS_REQ_FLAG_ - * H264_META. See decoder.c for the AnnexB synthesis. + * H264_META. Used to synthesise the AnnexB SPS+PPS NALs + * libavcodec needs before any slice (libva-v4l2-request + * passes only the slice in @bitstream per the V4L2 + * stateless API contract). NULL for VP9/AV1 paths. + * @resp: caller-allocated RESP_FRAME output (zeroed by callee) + * @planes: mapped CAPTURE planes (Phase 8.6 dmabuf path). If + * NULL or planes->nr == 0, the decoder runs but + * writes no pixels — caller still gets dims + digest. * - * Calls avcodec_send_packet on the codec's per-codec AVCodecContext. - * Returns 0 on success; one of DAEDALUS_DECODE_ERR_* on failure - * (which the caller should propagate as the RESP_FRAME status for - * the cookie of this REQ). Does NOT call avcodec_receive_frame — - * use daedalus_decoder_drain_one for that. + * Populates @resp with the decode outcome and writes decoded + * pixels (NV12 layout: Y to plane 0, interleaved CbCr to plane + * 1) directly into the mapped dmabuf planes. Always returns + * 0; decode-level failures are reported via @resp->status so + * the kernel sees a structured response rather than a dropped + * request. */ -int daedalus_decoder_submit(struct daedalus_decoder *dec, - const struct daedalus_req_decode *req, - const uint8_t *bitstream, - const struct daedalus_h264_meta *h264_meta); - -/** - * daedalus_decoder_drain_one - pop the next display-ordered frame, if any - * @dec: initialised decoder - * @codec_id: which codec context to drain (matches the REQ that just - * called submit). VP9/AV1/H264 use independent contexts. - * @resp: caller-allocated RESP_FRAME output (zeroed by callee). - * On a successful drain (return 0), resp's status / width / - * height / pix_fmt / luma_len / chroma_len / fnv1a_yuv / - * output_src_pts are populated; flags is left at 0 (caller - * adds HAS_PIXELS / SRC_CONSUMED). On EAGAIN, resp is - * zeroed. - * - * Return: 0 on a frame returned, -EAGAIN if libavcodec needs more - * input (display-order frame held inside DPB), <0 on a hard codec - * error (resp->status set). - * - * After a successful drain, the dec's internal AVFrame holds the - * decoded picture. Caller may immediately call - * daedalus_decoder_pack_current(planes) to write that picture into - * a CAPTURE buffer's dmabuf-mapped planes. Subsequent calls to - * drain_one (without another submit) try to pull additional frames - * from libavcodec's DPB. - */ -int daedalus_decoder_drain_one(struct daedalus_decoder *dec, - uint32_t codec_id, - struct daedalus_resp_frame *resp); - -/** - * daedalus_decoder_pack_current - pack the last drained frame into planes - * @dec: initialised decoder; must have a frame from drain_one - * @planes: mapped CAPTURE planes (open via GET_DMABUF using the - * cookie that owns the frame's output_src_pts). - * @capture_pix_fmt: V4L2 fourcc on the CAPTURE side (NV12M, NV12, - * P010). - * - * Return: 0 on success, <0 on a pack failure (kernel sees only the - * metadata, not pixels — typical when a format isn't wired yet). - */ -int daedalus_decoder_pack_current(struct daedalus_decoder *dec, - const struct daedalus_capture_planes *planes, - uint32_t capture_pix_fmt); +int daedalus_decoder_run_request(struct daedalus_decoder *dec, + const struct daedalus_req_decode *req, + const uint8_t *bitstream, + const struct daedalus_h264_meta *h264_meta, + struct daedalus_resp_frame *resp, + const struct daedalus_capture_planes *planes); #endif /* DAEDALUS_V4L2_DECODER_H */ diff --git a/include/daedalus_v4l2_proto.h b/include/daedalus_v4l2_proto.h index 39e705a..04e2dc2 100644 --- a/include/daedalus_v4l2_proto.h +++ b/include/daedalus_v4l2_proto.h @@ -28,12 +28,7 @@ #include #define DAEDALUS_PROTO_MAGIC 0x44303456u /* 'D04V' */ -#define DAEDALUS_PROTO_VERSION 1u /* pre-1.0; bumped for - * REQ_DECODE.src_pts + - * RESP_FRAME.flags + - * RESP_FRAME.output_src_pts - * (H.264 B-frame reorder fix, - * daedalus-v4l2#6). */ +#define DAEDALUS_PROTO_VERSION 0u /* pre-1.0 */ /* * Wire-protocol message types. @@ -147,17 +142,6 @@ struct daedalus_req_decode { __u32 capture_plane_size[3]; __u32 capture_plane_stride[3]; __u32 flags; - __u32 reserved0; /* explicit pad to 8-byte align src_pts */ - /* - * The V4L2 OUTPUT (bitstream) buffer's vb2 timestamp at submission - * time. The daemon sets pkt->pts = src_pts before - * avcodec_send_packet so libavcodec's display-ordered - * receive_frame can return frame->pts == src_pts of the bitstream - * the frame's slices belong to. Decouples kernel cookie (decode - * order, in-kernel identity) from display order — required for - * H.264 B-frame correctness (daedalus-v4l2#6). - */ - __u64 src_pts; }; /** @@ -224,31 +208,6 @@ enum daedalus_decode_status { * Fixed size — keeps wire parsing simple. No variable-length * pixel data in Phase 8.4; dmabuf in Phase 8.5 carries that. */ -/** - * DAEDALUS_RESP_FLAG_HAS_PIXELS - this RESP delivers a decoded frame's - * pixels. The owning CAPTURE buffer is identified by output_src_pts - * (matched against an in-flight item's src_pts on the kernel side), - * NOT by the chardev message header's cookie. Required since - * libavcodec's H.264 decoder reorders to display order — the cookie - * the daemon just received the REQ on may not be the cookie whose - * bitstream produced the frame just popped from receive_frame. - * - * DAEDALUS_RESP_FLAG_SRC_CONSUMED - the chardev header's cookie's - * OUTPUT bitstream buffer is done from the daemon's perspective - * (libavcodec has accepted the slice data via avcodec_send_packet). - * Kernel releases src_buf for the cookie and runs job_finish so the - * m2m scheduler can dispatch the next REQ. Independent of any - * pixel delivery — the dst_buf paired with this cookie may still - * be parked, awaiting a future RESP with HAS_PIXELS + matching - * output_src_pts. - * - * Both flags may be set in a single message (steady-state path with - * no codec reorder lag — the just-sent packet immediately yielded a - * frame whose pts == this REQ's src_pts). - */ -#define DAEDALUS_RESP_FLAG_HAS_PIXELS 0x00000001u -#define DAEDALUS_RESP_FLAG_SRC_CONSUMED 0x00000002u - struct daedalus_resp_frame { __u32 status; __u32 codec_id; @@ -258,16 +217,7 @@ struct daedalus_resp_frame { __u32 luma_len; __u32 chroma_len; __u32 fnv1a_yuv; - __u32 flags; /* bitmask of DAEDALUS_RESP_FLAG_* */ - __u32 reserved0; /* explicit pad to 8-byte align output_src_pts */ - /* - * Set when DAEDALUS_RESP_FLAG_HAS_PIXELS is in flags. Identifies - * which OUTPUT bitstream's slices produced the pixels in this - * RESP — kernel completes the CAPTURE buffer whose inflight item - * has src_pts == output_src_pts. Ignored when HAS_PIXELS is - * clear. - */ - __u64 output_src_pts; + __u32 reserved; }; /* -- chardev ioctl ABI ----------------------------------------------- */ diff --git a/kernel/daedalus_v4l2_main.c b/kernel/daedalus_v4l2_main.c index 899600a..61fc312 100644 --- a/kernel/daedalus_v4l2_main.c +++ b/kernel/daedalus_v4l2_main.c @@ -611,28 +611,8 @@ struct daedalus_inflight { struct list_head list; u32 cookie; struct daedalus_ctx *ctx; - /* - * src_buf / dst_buf decouple in the daedalus-v4l2#6 reorder fix. - * src_buf is cleared (NULL'd) when DAEDALUS_RESP_FLAG_SRC_CONSUMED - * arrives — that signals libavcodec has accepted the bitstream - * even if no display-order frame is ready yet. dst_buf is cleared - * when DAEDALUS_RESP_FLAG_HAS_PIXELS arrives — the daemon has - * written pixels into this CAPTURE buffer. When both are NULL - * the inflight entry is removed and freed. - */ struct vb2_v4l2_buffer *src_buf; struct vb2_v4l2_buffer *dst_buf; - /* - * src_buf->vb2_buf.timestamp captured at device_run time. - * Mirrored into REQ_DECODE.src_pts so the daemon can set - * pkt->pts = src_pts on avcodec_send_packet, and read back - * frame->pts to identify which OUTPUT bitstream produced the - * current display-order frame. Kept here so the kernel can - * stamp dst_buf.timestamp explicitly at HAS_PIXELS time even - * though V4L2_BUF_FLAG_TIMESTAMP_COPY's automatic src->dst - * pairing no longer applies (src/dst lifecycles decoupled). - */ - u64 src_pts; /* * Captured media_request the src_buf was bound to (if any). * Set by device_run from src_buf->vb2_buf.req_obj.req; @@ -643,22 +623,16 @@ struct daedalus_inflight { struct media_request *req; }; -/* - * Peek (don't remove). The split-completion path may receive - * multiple RESP_FRAME messages on a single inflight item (one for - * SRC_CONSUMED, one for HAS_PIXELS — possibly separated in time if - * libavcodec held the picture for display reorder). Caller removes - * the entry only when both src_buf and dst_buf have been cleared - * from inside the inflight lock. - */ static struct daedalus_inflight * -daedalus_inflight_peek_locked(struct daedalus_dev *dev, u32 cookie) +daedalus_inflight_pop_locked(struct daedalus_dev *dev, u32 cookie) { struct daedalus_inflight *e; list_for_each_entry(e, &dev->inflight, list) { - if (e->cookie == cookie) + if (e->cookie == cookie) { + list_del(&e->list); return e; + } } return NULL; } @@ -821,17 +795,6 @@ static void daedalus_device_run(void *priv) req->codec_id = cid; req->bitstream_len = (u32) blen; - /* - * Ferry the OUTPUT buffer's vb2 timestamp through to the - * daemon for the H.264 B-frame display-reorder fix - * (daedalus-v4l2#6). Daemon sets pkt->pts = src_pts before - * avcodec_send_packet; libavcodec stamps frame->pts with - * the same value when it eventually outputs the frame in - * display order, letting the daemon route HAS_PIXELS RESPs - * to the correct cookie even when libavcodec's display - * order disagrees with V4L2's decode submission order. - */ - req->src_pts = (u64) src_buf->vb2_buf.timestamp; req->capture_width = ctx->dst_fmt.width; req->capture_height = ctx->dst_fmt.height; req->capture_pix_fmt = ctx->dst_fmt.pixelformat; @@ -861,7 +824,6 @@ static void daedalus_device_run(void *priv) inf->ctx = ctx; inf->src_buf = src_buf; inf->dst_buf = dst_buf; - inf->src_pts = req->src_pts; /* * Capture the bound media_request (if any) so the * completion path can call v4l2_ctrl_request_complete + @@ -927,179 +889,120 @@ static const struct v4l2_m2m_ops daedalus_m2m_ops = { /* -- chardev RESP_FRAME → buf_done bridge ---------------------------- */ -/* - * Pack the daemon's pixel delivery into the inflight item's CAPTURE - * buffer. Called from daedalus_complete_resp_frame on the - * HAS_PIXELS branch, after the lock has been dropped (vb2 ops may - * sleep / take their own locks). The dst_buf reference was - * snapshotted under the inflight lock and cleared from the entry, - * so no other RESP can race for this buffer. - * - * pixels_len == 0 → dmabuf path (Phase 8.6+); the daemon mmap'd the - * CAPTURE plane via GET_DMABUF and wrote pixels in place; we just - * set the plane payloads. pixels_len > 0 → legacy Phase 8.5 inline - * NV12 path; we memcpy from the chardev payload. - */ -static void daedalus_pack_pixels_into_dst(struct vb2_v4l2_buffer *dst_buf, - const struct daedalus_resp_frame *fr, - const u8 *pixels, size_t pixels_len) -{ - struct vb2_buffer *vb = &dst_buf->vb2_buf; - void *dst_y, *dst_uv; - u32 y_size, uv_size; - unsigned int p; - - if (pixels_len) { - y_size = min_t(u32, fr->luma_len, - (u32) vb2_plane_size(vb, 0)); - uv_size = vb->num_planes > 1 ? - min_t(u32, fr->chroma_len, - (u32) vb2_plane_size(vb, 1)) : 0; - dst_y = vb2_plane_vaddr(vb, 0); - dst_uv = vb->num_planes > 1 ? - vb2_plane_vaddr(vb, 1) : NULL; - if (dst_y && y_size && pixels_len >= y_size) - memcpy(dst_y, pixels, y_size); - else - y_size = 0; - if (dst_uv && uv_size && - pixels_len >= y_size + uv_size) - memcpy(dst_uv, pixels + y_size, uv_size); - else - uv_size = 0; - vb2_set_plane_payload(vb, 0, y_size); - if (vb->num_planes > 1) - vb2_set_plane_payload(vb, 1, uv_size); - } else { - for (p = 0; p < vb->num_planes; p++) - vb2_set_plane_payload(vb, p, - vb2_plane_size(vb, p)); - } -} - void daedalus_complete_resp_frame(u32 cookie, const struct daedalus_resp_frame *fr, const u8 *pixels, size_t pixels_len) { struct daedalus_dev *dev = g_daedalus_dev; struct daedalus_inflight *inf; - struct daedalus_ctx *ctx = NULL; - struct vb2_v4l2_buffer *src_to_complete = NULL; - struct vb2_v4l2_buffer *dst_to_complete = NULL; - struct media_request *req_to_complete = NULL; enum vb2_buffer_state state; - u64 dst_timestamp = 0; - bool entry_freed = false; - bool has_pixels, src_consumed; + void *dst_y, *dst_uv; + u32 y_size, uv_size; if (!dev) return; - state = (fr->status == DAEDALUS_DECODE_OK) - ? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; - has_pixels = !!(fr->flags & DAEDALUS_RESP_FLAG_HAS_PIXELS); - src_consumed = !!(fr->flags & DAEDALUS_RESP_FLAG_SRC_CONSUMED); - - if (!has_pixels && !src_consumed) { - pr_warn_ratelimited( - "daedalus_v4l2: RESP_FRAME cookie=%u with neither HAS_PIXELS nor SRC_CONSUMED — ignoring\n", - cookie); - return; - } - mutex_lock(&dev->inflight_lock); - inf = daedalus_inflight_peek_locked(dev, cookie); + inf = daedalus_inflight_pop_locked(dev, cookie); + mutex_unlock(&dev->inflight_lock); if (!inf) { - mutex_unlock(&dev->inflight_lock); pr_warn_ratelimited( "daedalus_v4l2: RESP_FRAME for unknown cookie=%u\n", cookie); return; } - ctx = inf->ctx; + state = (fr->status == DAEDALUS_DECODE_OK) + ? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; /* - * Snapshot what this RESP completes and clear the matching - * fields on the inflight item, so concurrent RESPs (e.g. a - * later HAS_PIXELS arriving on the same cookie after this - * SRC_CONSUMED clears src_buf) see the correct residual - * state. Actual vb2 buf_done calls happen below the lock. + * Two routes the daemon can take, both supported: * - * Sanity check on output_src_pts only when HAS_PIXELS is - * set — the daemon's output_src_pts should equal this - * inflight's stored src_pts, since the daemon routes pixels - * to the cookie of the OUTPUT bitstream that contained the - * frame's slices (which is what we stored at device_run time). - * Surface a mismatch loudly — indicates daemon-side pts→cookie - * mapping bug, not silent data corruption. + * (a) dmabuf path (Phase 8.6+) — daemon called + * DAEDALUS_IOC_GET_DMABUF, mmap'd the CAPTURE buffer, + * wrote pixels in place. RESP_FRAME carries metadata + * only (pixels_len == 0). Each plane's payload is + * the full plane size (the daemon wrote everything + * the format requires). + * + * (b) Phase 8.5 inline path — daemon shipped raw NV12 in + * the chardev payload (≤ 64 KiB cap). We memcpy + * into the vb2 buffer. Plane payloads come from + * the daemon's NV12 luma/chroma counts. */ - if (has_pixels) { - if (fr->output_src_pts != inf->src_pts) - pr_warn_ratelimited( - "daedalus_v4l2: RESP HAS_PIXELS cookie=%u output_src_pts=%llu but inflight.src_pts=%llu — daemon dispatch bug?\n", - cookie, - (unsigned long long) fr->output_src_pts, - (unsigned long long) inf->src_pts); + if (state == VB2_BUF_STATE_DONE) { + struct vb2_buffer *vb = &inf->dst_buf->vb2_buf; + unsigned int p; - dst_to_complete = inf->dst_buf; - dst_timestamp = inf->src_pts; - inf->dst_buf = NULL; - } - - if (src_consumed) { - src_to_complete = inf->src_buf; - req_to_complete = inf->req; - inf->src_buf = NULL; - inf->req = NULL; - } - - if (!inf->src_buf && !inf->dst_buf) { - list_del(&inf->list); - entry_freed = true; - } - mutex_unlock(&dev->inflight_lock); - - /* - * Complete the CAPTURE side first (when applicable). vb2-core's - * V4L2_BUF_FLAG_TIMESTAMP_COPY semantics no longer auto-copy - * src→dst timestamps because src and dst are no longer paired - * 1:1 in m2m's view — stamp dst explicitly from the inflight's - * stored src_pts (= the OUTPUT vb2_buf.timestamp captured at - * device_run). The V4L2 client gets the same display-PTS it - * originally set on the OUTPUT side. - */ - if (dst_to_complete) { - if (state == VB2_BUF_STATE_DONE) - daedalus_pack_pixels_into_dst(dst_to_complete, fr, - pixels, pixels_len); - dst_to_complete->vb2_buf.timestamp = dst_timestamp; - v4l2_m2m_dst_buf_remove_by_buf(ctx->m2m_ctx, dst_to_complete); - v4l2_m2m_buf_done(dst_to_complete, state); + if (pixels_len) { + /* (b) inline NV12 copy — legacy 2-plane only */ + y_size = min_t(u32, fr->luma_len, + (u32) vb2_plane_size(vb, 0)); + uv_size = vb->num_planes > 1 ? + min_t(u32, fr->chroma_len, + (u32) vb2_plane_size(vb, 1)) : 0; + dst_y = vb2_plane_vaddr(vb, 0); + dst_uv = vb->num_planes > 1 ? + vb2_plane_vaddr(vb, 1) : NULL; + if (dst_y && y_size && pixels_len >= y_size) + memcpy(dst_y, pixels, y_size); + else + y_size = 0; + if (dst_uv && uv_size && + pixels_len >= y_size + uv_size) + memcpy(dst_uv, pixels + y_size, uv_size); + else + uv_size = 0; + vb2_set_plane_payload(vb, 0, y_size); + if (vb->num_planes > 1) + vb2_set_plane_payload(vb, 1, uv_size); + } else { + /* (a) dmabuf path: plane is fully populated by + * the daemon, so payload == sizeimage. */ + for (p = 0; p < vb->num_planes; p++) + vb2_set_plane_payload(vb, p, + vb2_plane_size(vb, p)); + } } /* - * Complete the OUTPUT side: release the bound media_request's - * controls (libva-driven path), drop our request reference taken - * in device_run, mark src done, then job_finish so the m2m - * scheduler can dispatch the next pending REQ on this ctx. The - * dst_buf for this cookie may still be parked (HAS_PIXELS hasn't - * arrived yet — libavcodec is holding the frame for display- - * order release). That's fine: the next device_run picks a - * different next_dst_buf out of the CAPTURE queue and proceeds. + * Phase 8.14: if the src_buf was bound to a media_request + * (libva-driven decode path), complete the per-request + * control state BEFORE buf_done_and_job_finish. vb2-core's + * buf_done unbinds the buffer's req_obj on its own, but the + * control object stays bound until v4l2_ctrl_request_complete + * runs — only after BOTH objects unbind does the request + * transition to MEDIA_REQUEST_STATE_COMPLETE and wake any + * userspace poll on the request fd. + * + * For non-request flows (test_m2m_stream direct QBUF) inf->req + * is NULL and v4l2_ctrl_request_complete just no-ops. */ - if (src_to_complete) { - if (req_to_complete) - v4l2_ctrl_request_complete(req_to_complete, &ctx->hdl); - v4l2_m2m_src_buf_remove_by_buf(ctx->m2m_ctx, src_to_complete); - v4l2_m2m_buf_done(src_to_complete, state); - if (req_to_complete) - media_request_put(req_to_complete); - v4l2_m2m_job_finish(dev->m2m_dev, ctx->m2m_ctx); - } + if (inf->req) + v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl); - if (entry_freed) - kfree(inf); + /* + * Use the buf_done_and_job_finish helper rather than plain + * buf_done + job_finish: the helper pops the buffers off + * the m2m queue before marking them done, otherwise the + * scheduler immediately re-runs device_run on the same + * still-queued src buffer. Caught during Phase 8.5 first + * run — second REQ_DECODE with identical bitstream + oops + * in stop_streaming when the test client tore down. + */ + v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, inf->ctx->m2m_ctx, + state); + + /* + * Release our reference taken in device_run; safe to do + * AFTER buf_done_and_job_finish (which dropped the vb2 + * reference) because we still hold this one. If the + * refcount hits zero here, media-core releases the request. + */ + if (inf->req) + media_request_put(inf->req); + + kfree(inf); } /* -- v4l2_ioctl_ops -------------------------------------------------- */