Revert "Merge pull request 'kernel + daemon: H.264 B-frame display reorder fix (closes #6)' (#7) from noether/kernel-daemon-h264-reorder-fix into main"

This reverts commit 79256dc7ef, reversing
changes made to 7ff2d897ea.
This commit is contained in:
2026-05-21 14:40:59 +02:00
parent 49e60c9bba
commit 714d781d22
6 changed files with 276 additions and 713 deletions
+37 -238
View File
@@ -133,288 +133,87 @@ static int send_response(struct chardev_client *cli, uint32_t type,
return rc;
}
/*
* Register a new (src_pts → cookie) mapping in the pending table.
* Reuses an existing slot for src_pts if one exists (defensive — the
* kernel should never re-use the same src_pts for two live cookies,
* but libva running against a test client without timestamps might
* send all-zero src_pts; collapse them onto the latest cookie so the
* 1:1-per-stream case keeps working). Returns 0 on success, -ENOSPC
* if the table is full.
*/
static int pending_register(struct chardev_client *cli, uint64_t src_pts,
uint32_t cookie,
const struct daedalus_req_decode *req)
{
int free_slot = -1;
int i;
for (i = 0; i < DAEDALUS_MAX_PENDING_COOKIES; i++) {
if (cli->pending[i].used && cli->pending[i].src_pts == src_pts) {
cli->pending[i].cookie = cookie;
cli->pending[i].cached_req = *req;
return 0;
}
if (!cli->pending[i].used && free_slot < 0)
free_slot = i;
}
if (free_slot < 0) {
log_err("pending: table full registering cookie=%u src_pts=%llu",
cookie, (unsigned long long) src_pts);
return -ENOSPC;
}
cli->pending[free_slot].used = 1;
cli->pending[free_slot].src_pts = src_pts;
cli->pending[free_slot].cookie = cookie;
cli->pending[free_slot].cached_req = *req;
return 0;
}
/*
* Look up the cookie + cached REQ_DECODE that originally introduced
* @src_pts. Returns 0 + populates @cookie_out / @req_out, or -ENOENT
* if no match (likely a daemon bug or codec output we can't route).
*/
static int pending_lookup(const struct chardev_client *cli,
uint64_t src_pts,
uint32_t *cookie_out,
struct daedalus_req_decode *req_out)
{
int i;
for (i = 0; i < DAEDALUS_MAX_PENDING_COOKIES; i++) {
if (cli->pending[i].used &&
cli->pending[i].src_pts == src_pts) {
*cookie_out = cli->pending[i].cookie;
*req_out = cli->pending[i].cached_req;
return 0;
}
}
return -ENOENT;
}
static void pending_release(struct chardev_client *cli, uint64_t src_pts)
{
int i;
for (i = 0; i < DAEDALUS_MAX_PENDING_COOKIES; i++) {
if (cli->pending[i].used &&
cli->pending[i].src_pts == src_pts) {
cli->pending[i].used = 0;
cli->pending[i].src_pts = 0;
cli->pending[i].cookie = 0;
return;
}
}
}
/*
* Pack the daemon's current AVFrame into the CAPTURE buffer owned by
* @owner_cookie, then ship RESP_FRAME with the flags caller asked for.
* Returns 0 on success; -errno on GET_DMABUF / mmap failure (RESP is
* still emitted so the kernel doesn't park the dst buffer forever).
*/
static int deliver_frame_to_cookie(struct chardev_client *cli,
uint32_t owner_cookie,
const struct daedalus_req_decode *owner_req,
struct daedalus_resp_frame *resp,
uint32_t resp_flags)
{
struct daedalus_capture_planes planes;
int orc;
orc = daedalus_capture_planes_open(cli->fd, owner_cookie, owner_req,
&planes);
if (orc < 0) {
log_warn("drain: GET_DMABUF cookie=%u failed (%d); RESP metadata-only",
owner_cookie, orc);
} else {
(void) daedalus_decoder_pack_current(cli->decoder, &planes,
owner_req->capture_pix_fmt);
daedalus_capture_planes_close(&planes);
}
resp->flags |= resp_flags;
return send_response(cli, DAEDALUS_MSG_RESP_FRAME, owner_cookie,
resp, sizeof(*resp));
}
static int handle_req_decode(struct chardev_client *cli,
const struct daedalus_msg_hdr *hdr,
const uint8_t *payload)
{
struct daedalus_req_decode req;
struct daedalus_resp_frame resp;
struct daedalus_capture_planes planes;
const struct daedalus_h264_meta *h264_meta = NULL;
size_t meta_off, meta_len = 0;
int submit_status;
int src_consumed_emitted = 0;
int rc;
int decoded = 0;
if (hdr->payload_len < sizeof(req)) {
struct daedalus_resp_frame err = { 0 };
log_err("REQ_DECODE cookie=%u: payload too short %u < %zu",
hdr->cookie, hdr->payload_len, sizeof(req));
err.status = DAEDALUS_DECODE_ERR_RECV;
err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
memset(&resp, 0, sizeof(resp));
resp.status = DAEDALUS_DECODE_ERR_RECV;
return send_response(cli, DAEDALUS_MSG_RESP_FRAME,
hdr->cookie, &err, sizeof(err));
hdr->cookie, &resp, sizeof(resp));
}
memcpy(&req, payload, sizeof(req));
/* Optional H.264 meta block follows req when the flag is set;
* bitstream comes after meta. */
if (req.flags & DAEDALUS_REQ_FLAG_H264_META)
meta_len = sizeof(struct daedalus_h264_meta);
meta_off = sizeof(req);
if ((size_t) req.bitstream_len + sizeof(req) + meta_len !=
hdr->payload_len) {
struct daedalus_resp_frame err = { 0 };
log_err("REQ_DECODE cookie=%u: bitstream_len %u + meta %zu inconsistent with payload_len %u",
hdr->cookie, req.bitstream_len, meta_len,
hdr->payload_len);
err.status = DAEDALUS_DECODE_ERR_RECV;
err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
memset(&resp, 0, sizeof(resp));
resp.status = DAEDALUS_DECODE_ERR_RECV;
return send_response(cli, DAEDALUS_MSG_RESP_FRAME,
hdr->cookie, &err, sizeof(err));
hdr->cookie, &resp, sizeof(resp));
}
if (meta_len)
h264_meta = (const struct daedalus_h264_meta *)
(payload + meta_off);
log_info("REQ_DECODE cookie=%u codec=%u bitstream=%u bytes meta=%s capture=%ux%u %u planes src_pts=%llu",
log_info("REQ_DECODE cookie=%u codec=%u bitstream=%u bytes meta=%s capture=%ux%u %u planes",
hdr->cookie, req.codec_id, req.bitstream_len,
h264_meta ? "h264" : "none",
req.capture_width, req.capture_height,
req.capture_num_planes,
(unsigned long long) req.src_pts);
req.capture_num_planes);
/*
* Register (src_pts → cookie) mapping BEFORE submit, so any drained
* frame whose pts matches this REQ's src_pts (the steady-state
* 1:1 path) can find its owner via pending_lookup below. Out of
* space here is fatal — we'd lose the routing identity for this
* cookie's eventual frame. Send an error RESP that releases both
* src and dst so the V4L2 client moves on.
* Open dmabuf-fds for every CAPTURE plane and mmap them.
* If this fails we still attempt the decode (so the kernel
* gets a structured error response) — but we pass NULL
* planes so pixels aren't written anywhere.
*/
rc = pending_register(cli, req.src_pts, hdr->cookie, &req);
rc = daedalus_capture_planes_open(cli->fd, hdr->cookie, &req,
&planes);
if (rc < 0) {
struct daedalus_resp_frame err = { 0 };
err.status = DAEDALUS_DECODE_ERR_SEND;
err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
return send_response(cli, DAEDALUS_MSG_RESP_FRAME,
hdr->cookie, &err, sizeof(err));
log_warn("REQ_DECODE cookie=%u: GET_DMABUF/mmap failed (%d); decode metadata-only",
hdr->cookie, rc);
/* planes is already zeroed by capture_planes_open */
}
submit_status = daedalus_decoder_submit(cli->decoder, &req,
payload + meta_off + meta_len,
h264_meta);
if (submit_status != 0) {
/*
* avcodec_send_packet failed before any frame could have
* been queued for this src_pts. Drop the pending entry
* (no future drain will find a matching pts), and emit a
* combined HAS_PIXELS|SRC_CONSUMED error RESP for this
* cookie so the V4L2 client unblocks.
*/
struct daedalus_resp_frame err = { 0 };
rc = daedalus_decoder_run_request(cli->decoder, &req,
payload + meta_off + meta_len,
h264_meta,
&resp,
planes.nr ? &planes : NULL);
decoded = (rc >= 0);
pending_release(cli, req.src_pts);
err.status = (uint32_t) submit_status;
err.codec_id = req.codec_id;
err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
err.output_src_pts = req.src_pts;
return send_response(cli, DAEDALUS_MSG_RESP_FRAME,
hdr->cookie, &err, sizeof(err));
}
daedalus_capture_planes_close(&planes);
if (!decoded)
return rc;
/*
* Drain libavcodec for as many display-ordered frames as it can
* emit right now. Each frame's pts identifies which cookie's
* CAPTURE buffer the pixels go in (see [[daedalus-v4l2#6]]). In
* steady state for VP9/AV1 (no reorder) the loop runs exactly
* once, draining the just-submitted packet's own frame. For
* H.264 with B-frames the first drained frame may belong to an
* EARLIER cookie's bitstream — that's the entire point.
* RESP_FRAME is metadata-only in Phase 8.6 — pixels already
* live in the V4L2 client's CAPTURE buffer via the dmabuf
* the daemon wrote to in pack_nv12_to_planes.
*/
for (;;) {
struct daedalus_resp_frame resp;
uint32_t owner_cookie = 0;
struct daedalus_req_decode owner_req;
uint32_t flags;
rc = daedalus_decoder_drain_one(cli->decoder, req.codec_id,
&resp);
if (rc == -EAGAIN)
break;
if (rc != 0) {
/*
* Hard codec error during drain. resp->status is set.
* Pin it to THIS REQ's cookie (we can't know whose
* pts the failed frame would have had); set both
* flags so the V4L2 client moves on.
*/
pending_release(cli, req.src_pts);
resp.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
resp.output_src_pts = req.src_pts;
(void) send_response(cli, DAEDALUS_MSG_RESP_FRAME,
hdr->cookie, &resp, sizeof(resp));
src_consumed_emitted = 1;
break;
}
if (pending_lookup(cli, resp.output_src_pts,
&owner_cookie, &owner_req) != 0) {
/*
* Frame's pts has no registered owner — implies a
* daemon-side tracking bug or a codec output for a
* packet we never registered (e.g. a B-frame that
* was queued before the daemon caught up). Drop the
* frame; can't safely route it.
*/
log_warn("drain: no pending entry for output_src_pts=%llu (codec dropped a frame?)",
(unsigned long long) resp.output_src_pts);
continue;
}
flags = DAEDALUS_RESP_FLAG_HAS_PIXELS;
if (owner_cookie == hdr->cookie) {
flags |= DAEDALUS_RESP_FLAG_SRC_CONSUMED;
src_consumed_emitted = 1;
}
(void) deliver_frame_to_cookie(cli, owner_cookie, &owner_req,
&resp, flags);
pending_release(cli, resp.output_src_pts);
}
/*
* If the drain loop didn't already SRC_CONSUMED this REQ's cookie
* (libavcodec held the frame for display-order reorder — the
* pixels will arrive in a future drain), emit a standalone
* SRC_CONSUMED RESP now. Kernel releases src_buf + runs
* job_finish; dst_buf parked until the matching HAS_PIXELS
* shows up later.
*/
if (!src_consumed_emitted) {
struct daedalus_resp_frame resp = { 0 };
resp.status = DAEDALUS_DECODE_OK;
resp.codec_id = req.codec_id;
resp.flags = DAEDALUS_RESP_FLAG_SRC_CONSUMED;
(void) send_response(cli, DAEDALUS_MSG_RESP_FRAME,
hdr->cookie, &resp, sizeof(resp));
}
return 0;
return send_response(cli, DAEDALUS_MSG_RESP_FRAME, hdr->cookie,
&resp, sizeof(resp));
}
static int handle_ping(struct chardev_client *cli,
-26
View File
@@ -18,44 +18,18 @@
struct ffmpeg_loader;
struct daedalus_decoder;
/*
* Per-inflight (cookie, src_pts) tracking for the H.264 B-frame
* display-reorder fix (daedalus-v4l2#6). When the daemon drains a
* frame from libavcodec, frame->pts (= src_pts of the OUTPUT bitstream
* that contained the frame's slices) identifies which cookie's CAPTURE
* buffer the pixels belong in — distinct from the cookie of the REQ
* that triggered the receive_frame call. Mapping is small (bounded
* by the V4L2 client's buffer pool depth, typically ≤24) so a linear
* array beats a hashtable for cache-locality.
*
* cached_req carries the capture geometry (num_planes, plane sizes,
* strides, pix_fmt) so a later drain — which may target this cookie
* from a DIFFERENT REQ's drain loop — can call GET_DMABUF + open
* planes with the original REQ's parameters.
*/
#define DAEDALUS_MAX_PENDING_COOKIES 64
struct chardev_pending_cookie {
int used;
uint64_t src_pts;
uint32_t cookie;
struct daedalus_req_decode cached_req;
};
/**
* struct chardev_client - daemon-side chardev state
* @fd: open /dev/daedalus-v4l2 descriptor (-1 if not open)
* @loader: dlopen'd FFmpeg loader (borrowed; not owned)
* @decoder: per-codec AVCodecContext cache (owned)
* @stop_flag: set non-zero from a signal handler to break the loop
* @pending: pts → cookie lookup table for split SRC/DST RESPs
*/
struct chardev_client {
int fd;
struct ffmpeg_loader *loader;
struct daedalus_decoder *decoder;
volatile sig_atomic_t *stop_flag;
struct chardev_pending_cookie pending[DAEDALUS_MAX_PENDING_COOKIES];
};
/**
+126 -154
View File
@@ -348,30 +348,31 @@ static int pack_nv12_to_planes(struct AVFrame *fr,
return 0;
}
/*
* Per-codec assemble + send_packet. Returns 0 on success, or one
* of DAEDALUS_DECODE_ERR_* on failure (errors here propagate via
* the caller's RESP_FRAME status field — they are NOT logged as a
* silent skip). pkt->pts is stamped from req->src_pts so the
* resulting frame->pts comes back identifiable on the drain side.
*/
int daedalus_decoder_submit(struct daedalus_decoder *dec,
const struct daedalus_req_decode *req,
const uint8_t *bitstream,
const struct daedalus_h264_meta *h264_meta)
int daedalus_decoder_run_request(struct daedalus_decoder *dec,
const struct daedalus_req_decode *req,
const uint8_t *bitstream,
const struct daedalus_h264_meta *h264_meta,
struct daedalus_resp_frame *resp,
const struct daedalus_capture_planes *planes)
{
struct ffmpeg_loader *fm = dec->loader;
struct AVCodecContext *ctx = NULL;
uint8_t *assembled = NULL;
size_t assembled_len = 0;
int rc;
int status = 0;
memset(resp, 0, sizeof(*resp));
resp->codec_id = req->codec_id;
rc = decoder_open_codec(dec, req->codec_id, &ctx);
if (rc == -ENOSYS)
return DAEDALUS_DECODE_ERR_CODEC;
if (rc < 0)
return DAEDALUS_DECODE_ERR_OPEN;
if (rc == -ENOSYS) {
resp->status = DAEDALUS_DECODE_ERR_CODEC;
goto out;
}
if (rc < 0) {
resp->status = DAEDALUS_DECODE_ERR_OPEN;
goto out;
}
fm->av_packet_unref(dec->pkt);
@@ -396,14 +397,14 @@ int daedalus_decoder_submit(struct daedalus_decoder *dec,
if (sps_len == 0 || pps_len == 0) {
log_err("decoder: SPS/PPS NAL synth failed (sps=%zu pps=%zu)",
sps_len, pps_len);
status = DAEDALUS_DECODE_ERR_SEND;
resp->status = DAEDALUS_DECODE_ERR_SEND;
goto out;
}
assembled_len = sps_len + pps_len + req->bitstream_len;
assembled = malloc(assembled_len + AV_INPUT_BUFFER_PADDING_SIZE);
if (!assembled) {
status = DAEDALUS_DECODE_ERR_SEND;
resp->status = DAEDALUS_DECODE_ERR_SEND;
goto out;
}
memcpy(assembled, sps_nal, sps_len);
@@ -440,162 +441,133 @@ int daedalus_decoder_submit(struct daedalus_decoder *dec,
dec->pkt->size = (int) req->bitstream_len;
}
/*
* Stamp pkt->pts from REQ_DECODE's src_pts (the V4L2 OUTPUT
* buffer's vb2 timestamp captured by the kernel at device_run
* time). libavcodec carries pkt->pts forward to frame->pts on
* the receive_frame side — even after display-order reordering
* inside the H.264 DPB — which lets the chardev_client identify
* which cookie's CAPTURE buffer a drained frame's pixels belong
* in. Without this stamp, every drained frame would look like
* it came from the current REQ; pairs of B/P would swap places
* in the visible output (daedalus-v4l2#6).
*/
dec->pkt->pts = (int64_t) req->src_pts;
rc = fm->avcodec_send_packet(ctx, dec->pkt);
if (rc < 0) {
log_err("decoder: avcodec_send_packet failed: %d", rc);
status = DAEDALUS_DECODE_ERR_SEND;
resp->status = DAEDALUS_DECODE_ERR_SEND;
goto out;
}
out:
free(assembled);
(void) assembled_len;
return status;
}
/*
* Pull the next display-ordered frame out of libavcodec's DPB.
* Returns 0 if a frame was returned (dec->frame holds it and resp
* is populated with metadata + output_src_pts == frame->pts),
* -EAGAIN if libavcodec needs more input, or DAEDALUS_DECODE_ERR_*
* on a hard codec error. Caller may immediately invoke
* daedalus_decoder_pack_current() to copy this frame's pixels into
* a CAPTURE buffer's mapped planes, then call drain_one again for
* any further frames in the DPB.
*/
int daedalus_decoder_drain_one(struct daedalus_decoder *dec,
uint32_t codec_id,
struct daedalus_resp_frame *resp)
{
struct ffmpeg_loader *fm = dec->loader;
struct AVCodecContext *ctx = NULL;
struct AVFrame *fr;
const AVPixFmtDescriptor *desc;
uint32_t h, luma_len = 0, chroma_len = 0;
int rc;
memset(resp, 0, sizeof(*resp));
resp->codec_id = codec_id;
rc = decoder_open_codec(dec, codec_id, &ctx);
if (rc == -ENOSYS) {
resp->status = DAEDALUS_DECODE_ERR_CODEC;
return DAEDALUS_DECODE_ERR_CODEC;
}
if (rc < 0) {
resp->status = DAEDALUS_DECODE_ERR_OPEN;
return DAEDALUS_DECODE_ERR_OPEN;
}
fm->av_frame_unref(dec->frame);
rc = fm->avcodec_receive_frame(ctx, dec->frame);
if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF)
return -EAGAIN;
if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) {
log_debug("decoder: no frame ready yet (rc=%d)", rc);
resp->status = DAEDALUS_DECODE_NO_FRAME;
goto out;
}
if (rc < 0) {
log_err("decoder: avcodec_receive_frame failed: %d", rc);
resp->status = DAEDALUS_DECODE_ERR_RECV;
return DAEDALUS_DECODE_ERR_RECV;
goto out;
}
fr = dec->frame;
desc = fm->av_pix_fmt_desc_get(fr->format);
h = fnv1a32_init();
{
struct AVFrame *fr = dec->frame;
const AVPixFmtDescriptor *desc =
fm->av_pix_fmt_desc_get(fr->format);
uint32_t h = fnv1a32_init();
uint32_t luma_len = 0, chroma_len = 0;
resp->status = DAEDALUS_DECODE_OK;
resp->width = (uint32_t) fr->width;
resp->height = (uint32_t) fr->height;
resp->pix_fmt = fr->format;
resp->output_src_pts = (uint64_t) fr->pts;
resp->status = DAEDALUS_DECODE_OK;
resp->width = (uint32_t) fr->width;
resp->height = (uint32_t) fr->height;
resp->pix_fmt = fr->format;
if (!desc) {
log_warn("decoder: no descriptor for pix_fmt %d", fr->format);
} else {
int p, max_plane = 0;
int i;
/*
* Walk every plane reported by the AVPixFmtDescriptor.
* For each component, byte width = ((plane_w *
* step_minus1) >> 0) — but the descriptor only tells
* us which plane each component sits in, not the
* plane's byte stride per pixel. In practice for the
* formats we care about (YUV420P, YUV422P, YUV444P,
* GBRP, NV12), each plane has exactly one component
* at 1 byte/sample. Hash each plane at
* (width >> log2_chroma_w) × (height >> log2_chroma_h)
* for chroma planes, full-size for plane 0.
*
* This generalises cleanly to anything 8-bit-per-
* sample-per-plane; 10/12-bit (P010, YUV420P10LE) will
* need depth handling when Phase 8.6 brings HDR
* content into play.
*/
if (!desc) {
log_warn("decoder: no descriptor for pix_fmt %d",
fr->format);
} else {
int p, max_plane = 0;
int i;
for (i = 0; i < desc->nb_components; i++) {
if (desc->comp[i].plane > max_plane)
max_plane = desc->comp[i].plane;
}
for (p = 0; p <= max_plane; p++) {
int pw, ph;
if (!fr->data[p] || !fr->linesize[p])
continue;
if (p == 0) {
pw = fr->width;
ph = fr->height;
luma_len += (uint32_t) pw * (uint32_t) ph;
} else {
pw = AV_CEIL_RSHIFT(fr->width,
desc->log2_chroma_w);
ph = AV_CEIL_RSHIFT(fr->height,
desc->log2_chroma_h);
chroma_len += (uint32_t) pw * (uint32_t) ph;
for (i = 0; i < desc->nb_components; i++) {
if (desc->comp[i].plane > max_plane)
max_plane = desc->comp[i].plane;
}
for (p = 0; p <= max_plane; p++) {
int pw, ph;
if (!fr->data[p] || !fr->linesize[p])
continue;
if (p == 0) {
pw = fr->width;
ph = fr->height;
luma_len += (uint32_t) pw *
(uint32_t) ph;
} else {
pw = AV_CEIL_RSHIFT(fr->width,
desc->log2_chroma_w);
ph = AV_CEIL_RSHIFT(fr->height,
desc->log2_chroma_h);
chroma_len += (uint32_t) pw *
(uint32_t) ph;
}
h = fnv1a32_plane(h, fr->data[p], pw, ph,
fr->linesize[p]);
}
h = fnv1a32_plane(h, fr->data[p], pw, ph,
fr->linesize[p]);
}
resp->luma_len = luma_len;
resp->chroma_len = chroma_len;
resp->fnv1a_yuv = h;
/*
* Pack pixels directly into the mapped CAPTURE dmabuf
* planes. Dispatch on the V4L2 fourcc the kernel
* negotiated:
* V4L2_PIX_FMT_NV12M (default, 8-bit, 2 planes)
* V4L2_PIX_FMT_P010 (10-bit HDR, 1 plane)
*/
if (planes && planes->nr >= 1) {
int prc = 0;
switch (req->capture_pix_fmt) {
case V4L2_PIX_FMT_NV12M:
prc = pack_nv12_to_planes(fr, desc, planes);
break;
case V4L2_PIX_FMT_NV12:
prc = pack_nv12_single_to_plane(fr, desc, planes);
break;
case V4L2_PIX_FMT_P010:
prc = pack_p010_to_plane(fr, desc, planes);
break;
default:
log_warn("decoder: unsupported capture fourcc 0x%08x",
req->capture_pix_fmt);
prc = -EINVAL;
break;
}
if (prc < 0)
log_warn("decoder: pack failed (pix_fmt=%d cap_fourcc=0x%08x) — kernel will see metadata only",
fr->format, req->capture_pix_fmt);
}
log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u",
fr->width, fr->height, fr->format,
desc ? desc->name : "?",
h, luma_len, chroma_len);
}
resp->luma_len = luma_len;
resp->chroma_len = chroma_len;
resp->fnv1a_yuv = h;
log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u src_pts=%llu",
fr->width, fr->height, fr->format,
desc ? desc->name : "?",
h, luma_len, chroma_len,
(unsigned long long) fr->pts);
fm->av_frame_unref(dec->frame);
out:
free(assembled);
(void) assembled_len;
return 0;
}
int daedalus_decoder_pack_current(struct daedalus_decoder *dec,
const struct daedalus_capture_planes *planes,
uint32_t capture_pix_fmt)
{
struct ffmpeg_loader *fm = dec->loader;
struct AVFrame *fr = dec->frame;
const AVPixFmtDescriptor *desc;
int prc;
if (!planes || planes->nr < 1 || !fr || !fr->width || !fr->height)
return -EINVAL;
desc = fm->av_pix_fmt_desc_get(fr->format);
switch (capture_pix_fmt) {
case V4L2_PIX_FMT_NV12M:
prc = pack_nv12_to_planes(fr, desc, planes);
break;
case V4L2_PIX_FMT_NV12:
prc = pack_nv12_single_to_plane(fr, desc, planes);
break;
case V4L2_PIX_FMT_P010:
prc = pack_p010_to_plane(fr, desc, planes);
break;
default:
log_warn("decoder: unsupported capture fourcc 0x%08x",
capture_pix_fmt);
prc = -EINVAL;
break;
}
if (prc < 0)
log_warn("decoder: pack failed (pix_fmt=%d cap_fourcc=0x%08x)",
fr->format, capture_pix_fmt);
return prc;
}
+22 -57
View File
@@ -56,68 +56,33 @@ int daedalus_decoder_init(struct daedalus_decoder *dec,
void daedalus_decoder_cleanup(struct daedalus_decoder *dec);
/**
* daedalus_decoder_submit - send one REQ_DECODE's bitstream into libavcodec
* daedalus_decoder_run_request - decode one REQ_DECODE payload
* @dec: initialised decoder
* @req: REQ_DECODE prefix (from the wire); src_pts is stamped on
* the AVPacket so libavcodec returns frame->pts == src_pts
* when it eventually outputs the matching frame in display
* order (daedalus-v4l2#6).
* @req: REQ_DECODE prefix (from the wire)
* @bitstream: bitstream blob (req->bitstream_len bytes)
* @h264_meta: optional H.264 SPS/PPS metadata; non-NULL only when
* codec_id == H264 and the kernel set DAEDALUS_REQ_FLAG_
* H264_META. See decoder.c for the AnnexB synthesis.
* H264_META. Used to synthesise the AnnexB SPS+PPS NALs
* libavcodec needs before any slice (libva-v4l2-request
* passes only the slice in @bitstream per the V4L2
* stateless API contract). NULL for VP9/AV1 paths.
* @resp: caller-allocated RESP_FRAME output (zeroed by callee)
* @planes: mapped CAPTURE planes (Phase 8.6 dmabuf path). If
* NULL or planes->nr == 0, the decoder runs but
* writes no pixels caller still gets dims + digest.
*
* Calls avcodec_send_packet on the codec's per-codec AVCodecContext.
* Returns 0 on success; one of DAEDALUS_DECODE_ERR_* on failure
* (which the caller should propagate as the RESP_FRAME status for
* the cookie of this REQ). Does NOT call avcodec_receive_frame
* use daedalus_decoder_drain_one for that.
* Populates @resp with the decode outcome and writes decoded
* pixels (NV12 layout: Y to plane 0, interleaved CbCr to plane
* 1) directly into the mapped dmabuf planes. Always returns
* 0; decode-level failures are reported via @resp->status so
* the kernel sees a structured response rather than a dropped
* request.
*/
int daedalus_decoder_submit(struct daedalus_decoder *dec,
const struct daedalus_req_decode *req,
const uint8_t *bitstream,
const struct daedalus_h264_meta *h264_meta);
/**
* daedalus_decoder_drain_one - pop the next display-ordered frame, if any
* @dec: initialised decoder
* @codec_id: which codec context to drain (matches the REQ that just
* called submit). VP9/AV1/H264 use independent contexts.
* @resp: caller-allocated RESP_FRAME output (zeroed by callee).
* On a successful drain (return 0), resp's status / width /
* height / pix_fmt / luma_len / chroma_len / fnv1a_yuv /
* output_src_pts are populated; flags is left at 0 (caller
* adds HAS_PIXELS / SRC_CONSUMED). On EAGAIN, resp is
* zeroed.
*
* Return: 0 on a frame returned, -EAGAIN if libavcodec needs more
* input (display-order frame held inside DPB), <0 on a hard codec
* error (resp->status set).
*
* After a successful drain, the dec's internal AVFrame holds the
* decoded picture. Caller may immediately call
* daedalus_decoder_pack_current(planes) to write that picture into
* a CAPTURE buffer's dmabuf-mapped planes. Subsequent calls to
* drain_one (without another submit) try to pull additional frames
* from libavcodec's DPB.
*/
int daedalus_decoder_drain_one(struct daedalus_decoder *dec,
uint32_t codec_id,
struct daedalus_resp_frame *resp);
/**
* daedalus_decoder_pack_current - pack the last drained frame into planes
* @dec: initialised decoder; must have a frame from drain_one
* @planes: mapped CAPTURE planes (open via GET_DMABUF using the
* cookie that owns the frame's output_src_pts).
* @capture_pix_fmt: V4L2 fourcc on the CAPTURE side (NV12M, NV12,
* P010).
*
* Return: 0 on success, <0 on a pack failure (kernel sees only the
* metadata, not pixels typical when a format isn't wired yet).
*/
int daedalus_decoder_pack_current(struct daedalus_decoder *dec,
const struct daedalus_capture_planes *planes,
uint32_t capture_pix_fmt);
int daedalus_decoder_run_request(struct daedalus_decoder *dec,
const struct daedalus_req_decode *req,
const uint8_t *bitstream,
const struct daedalus_h264_meta *h264_meta,
struct daedalus_resp_frame *resp,
const struct daedalus_capture_planes *planes);
#endif /* DAEDALUS_V4L2_DECODER_H */
+2 -52
View File
@@ -28,12 +28,7 @@
#include <linux/v4l2-controls.h>
#define DAEDALUS_PROTO_MAGIC 0x44303456u /* 'D04V' */
#define DAEDALUS_PROTO_VERSION 1u /* pre-1.0; bumped for
* REQ_DECODE.src_pts +
* RESP_FRAME.flags +
* RESP_FRAME.output_src_pts
* (H.264 B-frame reorder fix,
* daedalus-v4l2#6). */
#define DAEDALUS_PROTO_VERSION 0u /* pre-1.0 */
/*
* Wire-protocol message types.
@@ -147,17 +142,6 @@ struct daedalus_req_decode {
__u32 capture_plane_size[3];
__u32 capture_plane_stride[3];
__u32 flags;
__u32 reserved0; /* explicit pad to 8-byte align src_pts */
/*
* The V4L2 OUTPUT (bitstream) buffer's vb2 timestamp at submission
* time. The daemon sets pkt->pts = src_pts before
* avcodec_send_packet so libavcodec's display-ordered
* receive_frame can return frame->pts == src_pts of the bitstream
* the frame's slices belong to. Decouples kernel cookie (decode
* order, in-kernel identity) from display order required for
* H.264 B-frame correctness (daedalus-v4l2#6).
*/
__u64 src_pts;
};
/**
@@ -224,31 +208,6 @@ enum daedalus_decode_status {
* Fixed size keeps wire parsing simple. No variable-length
* pixel data in Phase 8.4; dmabuf in Phase 8.5 carries that.
*/
/**
* DAEDALUS_RESP_FLAG_HAS_PIXELS - this RESP delivers a decoded frame's
* pixels. The owning CAPTURE buffer is identified by output_src_pts
* (matched against an in-flight item's src_pts on the kernel side),
* NOT by the chardev message header's cookie. Required since
* libavcodec's H.264 decoder reorders to display order the cookie
* the daemon just received the REQ on may not be the cookie whose
* bitstream produced the frame just popped from receive_frame.
*
* DAEDALUS_RESP_FLAG_SRC_CONSUMED - the chardev header's cookie's
* OUTPUT bitstream buffer is done from the daemon's perspective
* (libavcodec has accepted the slice data via avcodec_send_packet).
* Kernel releases src_buf for the cookie and runs job_finish so the
* m2m scheduler can dispatch the next REQ. Independent of any
* pixel delivery the dst_buf paired with this cookie may still
* be parked, awaiting a future RESP with HAS_PIXELS + matching
* output_src_pts.
*
* Both flags may be set in a single message (steady-state path with
* no codec reorder lag the just-sent packet immediately yielded a
* frame whose pts == this REQ's src_pts).
*/
#define DAEDALUS_RESP_FLAG_HAS_PIXELS 0x00000001u
#define DAEDALUS_RESP_FLAG_SRC_CONSUMED 0x00000002u
struct daedalus_resp_frame {
__u32 status;
__u32 codec_id;
@@ -258,16 +217,7 @@ struct daedalus_resp_frame {
__u32 luma_len;
__u32 chroma_len;
__u32 fnv1a_yuv;
__u32 flags; /* bitmask of DAEDALUS_RESP_FLAG_* */
__u32 reserved0; /* explicit pad to 8-byte align output_src_pts */
/*
* Set when DAEDALUS_RESP_FLAG_HAS_PIXELS is in flags. Identifies
* which OUTPUT bitstream's slices produced the pixels in this
* RESP kernel completes the CAPTURE buffer whose inflight item
* has src_pts == output_src_pts. Ignored when HAS_PIXELS is
* clear.
*/
__u64 output_src_pts;
__u32 reserved;
};
/* -- chardev ioctl ABI ----------------------------------------------- */
+89 -186
View File
@@ -611,28 +611,8 @@ struct daedalus_inflight {
struct list_head list;
u32 cookie;
struct daedalus_ctx *ctx;
/*
* src_buf / dst_buf decouple in the daedalus-v4l2#6 reorder fix.
* src_buf is cleared (NULL'd) when DAEDALUS_RESP_FLAG_SRC_CONSUMED
* arrives that signals libavcodec has accepted the bitstream
* even if no display-order frame is ready yet. dst_buf is cleared
* when DAEDALUS_RESP_FLAG_HAS_PIXELS arrives the daemon has
* written pixels into this CAPTURE buffer. When both are NULL
* the inflight entry is removed and freed.
*/
struct vb2_v4l2_buffer *src_buf;
struct vb2_v4l2_buffer *dst_buf;
/*
* src_buf->vb2_buf.timestamp captured at device_run time.
* Mirrored into REQ_DECODE.src_pts so the daemon can set
* pkt->pts = src_pts on avcodec_send_packet, and read back
* frame->pts to identify which OUTPUT bitstream produced the
* current display-order frame. Kept here so the kernel can
* stamp dst_buf.timestamp explicitly at HAS_PIXELS time even
* though V4L2_BUF_FLAG_TIMESTAMP_COPY's automatic src->dst
* pairing no longer applies (src/dst lifecycles decoupled).
*/
u64 src_pts;
/*
* Captured media_request the src_buf was bound to (if any).
* Set by device_run from src_buf->vb2_buf.req_obj.req;
@@ -643,22 +623,16 @@ struct daedalus_inflight {
struct media_request *req;
};
/*
* Peek (don't remove). The split-completion path may receive
* multiple RESP_FRAME messages on a single inflight item (one for
* SRC_CONSUMED, one for HAS_PIXELS possibly separated in time if
* libavcodec held the picture for display reorder). Caller removes
* the entry only when both src_buf and dst_buf have been cleared
* from inside the inflight lock.
*/
static struct daedalus_inflight *
daedalus_inflight_peek_locked(struct daedalus_dev *dev, u32 cookie)
daedalus_inflight_pop_locked(struct daedalus_dev *dev, u32 cookie)
{
struct daedalus_inflight *e;
list_for_each_entry(e, &dev->inflight, list) {
if (e->cookie == cookie)
if (e->cookie == cookie) {
list_del(&e->list);
return e;
}
}
return NULL;
}
@@ -821,17 +795,6 @@ static void daedalus_device_run(void *priv)
req->codec_id = cid;
req->bitstream_len = (u32) blen;
/*
* Ferry the OUTPUT buffer's vb2 timestamp through to the
* daemon for the H.264 B-frame display-reorder fix
* (daedalus-v4l2#6). Daemon sets pkt->pts = src_pts before
* avcodec_send_packet; libavcodec stamps frame->pts with
* the same value when it eventually outputs the frame in
* display order, letting the daemon route HAS_PIXELS RESPs
* to the correct cookie even when libavcodec's display
* order disagrees with V4L2's decode submission order.
*/
req->src_pts = (u64) src_buf->vb2_buf.timestamp;
req->capture_width = ctx->dst_fmt.width;
req->capture_height = ctx->dst_fmt.height;
req->capture_pix_fmt = ctx->dst_fmt.pixelformat;
@@ -861,7 +824,6 @@ static void daedalus_device_run(void *priv)
inf->ctx = ctx;
inf->src_buf = src_buf;
inf->dst_buf = dst_buf;
inf->src_pts = req->src_pts;
/*
* Capture the bound media_request (if any) so the
* completion path can call v4l2_ctrl_request_complete +
@@ -927,179 +889,120 @@ static const struct v4l2_m2m_ops daedalus_m2m_ops = {
/* -- chardev RESP_FRAME → buf_done bridge ---------------------------- */
/*
* Pack the daemon's pixel delivery into the inflight item's CAPTURE
* buffer. Called from daedalus_complete_resp_frame on the
* HAS_PIXELS branch, after the lock has been dropped (vb2 ops may
* sleep / take their own locks). The dst_buf reference was
* snapshotted under the inflight lock and cleared from the entry,
* so no other RESP can race for this buffer.
*
* pixels_len == 0 dmabuf path (Phase 8.6+); the daemon mmap'd the
* CAPTURE plane via GET_DMABUF and wrote pixels in place; we just
* set the plane payloads. pixels_len > 0 legacy Phase 8.5 inline
* NV12 path; we memcpy from the chardev payload.
*/
static void daedalus_pack_pixels_into_dst(struct vb2_v4l2_buffer *dst_buf,
const struct daedalus_resp_frame *fr,
const u8 *pixels, size_t pixels_len)
{
struct vb2_buffer *vb = &dst_buf->vb2_buf;
void *dst_y, *dst_uv;
u32 y_size, uv_size;
unsigned int p;
if (pixels_len) {
y_size = min_t(u32, fr->luma_len,
(u32) vb2_plane_size(vb, 0));
uv_size = vb->num_planes > 1 ?
min_t(u32, fr->chroma_len,
(u32) vb2_plane_size(vb, 1)) : 0;
dst_y = vb2_plane_vaddr(vb, 0);
dst_uv = vb->num_planes > 1 ?
vb2_plane_vaddr(vb, 1) : NULL;
if (dst_y && y_size && pixels_len >= y_size)
memcpy(dst_y, pixels, y_size);
else
y_size = 0;
if (dst_uv && uv_size &&
pixels_len >= y_size + uv_size)
memcpy(dst_uv, pixels + y_size, uv_size);
else
uv_size = 0;
vb2_set_plane_payload(vb, 0, y_size);
if (vb->num_planes > 1)
vb2_set_plane_payload(vb, 1, uv_size);
} else {
for (p = 0; p < vb->num_planes; p++)
vb2_set_plane_payload(vb, p,
vb2_plane_size(vb, p));
}
}
void daedalus_complete_resp_frame(u32 cookie,
const struct daedalus_resp_frame *fr,
const u8 *pixels, size_t pixels_len)
{
struct daedalus_dev *dev = g_daedalus_dev;
struct daedalus_inflight *inf;
struct daedalus_ctx *ctx = NULL;
struct vb2_v4l2_buffer *src_to_complete = NULL;
struct vb2_v4l2_buffer *dst_to_complete = NULL;
struct media_request *req_to_complete = NULL;
enum vb2_buffer_state state;
u64 dst_timestamp = 0;
bool entry_freed = false;
bool has_pixels, src_consumed;
void *dst_y, *dst_uv;
u32 y_size, uv_size;
if (!dev)
return;
state = (fr->status == DAEDALUS_DECODE_OK)
? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
has_pixels = !!(fr->flags & DAEDALUS_RESP_FLAG_HAS_PIXELS);
src_consumed = !!(fr->flags & DAEDALUS_RESP_FLAG_SRC_CONSUMED);
if (!has_pixels && !src_consumed) {
pr_warn_ratelimited(
"daedalus_v4l2: RESP_FRAME cookie=%u with neither HAS_PIXELS nor SRC_CONSUMED — ignoring\n",
cookie);
return;
}
mutex_lock(&dev->inflight_lock);
inf = daedalus_inflight_peek_locked(dev, cookie);
inf = daedalus_inflight_pop_locked(dev, cookie);
mutex_unlock(&dev->inflight_lock);
if (!inf) {
mutex_unlock(&dev->inflight_lock);
pr_warn_ratelimited(
"daedalus_v4l2: RESP_FRAME for unknown cookie=%u\n",
cookie);
return;
}
ctx = inf->ctx;
state = (fr->status == DAEDALUS_DECODE_OK)
? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
/*
* Snapshot what this RESP completes and clear the matching
* fields on the inflight item, so concurrent RESPs (e.g. a
* later HAS_PIXELS arriving on the same cookie after this
* SRC_CONSUMED clears src_buf) see the correct residual
* state. Actual vb2 buf_done calls happen below the lock.
* Two routes the daemon can take, both supported:
*
* Sanity check on output_src_pts only when HAS_PIXELS is
* set the daemon's output_src_pts should equal this
* inflight's stored src_pts, since the daemon routes pixels
* to the cookie of the OUTPUT bitstream that contained the
* frame's slices (which is what we stored at device_run time).
* Surface a mismatch loudly indicates daemon-side ptscookie
* mapping bug, not silent data corruption.
* (a) dmabuf path (Phase 8.6+) daemon called
* DAEDALUS_IOC_GET_DMABUF, mmap'd the CAPTURE buffer,
* wrote pixels in place. RESP_FRAME carries metadata
* only (pixels_len == 0). Each plane's payload is
* the full plane size (the daemon wrote everything
* the format requires).
*
* (b) Phase 8.5 inline path daemon shipped raw NV12 in
* the chardev payload ( 64 KiB cap). We memcpy
* into the vb2 buffer. Plane payloads come from
* the daemon's NV12 luma/chroma counts.
*/
if (has_pixels) {
if (fr->output_src_pts != inf->src_pts)
pr_warn_ratelimited(
"daedalus_v4l2: RESP HAS_PIXELS cookie=%u output_src_pts=%llu but inflight.src_pts=%llu — daemon dispatch bug?\n",
cookie,
(unsigned long long) fr->output_src_pts,
(unsigned long long) inf->src_pts);
if (state == VB2_BUF_STATE_DONE) {
struct vb2_buffer *vb = &inf->dst_buf->vb2_buf;
unsigned int p;
dst_to_complete = inf->dst_buf;
dst_timestamp = inf->src_pts;
inf->dst_buf = NULL;
}
if (src_consumed) {
src_to_complete = inf->src_buf;
req_to_complete = inf->req;
inf->src_buf = NULL;
inf->req = NULL;
}
if (!inf->src_buf && !inf->dst_buf) {
list_del(&inf->list);
entry_freed = true;
}
mutex_unlock(&dev->inflight_lock);
/*
* Complete the CAPTURE side first (when applicable). vb2-core's
* V4L2_BUF_FLAG_TIMESTAMP_COPY semantics no longer auto-copy
* srcdst timestamps because src and dst are no longer paired
* 1:1 in m2m's view stamp dst explicitly from the inflight's
* stored src_pts (= the OUTPUT vb2_buf.timestamp captured at
* device_run). The V4L2 client gets the same display-PTS it
* originally set on the OUTPUT side.
*/
if (dst_to_complete) {
if (state == VB2_BUF_STATE_DONE)
daedalus_pack_pixels_into_dst(dst_to_complete, fr,
pixels, pixels_len);
dst_to_complete->vb2_buf.timestamp = dst_timestamp;
v4l2_m2m_dst_buf_remove_by_buf(ctx->m2m_ctx, dst_to_complete);
v4l2_m2m_buf_done(dst_to_complete, state);
if (pixels_len) {
/* (b) inline NV12 copy — legacy 2-plane only */
y_size = min_t(u32, fr->luma_len,
(u32) vb2_plane_size(vb, 0));
uv_size = vb->num_planes > 1 ?
min_t(u32, fr->chroma_len,
(u32) vb2_plane_size(vb, 1)) : 0;
dst_y = vb2_plane_vaddr(vb, 0);
dst_uv = vb->num_planes > 1 ?
vb2_plane_vaddr(vb, 1) : NULL;
if (dst_y && y_size && pixels_len >= y_size)
memcpy(dst_y, pixels, y_size);
else
y_size = 0;
if (dst_uv && uv_size &&
pixels_len >= y_size + uv_size)
memcpy(dst_uv, pixels + y_size, uv_size);
else
uv_size = 0;
vb2_set_plane_payload(vb, 0, y_size);
if (vb->num_planes > 1)
vb2_set_plane_payload(vb, 1, uv_size);
} else {
/* (a) dmabuf path: plane is fully populated by
* the daemon, so payload == sizeimage. */
for (p = 0; p < vb->num_planes; p++)
vb2_set_plane_payload(vb, p,
vb2_plane_size(vb, p));
}
}
/*
* Complete the OUTPUT side: release the bound media_request's
* controls (libva-driven path), drop our request reference taken
* in device_run, mark src done, then job_finish so the m2m
* scheduler can dispatch the next pending REQ on this ctx. The
* dst_buf for this cookie may still be parked (HAS_PIXELS hasn't
* arrived yet libavcodec is holding the frame for display-
* order release). That's fine: the next device_run picks a
* different next_dst_buf out of the CAPTURE queue and proceeds.
* Phase 8.14: if the src_buf was bound to a media_request
* (libva-driven decode path), complete the per-request
* control state BEFORE buf_done_and_job_finish. vb2-core's
* buf_done unbinds the buffer's req_obj on its own, but the
* control object stays bound until v4l2_ctrl_request_complete
* runs only after BOTH objects unbind does the request
* transition to MEDIA_REQUEST_STATE_COMPLETE and wake any
* userspace poll on the request fd.
*
* For non-request flows (test_m2m_stream direct QBUF) inf->req
* is NULL and v4l2_ctrl_request_complete just no-ops.
*/
if (src_to_complete) {
if (req_to_complete)
v4l2_ctrl_request_complete(req_to_complete, &ctx->hdl);
v4l2_m2m_src_buf_remove_by_buf(ctx->m2m_ctx, src_to_complete);
v4l2_m2m_buf_done(src_to_complete, state);
if (req_to_complete)
media_request_put(req_to_complete);
v4l2_m2m_job_finish(dev->m2m_dev, ctx->m2m_ctx);
}
if (inf->req)
v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl);
if (entry_freed)
kfree(inf);
/*
* Use the buf_done_and_job_finish helper rather than plain
* buf_done + job_finish: the helper pops the buffers off
* the m2m queue before marking them done, otherwise the
* scheduler immediately re-runs device_run on the same
* still-queued src buffer. Caught during Phase 8.5 first
* run second REQ_DECODE with identical bitstream + oops
* in stop_streaming when the test client tore down.
*/
v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, inf->ctx->m2m_ctx,
state);
/*
* Release our reference taken in device_run; safe to do
* AFTER buf_done_and_job_finish (which dropped the vb2
* reference) because we still hold this one. If the
* refcount hits zero here, media-core releases the request.
*/
if (inf->req)
media_request_put(inf->req);
kfree(inf);
}
/* -- v4l2_ioctl_ops -------------------------------------------------- */