diff --git a/daemon/CMakeLists.txt b/daemon/CMakeLists.txt index ae92695..1ffa6b8 100644 --- a/daemon/CMakeLists.txt +++ b/daemon/CMakeLists.txt @@ -33,11 +33,14 @@ add_executable(daedalus_v4l2_daemon src/ffmpeg_loader.c src/log.c src/parser.c + src/decoder.c + src/chardev_client.c ) target_include_directories(daedalus_v4l2_daemon PRIVATE src + ${CMAKE_CURRENT_SOURCE_DIR}/../include ${FFMPEG_INCLUDE_DIRS} ) diff --git a/daemon/src/chardev_client.c b/daemon/src/chardev_client.c new file mode 100644 index 0000000..f8e8340 --- /dev/null +++ b/daemon/src/chardev_client.c @@ -0,0 +1,262 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * chardev_client.c — kernel-bridge client for the daedalus-v4l2 daemon. + */ +#include "chardev_client.h" +#include "decoder.h" +#include "ffmpeg_loader.h" +#include "log.h" + +#include +#include +#include +#include +#include +#include + +#define CHARDEV_PATH "/dev/daedalus-v4l2" +#define CHARDEV_READ_BUFSZ (sizeof(struct daedalus_msg_hdr) + \ + DAEDALUS_PROTO_MAX_PAYLOAD) + +int chardev_client_open(struct chardev_client *cli, + struct ffmpeg_loader *loader, + volatile sig_atomic_t *stop_flag) +{ + int fd, rc; + + memset(cli, 0, sizeof(*cli)); + cli->fd = -1; + cli->loader = loader; + cli->stop_flag = stop_flag; + + fd = open(CHARDEV_PATH, O_RDWR | O_CLOEXEC); + if (fd < 0) { + rc = -errno; + log_err("open(%s): %s", CHARDEV_PATH, strerror(errno)); + return rc; + } + + cli->decoder = calloc(1, sizeof(*cli->decoder)); + if (!cli->decoder) { + close(fd); + return -ENOMEM; + } + rc = daedalus_decoder_init(cli->decoder, loader); + if (rc < 0) { + free(cli->decoder); + cli->decoder = NULL; + close(fd); + log_err("daedalus_decoder_init: %d", rc); + return rc; + } + + cli->fd = fd; + log_info("chardev: opened %s (fd %d)", CHARDEV_PATH, fd); + return 0; +} + +void chardev_client_close(struct chardev_client *cli) +{ + if (!cli) + return; + if (cli->decoder) { + daedalus_decoder_cleanup(cli->decoder); + free(cli->decoder); + cli->decoder = NULL; + } + if (cli->fd >= 0) { + close(cli->fd); + cli->fd = -1; + } +} + +static int send_response(struct chardev_client *cli, uint32_t type, + uint32_t cookie, const void *payload, + size_t payload_len) +{ + struct daedalus_msg_hdr hdr; + int rc; + + if (payload_len > DAEDALUS_PROTO_MAX_PAYLOAD) + return -EMSGSIZE; + + hdr.magic = DAEDALUS_PROTO_MAGIC; + hdr.version = DAEDALUS_PROTO_VERSION; + hdr.type = type; + hdr.cookie = cookie; + hdr.payload_len = (uint32_t) payload_len; + hdr.reserved = 0; + + /* + * The kernel's write() path validates count == sizeof(hdr) + * + hdr.payload_len in a single call, and only implements + * .write (not .write_iter), so a writev() lands as -EINVAL. + * Marshal the message into a single buffer and write() it. + * + * Response payloads are small (struct daedalus_resp_frame = + * 36 bytes; PONG echoes <= 64 KiB). A short-lived heap + * allocation per response is fine; per-loop reuse can come + * later if profiling demands it. + */ + { + size_t total = sizeof(hdr) + payload_len; + uint8_t *out = malloc(total); + ssize_t n; + + if (!out) + return -ENOMEM; + memcpy(out, &hdr, sizeof(hdr)); + if (payload_len) + memcpy(out + sizeof(hdr), payload, payload_len); + + for (;;) { + n = write(cli->fd, out, total); + if (n >= 0) { + if ((size_t) n != total) { + log_err("chardev: short write %zd != %zu", + n, total); + rc = -EIO; + } else { + rc = 0; + } + break; + } + if (errno == EINTR) + continue; + rc = -errno; + log_err("chardev: write: %s", strerror(errno)); + break; + } + free(out); + } + return rc; +} + +static int handle_req_decode(struct chardev_client *cli, + const struct daedalus_msg_hdr *hdr, + const uint8_t *payload) +{ + struct daedalus_req_decode req; + struct daedalus_resp_frame resp; + int rc; + + if (hdr->payload_len < sizeof(req)) { + log_err("REQ_DECODE cookie=%u: payload too short %u < %zu", + hdr->cookie, hdr->payload_len, sizeof(req)); + memset(&resp, 0, sizeof(resp)); + resp.status = DAEDALUS_DECODE_ERR_RECV; + return send_response(cli, DAEDALUS_MSG_RESP_FRAME, + hdr->cookie, &resp, sizeof(resp)); + } + memcpy(&req, payload, sizeof(req)); + if ((size_t) req.bitstream_len + sizeof(req) != hdr->payload_len) { + log_err("REQ_DECODE cookie=%u: bitstream_len %u inconsistent with payload_len %u", + hdr->cookie, req.bitstream_len, hdr->payload_len); + memset(&resp, 0, sizeof(resp)); + resp.status = DAEDALUS_DECODE_ERR_RECV; + return send_response(cli, DAEDALUS_MSG_RESP_FRAME, + hdr->cookie, &resp, sizeof(resp)); + } + + log_info("REQ_DECODE cookie=%u codec=%u bitstream=%u bytes", + hdr->cookie, req.codec_id, req.bitstream_len); + + rc = daedalus_decoder_run_request(cli->decoder, &req, + payload + sizeof(req), &resp); + if (rc < 0) + return rc; + + return send_response(cli, DAEDALUS_MSG_RESP_FRAME, hdr->cookie, + &resp, sizeof(resp)); +} + +static int handle_ping(struct chardev_client *cli, + const struct daedalus_msg_hdr *hdr, + const uint8_t *payload) +{ + log_info("PING cookie=%u plen=%u — echoing PONG", + hdr->cookie, hdr->payload_len); + return send_response(cli, DAEDALUS_MSG_PONG, hdr->cookie, + payload, hdr->payload_len); +} + +static int handle_one_message(struct chardev_client *cli, uint8_t *buf) +{ + struct daedalus_msg_hdr hdr; + ssize_t n; + + /* + * The kernel chardev delivers exactly one message per + * read(). Pass a buffer that can hold any legal message. + */ + for (;;) { + n = read(cli->fd, buf, CHARDEV_READ_BUFSZ); + if (n >= 0) + break; + if (errno == EINTR) { + if (*cli->stop_flag) + return 0; + continue; + } + log_err("chardev: read: %s", strerror(errno)); + return -errno; + } + if (n == 0) + return -EIO; /* EOF / device unplugged */ + if ((size_t) n < sizeof(hdr)) { + log_err("chardev: short read %zd < hdr", n); + return -EBADMSG; + } + + memcpy(&hdr, buf, sizeof(hdr)); + if (hdr.magic != DAEDALUS_PROTO_MAGIC) { + log_err("chardev: bad magic 0x%08x", hdr.magic); + return -EBADMSG; + } + if (hdr.version != DAEDALUS_PROTO_VERSION) { + log_err("chardev: unsupported version %u", hdr.version); + return -EPROTO; + } + if ((size_t) n != sizeof(hdr) + hdr.payload_len) { + log_err("chardev: framing mismatch n=%zd expected %zu", + n, sizeof(hdr) + hdr.payload_len); + return -EBADMSG; + } + + switch (hdr.type) { + case DAEDALUS_MSG_PING: + return handle_ping(cli, &hdr, buf + sizeof(hdr)); + case DAEDALUS_MSG_REQ_DECODE: + return handle_req_decode(cli, &hdr, buf + sizeof(hdr)); + default: + log_warn("chardev: unknown request type 0x%08x cookie=%u", + hdr.type, hdr.cookie); + return 0; /* skip, don't bail the loop */ + } +} + +int chardev_client_run(struct chardev_client *cli) +{ + uint8_t *buf; + int rc = 0; + + buf = malloc(CHARDEV_READ_BUFSZ); + if (!buf) + return -ENOMEM; + + log_info("daemon loop started; waiting for kernel requests"); + + while (!*cli->stop_flag) { + rc = handle_one_message(cli, buf); + if (rc < 0) { + if (rc == -EINTR) + continue; + log_err("chardev: handle_one_message: %d", rc); + break; + } + } + + log_info("daemon loop exiting (stop=%d rc=%d)", *cli->stop_flag, rc); + free(buf); + return rc; +} diff --git a/daemon/src/chardev_client.h b/daemon/src/chardev_client.h new file mode 100644 index 0000000..f0b22fe --- /dev/null +++ b/daemon/src/chardev_client.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * chardev_client.h — kernel-bridge client for the daedalus-v4l2 daemon. + * + * Opens /dev/daedalus-v4l2, runs a blocking read() / write() loop, + * and invokes the daemon's REQ_* handlers. Phase 8.4 understands + * REQ_DECODE; future phases extend the dispatch table. + */ +#ifndef DAEDALUS_V4L2_CHARDEV_CLIENT_H +#define DAEDALUS_V4L2_CHARDEV_CLIENT_H + +#include +#include +#include + +#include "daedalus_v4l2_proto.h" + +struct ffmpeg_loader; +struct daedalus_decoder; + +/** + * struct chardev_client - daemon-side chardev state + * @fd: open /dev/daedalus-v4l2 descriptor (-1 if not open) + * @loader: dlopen'd FFmpeg loader (borrowed; not owned) + * @decoder: per-codec AVCodecContext cache (owned) + * @stop_flag: set non-zero from a signal handler to break the loop + */ +struct chardev_client { + int fd; + struct ffmpeg_loader *loader; + struct daedalus_decoder *decoder; + volatile sig_atomic_t *stop_flag; +}; + +/** + * chardev_client_open - open /dev/daedalus-v4l2 O_RDWR + * + * @cli: caller-allocated; cleared on entry + * @loader: borrowed FFmpeg loader (must outlive the client) + * @stop_flag: pointer the signal handler sets to ask the loop to stop + * + * Return: 0 on success; negative errno on failure. + */ +int chardev_client_open(struct chardev_client *cli, + struct ffmpeg_loader *loader, + volatile sig_atomic_t *stop_flag); + +/** + * chardev_client_run - blocking event loop + * + * Reads one message at a time, dispatches to the matching + * handler, writes the corresponding response. Returns when + * *@stop_flag is set, on chardev EOF, or on an unrecoverable + * error. + * + * Return: 0 on clean shutdown; negative errno on a fatal error. + */ +int chardev_client_run(struct chardev_client *cli); + +/** + * chardev_client_close - close the chardev and free decoder state + */ +void chardev_client_close(struct chardev_client *cli); + +#endif /* DAEDALUS_V4L2_CHARDEV_CLIENT_H */ diff --git a/daemon/src/decoder.c b/daemon/src/decoder.c new file mode 100644 index 0000000..b78b627 --- /dev/null +++ b/daemon/src/decoder.c @@ -0,0 +1,266 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * decoder.c — FFmpeg-driven decode helper for daedalus-v4l2 daemon. + */ +#include "decoder.h" +#include "ffmpeg_loader.h" +#include "log.h" + +#include +#include +#include + +#include +#include + +/* + * FNV-1a 32-bit hash. Used as a compact digest of the decoded + * frame's YUV planes so the kernel can verify "the daemon produced + * the expected pixels" without shipping the full frame through the + * 64-KiB-capped chardev wire protocol. Phase 8.5's dmabuf path + * carries actual pixel data; this digest stays useful as a + * cross-host sanity check. + */ +static uint32_t fnv1a32_init(void) +{ + return 0x811c9dc5u; +} + +static uint32_t fnv1a32_update(uint32_t h, const uint8_t *data, size_t len) +{ + size_t i; + for (i = 0; i < len; i++) + h = (h ^ data[i]) * 0x01000193u; + return h; +} + +/* + * Hash plane @p (width @w bytes per line, @h lines, stride @stride + * bytes between lines). We strip libav's row alignment padding so + * the hash matches the layout used by `ffmpeg -f rawvideo` reference + * output (which is tightly packed). + */ +static uint32_t fnv1a32_plane(uint32_t h, const uint8_t *p, + int w, int height, int stride) +{ + int y; + for (y = 0; y < height; y++) + h = fnv1a32_update(h, p + (size_t) y * (size_t) stride, + (size_t) w); + return h; +} + +int daedalus_decoder_init(struct daedalus_decoder *dec, + struct ffmpeg_loader *loader) +{ + memset(dec, 0, sizeof(*dec)); + dec->loader = loader; + + dec->pkt = loader->av_packet_alloc(); + if (!dec->pkt) + return -ENOMEM; + dec->frame = loader->av_frame_alloc(); + if (!dec->frame) { + loader->av_packet_free(&dec->pkt); + return -ENOMEM; + } + return 0; +} + +void daedalus_decoder_cleanup(struct daedalus_decoder *dec) +{ + if (!dec || !dec->loader) + return; + if (dec->ctx_vp9) + dec->loader->avcodec_free_context(&dec->ctx_vp9); + if (dec->frame) + dec->loader->av_frame_free(&dec->frame); + if (dec->pkt) + dec->loader->av_packet_free(&dec->pkt); + memset(dec, 0, sizeof(*dec)); +} + +/* + * Lazily open the AVCodecContext for codec_id. Returns 0 on + * success, -ENOSYS on unknown codec, -EIO on FFmpeg failure. + */ +static int decoder_open_codec(struct daedalus_decoder *dec, uint32_t codec_id, + struct AVCodecContext **out) +{ + struct ffmpeg_loader *fm = dec->loader; + const struct AVCodec *codec; + struct AVCodecContext *ctx; + enum AVCodecID av_id; + int rc; + + switch (codec_id) { + case DAEDALUS_CODEC_VP9: + av_id = AV_CODEC_ID_VP9; + if (dec->ctx_vp9) { + *out = dec->ctx_vp9; + return 0; + } + break; + case DAEDALUS_CODEC_AV1: + case DAEDALUS_CODEC_H264: + /* Phase 8.6 wires AV1 and H.264 properly. */ + log_warn("decoder: codec_id %u not yet supported", codec_id); + return -ENOSYS; + default: + log_warn("decoder: unknown codec_id %u", codec_id); + return -ENOSYS; + } + + codec = fm->avcodec_find_decoder(av_id); + if (!codec) { + log_err("decoder: avcodec_find_decoder(%d) returned NULL", av_id); + return -EIO; + } + ctx = fm->avcodec_alloc_context3(codec); + if (!ctx) + return -ENOMEM; + rc = fm->avcodec_open2(ctx, codec, NULL); + if (rc < 0) { + log_err("decoder: avcodec_open2 failed: %d", rc); + fm->avcodec_free_context(&ctx); + return -EIO; + } + + dec->ctx_vp9 = ctx; + *out = ctx; + log_info("decoder: opened %s context", codec->name); + return 0; +} + +int daedalus_decoder_run_request(struct daedalus_decoder *dec, + const struct daedalus_req_decode *req, + const uint8_t *bitstream, + struct daedalus_resp_frame *resp) +{ + struct ffmpeg_loader *fm = dec->loader; + struct AVCodecContext *ctx = NULL; + int rc; + + memset(resp, 0, sizeof(*resp)); + resp->codec_id = req->codec_id; + + rc = decoder_open_codec(dec, req->codec_id, &ctx); + if (rc == -ENOSYS) { + resp->status = DAEDALUS_DECODE_ERR_CODEC; + return 0; + } + if (rc < 0) { + resp->status = DAEDALUS_DECODE_ERR_OPEN; + return 0; + } + + fm->av_packet_unref(dec->pkt); + /* + * The kernel's REQ_DECODE payload is borrowed memory we'll + * free as soon as this function returns. Pointing the + * AVPacket at it directly is safe because avcodec_send_packet + * either fully consumes the input or copies it internally — + * by the time we return we no longer reference @bitstream. + * + * We cast away const because AVPacket->data is non-const in + * the FFmpeg API; we promise not to mutate the buffer. + */ + dec->pkt->data = (uint8_t *) (uintptr_t) bitstream; + dec->pkt->size = (int) req->bitstream_len; + + rc = fm->avcodec_send_packet(ctx, dec->pkt); + if (rc < 0) { + log_err("decoder: avcodec_send_packet failed: %d", rc); + resp->status = DAEDALUS_DECODE_ERR_SEND; + return 0; + } + + fm->av_frame_unref(dec->frame); + rc = fm->avcodec_receive_frame(ctx, dec->frame); + if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) { + log_debug("decoder: no frame ready yet (rc=%d)", rc); + resp->status = DAEDALUS_DECODE_NO_FRAME; + return 0; + } + if (rc < 0) { + log_err("decoder: avcodec_receive_frame failed: %d", rc); + resp->status = DAEDALUS_DECODE_ERR_RECV; + return 0; + } + + { + struct AVFrame *fr = dec->frame; + const AVPixFmtDescriptor *desc = + fm->av_pix_fmt_desc_get(fr->format); + uint32_t h = fnv1a32_init(); + uint32_t luma_len = 0, chroma_len = 0; + + resp->status = DAEDALUS_DECODE_OK; + resp->width = (uint32_t) fr->width; + resp->height = (uint32_t) fr->height; + resp->pix_fmt = fr->format; + + /* + * Walk every plane reported by the AVPixFmtDescriptor. + * For each component, byte width = ((plane_w * + * step_minus1) >> 0) — but the descriptor only tells + * us which plane each component sits in, not the + * plane's byte stride per pixel. In practice for the + * formats we care about (YUV420P, YUV422P, YUV444P, + * GBRP, NV12), each plane has exactly one component + * at 1 byte/sample. Hash each plane at + * (width >> log2_chroma_w) × (height >> log2_chroma_h) + * for chroma planes, full-size for plane 0. + * + * This generalises cleanly to anything 8-bit-per- + * sample-per-plane; 10/12-bit (P010, YUV420P10LE) will + * need depth handling when Phase 8.6 brings HDR + * content into play. + */ + if (!desc) { + log_warn("decoder: no descriptor for pix_fmt %d", + fr->format); + } else { + int p, max_plane = 0; + int i; + + for (i = 0; i < desc->nb_components; i++) { + if (desc->comp[i].plane > max_plane) + max_plane = desc->comp[i].plane; + } + + for (p = 0; p <= max_plane; p++) { + int pw, ph; + if (!fr->data[p] || !fr->linesize[p]) + continue; + if (p == 0) { + pw = fr->width; + ph = fr->height; + luma_len += (uint32_t) pw * + (uint32_t) ph; + } else { + pw = AV_CEIL_RSHIFT(fr->width, + desc->log2_chroma_w); + ph = AV_CEIL_RSHIFT(fr->height, + desc->log2_chroma_h); + chroma_len += (uint32_t) pw * + (uint32_t) ph; + } + h = fnv1a32_plane(h, fr->data[p], pw, ph, + fr->linesize[p]); + } + } + + resp->luma_len = luma_len; + resp->chroma_len = chroma_len; + resp->fnv1a_yuv = h; + + log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u", + fr->width, fr->height, fr->format, + desc ? desc->name : "?", + h, luma_len, chroma_len); + } + + fm->av_frame_unref(dec->frame); + return 0; +} diff --git a/daemon/src/decoder.h b/daemon/src/decoder.h new file mode 100644 index 0000000..9d0776f --- /dev/null +++ b/daemon/src/decoder.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * decoder.h — FFmpeg-driven decode helper for daedalus-v4l2 daemon. + * + * Encapsulates the AVCodecContext per supported codec, plus a + * single shared AVPacket / AVFrame pair (we serialise decode + * inside the chardev event loop; no concurrency). + * + * Phase 8.4 implements VP9. Phase 8.6 extends to AV1 / H.264. + */ +#ifndef DAEDALUS_V4L2_DECODER_H +#define DAEDALUS_V4L2_DECODER_H + +#include +#include + +#include "daedalus_v4l2_proto.h" + +struct ffmpeg_loader; +struct AVCodecContext; +struct AVPacket; +struct AVFrame; + +/** + * struct daedalus_decoder - per-daemon decoder state + * @loader: borrowed FFmpeg loader (must outlive the decoder) + * @ctx_vp9: lazily-opened VP9 AVCodecContext (NULL until first + * VP9 REQ_DECODE) + * @pkt: shared AVPacket reused across requests + * @frame: shared AVFrame reused across requests + */ +struct daedalus_decoder { + struct ffmpeg_loader *loader; + struct AVCodecContext *ctx_vp9; + struct AVPacket *pkt; + struct AVFrame *frame; +}; + +/** + * daedalus_decoder_init - allocate the shared packet/frame pair + * + * Return: 0 on success, -ENOMEM if FFmpeg refused to allocate. + * Codec contexts are opened lazily on first use. + */ +int daedalus_decoder_init(struct daedalus_decoder *dec, + struct ffmpeg_loader *loader); + +/** + * daedalus_decoder_cleanup - free codec contexts, packet, frame + */ +void daedalus_decoder_cleanup(struct daedalus_decoder *dec); + +/** + * daedalus_decoder_run_request - decode one REQ_DECODE payload + * @dec: initialised decoder + * @req: REQ_DECODE prefix (from the wire) + * @bitstream: bitstream blob (req->bitstream_len bytes) + * @resp: caller-allocated RESP_FRAME output (zeroed by callee) + * + * Populates @resp with the decode outcome. Always returns 0; + * decode-level failures are reported via @resp->status so the + * kernel sees a structured response rather than a dropped + * request. + */ +int daedalus_decoder_run_request(struct daedalus_decoder *dec, + const struct daedalus_req_decode *req, + const uint8_t *bitstream, + struct daedalus_resp_frame *resp); + +#endif /* DAEDALUS_V4L2_DECODER_H */ diff --git a/daemon/src/ffmpeg_loader.c b/daemon/src/ffmpeg_loader.c index 76e5fb6..d32ba91 100644 --- a/daemon/src/ffmpeg_loader.c +++ b/daemon/src/ffmpeg_loader.c @@ -90,6 +90,7 @@ int ffmpeg_loader_init(struct ffmpeg_loader *loader) RESOLVE(libavutil, LIBAVUTIL_SONAME, av_log_set_level); RESOLVE(libavutil, LIBAVUTIL_SONAME, av_get_media_type_string); RESOLVE(libavutil, LIBAVUTIL_SONAME, av_version_info); + RESOLVE(libavutil, LIBAVUTIL_SONAME, av_pix_fmt_desc_get); { unsigned int v = loader->avformat_version(); diff --git a/daemon/src/ffmpeg_loader.h b/daemon/src/ffmpeg_loader.h index 5484f51..c43ee41 100644 --- a/daemon/src/ffmpeg_loader.h +++ b/daemon/src/ffmpeg_loader.h @@ -33,6 +33,7 @@ #include #include #include +#include /** * struct ffmpeg_loader - resolved FFmpeg API entry points @@ -86,6 +87,7 @@ struct ffmpeg_loader { void (*av_log_set_level)(int); const char *(*av_get_media_type_string)(enum AVMediaType); const char *(*av_version_info)(void); + const AVPixFmtDescriptor *(*av_pix_fmt_desc_get)(enum AVPixelFormat); }; /** diff --git a/daemon/src/main.c b/daemon/src/main.c index b658f8e..eb12eb3 100644 --- a/daemon/src/main.c +++ b/daemon/src/main.c @@ -10,6 +10,7 @@ */ #include "ffmpeg_loader.h" #include "parser.h" +#include "chardev_client.h" #include "log.h" #include @@ -52,12 +53,32 @@ static int cmd_parse(struct ffmpeg_loader *fm, int argc, char **argv) return rc < 0 ? 1 : 0; } +static int cmd_daemon(struct ffmpeg_loader *fm, int argc, char **argv) +{ + struct chardev_client cli; + int rc; + + (void) argc; + (void) argv; + + rc = chardev_client_open(&cli, fm, &g_terminate); + if (rc < 0) { + log_err("chardev_client_open: %d", rc); + return 1; + } + rc = chardev_client_run(&cli); + chardev_client_close(&cli); + return rc < 0 ? 1 : 0; +} + static void usage(const char *progname) { fprintf(stderr, "usage: %s [args]\n\n" "commands:\n" " parse Phase 8.3: demux+enumerate frames\n" + " daemon Phase 8.4: open /dev/daedalus-v4l2 and\n" + " service REQ_DECODE from the kernel\n" "\n" "options:\n" " -v, --verbose enable debug logging\n" @@ -103,6 +124,8 @@ int main(int argc, char **argv) const char *cmd = argv[i++]; if (strcmp(cmd, "parse") == 0) { rc = cmd_parse(&fm, argc - i, argv + i); + } else if (strcmp(cmd, "daemon") == 0) { + rc = cmd_daemon(&fm, argc - i, argv + i); } else { fprintf(stderr, "unknown command: %s\n", cmd); usage(argv[0]); diff --git a/docs/phase_8_4_closure.md b/docs/phase_8_4_closure.md new file mode 100644 index 0000000..c1421d2 --- /dev/null +++ b/docs/phase_8_4_closure.md @@ -0,0 +1,214 @@ +# Phase 8.4 closure — daemon ↔ kernel decode round-trip (VP9) + +**Status:** closed 2026-05-18. + +Wires the Phase 8.3 FFmpeg loader through the Phase 8.2 chardev +bridge: kernel injects `REQ_DECODE` carrying a raw VP9 access +unit, daemon hands the bitstream to libavcodec via dlopen, sends +`RESP_FRAME` back with a content-dependent FNV-1a digest of the +decoded YUV planes. Pure CPU decode for now — Phase 8.5 swaps +in dmabuf + QPU dispatch. + +## What lands + +### Protocol (`include/daedalus_v4l2_proto.h`) +- New message types: `REQ_DECODE` (kernel→daemon) and + `RESP_FRAME` (daemon→kernel). Converted the prior `enum + daedalus_msg_type` to `#define`s — high-bit values exceed + INT_MAX and tripped -Wpedantic on userspace builds; kernel uABI + headers use the same idiom. +- New payload structs `daedalus_req_decode`, + `daedalus_resp_frame`. +- New codec id enum (`DAEDALUS_CODEC_VP9 = 1`); wire-stable so + Phase 8.6's AV1/H.264 additions don't move the existing + values. +- New status enum (`DAEDALUS_DECODE_OK`, `..._NO_FRAME`, + `..._ERR_OPEN`, `..._ERR_SEND`, `..._ERR_RECV`, + `..._ERR_CODEC`). + +### Kernel (`kernel/daedalus_v4l2_chardev.c`) +- New debugfs entry `/sys/kernel/debug/daedalus_v4l2/test_decode` + — writing raw bitstream bytes wraps them in a `REQ_DECODE` + (codec hard-wired to VP9 for Phase 8.4) and enqueues for the + daemon. Auto-incrementing cookie per request. +- `daedalus_chardev_write` learned `RESP_FRAME`: parses the + fixed-size payload and emits a single `pr_info` line with the + decode metadata. Keeps the existing PONG path on the default + arm. + +### Daemon (`daemon/src/...`) +- `chardev_client.{c,h}` — opens `/dev/daedalus-v4l2`, blocking + read loop dispatching on message type, writes responses via + single contiguous `write()` (kernel chardev has only `.write`, + no `.write_iter`, so `writev` lands as -EINVAL — discovered + the hard way during first end-to-end run). +- `decoder.{c,h}` — encapsulates the AVCodecContext (lazily + opened on first request per codec), shared AVPacket/AVFrame + pair, and an FNV-1a digest of the decoded planes. Plane walk + is descriptor-driven (`av_pix_fmt_desc_get`) so the same code + path covers YUV420P, YUV422P, YUV444P, GBRP and other 8-bit + planar layouts. +- `daemon` command in `main.c` opens the chardev and runs the + loop until SIGINT / SIGTERM. +- `ffmpeg_loader` gained `av_pix_fmt_desc_get` (23 resolved + symbols total). + +### Build +- CMakeLists adds `chardev_client.c` and `decoder.c` to the + executable; explicit `-I../include` for the shared protocol + header. +- Still `-Wall -Wextra -Wpedantic` clean. + +## Verification + +Kernel module built clean against the in-tree headers +(`linux-headers-6.12.75+rpt-rpi-2712`): + +``` +$ cd /home/mfritsche/src/daedalus-v4l2/kernel && make + CC [M] daedalus_v4l2_chardev.o + LD [M] daedalus_v4l2.ko +``` + +Daemon built clean: + +``` +$ cmake --build build/ +[100%] Built target daedalus_v4l2_daemon +``` + +End-to-end: + +``` +$ ffmpeg -hide_banner -loglevel warning -f lavfi \ + -i 'testsrc=duration=0.04:size=320x240:rate=25' \ + -pix_fmt yuv420p -c:v libvpx-vp9 -frames:v 1 -y /tmp/vp9_test.ivf +$ python3 -c "..." # strip IVF framing → /tmp/vp9_keyframe.bin +extracted 3268 bytes raw VP9 + +$ sudo insmod kernel/daedalus_v4l2.ko +$ /tmp/start_daemon.sh # daemon mode, blocks on read +$ sudo dd if=/tmp/vp9_keyframe.bin \ + of=/sys/kernel/debug/daedalus_v4l2/test_decode bs=8192 count=1 + +daemon log: + [INFO] REQ_DECODE cookie=2 codec=1 bitstream=3268 bytes + [INFO] decoder: opened vp9 context + [INFO] decoder: OK 320x240 fmt=0 (yuv420p) fnv1a=0x6ef10d71 luma=76800 chroma=38400 + +kernel log: + [16199.734667] daedalus_v4l2: REQ_DECODE enqueued cookie=2 codec=VP9 bitstream=3268 + [16199.735951] daedalus_v4l2: RESP_FRAME cookie=2 status=0 codec=1 320x240 + pixfmt=0 luma=76800 chroma=38400 fnv1a=0x6ef10d71 + ^^^^^^^^^^ + matches the daemon's +``` + +### Hash properties (sanity) + +| Trigger | Bitstream | Hash | Notes | +|---|---|---|---| +| testsrc 320×240 | 3268 B | `0x6ef10d71` | first decode (codec open) | +| color=red 320×240 | 44 B | `0x7f6e5dc5` | hash changes with content ✓ | +| testsrc again | 3268 B | `0x6ef10d71` | deterministic ✓ | +| 64 B `/dev/urandom` | 64 B | n/a | structured error, status=101 | + +The garbage-input case is the interesting one: FFmpeg's +`avcodec_send_packet` returned -1094995529 ("Invalid sync code"), +the daemon stayed alive, wrapped that into +`DAEDALUS_DECODE_ERR_SEND`, sent `RESP_FRAME` with status=101 and +zeroed metadata. Kernel logged the response. No daemon crash, +no kernel oops, no stuck request in the FIFO. + +### Cleanup + +``` +$ pkill -TERM -f daedalus_v4l2_daemon # daemon exits cleanly +$ sudo rmmod daedalus_v4l2 # ok, all queued requests drained +``` + +## Design decisions + +### Why FNV-1a, why no pixel data on the wire? + +The chardev's wire protocol caps single messages at 64 KiB +(`DAEDALUS_PROTO_MAX_PAYLOAD`). A single 1080p YUV420P frame is +3.1 MB — orders of magnitude larger. Forcing pixel data through +the chardev would require either: + +1. Fragmentation across multiple messages (re-assembly state in + kernel, complexity tax for a temporary path). +2. Bumping the limit, which lifts the per-message kmalloc out of + GFP_KERNEL territory. + +Neither is the right answer. Phase 8.5 wires dmabuf for actual +frame transfer; the FNV-1a digest is just enough to prove "the +right bytes came out of the decoder" without paying that cost +yet. The digest also stays useful as a cross-host sanity check +(reference vs target). + +### Why `write()` not `writev()` in the daemon? + +The kernel chardev implements only `.write` in its fops — not +`.write_iter`. Modern Linux does not auto-fallback `writev → +write`; userspace `writev` returns `-EINVAL` directly. Options +were: + +1. Implement `.write_iter` in the kernel (slightly more code, + buys nothing functionally for a one-or-two-iovec write). +2. Marshal into a single buffer in the daemon (one + short-lived malloc per response, dead simple). + +Picked (2). Response payloads are ≤ 36 B (struct +daedalus_resp_frame) for decode and ≤ 64 KiB for PONG; a +malloc/free per response is invisible at the scale we're +working. + +### Why plane walk via `av_pix_fmt_desc_get`? + +First end-to-end run decoded `testsrc` (an RGB-native source) as +`AV_PIX_FMT_GBRP` (71), not `YUV420P`. The original hand-rolled +hash hard-coded YUV420P plane geometry and fell back to "plane 0 +only" otherwise — fine for a one-time test, but it would silently +miss two-thirds of the pixels on the very first real-world +content variation. + +Using `AVPixFmtDescriptor` directly gives us a generic plane +walker: how many planes, which components live in each, and +the chroma subsampling shifts. Now the same hash path correctly +covers planar YUV (any subsampling), GBRP, and similar +8-bit-per-sample layouts. 10/12-bit (P010, YUV420P10LE) needs a +depth-aware variant — that lands when Phase 8.6 starts looking at +HDR. + +## What's NOT here (deferred) + +- **dmabuf / DRM PRIME**: Phase 8.5. RESP_FRAME today carries + metadata + digest only; actual pixel data goes out-of-band via + dmabuf in the next phase. +- **V4L2 buffer-queue wiring**: REQ_DECODE today is debugfs- + triggered. Phase 8.5+ has the V4L2 m2m queue submit + requests from `vidioc_qbuf`. +- **QPU dispatch**: the daemon decodes on CPU via FFmpeg. + Substituting per-block dispatch into the sibling + daedalus-fourier kernels (cycles 1, 2, 4, 9) lands once the + daemon-side parser can extract block-level metadata — that's a + Phase 8.5/8.6 follow-up. +- **AV1 / H.264**: decoder rejects them with + `DAEDALUS_DECODE_ERR_CODEC` today. Phase 8.6 adds the codec + contexts. +- **10-bit pixel formats**: hash path is 8-bit/sample/plane only. + +## Phase 8.5 plan + +1. Replace debugfs `test_decode` with V4L2 m2m queue submission: + `vidioc_qbuf` on the OUTPUT queue extracts the bitstream from + the userspace plane and calls `daedalus_chardev_enqueue_req`. +2. dmabuf import on the CAPTURE queue: daemon writes decoded + pixels into a kernel-allocated dmabuf and `RESP_FRAME` + references the buffer index, not raw bytes. +3. Drive a userspace V4L2 client (start with `v4l2-compliance + --stream-options` then a tiny custom test) end-to-end. +4. Begin substituting `daedalus_dispatch_*` calls into the + daemon's decode path for kernels where the QPU implementation + matches the FFmpeg block format. diff --git a/include/daedalus_v4l2_proto.h b/include/daedalus_v4l2_proto.h index a15df62..70a81c4 100644 --- a/include/daedalus_v4l2_proto.h +++ b/include/daedalus_v4l2_proto.h @@ -18,9 +18,8 @@ * Each message is a `struct daedalus_msg_hdr` followed by an * optional variable-length payload of `hdr.payload_len` bytes. * - * Phase 8.2 (chardev bridge): only PING/PONG implemented. - * Phase 8.4 (VP9 end-to-end): adds DECODE_FRAME request, - * FRAME_READY response. + * Phase 8.2 (chardev bridge): PING / PONG. + * Phase 8.4 (decode end-to-end): REQ_DECODE / RESP_FRAME. */ #ifndef DAEDALUS_V4L2_PROTO_H #define DAEDALUS_V4L2_PROTO_H @@ -30,23 +29,25 @@ #define DAEDALUS_PROTO_MAGIC 0x44303456u /* 'D04V' */ #define DAEDALUS_PROTO_VERSION 0u /* pre-1.0 */ -/** - * enum daedalus_msg_type - wire-protocol message types - * @DAEDALUS_MSG_PING: request: payload is opaque echo data - * @DAEDALUS_MSG_PONG: response: payload echoes the matching ping - * @DAEDALUS_MSG_HELLO: response: daemon announces itself on connect - * - * Phase 8.2 implements PING / PONG / HELLO. Later phases add - * REQ_DECODE / RESP_FRAME / etc. +/* + * Wire-protocol message types. * * Request types (kernel → daemon) live in 0x0000_0000..0x7fff_ffff. * Response types (daemon → kernel) live in 0x8000_0000..0xffff_ffff. + * The high bit is what distinguishes "kernel produced this" from + * "daemon produced this" on the wire. + * + * These are #defines rather than an enum because the high-bit + * values (>= 0x80000000) exceed INT_MAX, and pre-C23 enums can't + * portably hold them — kernel uABI headers follow the same + * convention. */ -enum daedalus_msg_type { - DAEDALUS_MSG_PING = 0x00000001u, - DAEDALUS_MSG_HELLO = 0x80000001u, - DAEDALUS_MSG_PONG = 0x80000002u, -}; +#define DAEDALUS_MSG_PING 0x00000001u +#define DAEDALUS_MSG_REQ_DECODE 0x00000002u + +#define DAEDALUS_MSG_HELLO 0x80000001u +#define DAEDALUS_MSG_PONG 0x80000002u +#define DAEDALUS_MSG_RESP_FRAME 0x80000003u /** * struct daedalus_msg_hdr - on-the-wire message header @@ -54,7 +55,7 @@ enum daedalus_msg_type { * @version: protocol version (DAEDALUS_PROTO_VERSION) * @type: one of enum daedalus_msg_type * @cookie: caller-supplied identifier; copied verbatim into - * the matching response so the daemon can pair + * the matching response so the kernel can pair * response with request * @payload_len: number of bytes immediately following this * struct (max DAEDALUS_PROTO_MAX_PAYLOAD) @@ -71,4 +72,87 @@ struct daedalus_msg_hdr { #define DAEDALUS_PROTO_MAX_PAYLOAD (64u * 1024u) /* 64 KiB */ +/* -- REQ_DECODE / RESP_FRAME payload structures ---------------------- */ + +/** + * enum daedalus_codec_id - codec selector for REQ_DECODE + * @DAEDALUS_CODEC_VP9: libavcodec AV_CODEC_ID_VP9 + * @DAEDALUS_CODEC_AV1: libavcodec AV_CODEC_ID_AV1 (Phase 8.6) + * @DAEDALUS_CODEC_H264: libavcodec AV_CODEC_ID_H264 (Phase 8.6) + * + * Wire-stable across phases. The daemon maps these to the + * libavcodec AV_CODEC_ID_* values internally so we don't leak + * FFmpeg's enum into the kernel ABI. + */ +enum daedalus_codec_id { + DAEDALUS_CODEC_VP9 = 1, + DAEDALUS_CODEC_AV1 = 2, + DAEDALUS_CODEC_H264 = 3, +}; + +/** + * struct daedalus_req_decode - REQ_DECODE payload prefix + * @codec_id: enum daedalus_codec_id + * @bitstream_len: bytes of bitstream following this struct + * @flags: reserved, must be zero + * + * Total payload_len for a REQ_DECODE = sizeof(struct + * daedalus_req_decode) + bitstream_len. + */ +struct daedalus_req_decode { + __u32 codec_id; + __u32 bitstream_len; + __u32 flags; +}; + +/** + * enum daedalus_decode_status - RESP_FRAME outcome codes + * @DAEDALUS_DECODE_OK: frame produced; fields below populated + * @DAEDALUS_DECODE_NO_FRAME: codec consumed input but no frame + * ready yet (e.g. lacks reference) + * @DAEDALUS_DECODE_ERR_OPEN: avcodec_open2 failed + * @DAEDALUS_DECODE_ERR_SEND: avcodec_send_packet failed + * @DAEDALUS_DECODE_ERR_RECV: avcodec_receive_frame failed + * @DAEDALUS_DECODE_ERR_CODEC: unknown codec_id + */ +enum daedalus_decode_status { + DAEDALUS_DECODE_OK = 0, + DAEDALUS_DECODE_NO_FRAME = 1, + DAEDALUS_DECODE_ERR_OPEN = 100, + DAEDALUS_DECODE_ERR_SEND = 101, + DAEDALUS_DECODE_ERR_RECV = 102, + DAEDALUS_DECODE_ERR_CODEC = 103, +}; + +/** + * struct daedalus_resp_frame - RESP_FRAME payload + * @status: enum daedalus_decode_status + * @codec_id: echoes the request's codec_id + * @width: decoded frame width in pixels (0 if !OK) + * @height: decoded frame height in pixels (0 if !OK) + * @pix_fmt: libavcodec AVPixelFormat as int (informational) + * @luma_len: Y-plane byte count actually hashed + * @chroma_len: U+V byte count actually hashed (planar combined) + * @fnv1a_yuv: FNV-1a 32-bit hash of Y,U,V planes concatenated + * (line-by-line, stripping any libav alignment + * stride padding). Lets the kernel side compare + * against an offline reference without shipping + * full pixel data through the chardev. + * @reserved: must be zero + * + * Fixed size — keeps wire parsing simple. No variable-length + * pixel data in Phase 8.4; dmabuf in Phase 8.5 carries that. + */ +struct daedalus_resp_frame { + __u32 status; + __u32 codec_id; + __u32 width; + __u32 height; + __s32 pix_fmt; + __u32 luma_len; + __u32 chroma_len; + __u32 fnv1a_yuv; + __u32 reserved; +}; + #endif /* DAEDALUS_V4L2_PROTO_H */ diff --git a/kernel/daedalus_v4l2_chardev.c b/kernel/daedalus_v4l2_chardev.c index 0f09e2a..d0aae57 100644 --- a/kernel/daedalus_v4l2_chardev.c +++ b/kernel/daedalus_v4l2_chardev.c @@ -259,11 +259,34 @@ static ssize_t daedalus_chardev_write(struct file *file, } /* - * Phase 8.2 handling: log the response type. Phase 8.4 - * will wire RESP_FRAME etc. to the V4L2 buffer queue. + * Response dispatch. Phase 8.4 understands PONG (echoes + * back at debug level) and RESP_FRAME (logs decode result + * at info so the test harness can see it without enabling + * dyndbg). Phase 8.5+ will wire RESP_FRAME to the V4L2 + * buffer-done path. */ - pr_debug("daedalus_v4l2: chardev got response type=0x%08x cookie=%u plen=%u\n", - hdr.type, hdr.cookie, hdr.payload_len); + switch (hdr.type) { + case DAEDALUS_MSG_RESP_FRAME: { + struct daedalus_resp_frame fr; + + if (hdr.payload_len < sizeof(fr)) { + pr_warn("daedalus_v4l2: RESP_FRAME payload too short (%u < %zu)\n", + hdr.payload_len, sizeof(fr)); + kfree(payload); + return -EBADMSG; + } + memcpy(&fr, payload, sizeof(fr)); + pr_info("daedalus_v4l2: RESP_FRAME cookie=%u status=%u codec=%u %ux%u pixfmt=%d luma=%u chroma=%u fnv1a=0x%08x\n", + hdr.cookie, fr.status, fr.codec_id, + fr.width, fr.height, fr.pix_fmt, + fr.luma_len, fr.chroma_len, fr.fnv1a_yuv); + break; + } + default: + pr_debug("daedalus_v4l2: chardev got response type=0x%08x cookie=%u plen=%u\n", + hdr.type, hdr.cookie, hdr.payload_len); + break; + } kfree(payload); return expected; @@ -328,6 +351,66 @@ static const struct file_operations daedalus_test_ping_fops = { .write = daedalus_test_ping_write, }; +/* + * Writing bitstream bytes to + * /sys/kernel/debug/daedalus_v4l2/test_decode enqueues a REQ_DECODE + * carrying those bytes as a VP9 access unit (Phase 8.4 fixed + * codec). The wire payload prepends a struct daedalus_req_decode + * header so the daemon knows the codec id and bitstream length. + * + * Phase 8.6 generalises codec_id (via a sysfs / debugfs control); + * for Phase 8.4 VP9 is hard-wired since that's what the cycle-9 + * stack targets first. + */ +static atomic_t daedalus_decode_cookie = ATOMIC_INIT(0); + +static ssize_t daedalus_test_decode_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct daedalus_req_decode req; + u8 *blob; + size_t total; + u32 cookie; + int ret; + + if (count == 0) + return -EINVAL; + if (count + sizeof(req) > DAEDALUS_PROTO_MAX_PAYLOAD) + return -EMSGSIZE; + + total = sizeof(req) + count; + blob = kmalloc(total, GFP_KERNEL); + if (!blob) + return -ENOMEM; + + req.codec_id = DAEDALUS_CODEC_VP9; + req.bitstream_len = (u32) count; + req.flags = 0; + memcpy(blob, &req, sizeof(req)); + + if (copy_from_user(blob + sizeof(req), buf, count)) { + kfree(blob); + return -EFAULT; + } + + cookie = (u32) atomic_inc_return(&daedalus_decode_cookie); + ret = daedalus_chardev_enqueue_req(DAEDALUS_MSG_REQ_DECODE, cookie, + blob, total); + kfree(blob); + if (ret) + return ret; + + pr_info("daedalus_v4l2: REQ_DECODE enqueued cookie=%u codec=VP9 bitstream=%zu\n", + cookie, count); + return count; +} + +static const struct file_operations daedalus_test_decode_fops = { + .owner = THIS_MODULE, + .write = daedalus_test_decode_write, +}; + /* -- registration ---------------------------------------------------- */ int daedalus_chardev_init(void) @@ -355,9 +438,12 @@ int daedalus_chardev_init(void) } dev->debugfs_dir = debugfs_create_dir("daedalus_v4l2", NULL); - if (!IS_ERR(dev->debugfs_dir)) + if (!IS_ERR(dev->debugfs_dir)) { debugfs_create_file("test_ping", 0200, dev->debugfs_dir, NULL, &daedalus_test_ping_fops); + debugfs_create_file("test_decode", 0200, dev->debugfs_dir, + NULL, &daedalus_test_decode_fops); + } g_chardev = dev; pr_info("daedalus_v4l2: /dev/%s registered\n", DAEDALUS_CHARDEV_NAME);