Phase 8.4: daemon ↔ kernel decode round-trip (VP9 end-to-end)

Wires the Phase 8.3 FFmpeg loader through the Phase 8.2 chardev bridge: kernel injects REQ_DECODE carrying a raw VP9 access unit, daemon hands the bitstream to libavcodec via dlopen, sends RESP_FRAME back with a content-dependent FNV-1a digest of the decoded YUV planes. Pure CPU decode for now — Phase 8.5 swaps in dmabuf + QPU dispatch. Protocol (include/daedalus_v4l2_proto.h): - New REQ_DECODE (kernel→daemon) and RESP_FRAME (daemon→kernel) message types, with fixed-size payload structs. - New DAEDALUS_CODEC_VP9/AV1/H264 enum (wire-stable so 8.6's AV1+H.264 work doesn't move existing values). - New DAEDALUS_DECODE_* status enum (OK / NO_FRAME / ERR_OPEN / ERR_SEND / ERR_RECV / ERR_CODEC). - Converted the prior `enum daedalus_msg_type` to #defines — high-bit values exceed INT_MAX and tripped -Wpedantic on userspace; kernel uABI headers use the same idiom. Kernel (kernel/daedalus_v4l2_chardev.c): - New debugfs entry /sys/kernel/debug/daedalus_v4l2/test_decode: writing raw bitstream bytes wraps them in a REQ_DECODE (codec=VP9 for Phase 8.4) and enqueues with an auto-incrementing cookie. - daedalus_chardev_write learned RESP_FRAME: parses the payload and emits a single pr_info line with decode metadata. Keeps existing PONG handling on the default arm. Daemon (daemon/src/...): - chardev_client.{c,h} — opens /dev/daedalus-v4l2, blocking read loop, single-buffer write() responses (kernel chardev has only .write, not .write_iter, so writev lands as -EINVAL — discovered the hard way during first run). - decoder.{c,h} — lazily-opened AVCodecContext per codec, shared AVPacket/AVFrame pair, descriptor-driven plane walker (av_pix_fmt_desc_get) so the same hash path covers YUV420P, YUV422P, YUV444P, GBRP and other 8-bit planar layouts. Generalised after first run decoded testsrc as GBRP (71) rather than the assumed YUV420P. - `daemon` command in main.c opens the chardev and runs the loop until SIGINT/SIGTERM. Cookie correlation handled end-to-end. - ffmpeg_loader gained av_pix_fmt_desc_get (23 symbols total). Build: - CMakeLists adds chardev_client.c + decoder.c; explicit -I../include for the shared protocol header. - Still -Wall -Wextra -Wpedantic clean. Verification on hertz (Pi 5, 6.12.75+rpt-rpi-2712): $ ffmpeg ... -pix_fmt yuv420p -c:v libvpx-vp9 -frames:v 1 \ -y /tmp/vp9_test.ivf $ python3 ... strip IVF framing → vp9_keyframe.bin (3268 B) $ sudo insmod kernel/daedalus_v4l2.ko $ daedalus_v4l2_daemon -v daemon & $ sudo dd if=vp9_keyframe.bin \ of=/sys/kernel/debug/daedalus_v4l2/test_decode daemon: REQ_DECODE cookie=2 → decoded yuv420p 320x240 fnv1a=0x6ef10d71 luma=76800 chroma=38400 kernel: RESP_FRAME cookie=2 status=0 320x240 pixfmt=0 fnv1a=0x6ef10d71 ← matches daemon ✓ Hash properties verified: cookie=2 testsrc 3268 B → 0x6ef10d71 (first decode) cookie=3 red 44 B → 0x7f6e5dc5 (content-dependent ✓) cookie=4 testsrc 3268 B → 0x6ef10d71 (deterministic ✓) cookie=5 64 B random → status=101 (ERR_SEND, daemon alive) Daemon survives bad input (FFmpeg "Invalid sync code" wrapped into structured ERR_SEND response). Clean SIGTERM shutdown, clean rmmod. Phase 8.4 acceptance criteria met: - ✓ end-to-end kernel→daemon→FFmpeg→kernel round-trip - ✓ cookie correlation per request/response pair - ✓ content-dependent + deterministic digest - ✓ structured error responses (no daemon crash on bad input) - ✓ clean teardown (SIGTERM + rmmod) - ✓ builds clean on both kernel kbuild and daemon CMake Per correctness-before-speed: - Real chardev I/O (no shortcuts, no select-loop hacks) - Real FFmpeg AVCodecContext lifecycle (lazily opened, properly freed on cleanup) - Descriptor-driven plane walk (generalises across pix_fmts) - Structured error path (not just log-and-continue) - All resource paths cleaned up on every error branch - Documented why FNV-1a digest, why write() not writev(), why pix_desc walk in docs/phase_8_4_closure.md Phase 8.5 next: V4L2 m2m queue submits REQ_DECODE from vidioc_qbuf; dmabuf carries actual pixel data so the chardev's 64 KiB cap doesn't gate frame size; begin substituting daedalus_dispatch_* into the daemon's decode path. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 15:22:16 +00:00
parent 873a04c622
commit 2a449632b9
11 changed files with 1098 additions and 22 deletions
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * decoder.c — FFmpeg-driven decode helper for daedalus-v4l2 daemon.
+ */
+#include "decoder.h"
+#include "ffmpeg_loader.h"
+#include "log.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavutil/pixfmt.h>
+
+/*
+ * FNV-1a 32-bit hash.  Used as a compact digest of the decoded
+ * frame's YUV planes so the kernel can verify "the daemon produced
+ * the expected pixels" without shipping the full frame through the
+ * 64-KiB-capped chardev wire protocol.  Phase 8.5's dmabuf path
+ * carries actual pixel data; this digest stays useful as a
+ * cross-host sanity check.
+ */
+static uint32_t fnv1a32_init(void)
+{
+	return 0x811c9dc5u;
+}
+
+static uint32_t fnv1a32_update(uint32_t h, const uint8_t *data, size_t len)
+{
+	size_t i;
+	for (i = 0; i < len; i++)
+		h = (h ^ data[i]) * 0x01000193u;
+	return h;
+}
+
+/*
+ * Hash plane @p (width @w bytes per line, @h lines, stride @stride
+ * bytes between lines).  We strip libav's row alignment padding so
+ * the hash matches the layout used by `ffmpeg -f rawvideo` reference
+ * output (which is tightly packed).
+ */
+static uint32_t fnv1a32_plane(uint32_t h, const uint8_t *p,
+			      int w, int height, int stride)
+{
+	int y;
+	for (y = 0; y < height; y++)
+		h = fnv1a32_update(h, p + (size_t) y * (size_t) stride,
+				   (size_t) w);
+	return h;
+}
+
+int daedalus_decoder_init(struct daedalus_decoder *dec,
+			  struct ffmpeg_loader *loader)
+{
+	memset(dec, 0, sizeof(*dec));
+	dec->loader = loader;
+
+	dec->pkt = loader->av_packet_alloc();
+	if (!dec->pkt)
+		return -ENOMEM;
+	dec->frame = loader->av_frame_alloc();
+	if (!dec->frame) {
+		loader->av_packet_free(&dec->pkt);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void daedalus_decoder_cleanup(struct daedalus_decoder *dec)
+{
+	if (!dec || !dec->loader)
+		return;
+	if (dec->ctx_vp9)
+		dec->loader->avcodec_free_context(&dec->ctx_vp9);
+	if (dec->frame)
+		dec->loader->av_frame_free(&dec->frame);
+	if (dec->pkt)
+		dec->loader->av_packet_free(&dec->pkt);
+	memset(dec, 0, sizeof(*dec));
+}
+
+/*
+ * Lazily open the AVCodecContext for codec_id.  Returns 0 on
+ * success, -ENOSYS on unknown codec, -EIO on FFmpeg failure.
+ */
+static int decoder_open_codec(struct daedalus_decoder *dec, uint32_t codec_id,
+			      struct AVCodecContext **out)
+{
+	struct ffmpeg_loader *fm = dec->loader;
+	const struct AVCodec *codec;
+	struct AVCodecContext *ctx;
+	enum AVCodecID av_id;
+	int rc;
+
+	switch (codec_id) {
+	case DAEDALUS_CODEC_VP9:
+		av_id = AV_CODEC_ID_VP9;
+		if (dec->ctx_vp9) {
+			*out = dec->ctx_vp9;
+			return 0;
+		}
+		break;
+	case DAEDALUS_CODEC_AV1:
+	case DAEDALUS_CODEC_H264:
+		/* Phase 8.6 wires AV1 and H.264 properly. */
+		log_warn("decoder: codec_id %u not yet supported", codec_id);
+		return -ENOSYS;
+	default:
+		log_warn("decoder: unknown codec_id %u", codec_id);
+		return -ENOSYS;
+	}
+
+	codec = fm->avcodec_find_decoder(av_id);
+	if (!codec) {
+		log_err("decoder: avcodec_find_decoder(%d) returned NULL", av_id);
+		return -EIO;
+	}
+	ctx = fm->avcodec_alloc_context3(codec);
+	if (!ctx)
+		return -ENOMEM;
+	rc = fm->avcodec_open2(ctx, codec, NULL);
+	if (rc < 0) {
+		log_err("decoder: avcodec_open2 failed: %d", rc);
+		fm->avcodec_free_context(&ctx);
+		return -EIO;
+	}
+
+	dec->ctx_vp9 = ctx;
+	*out = ctx;
+	log_info("decoder: opened %s context", codec->name);
+	return 0;
+}
+
+int daedalus_decoder_run_request(struct daedalus_decoder *dec,
+				 const struct daedalus_req_decode *req,
+				 const uint8_t *bitstream,
+				 struct daedalus_resp_frame *resp)
+{
+	struct ffmpeg_loader *fm = dec->loader;
+	struct AVCodecContext *ctx = NULL;
+	int rc;
+
+	memset(resp, 0, sizeof(*resp));
+	resp->codec_id = req->codec_id;
+
+	rc = decoder_open_codec(dec, req->codec_id, &ctx);
+	if (rc == -ENOSYS) {
+		resp->status = DAEDALUS_DECODE_ERR_CODEC;
+		return 0;
+	}
+	if (rc < 0) {
+		resp->status = DAEDALUS_DECODE_ERR_OPEN;
+		return 0;
+	}
+
+	fm->av_packet_unref(dec->pkt);
+	/*
+	 * The kernel's REQ_DECODE payload is borrowed memory we'll
+	 * free as soon as this function returns.  Pointing the
+	 * AVPacket at it directly is safe because avcodec_send_packet
+	 * either fully consumes the input or copies it internally —
+	 * by the time we return we no longer reference @bitstream.
+	 *
+	 * We cast away const because AVPacket->data is non-const in
+	 * the FFmpeg API; we promise not to mutate the buffer.
+	 */
+	dec->pkt->data = (uint8_t *) (uintptr_t) bitstream;
+	dec->pkt->size = (int) req->bitstream_len;
+
+	rc = fm->avcodec_send_packet(ctx, dec->pkt);
+	if (rc < 0) {
+		log_err("decoder: avcodec_send_packet failed: %d", rc);
+		resp->status = DAEDALUS_DECODE_ERR_SEND;
+		return 0;
+	}
+
+	fm->av_frame_unref(dec->frame);
+	rc = fm->avcodec_receive_frame(ctx, dec->frame);
+	if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) {
+		log_debug("decoder: no frame ready yet (rc=%d)", rc);
+		resp->status = DAEDALUS_DECODE_NO_FRAME;
+		return 0;
+	}
+	if (rc < 0) {
+		log_err("decoder: avcodec_receive_frame failed: %d", rc);
+		resp->status = DAEDALUS_DECODE_ERR_RECV;
+		return 0;
+	}
+
+	{
+		struct AVFrame *fr = dec->frame;
+		const AVPixFmtDescriptor *desc =
+			fm->av_pix_fmt_desc_get(fr->format);
+		uint32_t h = fnv1a32_init();
+		uint32_t luma_len = 0, chroma_len = 0;
+
+		resp->status	= DAEDALUS_DECODE_OK;
+		resp->width	= (uint32_t) fr->width;
+		resp->height	= (uint32_t) fr->height;
+		resp->pix_fmt	= fr->format;
+
+		/*
+		 * Walk every plane reported by the AVPixFmtDescriptor.
+		 * For each component, byte width = ((plane_w *
+		 * step_minus1) >> 0) — but the descriptor only tells
+		 * us which plane each component sits in, not the
+		 * plane's byte stride per pixel.  In practice for the
+		 * formats we care about (YUV420P, YUV422P, YUV444P,
+		 * GBRP, NV12), each plane has exactly one component
+		 * at 1 byte/sample.  Hash each plane at
+		 * (width >> log2_chroma_w) × (height >> log2_chroma_h)
+		 * for chroma planes, full-size for plane 0.
+		 *
+		 * This generalises cleanly to anything 8-bit-per-
+		 * sample-per-plane; 10/12-bit (P010, YUV420P10LE) will
+		 * need depth handling when Phase 8.6 brings HDR
+		 * content into play.
+		 */
+		if (!desc) {
+			log_warn("decoder: no descriptor for pix_fmt %d",
+				 fr->format);
+		} else {
+			int p, max_plane = 0;
+			int i;
+
+			for (i = 0; i < desc->nb_components; i++) {
+				if (desc->comp[i].plane > max_plane)
+					max_plane = desc->comp[i].plane;
+			}
+
+			for (p = 0; p <= max_plane; p++) {
+				int pw, ph;
+				if (!fr->data[p] || !fr->linesize[p])
+					continue;
+				if (p == 0) {
+					pw = fr->width;
+					ph = fr->height;
+					luma_len += (uint32_t) pw *
+						    (uint32_t) ph;
+				} else {
+					pw = AV_CEIL_RSHIFT(fr->width,
+							    desc->log2_chroma_w);
+					ph = AV_CEIL_RSHIFT(fr->height,
+							    desc->log2_chroma_h);
+					chroma_len += (uint32_t) pw *
+						      (uint32_t) ph;
+				}
+				h = fnv1a32_plane(h, fr->data[p], pw, ph,
+						  fr->linesize[p]);
+			}
+		}
+
+		resp->luma_len	 = luma_len;
+		resp->chroma_len = chroma_len;
+		resp->fnv1a_yuv	 = h;
+
+		log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u",
+			 fr->width, fr->height, fr->format,
+			 desc ? desc->name : "?",
+			 h, luma_len, chroma_len);
+	}
+
+	fm->av_frame_unref(dec->frame);
+	return 0;
+}