// SPDX-License-Identifier: GPL-2.0-or-later /* * daedalus-v4l2 — V4L2 stateless decoder shim. * * Out-of-tree Linux kernel module that exposes a /dev/videoNN * V4L2 m2m (mem2mem) device for the daedalus-fourier kernel * library. Real decoding happens in a userspace daemon; this * module ferries bitstream buffers to the daemon via the * /dev/daedalus-v4l2 chardev bridge and ferries decoded pixels * back into the V4L2 client's CAPTURE buffer. * * Phase 8.5 (this revision): full V4L2 m2m driver with vb2 * queues, real v4l2_ioctl_ops table, device_run wired to REQ_DECODE * over the chardev, RESP_FRAME completion path back into * v4l2_m2m_buf_done. Bitstream + decoded pixel data travel * inline through the 64 KiB chardev payload — enough for small * frames and proof-of-pipe; Phase 8.6 adds dmabuf-export so * larger CAPTURE buffers don't have to round-trip through the * chardev. * * Phase 8.5 does NOT implement the V4L2 stateless control set * (V4L2_CID_STATELESS_VP9_FRAME etc.). The daemon parses VP9 * headers itself via dlopen'd FFmpeg, so per-buffer controls are * not needed for the proof-of-pipe. Phase 8.6 adds the proper * stateless controls when AV1/H.264 land. * * Project: https://git.reauktion.de/reauktion/daedalus-v4l2 * Sibling kernel library: https://git.reauktion.de/marfrit/daedalus-fourier */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "daedalus_v4l2_chardev.h" #include "daedalus_v4l2_proto.h" #include "daedalus_v4l2_main.h" #define DAEDALUS_DRV_NAME "daedalus_v4l2" #define DAEDALUS_VIDEO_NAME "daedalus" /* * OUTPUT side advertises VP9 + AV1 + H.264 stateless formats * (the daemon decodes all three via FFmpeg dlopen). CAPTURE * advertises NV12M (8-bit, two-plane) + P010 (10-bit, * single-plane interleaved CbCr) added in Phase 8.8 for HDR * content. */ static const u32 daedalus_output_formats[] = { V4L2_PIX_FMT_VP9_FRAME, V4L2_PIX_FMT_AV1_FRAME, V4L2_PIX_FMT_H264_SLICE, }; #define DAEDALUS_NUM_OUTPUT_FMTS ARRAY_SIZE(daedalus_output_formats) #define DAEDALUS_DEFAULT_OUTPUT_FOURCC V4L2_PIX_FMT_VP9_FRAME /* * NV12 (single-plane Y+CbCr contiguous) listed alongside NV12M * (two-plane Y / CbCr separate) so legacy MPLANE clients that * expect single-plane buffer geometry (e.g. libva-v4l2-request- * fourier's NV12 video_format entry, used by VAAPI consumers via * ffmpeg vaapi) can negotiate the format successfully. The two * fourccs differ only in plane layout — bit-exact pixel content * is identical. */ static const u32 daedalus_capture_formats[] = { V4L2_PIX_FMT_NV12M, V4L2_PIX_FMT_NV12, V4L2_PIX_FMT_P010, }; #define DAEDALUS_NUM_CAPTURE_FMTS ARRAY_SIZE(daedalus_capture_formats) #define DAEDALUS_DEFAULT_CAPTURE_FOURCC V4L2_PIX_FMT_NV12M static bool daedalus_is_supported_capture(u32 fourcc) { size_t i; for (i = 0; i < DAEDALUS_NUM_CAPTURE_FMTS; i++) if (daedalus_capture_formats[i] == fourcc) return true; return false; } static u32 daedalus_fourcc_to_codec_id(u32 fourcc) { switch (fourcc) { case V4L2_PIX_FMT_VP9_FRAME: return DAEDALUS_CODEC_VP9; case V4L2_PIX_FMT_AV1_FRAME: return DAEDALUS_CODEC_AV1; case V4L2_PIX_FMT_H264_SLICE: return DAEDALUS_CODEC_H264; default: return 0; } } static bool daedalus_is_supported_output(u32 fourcc) { return daedalus_fourcc_to_codec_id(fourcc) != 0; } /* Conservative defaults; userspace S_FMT overrides. */ #define DAEDALUS_DEFAULT_W 320 #define DAEDALUS_DEFAULT_H 240 /* Bound bitstream buffer size to the chardev payload cap. */ #define DAEDALUS_MAX_BITSTREAM (DAEDALUS_PROTO_MAX_PAYLOAD - \ sizeof(struct daedalus_req_decode)) /* -- module-wide state ----------------------------------------------- */ static struct daedalus_dev *g_daedalus_dev; struct daedalus_dev *daedalus_get_dev(void) { return g_daedalus_dev; } /* -- per-open context ------------------------------------------------ */ /** * struct daedalus_ctx - per-open instance state * @fh: V4L2 file handle (must be first to satisfy v4l2-core) * @dev: parent daedalus_dev * @m2m_ctx: v4l2 mem2mem context (one job queue per open) * @hdl: v4l2_ctrl_handler (no controls yet; placeholder for 8.6) * @src_fmt: current OUTPUT (bitstream) format * @dst_fmt: current CAPTURE (decoded) format * * One context per open() of /dev/videoNN. v4l2-core's m2m * scheduler picks one context at a time to call device_run on. */ struct daedalus_ctx { struct v4l2_fh fh; struct daedalus_dev *dev; struct v4l2_m2m_ctx *m2m_ctx; struct v4l2_ctrl_handler hdl; struct v4l2_pix_format_mplane src_fmt; struct v4l2_pix_format_mplane dst_fmt; }; static inline struct daedalus_ctx *file_to_ctx(struct file *file) { return container_of(file->private_data, struct daedalus_ctx, fh); } /* -- V4L2 stateless control registration (skeleton) ----------------- */ /* * Register the per-codec stateless controls so userspace * (libva-v4l2-request and v4l2-compliance) recognises us as a * proper stateless decoder. We don't act on the values — the * daemon parses VP9/H.264/AV1 headers itself via FFmpeg — but * we accept stores so libva can drive standard decode flows. * * Per-codec control IDs that ship in v6.12 headers: * VP9: V4L2_CID_STATELESS_VP9_FRAME, V4L2_CID_STATELESS_VP9_COMPRESSED_HDR * H264: V4L2_CID_STATELESS_H264_{SPS,PPS,SCALING_MATRIX,PRED_WEIGHTS, * SLICE_PARAMS,DECODE_PARAMS} * AV1: V4L2_CID_STATELESS_AV1_FRAME (+ TILE_GROUP_ENTRY, SEQUENCE, * FILM_GRAIN where available) */ static const u32 daedalus_stateless_ctrls[] = { V4L2_CID_STATELESS_VP9_FRAME, V4L2_CID_STATELESS_VP9_COMPRESSED_HDR, V4L2_CID_STATELESS_H264_SPS, V4L2_CID_STATELESS_H264_PPS, V4L2_CID_STATELESS_H264_SCALING_MATRIX, V4L2_CID_STATELESS_H264_PRED_WEIGHTS, V4L2_CID_STATELESS_H264_SLICE_PARAMS, V4L2_CID_STATELESS_H264_DECODE_PARAMS, V4L2_CID_STATELESS_AV1_FRAME, V4L2_CID_STATELESS_AV1_SEQUENCE, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY, V4L2_CID_STATELESS_AV1_FILM_GRAIN, }; /* * No-op control op set: daemon ignores all stateless control * values (FFmpeg re-parses the bitstream). But v4l2-core requires * ops to be present on a ctrl_handler that processes SET requests * — without it, S_EXT_CTRLS rejects with EINVAL on validate. * Always-success s_ctrl is the right shape for "we accept whatever * you tell us but actually act on the OUTPUT buffer payload alone." */ static int daedalus_s_ctrl_noop(struct v4l2_ctrl *ctrl) { (void) ctrl; return 0; } static const struct v4l2_ctrl_ops daedalus_ctrl_ops = { .s_ctrl = daedalus_s_ctrl_noop, }; static int daedalus_register_stateless_ctrls(struct v4l2_ctrl_handler *hdl) { size_t i; /* * Use v4l2_ctrl_new_custom (the pattern rkvdec / cedrus / * hantro use) rather than v4l2_ctrl_new_std_compound. * v4l2-core auto-detects the type from each known * V4L2_CID_STATELESS_* id and allocates the right payload * size internally; S_EXT_CTRLS then validates user input * against that allocated payload. v4l2_ctrl_new_std_compound * with NULL p_def was rejecting writes (libva-v4l2-request- * fourier got EINVAL on every stateless ctrl SET). */ for (i = 0; i < ARRAY_SIZE(daedalus_stateless_ctrls); i++) { struct v4l2_ctrl_config cfg = { .ops = &daedalus_ctrl_ops, .id = daedalus_stateless_ctrls[i], }; struct v4l2_ctrl *ctrl; ctrl = v4l2_ctrl_new_custom(hdl, &cfg, NULL); if (hdl->error) { pr_debug("daedalus_v4l2: skipping unsupported CID 0x%x (err=%d)\n", daedalus_stateless_ctrls[i], hdl->error); hdl->error = 0; } (void) ctrl; } return 0; } /* -- format helpers -------------------------------------------------- */ /* * CAPTURE format fill. Three layouts supported: * NV12M (default, 8-bit) — 2 planes: Y (W*H bytes) + interleaved * CbCr at half-res (W*H/2 bytes). * NV12 (8-bit, 1 plane) — 1 plane: Y (W*H) followed by * interleaved CbCr (W*H/2); total * W*H*3/2 bytes. For legacy MPLANE * clients that don't speak multi- * plane (libva-v4l2-request). * P010 (10-bit HDR) — 1 plane: Y first (W*H*2 bytes) then * interleaved CbCr at half-res * (W*H bytes); 16-bit samples, * MSB-aligned 10-bit data (low 6 * bits zero per V4L2 ABI). */ static void daedalus_fill_capture_fmt(struct v4l2_pix_format_mplane *f, u32 fourcc, u32 w, u32 h) { if (!daedalus_is_supported_capture(fourcc)) fourcc = DAEDALUS_DEFAULT_CAPTURE_FOURCC; f->width = w; f->height = h; f->pixelformat = fourcc; f->field = V4L2_FIELD_NONE; f->colorspace = V4L2_COLORSPACE_REC709; if (fourcc == V4L2_PIX_FMT_P010) { f->num_planes = 1; f->plane_fmt[0].bytesperline = w * 2; f->plane_fmt[0].sizeimage = w * h * 2 + w * h; f->plane_fmt[1].bytesperline = 0; f->plane_fmt[1].sizeimage = 0; } else if (fourcc == V4L2_PIX_FMT_NV12) { f->num_planes = 1; f->plane_fmt[0].bytesperline = w; f->plane_fmt[0].sizeimage = w * h + w * h / 2; f->plane_fmt[1].bytesperline = 0; f->plane_fmt[1].sizeimage = 0; } else { f->num_planes = 2; f->plane_fmt[0].bytesperline = w; f->plane_fmt[0].sizeimage = w * h; f->plane_fmt[1].bytesperline = w; f->plane_fmt[1].sizeimage = w * h / 2; } } /* * OUTPUT is a parsed access unit (VP9 frame / AV1 frame / H.264 * slice). V4L2 convention for compressed bitstream formats: * single plane, sizeimage = worst-case bitstream size we're * willing to accept. fourcc carries the codec selector. */ static void daedalus_fill_output_fmt(struct v4l2_pix_format_mplane *f, u32 fourcc, u32 w, u32 h) { if (!daedalus_is_supported_output(fourcc)) fourcc = DAEDALUS_DEFAULT_OUTPUT_FOURCC; f->width = w; f->height = h; f->pixelformat = fourcc; f->field = V4L2_FIELD_NONE; f->colorspace = V4L2_COLORSPACE_REC709; f->num_planes = 1; f->plane_fmt[0].bytesperline = 0; /* compressed */ f->plane_fmt[0].sizeimage = DAEDALUS_MAX_BITSTREAM; } /* -- vb2 queue ops --------------------------------------------------- */ static int daedalus_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vq); const struct v4l2_pix_format_mplane *fmt; unsigned int p; fmt = V4L2_TYPE_IS_OUTPUT(vq->type) ? &ctx->src_fmt : &ctx->dst_fmt; if (*nplanes) { if (*nplanes != fmt->num_planes) return -EINVAL; for (p = 0; p < *nplanes; p++) if (sizes[p] < fmt->plane_fmt[p].sizeimage) return -EINVAL; } else { *nplanes = fmt->num_planes; for (p = 0; p < *nplanes; p++) sizes[p] = fmt->plane_fmt[p].sizeimage; if (*nbuffers < 2) *nbuffers = 2; } /* * Both queues use vb2_dma_contig now (OUTPUT switched in * Phase 8.6 to satisfy v4l2-compliance's non-coherent * REQBUFS test). Point both at the platform device as * the CMA-backed allocation parent. */ for (p = 0; p < *nplanes; p++) alloc_devs[p] = &ctx->dev->pdev->dev; return 0; } static int daedalus_buf_prepare(struct vb2_buffer *vb) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); const struct v4l2_pix_format_mplane *fmt; unsigned int p; fmt = V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type) ? &ctx->src_fmt : &ctx->dst_fmt; for (p = 0; p < vb->num_planes; p++) { unsigned long need = fmt->plane_fmt[p].sizeimage; if (vb2_plane_size(vb, p) < need) { v4l2_err(&ctx->dev->v4l2_dev, "buf_prepare: plane %u size %lu < %lu\n", p, vb2_plane_size(vb, p), need); return -EINVAL; } /* * For OUTPUT (bitstream), payload is set by userspace * via VIDIOC_QBUF (bytesused). For CAPTURE we set the * full plane size; device_run / buf_done updates it on * completion if needed. */ if (!V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type)) vb2_set_plane_payload(vb, p, need); } return 0; } static void daedalus_buf_queue(struct vb2_buffer *vb) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); v4l2_m2m_buf_queue(ctx->m2m_ctx, vbuf); } static int daedalus_start_streaming(struct vb2_queue *vq, unsigned int count) { return 0; } static void daedalus_stop_streaming(struct vb2_queue *vq) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vq); struct vb2_v4l2_buffer *vbuf; while ((vbuf = V4L2_TYPE_IS_OUTPUT(vq->type) ? v4l2_m2m_src_buf_remove(ctx->m2m_ctx) : v4l2_m2m_dst_buf_remove(ctx->m2m_ctx)) != NULL) v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR); } /* * Phase 8.12: request API vb2 hooks. * * buf_out_validate: called on QBUF of an OUTPUT buf with a bound * request fd. We only accept progressive (FIELD_NONE) frames, so * normalise + accept. Without this op v4l2-core WARNs at * vb2_queue_or_prepare_buf and rejects with EINVAL. * * buf_request_complete: called when a request completes or is * cancelled; v4l2_ctrl_request_complete is the canonical helper * (releases the per-request control state cloned off ctx->hdl). */ static int daedalus_buf_out_validate(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); vbuf->field = V4L2_FIELD_NONE; return 0; } static void daedalus_buf_request_complete(struct vb2_buffer *vb) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl); } static const struct vb2_ops daedalus_qops = { .queue_setup = daedalus_queue_setup, .buf_prepare = daedalus_buf_prepare, .buf_queue = daedalus_buf_queue, .start_streaming = daedalus_start_streaming, .stop_streaming = daedalus_stop_streaming, .wait_prepare = vb2_ops_wait_prepare, .wait_finish = vb2_ops_wait_finish, .buf_out_validate = daedalus_buf_out_validate, .buf_request_complete = daedalus_buf_request_complete, }; /* -- m2m queue init -------------------------------------------------- */ static int daedalus_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq) { struct daedalus_ctx *ctx = priv; int ret; src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF; src_vq->drv_priv = ctx; /* * Phase 8.12: Request API support on OUTPUT queue. vb2 * binds the per-request control state (the v4l2_ctrl_handler * clone used by VIDIOC_S_EXT_CTRLS(which=REQUEST_VAL)) to * the OUTPUT queue. Without this flag v4l2-core rejects * REQUEST_VAL writes before our s_ctrl is ever reached. */ src_vq->supports_requests = true; /* * requires_requests would reject any QBUF without a bound * media_request — useful when the daemon truly needs the * per-frame stateless controls to decode. Our daemon * re-parses the bitstream so it doesn't actually need the * controls; leaving requires_requests off lets non-request * clients (test_m2m_stream etc.) keep working AND lets the * libva path proceed even if its S_EXT_CTRLS bind didn't * fully take. */ src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); src_vq->ops = &daedalus_qops; /* * Phase 8.6: OUTPUT switched from vb2_vmalloc to * vb2_dma_contig so v4l2-compliance's REQBUFS test passes * V4L2_MEMORY_FLAG_NON_COHERENT (vmalloc memops don't * honour the flag; dma_contig does). We still use * vb2_plane_vaddr in device_run to read the bitstream — * dma_contig provides a kernel virtual address just like * vmalloc did. */ src_vq->mem_ops = &vb2_dma_contig_memops; src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; src_vq->lock = &ctx->dev->m2m_lock; src_vq->dev = &ctx->dev->pdev->dev; src_vq->allow_cache_hints = 1; ret = vb2_queue_init(src_vq); if (ret) return ret; dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; dst_vq->io_modes = VB2_MMAP | VB2_DMABUF; dst_vq->drv_priv = ctx; dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); dst_vq->ops = &daedalus_qops; dst_vq->mem_ops = &vb2_dma_contig_memops; dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; dst_vq->lock = &ctx->dev->m2m_lock; dst_vq->dev = &ctx->dev->pdev->dev; dst_vq->allow_cache_hints = 1; return vb2_queue_init(dst_vq); } /* -- in-flight tracking (cookie → ctx + bufs) ------------------------ */ /* * The chardev RESP_FRAME path needs to find the per-request * context + source/destination buffer pair so it can complete the * V4L2 m2m job. Track in-flight requests in a small list keyed * by cookie. Cookies are monotonically increasing (see * device_run); collisions on wrap-around are astronomically * unlikely in normal use and would self-clear once the older * cookie's response arrives. */ struct daedalus_inflight { struct list_head list; u32 cookie; struct daedalus_ctx *ctx; struct vb2_v4l2_buffer *src_buf; struct vb2_v4l2_buffer *dst_buf; }; static struct daedalus_inflight * daedalus_inflight_pop_locked(struct daedalus_dev *dev, u32 cookie) { struct daedalus_inflight *e; list_for_each_entry(e, &dev->inflight, list) { if (e->cookie == cookie) { list_del(&e->list); return e; } } return NULL; } /* -- chardev GET_DMABUF backend (called in daemon task context) ----- */ int daedalus_export_capture_dmabuf(u32 cookie, u32 plane, u32 flags, int *out_fd) { struct daedalus_dev *dev = g_daedalus_dev; struct daedalus_inflight *e, *match = NULL; struct vb2_queue *vq; struct vb2_buffer *vb; int rc; if (!dev || !out_fd) return -EINVAL; /* * Walk the inflight list under the lock to look up the * V4L2 request. Hold a transient reference via the lock * — once we drop the lock the entry could be popped by a * concurrent RESP_FRAME, but we only need the dst_buf + * its vb2_queue, both of which are stable for the * lifetime of the in-flight request (RESP_FRAME is what * pops the entry, so daemon completing the export then * sending RESP_FRAME is the canonical ordering). */ mutex_lock(&dev->inflight_lock); list_for_each_entry(e, &dev->inflight, list) { if (e->cookie == cookie) { match = e; break; } } if (!match) { mutex_unlock(&dev->inflight_lock); return -EINVAL; } vb = &match->dst_buf->vb2_buf; vq = vb->vb2_queue; mutex_unlock(&dev->inflight_lock); if (plane >= vb->num_planes) return -EINVAL; rc = vb2_core_expbuf(vq, out_fd, vq->type, vb, plane, flags); if (rc) return rc; return 0; } /* -- v4l2_m2m_ops.device_run ----------------------------------------- */ static atomic_t daedalus_cookie_seq = ATOMIC_INIT(0); u32 daedalus_next_cookie(void) { return (u32) atomic_inc_return(&daedalus_cookie_seq); } static void daedalus_device_run(void *priv) { struct daedalus_ctx *ctx = priv; struct daedalus_dev *dev = ctx->dev; struct vb2_v4l2_buffer *src_buf, *dst_buf; struct daedalus_inflight *inf = NULL; struct daedalus_req_decode *req = NULL; void *bitstream; size_t blen, payload_len; u32 cookie; int ret; src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx); dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); if (!src_buf || !dst_buf) { v4l2_warn(&dev->v4l2_dev, "device_run with no src/dst buf — scheduler bug?\n"); goto fail_job_finish; } blen = vb2_get_plane_payload(&src_buf->vb2_buf, 0); if (!blen || blen > DAEDALUS_MAX_BITSTREAM) { v4l2_err(&dev->v4l2_dev, "device_run: bitstream length %zu out of range [1, %lu]\n", blen, (unsigned long) DAEDALUS_MAX_BITSTREAM); goto fail_buf_error; } bitstream = vb2_plane_vaddr(&src_buf->vb2_buf, 0); if (!bitstream) { v4l2_err(&dev->v4l2_dev, "device_run: vaddr NULL\n"); goto fail_buf_error; } payload_len = sizeof(*req) + blen; req = kmalloc(payload_len, GFP_KERNEL); if (!req) goto fail_buf_error; memset(req, 0, sizeof(*req)); req->codec_id = daedalus_fourcc_to_codec_id(ctx->src_fmt.pixelformat); if (!req->codec_id) { v4l2_err(&dev->v4l2_dev, "device_run: unsupported OUTPUT pixelformat 0x%08x\n", ctx->src_fmt.pixelformat); kfree(req); req = NULL; goto fail_buf_error; } req->bitstream_len = (u32) blen; req->capture_width = ctx->dst_fmt.width; req->capture_height = ctx->dst_fmt.height; req->capture_pix_fmt = ctx->dst_fmt.pixelformat; req->capture_num_planes = ctx->dst_fmt.num_planes; { unsigned int p; for (p = 0; p < ctx->dst_fmt.num_planes && p < 3; p++) { req->capture_plane_size[p] = ctx->dst_fmt.plane_fmt[p].sizeimage; req->capture_plane_stride[p] = ctx->dst_fmt.plane_fmt[p].bytesperline; } } memcpy((u8 *) req + sizeof(*req), bitstream, blen); inf = kzalloc(sizeof(*inf), GFP_KERNEL); if (!inf) goto fail_buf_error; cookie = daedalus_next_cookie(); inf->cookie = cookie; inf->ctx = ctx; inf->src_buf = src_buf; inf->dst_buf = dst_buf; mutex_lock(&dev->inflight_lock); list_add_tail(&inf->list, &dev->inflight); mutex_unlock(&dev->inflight_lock); ret = daedalus_chardev_enqueue_req(DAEDALUS_MSG_REQ_DECODE, cookie, req, payload_len); kfree(req); req = NULL; if (ret) { v4l2_err(&dev->v4l2_dev, "device_run: enqueue_req failed: %d\n", ret); mutex_lock(&dev->inflight_lock); list_del(&inf->list); mutex_unlock(&dev->inflight_lock); kfree(inf); goto fail_buf_error; } v4l2_dbg(1, 0, &dev->v4l2_dev, "device_run: REQ_DECODE cookie=%u blen=%zu\n", cookie, blen); /* * Job stays open until RESP_FRAME comes back; chardev path * calls v4l2_m2m_buf_done_and_job_finish then. */ return; fail_buf_error: if (src_buf) { v4l2_m2m_src_buf_remove(ctx->m2m_ctx); v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); } if (dst_buf) { v4l2_m2m_dst_buf_remove(ctx->m2m_ctx); v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR); } kfree(req); fail_job_finish: v4l2_m2m_job_finish(dev->m2m_dev, ctx->m2m_ctx); } static const struct v4l2_m2m_ops daedalus_m2m_ops = { .device_run = daedalus_device_run, }; /* -- chardev RESP_FRAME → buf_done bridge ---------------------------- */ void daedalus_complete_resp_frame(u32 cookie, const struct daedalus_resp_frame *fr, const u8 *pixels, size_t pixels_len) { struct daedalus_dev *dev = g_daedalus_dev; struct daedalus_inflight *inf; enum vb2_buffer_state state; void *dst_y, *dst_uv; u32 y_size, uv_size; if (!dev) return; mutex_lock(&dev->inflight_lock); inf = daedalus_inflight_pop_locked(dev, cookie); mutex_unlock(&dev->inflight_lock); if (!inf) { pr_warn_ratelimited( "daedalus_v4l2: RESP_FRAME for unknown cookie=%u\n", cookie); return; } state = (fr->status == DAEDALUS_DECODE_OK) ? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; /* * Two routes the daemon can take, both supported: * * (a) dmabuf path (Phase 8.6+) — daemon called * DAEDALUS_IOC_GET_DMABUF, mmap'd the CAPTURE buffer, * wrote pixels in place. RESP_FRAME carries metadata * only (pixels_len == 0). Each plane's payload is * the full plane size (the daemon wrote everything * the format requires). * * (b) Phase 8.5 inline path — daemon shipped raw NV12 in * the chardev payload (≤ 64 KiB cap). We memcpy * into the vb2 buffer. Plane payloads come from * the daemon's NV12 luma/chroma counts. */ if (state == VB2_BUF_STATE_DONE) { struct vb2_buffer *vb = &inf->dst_buf->vb2_buf; unsigned int p; if (pixels_len) { /* (b) inline NV12 copy — legacy 2-plane only */ y_size = min_t(u32, fr->luma_len, (u32) vb2_plane_size(vb, 0)); uv_size = vb->num_planes > 1 ? min_t(u32, fr->chroma_len, (u32) vb2_plane_size(vb, 1)) : 0; dst_y = vb2_plane_vaddr(vb, 0); dst_uv = vb->num_planes > 1 ? vb2_plane_vaddr(vb, 1) : NULL; if (dst_y && y_size && pixels_len >= y_size) memcpy(dst_y, pixels, y_size); else y_size = 0; if (dst_uv && uv_size && pixels_len >= y_size + uv_size) memcpy(dst_uv, pixels + y_size, uv_size); else uv_size = 0; vb2_set_plane_payload(vb, 0, y_size); if (vb->num_planes > 1) vb2_set_plane_payload(vb, 1, uv_size); } else { /* (a) dmabuf path: plane is fully populated by * the daemon, so payload == sizeimage. */ for (p = 0; p < vb->num_planes; p++) vb2_set_plane_payload(vb, p, vb2_plane_size(vb, p)); } } /* * Use the buf_done_and_job_finish helper rather than plain * buf_done + job_finish: the helper pops the buffers off * the m2m queue before marking them done, otherwise the * scheduler immediately re-runs device_run on the same * still-queued src buffer. Caught during Phase 8.5 first * run — second REQ_DECODE with identical bitstream + oops * in stop_streaming when the test client tore down. */ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, inf->ctx->m2m_ctx, state); kfree(inf); } /* -- v4l2_ioctl_ops -------------------------------------------------- */ static int daedalus_querycap(struct file *file, void *priv, struct v4l2_capability *cap) { strscpy(cap->driver, DAEDALUS_DRV_NAME, sizeof(cap->driver)); strscpy(cap->card, "daedalus-fourier V3D7+NEON", sizeof(cap->card)); snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s", DAEDALUS_DRV_NAME); return 0; } static int daedalus_enum_fmt(struct file *file, void *priv, struct v4l2_fmtdesc *f) { if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { if (f->index >= DAEDALUS_NUM_OUTPUT_FMTS) return -EINVAL; f->pixelformat = daedalus_output_formats[f->index]; f->flags |= V4L2_FMT_FLAG_COMPRESSED; return 0; } if (f->index >= DAEDALUS_NUM_CAPTURE_FMTS) return -EINVAL; f->pixelformat = daedalus_capture_formats[f->index]; return 0; } static int daedalus_g_fmt(struct file *file, void *priv, struct v4l2_format *f) { struct daedalus_ctx *ctx = file_to_ctx(file); if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) f->fmt.pix_mp = ctx->src_fmt; else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) f->fmt.pix_mp = ctx->dst_fmt; else return -EINVAL; return 0; } static int daedalus_try_fmt(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format_mplane *p = &f->fmt.pix_mp; u32 w = clamp_t(u32, p->width, 16, 1920); u32 h = clamp_t(u32, p->height, 16, 1088); u32 cs, xfer, ycbcr, quant; /* * Preserve userspace-supplied colorspace fields verbatim * (fixes the Phase 8.5 v4l2-compliance S_FMT colorspace * round-trip failure) — fill_*_fmt overwrites these to * REC709 defaults, but TRY_FMT must echo what the caller * asked for if it's at all sensible. */ cs = p->colorspace; xfer = p->xfer_func; ycbcr = p->ycbcr_enc; quant = p->quantization; if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { u32 fourcc = p->pixelformat; if (!daedalus_is_supported_output(fourcc)) fourcc = DAEDALUS_DEFAULT_OUTPUT_FOURCC; daedalus_fill_output_fmt(p, fourcc, w, h); } else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { u32 fourcc = p->pixelformat; if (!daedalus_is_supported_capture(fourcc)) fourcc = DAEDALUS_DEFAULT_CAPTURE_FOURCC; daedalus_fill_capture_fmt(p, fourcc, w, h); } else { return -EINVAL; } if (cs) p->colorspace = cs; if (xfer) p->xfer_func = xfer; if (ycbcr) p->ycbcr_enc = ycbcr; if (quant) p->quantization = quant; return 0; } static int daedalus_s_fmt(struct file *file, void *priv, struct v4l2_format *f) { struct daedalus_ctx *ctx = file_to_ctx(file); struct vb2_queue *vq; int ret; vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type); if (!vq) return -EINVAL; if (vb2_is_busy(vq)) return -EBUSY; ret = daedalus_try_fmt(file, priv, f); if (ret) return ret; if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { ctx->src_fmt = f->fmt.pix_mp; /* * Stateless decoder convention: colorspace metadata * on the OUTPUT (bitstream) side describes what the * codec will produce. Propagate to CAPTURE so a * follow-up G_FMT on CAPTURE returns matching values * (v4l2-compliance v4l2-test-formats.cpp:958 * round-trip test). */ ctx->dst_fmt.colorspace = ctx->src_fmt.colorspace; ctx->dst_fmt.xfer_func = ctx->src_fmt.xfer_func; ctx->dst_fmt.ycbcr_enc = ctx->src_fmt.ycbcr_enc; ctx->dst_fmt.quantization = ctx->src_fmt.quantization; } else { ctx->dst_fmt = f->fmt.pix_mp; } return 0; } static const struct v4l2_ioctl_ops daedalus_ioctl_ops = { .vidioc_querycap = daedalus_querycap, .vidioc_enum_fmt_vid_out = daedalus_enum_fmt, .vidioc_enum_fmt_vid_cap = daedalus_enum_fmt, .vidioc_g_fmt_vid_out_mplane = daedalus_g_fmt, .vidioc_g_fmt_vid_cap_mplane = daedalus_g_fmt, .vidioc_s_fmt_vid_out_mplane = daedalus_s_fmt, .vidioc_s_fmt_vid_cap_mplane = daedalus_s_fmt, .vidioc_try_fmt_vid_out_mplane = daedalus_try_fmt, .vidioc_try_fmt_vid_cap_mplane = daedalus_try_fmt, .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, .vidioc_streamon = v4l2_m2m_ioctl_streamon, .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, .vidioc_unsubscribe_event = v4l2_event_unsubscribe, }; /* -- file operations ------------------------------------------------- */ static int daedalus_open(struct file *file) { struct daedalus_dev *dev = video_drvdata(file); struct daedalus_ctx *ctx; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->dev = dev; v4l2_fh_init(&ctx->fh, &dev->vdev); file->private_data = &ctx->fh; v4l2_ctrl_handler_init(&ctx->hdl, ARRAY_SIZE(daedalus_stateless_ctrls)); daedalus_register_stateless_ctrls(&ctx->hdl); ctx->fh.ctrl_handler = &ctx->hdl; daedalus_fill_output_fmt(&ctx->src_fmt, DAEDALUS_DEFAULT_OUTPUT_FOURCC, DAEDALUS_DEFAULT_W, DAEDALUS_DEFAULT_H); daedalus_fill_capture_fmt(&ctx->dst_fmt, DAEDALUS_DEFAULT_CAPTURE_FOURCC, DAEDALUS_DEFAULT_W, DAEDALUS_DEFAULT_H); ctx->m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, daedalus_queue_init); if (IS_ERR(ctx->m2m_ctx)) { ret = PTR_ERR(ctx->m2m_ctx); v4l2_err(&dev->v4l2_dev, "m2m_ctx_init: %d\n", ret); goto err_ctrl; } ctx->fh.m2m_ctx = ctx->m2m_ctx; v4l2_fh_add(&ctx->fh); return 0; err_ctrl: v4l2_ctrl_handler_free(&ctx->hdl); v4l2_fh_exit(&ctx->fh); kfree(ctx); return ret; } static int daedalus_release(struct file *file) { struct daedalus_ctx *ctx = file_to_ctx(file); v4l2_fh_del(&ctx->fh); v4l2_m2m_ctx_release(ctx->m2m_ctx); v4l2_ctrl_handler_free(&ctx->hdl); v4l2_fh_exit(&ctx->fh); kfree(ctx); return 0; } static const struct v4l2_file_operations daedalus_fops = { .owner = THIS_MODULE, .open = daedalus_open, .release = daedalus_release, .poll = v4l2_m2m_fop_poll, .unlocked_ioctl = video_ioctl2, .mmap = v4l2_m2m_fop_mmap, }; static void daedalus_vdev_release(struct video_device *vdev) { /* embedded in daedalus_dev (devm) — nothing to free here */ } /* -- media controller request-API ops (Phase 8.11) ------------------ */ /* * V4L2 Request API plumbing: lets a client allocate a media_request * (MEDIA_IOC_REQUEST_ALLOC), stage per-buffer controls into it via * VIDIOC_S_EXT_CTRLS with which=V4L2_CTRL_WHICH_REQUEST_VAL, then * queue the OUTPUT buffer with the request fd bound — all controls * + the buffer apply atomically at decode submission. * * vb2_request_validate / v4l2_m2m_request_queue are the canonical * helpers; the daemon doesn't actually use the staged controls * (FFmpeg re-parses the bitstream) but the wire-level support is * what libva-v4l2-request-fourier requires to call MEDIA_IOC_ * REQUEST_ALLOC successfully. */ static const struct media_device_ops daedalus_media_ops = { .req_validate = vb2_request_validate, .req_queue = v4l2_m2m_request_queue, }; /* -- platform driver bind -------------------------------------------- */ static int daedalus_probe(struct platform_device *pdev) { struct daedalus_dev *dev; int ret; dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; dev->pdev = pdev; platform_set_drvdata(pdev, dev); mutex_init(&dev->m2m_lock); mutex_init(&dev->inflight_lock); INIT_LIST_HEAD(&dev->inflight); /* * vb2_dma_contig (used by the CAPTURE queue for dmabuf * export) needs the parent device's DMA mask configured. * Pi 5 CMA supports 32-bit DMA; that's sufficient for * NV12/NV12M up to 4K (4K NV12 = ~12.4 MiB, well under * the 4 GiB ceiling). */ ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { dev_err(&pdev->dev, "dma_coerce_mask_and_coherent: %d\n", ret); return ret; } /* * Set up the media controller BEFORE v4l2_device_register * binds the mdev so v4l2-core publishes the link between * the v4l2_device and the media_device. Stateless decoders * are required by spec to expose a media controller (the * request API rides on it) — v4l2-compliance's DECODER_CMD * test rejects drivers without it. * * Phase 8.11: wire the V4L2 request API media ops so libva- * v4l2-request-fourier can MEDIA_IOC_REQUEST_ALLOC against * us. vb2_request_validate + v4l2_m2m_request_queue are the * canonical helpers — they bundle per-buffer controls with * the matching qbuf so the decode submission is atomic * (required for stateless decoders feeding hardware that * needs all params present before kickoff). */ dev->mdev.dev = &pdev->dev; strscpy(dev->mdev.model, "daedalus-v4l2", sizeof(dev->mdev.model)); dev->mdev.ops = &daedalus_media_ops; media_device_init(&dev->mdev); dev->v4l2_dev.mdev = &dev->mdev; ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); if (ret) { dev_err(&pdev->dev, "v4l2_device_register: %d\n", ret); media_device_cleanup(&dev->mdev); return ret; } dev->m2m_dev = v4l2_m2m_init(&daedalus_m2m_ops); if (IS_ERR(dev->m2m_dev)) { ret = PTR_ERR(dev->m2m_dev); v4l2_err(&dev->v4l2_dev, "v4l2_m2m_init: %d\n", ret); goto err_v4l2_dev; } strscpy(dev->vdev.name, DAEDALUS_VIDEO_NAME, sizeof(dev->vdev.name)); dev->vdev.fops = &daedalus_fops; dev->vdev.ioctl_ops = &daedalus_ioctl_ops; dev->vdev.release = daedalus_vdev_release; dev->vdev.v4l2_dev = &dev->v4l2_dev; dev->vdev.vfl_dir = VFL_DIR_M2M; dev->vdev.device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING; dev->vdev.lock = &dev->m2m_lock; video_set_drvdata(&dev->vdev, dev); ret = video_register_device(&dev->vdev, VFL_TYPE_VIDEO, -1); if (ret) { v4l2_err(&dev->v4l2_dev, "video_register_device: %d\n", ret); goto err_m2m; } /* * Register the m2m entities with the media controller * AFTER video_register_device so vdev->num is set. * MEDIA_ENT_F_PROC_VIDEO_DECODER tags us as a decoder * entity in the graph — what libva-v4l2-request scans for. */ ret = v4l2_m2m_register_media_controller(dev->m2m_dev, &dev->vdev, MEDIA_ENT_F_PROC_VIDEO_DECODER); if (ret) { v4l2_err(&dev->v4l2_dev, "v4l2_m2m_register_media_controller: %d\n", ret); goto err_vdev; } ret = media_device_register(&dev->mdev); if (ret) { v4l2_err(&dev->v4l2_dev, "media_device_register: %d\n", ret); goto err_m2m_mc; } g_daedalus_dev = dev; v4l2_info(&dev->v4l2_dev, "daedalus-v4l2 m2m registered as /dev/video%d (Phase 8.7)\n", dev->vdev.num); return 0; err_m2m_mc: v4l2_m2m_unregister_media_controller(dev->m2m_dev); err_vdev: video_unregister_device(&dev->vdev); err_m2m: v4l2_m2m_release(dev->m2m_dev); err_v4l2_dev: v4l2_device_unregister(&dev->v4l2_dev); media_device_cleanup(&dev->mdev); return ret; } static void daedalus_remove(struct platform_device *pdev) { struct daedalus_dev *dev = platform_get_drvdata(pdev); g_daedalus_dev = NULL; media_device_unregister(&dev->mdev); v4l2_m2m_unregister_media_controller(dev->m2m_dev); video_unregister_device(&dev->vdev); v4l2_m2m_release(dev->m2m_dev); v4l2_device_unregister(&dev->v4l2_dev); media_device_cleanup(&dev->mdev); } static struct platform_driver daedalus_platform_driver = { .probe = daedalus_probe, .remove = daedalus_remove, .driver = { .name = DAEDALUS_DRV_NAME, }, }; static struct platform_device *daedalus_platform_device; static int __init daedalus_init(void) { int ret; ret = daedalus_chardev_init(); if (ret) return ret; daedalus_platform_device = platform_device_alloc(DAEDALUS_DRV_NAME, -1); if (!daedalus_platform_device) { ret = -ENOMEM; goto err_chardev; } ret = platform_device_add(daedalus_platform_device); if (ret) { platform_device_put(daedalus_platform_device); goto err_chardev; } ret = platform_driver_register(&daedalus_platform_driver); if (ret) { platform_device_unregister(daedalus_platform_device); goto err_chardev; } return 0; err_chardev: daedalus_chardev_exit(); return ret; } static void __exit daedalus_exit(void) { platform_driver_unregister(&daedalus_platform_driver); platform_device_unregister(daedalus_platform_device); daedalus_chardev_exit(); } module_init(daedalus_init); module_exit(daedalus_exit); MODULE_AUTHOR("Markus Fritsche "); MODULE_DESCRIPTION("V4L2 stateless decoder shim for daedalus-fourier (Pi 5 / VC7)"); MODULE_LICENSE("GPL v2"); MODULE_VERSION("0.0.2");