// SPDX-License-Identifier: GPL-2.0-or-later /* * daedalus-v4l2 — V4L2 stateless decoder shim. * * Out-of-tree Linux kernel module that exposes a /dev/videoNN * V4L2 m2m (mem2mem) device for the daedalus-fourier kernel * library. Real decoding happens in a userspace daemon; this * module ferries bitstream buffers to the daemon via the * /dev/daedalus-v4l2 chardev bridge and ferries decoded pixels * back into the V4L2 client's CAPTURE buffer. * * Phase 8.5 (this revision): full V4L2 m2m driver with vb2 * queues, real v4l2_ioctl_ops table, device_run wired to REQ_DECODE * over the chardev, RESP_FRAME completion path back into * v4l2_m2m_buf_done. Bitstream + decoded pixel data travel * inline through the 64 KiB chardev payload — enough for small * frames and proof-of-pipe; Phase 8.6 adds dmabuf-export so * larger CAPTURE buffers don't have to round-trip through the * chardev. * * Phase 8.5 does NOT implement the V4L2 stateless control set * (V4L2_CID_STATELESS_VP9_FRAME etc.). The daemon parses VP9 * headers itself via dlopen'd FFmpeg, so per-buffer controls are * not needed for the proof-of-pipe. Phase 8.6 adds the proper * stateless controls when AV1/H.264 land. * * Project: https://git.reauktion.de/reauktion/daedalus-v4l2 * Sibling kernel library: https://git.reauktion.de/marfrit/daedalus-fourier */ #include #include #include /* LINUX_VERSION_CODE / KERNEL_VERSION */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "daedalus_v4l2_chardev.h" #include "daedalus_v4l2_proto.h" #include "daedalus_v4l2_main.h" #define DAEDALUS_DRV_NAME "daedalus_v4l2" #define DAEDALUS_VIDEO_NAME "daedalus" /* * OUTPUT side advertises VP9 + AV1 + H.264 stateless formats * (the daemon decodes all three via FFmpeg dlopen). CAPTURE * advertises NV12M (8-bit, two-plane) + P010 (10-bit, * single-plane interleaved CbCr) added in Phase 8.8 for HDR * content. */ static const u32 daedalus_output_formats[] = { V4L2_PIX_FMT_VP9_FRAME, V4L2_PIX_FMT_AV1_FRAME, V4L2_PIX_FMT_H264_SLICE, }; #define DAEDALUS_NUM_OUTPUT_FMTS ARRAY_SIZE(daedalus_output_formats) #define DAEDALUS_DEFAULT_OUTPUT_FOURCC V4L2_PIX_FMT_VP9_FRAME /* * NV12 (single-plane Y+CbCr contiguous) listed alongside NV12M * (two-plane Y / CbCr separate) so legacy MPLANE clients that * expect single-plane buffer geometry (e.g. libva-v4l2-request- * fourier's NV12 video_format entry, used by VAAPI consumers via * ffmpeg vaapi) can negotiate the format successfully. The two * fourccs differ only in plane layout — bit-exact pixel content * is identical. */ static const u32 daedalus_capture_formats[] = { V4L2_PIX_FMT_NV12M, V4L2_PIX_FMT_NV12, V4L2_PIX_FMT_P010, }; #define DAEDALUS_NUM_CAPTURE_FMTS ARRAY_SIZE(daedalus_capture_formats) #define DAEDALUS_DEFAULT_CAPTURE_FOURCC V4L2_PIX_FMT_NV12M static bool daedalus_is_supported_capture(u32 fourcc) { size_t i; for (i = 0; i < DAEDALUS_NUM_CAPTURE_FMTS; i++) if (daedalus_capture_formats[i] == fourcc) return true; return false; } static u32 daedalus_fourcc_to_codec_id(u32 fourcc) { switch (fourcc) { case V4L2_PIX_FMT_VP9_FRAME: return DAEDALUS_CODEC_VP9; case V4L2_PIX_FMT_AV1_FRAME: return DAEDALUS_CODEC_AV1; case V4L2_PIX_FMT_H264_SLICE: return DAEDALUS_CODEC_H264; default: return 0; } } static bool daedalus_is_supported_output(u32 fourcc) { return daedalus_fourcc_to_codec_id(fourcc) != 0; } /* Conservative defaults; userspace S_FMT overrides. */ #define DAEDALUS_DEFAULT_W 320 #define DAEDALUS_DEFAULT_H 240 /* Bound bitstream buffer size to the chardev payload cap. */ #define DAEDALUS_MAX_BITSTREAM (DAEDALUS_PROTO_MAX_PAYLOAD - \ sizeof(struct daedalus_req_decode)) /* -- module-wide state ----------------------------------------------- */ static struct daedalus_dev *g_daedalus_dev; struct daedalus_dev *daedalus_get_dev(void) { return g_daedalus_dev; } /* -- per-open context ------------------------------------------------ */ /** * struct daedalus_ctx - per-open instance state * @fh: V4L2 file handle (must be first to satisfy v4l2-core) * @dev: parent daedalus_dev * @m2m_ctx: v4l2 mem2mem context (one job queue per open) * @hdl: v4l2_ctrl_handler (no controls yet; placeholder for 8.6) * @src_fmt: current OUTPUT (bitstream) format * @dst_fmt: current CAPTURE (decoded) format * * One context per open() of /dev/videoNN. v4l2-core's m2m * scheduler picks one context at a time to call device_run on. */ struct daedalus_ctx { struct v4l2_fh fh; struct daedalus_dev *dev; struct v4l2_m2m_ctx *m2m_ctx; struct v4l2_ctrl_handler hdl; /* * Per-context vb2 queue lock. Was originally pointed at the * device-wide dev->m2m_lock, which serialised vb2 ioctls across * every concurrent client — Firefox spawns multiple content/RDD * processes that each open /dev/video0, and a device-wide lock * made S_FMT / REQBUFS / QBUF on one client block (and sometimes * EBUSY-fail) against another client mid-stream. cedrus / rkvdec * / hantro all use per-ctx vb mutexes for exactly this reason. */ struct mutex vb_mutex; struct v4l2_pix_format_mplane src_fmt; struct v4l2_pix_format_mplane dst_fmt; }; static inline struct daedalus_ctx *file_to_ctx(struct file *file) { return container_of(file->private_data, struct daedalus_ctx, fh); } /* -- V4L2 stateless control registration (skeleton) ----------------- */ /* * Register the per-codec stateless controls so userspace * (libva-v4l2-request and v4l2-compliance) recognises us as a * proper stateless decoder. We don't act on the values — the * daemon parses VP9/H.264/AV1 headers itself via FFmpeg — but * we accept stores so libva can drive standard decode flows. * * Per-codec control IDs that ship in v6.12 headers: * VP9: V4L2_CID_STATELESS_VP9_FRAME, V4L2_CID_STATELESS_VP9_COMPRESSED_HDR * H264: V4L2_CID_STATELESS_H264_{SPS,PPS,SCALING_MATRIX,PRED_WEIGHTS, * SLICE_PARAMS,DECODE_PARAMS} * AV1: V4L2_CID_STATELESS_AV1_FRAME (+ TILE_GROUP_ENTRY, SEQUENCE, * FILM_GRAIN where available) */ static const u32 daedalus_stateless_ctrls[] = { V4L2_CID_STATELESS_VP9_FRAME, V4L2_CID_STATELESS_VP9_COMPRESSED_HDR, V4L2_CID_STATELESS_H264_SPS, V4L2_CID_STATELESS_H264_PPS, V4L2_CID_STATELESS_H264_SCALING_MATRIX, V4L2_CID_STATELESS_H264_PRED_WEIGHTS, V4L2_CID_STATELESS_H264_SLICE_PARAMS, V4L2_CID_STATELESS_H264_DECODE_PARAMS, V4L2_CID_STATELESS_AV1_FRAME, V4L2_CID_STATELESS_AV1_SEQUENCE, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY, V4L2_CID_STATELESS_AV1_FILM_GRAIN, }; /* * Always-success s_ctrl op. v4l2-core requires ops to be present on * a ctrl_handler that processes SET requests — without it, * S_EXT_CTRLS rejects with EINVAL on validate. We don't act on * values here at SET time; for H.264, device_run later reads the * (request-bound) values from p_cur and ships them to the daemon * via struct daedalus_h264_meta. For VP9/AV1 we still rely on * FFmpeg re-parsing the bitstream — those formats are self- * describing per frame. */ static int daedalus_s_ctrl_noop(struct v4l2_ctrl *ctrl) { (void) ctrl; return 0; } static const struct v4l2_ctrl_ops daedalus_ctrl_ops = { .s_ctrl = daedalus_s_ctrl_noop, }; /* * Copy the current H.264 stateless control values into a * daedalus_h264_meta scratch buffer. Returns true if all four * required controls (SPS, PPS, scaling matrix, decode params) had * data on the ctrl handler — caller then ships the meta block in * REQ_DECODE. Returns false if any control was missing (caller * skips the meta block; daemon will likely fail the decode, but * with a clear "no SPS" error from libavcodec rather than a * confusing protocol mismatch). * * The ctrl_handler's p_cur values are bound to the in-flight * media_request by v4l2_ctrl_request_setup, which v4l2-m2m calls * before device_run for stateless decoders. */ static bool daedalus_collect_h264_meta(struct daedalus_ctx *ctx, struct daedalus_h264_meta *meta) { struct v4l2_ctrl *c_sps, *c_pps, *c_sm, *c_dp; c_sps = v4l2_ctrl_find(&ctx->hdl, V4L2_CID_STATELESS_H264_SPS); c_pps = v4l2_ctrl_find(&ctx->hdl, V4L2_CID_STATELESS_H264_PPS); c_sm = v4l2_ctrl_find(&ctx->hdl, V4L2_CID_STATELESS_H264_SCALING_MATRIX); c_dp = v4l2_ctrl_find(&ctx->hdl, V4L2_CID_STATELESS_H264_DECODE_PARAMS); if (!c_sps || !c_pps || !c_sm || !c_dp) return false; if (!c_sps->p_cur.p_h264_sps || !c_pps->p_cur.p_h264_pps || !c_sm->p_cur.p_h264_scaling_matrix || !c_dp->p_cur.p_h264_decode_params) return false; meta->sps = *c_sps->p_cur.p_h264_sps; meta->pps = *c_pps->p_cur.p_h264_pps; meta->scaling_matrix = *c_sm->p_cur.p_h264_scaling_matrix; meta->decode_params = *c_dp->p_cur.p_h264_decode_params; return true; } static int daedalus_register_stateless_ctrls(struct v4l2_ctrl_handler *hdl) { size_t i; /* * Use v4l2_ctrl_new_custom (the pattern rkvdec / cedrus / * hantro use) rather than v4l2_ctrl_new_std_compound. * v4l2-core auto-detects the type from each known * V4L2_CID_STATELESS_* id and allocates the right payload * size internally; S_EXT_CTRLS then validates user input * against that allocated payload. v4l2_ctrl_new_std_compound * with NULL p_def was rejecting writes (libva-v4l2-request- * fourier got EINVAL on every stateless ctrl SET). */ for (i = 0; i < ARRAY_SIZE(daedalus_stateless_ctrls); i++) { struct v4l2_ctrl_config cfg = { .ops = &daedalus_ctrl_ops, .id = daedalus_stateless_ctrls[i], }; struct v4l2_ctrl *ctrl; ctrl = v4l2_ctrl_new_custom(hdl, &cfg, NULL); if (hdl->error) { pr_debug("daedalus_v4l2: skipping unsupported CID 0x%x (err=%d)\n", daedalus_stateless_ctrls[i], hdl->error); hdl->error = 0; } (void) ctrl; } /* * Device-wide H.264 mode controls. libva-v4l2-request sets these * on the device fd (request_fd=-1) at context init via * VIDIOC_S_EXT_CTRLS before any per-request controls are bound; * without entries on our ctrl_handler v4l2-core returns EINVAL * and userspace logs a noisy "Unable to set control(s)" warning * (cosmetic — libva already treats this as best-effort). Expose * the single value each that the daemon actually accepts: * * DECODE_MODE: FRAME_BASED only — the daemon receives a full * frame's worth of slice data per REQ_DECODE and calls * avcodec_send_packet / avcodec_receive_frame once per * submission; partial-slice (SLICE_BASED) decode is not * wired into the daemon pipeline. * START_CODE: ANNEX_B only — the daemon's H.264 SPS/PPS * synthesiser prepends 0x00000001-delimited NAL units * (Annex B); a NONE start-code variant would need a * separate emit path. * * Pattern matches rkvdec / hantro (skip_mask = BIT(unsupported)). */ v4l2_ctrl_new_std_menu(hdl, &daedalus_ctrl_ops, V4L2_CID_STATELESS_H264_DECODE_MODE, V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED, BIT(V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED), V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED); v4l2_ctrl_new_std_menu(hdl, &daedalus_ctrl_ops, V4L2_CID_STATELESS_H264_START_CODE, V4L2_STATELESS_H264_START_CODE_ANNEX_B, BIT(V4L2_STATELESS_H264_START_CODE_NONE), V4L2_STATELESS_H264_START_CODE_ANNEX_B); if (hdl->error) { pr_debug("daedalus_v4l2: H.264 menu ctrls registration err=%d\n", hdl->error); hdl->error = 0; } return 0; } /* -- format helpers -------------------------------------------------- */ /* * CAPTURE format fill. Three layouts supported: * NV12M (default, 8-bit) — 2 planes: Y (W*H bytes) + interleaved * CbCr at half-res (W*H/2 bytes). * NV12 (8-bit, 1 plane) — 1 plane: Y (W*H) followed by * interleaved CbCr (W*H/2); total * W*H*3/2 bytes. For legacy MPLANE * clients that don't speak multi- * plane (libva-v4l2-request). * P010 (10-bit HDR) — 1 plane: Y first (W*H*2 bytes) then * interleaved CbCr at half-res * (W*H bytes); 16-bit samples, * MSB-aligned 10-bit data (low 6 * bits zero per V4L2 ABI). */ static void daedalus_fill_capture_fmt(struct v4l2_pix_format_mplane *f, u32 fourcc, u32 w, u32 h) { if (!daedalus_is_supported_capture(fourcc)) fourcc = DAEDALUS_DEFAULT_CAPTURE_FOURCC; f->width = w; f->height = h; f->pixelformat = fourcc; f->field = V4L2_FIELD_NONE; f->colorspace = V4L2_COLORSPACE_REC709; if (fourcc == V4L2_PIX_FMT_P010) { f->num_planes = 1; f->plane_fmt[0].bytesperline = w * 2; f->plane_fmt[0].sizeimage = w * h * 2 + w * h; f->plane_fmt[1].bytesperline = 0; f->plane_fmt[1].sizeimage = 0; } else if (fourcc == V4L2_PIX_FMT_NV12) { f->num_planes = 1; f->plane_fmt[0].bytesperline = w; f->plane_fmt[0].sizeimage = w * h + w * h / 2; f->plane_fmt[1].bytesperline = 0; f->plane_fmt[1].sizeimage = 0; } else { f->num_planes = 2; f->plane_fmt[0].bytesperline = w; f->plane_fmt[0].sizeimage = w * h; f->plane_fmt[1].bytesperline = w; f->plane_fmt[1].sizeimage = w * h / 2; } } /* * OUTPUT is a parsed access unit (VP9 frame / AV1 frame / H.264 * slice). V4L2 convention for compressed bitstream formats: * single plane, sizeimage = worst-case bitstream size we're * willing to accept. fourcc carries the codec selector. */ static void daedalus_fill_output_fmt(struct v4l2_pix_format_mplane *f, u32 fourcc, u32 w, u32 h) { if (!daedalus_is_supported_output(fourcc)) fourcc = DAEDALUS_DEFAULT_OUTPUT_FOURCC; f->width = w; f->height = h; f->pixelformat = fourcc; f->field = V4L2_FIELD_NONE; f->colorspace = V4L2_COLORSPACE_REC709; f->num_planes = 1; f->plane_fmt[0].bytesperline = 0; /* compressed */ f->plane_fmt[0].sizeimage = DAEDALUS_MAX_BITSTREAM; } /* -- vb2 queue ops --------------------------------------------------- */ static int daedalus_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vq); const struct v4l2_pix_format_mplane *fmt; unsigned int p; fmt = V4L2_TYPE_IS_OUTPUT(vq->type) ? &ctx->src_fmt : &ctx->dst_fmt; if (*nplanes) { if (*nplanes != fmt->num_planes) return -EINVAL; for (p = 0; p < *nplanes; p++) if (sizes[p] < fmt->plane_fmt[p].sizeimage) return -EINVAL; } else { *nplanes = fmt->num_planes; for (p = 0; p < *nplanes; p++) sizes[p] = fmt->plane_fmt[p].sizeimage; if (*nbuffers < 2) *nbuffers = 2; } /* * Both queues use vb2_dma_contig now (OUTPUT switched in * Phase 8.6 to satisfy v4l2-compliance's non-coherent * REQBUFS test). Point both at the platform device as * the CMA-backed allocation parent. */ for (p = 0; p < *nplanes; p++) alloc_devs[p] = &ctx->dev->pdev->dev; return 0; } static int daedalus_buf_prepare(struct vb2_buffer *vb) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); const struct v4l2_pix_format_mplane *fmt; unsigned int p; fmt = V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type) ? &ctx->src_fmt : &ctx->dst_fmt; for (p = 0; p < vb->num_planes; p++) { unsigned long need = fmt->plane_fmt[p].sizeimage; if (vb2_plane_size(vb, p) < need) { v4l2_err(&ctx->dev->v4l2_dev, "buf_prepare: plane %u size %lu < %lu\n", p, vb2_plane_size(vb, p), need); return -EINVAL; } /* * For OUTPUT (bitstream), payload is set by userspace * via VIDIOC_QBUF (bytesused). For CAPTURE we set the * full plane size; device_run / buf_done updates it on * completion if needed. */ if (!V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type)) vb2_set_plane_payload(vb, p, need); } return 0; } static void daedalus_buf_queue(struct vb2_buffer *vb) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); v4l2_m2m_buf_queue(ctx->m2m_ctx, vbuf); } static int daedalus_start_streaming(struct vb2_queue *vq, unsigned int count) { return 0; } static void daedalus_stop_streaming(struct vb2_queue *vq) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vq); struct vb2_v4l2_buffer *vbuf; while ((vbuf = V4L2_TYPE_IS_OUTPUT(vq->type) ? v4l2_m2m_src_buf_remove(ctx->m2m_ctx) : v4l2_m2m_dst_buf_remove(ctx->m2m_ctx)) != NULL) v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR); } /* * Phase 8.12: request API vb2 hooks. * * buf_out_validate: called on QBUF of an OUTPUT buf with a bound * request fd. We only accept progressive (FIELD_NONE) frames, so * normalise + accept. Without this op v4l2-core WARNs at * vb2_queue_or_prepare_buf and rejects with EINVAL. * * buf_request_complete: called when a request completes or is * cancelled; v4l2_ctrl_request_complete is the canonical helper * (releases the per-request control state cloned off ctx->hdl). */ static int daedalus_buf_out_validate(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); vbuf->field = V4L2_FIELD_NONE; return 0; } static void daedalus_buf_request_complete(struct vb2_buffer *vb) { struct daedalus_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl); } static const struct vb2_ops daedalus_qops = { .queue_setup = daedalus_queue_setup, .buf_prepare = daedalus_buf_prepare, .buf_queue = daedalus_buf_queue, .start_streaming = daedalus_start_streaming, .stop_streaming = daedalus_stop_streaming, .wait_prepare = vb2_ops_wait_prepare, .wait_finish = vb2_ops_wait_finish, .buf_out_validate = daedalus_buf_out_validate, .buf_request_complete = daedalus_buf_request_complete, }; /* -- m2m queue init -------------------------------------------------- */ static int daedalus_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq) { struct daedalus_ctx *ctx = priv; int ret; src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; src_vq->io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF; src_vq->drv_priv = ctx; /* * Phase 8.12: Request API support on OUTPUT queue. vb2 * binds the per-request control state (the v4l2_ctrl_handler * clone used by VIDIOC_S_EXT_CTRLS(which=REQUEST_VAL)) to * the OUTPUT queue. Without this flag v4l2-core rejects * REQUEST_VAL writes before our s_ctrl is ever reached. */ src_vq->supports_requests = true; /* * requires_requests would reject any QBUF without a bound * media_request — useful when the daemon truly needs the * per-frame stateless controls to decode. Our daemon * re-parses the bitstream so it doesn't actually need the * controls; leaving requires_requests off lets non-request * clients (test_m2m_stream etc.) keep working AND lets the * libva path proceed even if its S_EXT_CTRLS bind didn't * fully take. */ src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); src_vq->ops = &daedalus_qops; /* * Phase 8.6: OUTPUT switched from vb2_vmalloc to * vb2_dma_contig so v4l2-compliance's REQBUFS test passes * V4L2_MEMORY_FLAG_NON_COHERENT (vmalloc memops don't * honour the flag; dma_contig does). We still use * vb2_plane_vaddr in device_run to read the bitstream — * dma_contig provides a kernel virtual address just like * vmalloc did. */ src_vq->mem_ops = &vb2_dma_contig_memops; src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; /* Per-ctx lock so concurrent clients don't serialise on a * device-wide mutex. See struct daedalus_ctx.vb_mutex comment. */ src_vq->lock = &ctx->vb_mutex; src_vq->dev = &ctx->dev->pdev->dev; src_vq->allow_cache_hints = 1; ret = vb2_queue_init(src_vq); if (ret) return ret; dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; dst_vq->io_modes = VB2_MMAP | VB2_DMABUF; dst_vq->drv_priv = ctx; dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); dst_vq->ops = &daedalus_qops; dst_vq->mem_ops = &vb2_dma_contig_memops; dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; dst_vq->lock = &ctx->vb_mutex; dst_vq->dev = &ctx->dev->pdev->dev; dst_vq->allow_cache_hints = 1; return vb2_queue_init(dst_vq); } /* -- in-flight tracking (cookie → ctx + bufs) ------------------------ */ /* * The chardev RESP_FRAME path needs to find the per-request * context + source/destination buffer pair so it can complete the * V4L2 m2m job. Track in-flight requests in a small list keyed * by cookie. Cookies are monotonically increasing (see * device_run); collisions on wrap-around are astronomically * unlikely in normal use and would self-clear once the older * cookie's response arrives. */ struct daedalus_inflight { struct list_head list; u32 cookie; struct daedalus_ctx *ctx; struct vb2_v4l2_buffer *src_buf; struct vb2_v4l2_buffer *dst_buf; /* * Captured media_request the src_buf was bound to (if any). * Set by device_run from src_buf->vb2_buf.req_obj.req; * consumed by the completion path to call * v4l2_ctrl_request_complete + signal request fd. NULL for * non-request flows (e.g. test_m2m_stream direct QBUF). */ struct media_request *req; }; static struct daedalus_inflight * daedalus_inflight_pop_locked(struct daedalus_dev *dev, u32 cookie) { struct daedalus_inflight *e; list_for_each_entry(e, &dev->inflight, list) { if (e->cookie == cookie) { list_del(&e->list); return e; } } return NULL; } /* -- chardev GET_DMABUF backend (called in daemon task context) ----- */ int daedalus_export_capture_dmabuf(u32 cookie, u32 plane, u32 flags, int *out_fd) { struct daedalus_dev *dev = g_daedalus_dev; struct daedalus_inflight *e, *match = NULL; struct vb2_queue *vq; struct vb2_buffer *vb; int rc; if (!dev || !out_fd) return -EINVAL; /* * Walk the inflight list under the lock to look up the * V4L2 request. Hold a transient reference via the lock * — once we drop the lock the entry could be popped by a * concurrent RESP_FRAME, but we only need the dst_buf + * its vb2_queue, both of which are stable for the * lifetime of the in-flight request (RESP_FRAME is what * pops the entry, so daemon completing the export then * sending RESP_FRAME is the canonical ordering). */ mutex_lock(&dev->inflight_lock); list_for_each_entry(e, &dev->inflight, list) { if (e->cookie == cookie) { match = e; break; } } if (!match) { mutex_unlock(&dev->inflight_lock); return -EINVAL; } vb = &match->dst_buf->vb2_buf; vq = vb->vb2_queue; mutex_unlock(&dev->inflight_lock); if (plane >= vb->num_planes) return -EINVAL; rc = vb2_core_expbuf(vq, out_fd, vq->type, vb, plane, flags); if (rc) return rc; return 0; } /* -- v4l2_m2m_ops.device_run ----------------------------------------- */ static atomic_t daedalus_cookie_seq = ATOMIC_INIT(0); u32 daedalus_next_cookie(void) { return (u32) atomic_inc_return(&daedalus_cookie_seq); } static void daedalus_device_run(void *priv) { struct daedalus_ctx *ctx = priv; struct daedalus_dev *dev = ctx->dev; struct vb2_v4l2_buffer *src_buf, *dst_buf; struct daedalus_inflight *inf = NULL; struct daedalus_req_decode *req = NULL; void *bitstream; size_t blen, payload_len; u32 cookie; int ret; src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx); dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx); if (!src_buf || !dst_buf) { v4l2_warn(&dev->v4l2_dev, "device_run with no src/dst buf — scheduler bug?\n"); goto fail_job_finish; } blen = vb2_get_plane_payload(&src_buf->vb2_buf, 0); if (!blen || blen > DAEDALUS_MAX_BITSTREAM) { v4l2_err(&dev->v4l2_dev, "device_run: bitstream length %zu out of range [1, %lu]\n", blen, (unsigned long) DAEDALUS_MAX_BITSTREAM); goto fail_buf_error; } bitstream = vb2_plane_vaddr(&src_buf->vb2_buf, 0); if (!bitstream) { v4l2_err(&dev->v4l2_dev, "device_run: vaddr NULL\n"); goto fail_buf_error; } /* * Bind the in-flight media_request's stateless control values to * the ctrl_handler's p_cur slots so daedalus_collect_h264_meta() * sees this request's SPS/PPS/scaling_matrix/decode_params — not * the previous request's stale values (or driver defaults from * v4l2_ctrl_new_custom when no prior request has run yet). * * Without this, p_cur reads back zero/default for everything the * userspace driver set via S_EXT_CTRLS with * V4L2_CTRL_WHICH_REQUEST_VAL — caught by libva-v4l2-request- * fourier's libva-boundary instrumentation (issue #8) showing * num_ref_frames=1 sent vs. ref_frames=0 read. Pair with the * v4l2_ctrl_request_complete call already present in the * completion path (daedalus_complete_resp_frame). * * cedrus / rkvdec / hantro all call this from device_run; the * m2m core does NOT do it automatically. */ if (src_buf->vb2_buf.req_obj.req) v4l2_ctrl_request_setup(src_buf->vb2_buf.req_obj.req, &ctx->hdl); { u32 cid = daedalus_fourcc_to_codec_id(ctx->src_fmt.pixelformat); size_t meta_len = 0; struct daedalus_h264_meta meta_local; bool have_h264_meta = false; if (!cid) { v4l2_err(&dev->v4l2_dev, "device_run: unsupported OUTPUT pixelformat 0x%08x\n", ctx->src_fmt.pixelformat); goto fail_buf_error; } /* * H.264 needs SPS/PPS/scaling-matrix/decode-params shipped * to the daemon alongside the slice bitstream — libavcodec * can't decode slices without them. VP9/AV1 are self- * describing so we skip the meta block for those. */ if (cid == DAEDALUS_CODEC_H264) { memset(&meta_local, 0, sizeof(meta_local)); have_h264_meta = daedalus_collect_h264_meta(ctx, &meta_local); if (have_h264_meta) meta_len = sizeof(meta_local); else v4l2_warn(&dev->v4l2_dev, "device_run: H.264 frame without SPS/PPS controls — daemon will fail decode\n"); } payload_len = sizeof(*req) + meta_len + blen; if (payload_len > DAEDALUS_PROTO_MAX_PAYLOAD) { v4l2_err(&dev->v4l2_dev, "device_run: payload %zu exceeds chardev cap %u\n", payload_len, (unsigned int) DAEDALUS_PROTO_MAX_PAYLOAD); goto fail_buf_error; } req = kmalloc(payload_len, GFP_KERNEL); if (!req) goto fail_buf_error; memset(req, 0, sizeof(*req)); req->codec_id = cid; req->bitstream_len = (u32) blen; req->capture_width = ctx->dst_fmt.width; req->capture_height = ctx->dst_fmt.height; req->capture_pix_fmt = ctx->dst_fmt.pixelformat; req->capture_num_planes = ctx->dst_fmt.num_planes; { unsigned int p; for (p = 0; p < ctx->dst_fmt.num_planes && p < 3; p++) { req->capture_plane_size[p] = ctx->dst_fmt.plane_fmt[p].sizeimage; req->capture_plane_stride[p] = ctx->dst_fmt.plane_fmt[p].bytesperline; } } if (have_h264_meta) { req->flags |= DAEDALUS_REQ_FLAG_H264_META; memcpy((u8 *) req + sizeof(*req), &meta_local, sizeof(meta_local)); } memcpy((u8 *) req + sizeof(*req) + meta_len, bitstream, blen); } inf = kzalloc(sizeof(*inf), GFP_KERNEL); if (!inf) goto fail_buf_error; cookie = daedalus_next_cookie(); inf->cookie = cookie; inf->ctx = ctx; inf->src_buf = src_buf; inf->dst_buf = dst_buf; /* * Capture the bound media_request (if any) so the * completion path can call v4l2_ctrl_request_complete + * trigger MEDIA_REQUEST_STATE_COMPLETE. vb2-core's normal * buf_done path unbinds the buffer's req_obj but leaves the * control object bound — the driver has to complete it. * * Take our own reference via media_request_get so the * pointer stays valid even if vb2 releases its reference * concurrently (e.g. via MEDIA_IOC_REQUEST_REINIT or a * process kill triggering buf_request_complete from the * cancel path). Released by media_request_put in * daedalus_complete_resp_frame. Matches the cedrus / * rkvdec refcount pattern. */ inf->req = src_buf->vb2_buf.req_obj.req; if (inf->req) media_request_get(inf->req); mutex_lock(&dev->inflight_lock); list_add_tail(&inf->list, &dev->inflight); mutex_unlock(&dev->inflight_lock); ret = daedalus_chardev_enqueue_req(DAEDALUS_MSG_REQ_DECODE, cookie, req, payload_len); kfree(req); req = NULL; if (ret) { v4l2_err(&dev->v4l2_dev, "device_run: enqueue_req failed: %d\n", ret); mutex_lock(&dev->inflight_lock); list_del(&inf->list); mutex_unlock(&dev->inflight_lock); kfree(inf); goto fail_buf_error; } v4l2_dbg(1, 0, &dev->v4l2_dev, "device_run: REQ_DECODE cookie=%u blen=%zu\n", cookie, blen); /* * Job stays open until RESP_FRAME comes back; chardev path * calls v4l2_m2m_buf_done_and_job_finish then. */ return; fail_buf_error: if (src_buf) { v4l2_m2m_src_buf_remove(ctx->m2m_ctx); v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); } if (dst_buf) { v4l2_m2m_dst_buf_remove(ctx->m2m_ctx); v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR); } kfree(req); fail_job_finish: v4l2_m2m_job_finish(dev->m2m_dev, ctx->m2m_ctx); } static const struct v4l2_m2m_ops daedalus_m2m_ops = { .device_run = daedalus_device_run, }; /* -- chardev RESP_FRAME → buf_done bridge ---------------------------- */ void daedalus_complete_resp_frame(u32 cookie, const struct daedalus_resp_frame *fr, const u8 *pixels, size_t pixels_len) { struct daedalus_dev *dev = g_daedalus_dev; struct daedalus_inflight *inf; enum vb2_buffer_state state; void *dst_y, *dst_uv; u32 y_size, uv_size; if (!dev) return; mutex_lock(&dev->inflight_lock); inf = daedalus_inflight_pop_locked(dev, cookie); mutex_unlock(&dev->inflight_lock); if (!inf) { pr_warn_ratelimited( "daedalus_v4l2: RESP_FRAME for unknown cookie=%u\n", cookie); return; } state = (fr->status == DAEDALUS_DECODE_OK) ? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR; /* * Two routes the daemon can take, both supported: * * (a) dmabuf path (Phase 8.6+) — daemon called * DAEDALUS_IOC_GET_DMABUF, mmap'd the CAPTURE buffer, * wrote pixels in place. RESP_FRAME carries metadata * only (pixels_len == 0). Each plane's payload is * the full plane size (the daemon wrote everything * the format requires). * * (b) Phase 8.5 inline path — daemon shipped raw NV12 in * the chardev payload (≤ 64 KiB cap). We memcpy * into the vb2 buffer. Plane payloads come from * the daemon's NV12 luma/chroma counts. */ if (state == VB2_BUF_STATE_DONE) { struct vb2_buffer *vb = &inf->dst_buf->vb2_buf; unsigned int p; if (pixels_len) { /* (b) inline NV12 copy — legacy 2-plane only */ y_size = min_t(u32, fr->luma_len, (u32) vb2_plane_size(vb, 0)); uv_size = vb->num_planes > 1 ? min_t(u32, fr->chroma_len, (u32) vb2_plane_size(vb, 1)) : 0; dst_y = vb2_plane_vaddr(vb, 0); dst_uv = vb->num_planes > 1 ? vb2_plane_vaddr(vb, 1) : NULL; if (dst_y && y_size && pixels_len >= y_size) memcpy(dst_y, pixels, y_size); else y_size = 0; if (dst_uv && uv_size && pixels_len >= y_size + uv_size) memcpy(dst_uv, pixels + y_size, uv_size); else uv_size = 0; vb2_set_plane_payload(vb, 0, y_size); if (vb->num_planes > 1) vb2_set_plane_payload(vb, 1, uv_size); } else { /* (a) dmabuf path: plane is fully populated by * the daemon, so payload == sizeimage. */ for (p = 0; p < vb->num_planes; p++) vb2_set_plane_payload(vb, p, vb2_plane_size(vb, p)); } } /* * Phase 8.14: if the src_buf was bound to a media_request * (libva-driven decode path), complete the per-request * control state BEFORE buf_done_and_job_finish. vb2-core's * buf_done unbinds the buffer's req_obj on its own, but the * control object stays bound until v4l2_ctrl_request_complete * runs — only after BOTH objects unbind does the request * transition to MEDIA_REQUEST_STATE_COMPLETE and wake any * userspace poll on the request fd. * * For non-request flows (test_m2m_stream direct QBUF) inf->req * is NULL and v4l2_ctrl_request_complete just no-ops. */ if (inf->req) v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl); /* * Use the buf_done_and_job_finish helper rather than plain * buf_done + job_finish: the helper pops the buffers off * the m2m queue before marking them done, otherwise the * scheduler immediately re-runs device_run on the same * still-queued src buffer. Caught during Phase 8.5 first * run — second REQ_DECODE with identical bitstream + oops * in stop_streaming when the test client tore down. */ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, inf->ctx->m2m_ctx, state); /* * Release our reference taken in device_run; safe to do * AFTER buf_done_and_job_finish (which dropped the vb2 * reference) because we still hold this one. If the * refcount hits zero here, media-core releases the request. */ if (inf->req) media_request_put(inf->req); kfree(inf); } /* -- daemon disconnect drain ----------------------------------------- */ void daedalus_drain_inflight_on_disconnect(void) { struct daedalus_dev *dev = g_daedalus_dev; struct daedalus_inflight *inf, *tmp; LIST_HEAD(local); if (!dev) return; /* * Splice the in-flight list onto a local list under the lock, * then process each entry with the lock dropped — every * v4l2_m2m_buf_done_and_job_finish call may itself try to * re-enter device_run via the scheduler (which would need to * walk dev->inflight again on a future REQ_DECODE), and * v4l2_m2m_buf_done can sleep via vb2's buffer-done dispatch. * Holding inflight_lock across either is a deadlock invitation. */ mutex_lock(&dev->inflight_lock); list_splice_init(&dev->inflight, &local); mutex_unlock(&dev->inflight_lock); list_for_each_entry_safe(inf, tmp, &local, list) { list_del(&inf->list); v4l2_warn(&dev->v4l2_dev, "draining inflight cookie=%u (daemon disconnect)\n", inf->cookie); /* * Complete the per-request control state before * buf_done_and_job_finish, same ordering as the success * path in daedalus_complete_resp_frame(). For non-request * flows inf->req is NULL and v4l2_ctrl_request_complete * no-ops. */ if (inf->req) v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl); /* * Mark both buffers ERROR and clear the m2m scheduler's * job_running flag. This is what unsticks * v4l2_m2m_cancel_job() inside the consumer's close() * path; without it, the consumer hangs in TASK_UNINTERRUPTIBLE * forever (issue #146). */ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, inf->ctx->m2m_ctx, VB2_BUF_STATE_ERROR); if (inf->req) media_request_put(inf->req); kfree(inf); } } /* -- v4l2_ioctl_ops -------------------------------------------------- */ static int daedalus_querycap(struct file *file, void *priv, struct v4l2_capability *cap) { strscpy(cap->driver, DAEDALUS_DRV_NAME, sizeof(cap->driver)); strscpy(cap->card, "daedalus-fourier V3D7+NEON", sizeof(cap->card)); snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s", DAEDALUS_DRV_NAME); return 0; } static int daedalus_enum_fmt(struct file *file, void *priv, struct v4l2_fmtdesc *f) { if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { if (f->index >= DAEDALUS_NUM_OUTPUT_FMTS) return -EINVAL; f->pixelformat = daedalus_output_formats[f->index]; f->flags |= V4L2_FMT_FLAG_COMPRESSED; return 0; } if (f->index >= DAEDALUS_NUM_CAPTURE_FMTS) return -EINVAL; f->pixelformat = daedalus_capture_formats[f->index]; return 0; } static int daedalus_g_fmt(struct file *file, void *priv, struct v4l2_format *f) { struct daedalus_ctx *ctx = file_to_ctx(file); if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) f->fmt.pix_mp = ctx->src_fmt; else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) f->fmt.pix_mp = ctx->dst_fmt; else return -EINVAL; return 0; } static int daedalus_try_fmt(struct file *file, void *priv, struct v4l2_format *f) { struct v4l2_pix_format_mplane *p = &f->fmt.pix_mp; u32 w = clamp_t(u32, p->width, 16, 1920); u32 h = clamp_t(u32, p->height, 16, 1088); u32 cs, xfer, ycbcr, quant; /* * Preserve userspace-supplied colorspace fields verbatim * (fixes the Phase 8.5 v4l2-compliance S_FMT colorspace * round-trip failure) — fill_*_fmt overwrites these to * REC709 defaults, but TRY_FMT must echo what the caller * asked for if it's at all sensible. */ cs = p->colorspace; xfer = p->xfer_func; ycbcr = p->ycbcr_enc; quant = p->quantization; if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { u32 fourcc = p->pixelformat; if (!daedalus_is_supported_output(fourcc)) fourcc = DAEDALUS_DEFAULT_OUTPUT_FOURCC; daedalus_fill_output_fmt(p, fourcc, w, h); } else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { u32 fourcc = p->pixelformat; if (!daedalus_is_supported_capture(fourcc)) fourcc = DAEDALUS_DEFAULT_CAPTURE_FOURCC; daedalus_fill_capture_fmt(p, fourcc, w, h); } else { return -EINVAL; } if (cs) p->colorspace = cs; if (xfer) p->xfer_func = xfer; if (ycbcr) p->ycbcr_enc = ycbcr; if (quant) p->quantization = quant; return 0; } static int daedalus_s_fmt(struct file *file, void *priv, struct v4l2_format *f) { struct daedalus_ctx *ctx = file_to_ctx(file); struct vb2_queue *vq; int ret; vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type); if (!vq) return -EINVAL; if (vb2_is_busy(vq)) return -EBUSY; ret = daedalus_try_fmt(file, priv, f); if (ret) return ret; if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { ctx->src_fmt = f->fmt.pix_mp; /* * Stateless decoder convention: colorspace metadata * on the OUTPUT (bitstream) side describes what the * codec will produce. Propagate to CAPTURE so a * follow-up G_FMT on CAPTURE returns matching values * (v4l2-compliance v4l2-test-formats.cpp:958 * round-trip test). */ ctx->dst_fmt.colorspace = ctx->src_fmt.colorspace; ctx->dst_fmt.xfer_func = ctx->src_fmt.xfer_func; ctx->dst_fmt.ycbcr_enc = ctx->src_fmt.ycbcr_enc; ctx->dst_fmt.quantization = ctx->src_fmt.quantization; } else { ctx->dst_fmt = f->fmt.pix_mp; } return 0; } static const struct v4l2_ioctl_ops daedalus_ioctl_ops = { .vidioc_querycap = daedalus_querycap, .vidioc_enum_fmt_vid_out = daedalus_enum_fmt, .vidioc_enum_fmt_vid_cap = daedalus_enum_fmt, .vidioc_g_fmt_vid_out_mplane = daedalus_g_fmt, .vidioc_g_fmt_vid_cap_mplane = daedalus_g_fmt, .vidioc_s_fmt_vid_out_mplane = daedalus_s_fmt, .vidioc_s_fmt_vid_cap_mplane = daedalus_s_fmt, .vidioc_try_fmt_vid_out_mplane = daedalus_try_fmt, .vidioc_try_fmt_vid_cap_mplane = daedalus_try_fmt, .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, .vidioc_streamon = v4l2_m2m_ioctl_streamon, .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, .vidioc_unsubscribe_event = v4l2_event_unsubscribe, }; /* -- file operations ------------------------------------------------- */ static int daedalus_open(struct file *file) { struct daedalus_dev *dev = video_drvdata(file); struct daedalus_ctx *ctx; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->dev = dev; mutex_init(&ctx->vb_mutex); v4l2_fh_init(&ctx->fh, &dev->vdev); file->private_data = &ctx->fh; /* +2 covers the H.264 DECODE_MODE + START_CODE menu controls * registered alongside daedalus_stateless_ctrls[]. */ v4l2_ctrl_handler_init(&ctx->hdl, ARRAY_SIZE(daedalus_stateless_ctrls) + 2); daedalus_register_stateless_ctrls(&ctx->hdl); /* * v4l2_ctrl_handler_setup runs s_ctrl for every registered * control with its default value — required to bring each * control out of "uninitialised" state. Without this the * per-request handler clone path returns EINVAL on * VIDIOC_S_EXT_CTRLS(which=REQUEST_VAL). rkvdec/cedrus/ * hantro all call this after registration. */ v4l2_ctrl_handler_setup(&ctx->hdl); ctx->fh.ctrl_handler = &ctx->hdl; daedalus_fill_output_fmt(&ctx->src_fmt, DAEDALUS_DEFAULT_OUTPUT_FOURCC, DAEDALUS_DEFAULT_W, DAEDALUS_DEFAULT_H); daedalus_fill_capture_fmt(&ctx->dst_fmt, DAEDALUS_DEFAULT_CAPTURE_FOURCC, DAEDALUS_DEFAULT_W, DAEDALUS_DEFAULT_H); ctx->m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, daedalus_queue_init); if (IS_ERR(ctx->m2m_ctx)) { ret = PTR_ERR(ctx->m2m_ctx); v4l2_err(&dev->v4l2_dev, "m2m_ctx_init: %d\n", ret); goto err_ctrl; } ctx->fh.m2m_ctx = ctx->m2m_ctx; /* * v4l2_fh_add/del gained a `struct file *filp` second arg in * Linux 6.18 (commit landing between v6.17 and v6.18 tags). * Keep building against both 6.12 LTS (no filp arg) and 6.18+. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 18, 0) v4l2_fh_add(&ctx->fh, file); #else v4l2_fh_add(&ctx->fh); #endif return 0; err_ctrl: v4l2_ctrl_handler_free(&ctx->hdl); v4l2_fh_exit(&ctx->fh); mutex_destroy(&ctx->vb_mutex); kfree(ctx); return ret; } static int daedalus_release(struct file *file) { struct daedalus_ctx *ctx = file_to_ctx(file); #if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 18, 0) v4l2_fh_del(&ctx->fh, file); #else v4l2_fh_del(&ctx->fh); #endif v4l2_m2m_ctx_release(ctx->m2m_ctx); v4l2_ctrl_handler_free(&ctx->hdl); v4l2_fh_exit(&ctx->fh); mutex_destroy(&ctx->vb_mutex); kfree(ctx); return 0; } static const struct v4l2_file_operations daedalus_fops = { .owner = THIS_MODULE, .open = daedalus_open, .release = daedalus_release, .poll = v4l2_m2m_fop_poll, .unlocked_ioctl = video_ioctl2, .mmap = v4l2_m2m_fop_mmap, }; static void daedalus_vdev_release(struct video_device *vdev) { /* embedded in daedalus_dev (devm) — nothing to free here */ } /* -- media controller request-API ops (Phase 8.11) ------------------ */ /* * V4L2 Request API plumbing: lets a client allocate a media_request * (MEDIA_IOC_REQUEST_ALLOC), stage per-buffer controls into it via * VIDIOC_S_EXT_CTRLS with which=V4L2_CTRL_WHICH_REQUEST_VAL, then * queue the OUTPUT buffer with the request fd bound — all controls * + the buffer apply atomically at decode submission. * * vb2_request_validate / v4l2_m2m_request_queue are the canonical * helpers; the daemon doesn't actually use the staged controls * (FFmpeg re-parses the bitstream) but the wire-level support is * what libva-v4l2-request-fourier requires to call MEDIA_IOC_ * REQUEST_ALLOC successfully. */ static const struct media_device_ops daedalus_media_ops = { .req_validate = vb2_request_validate, .req_queue = v4l2_m2m_request_queue, }; /* -- platform driver bind -------------------------------------------- */ static int daedalus_probe(struct platform_device *pdev) { struct daedalus_dev *dev; int ret; dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; dev->pdev = pdev; platform_set_drvdata(pdev, dev); mutex_init(&dev->m2m_lock); mutex_init(&dev->inflight_lock); INIT_LIST_HEAD(&dev->inflight); /* * vb2_dma_contig (used by the CAPTURE queue for dmabuf * export) needs the parent device's DMA mask configured. * Pi 5 CMA supports 32-bit DMA; that's sufficient for * NV12/NV12M up to 4K (4K NV12 = ~12.4 MiB, well under * the 4 GiB ceiling). */ ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { dev_err(&pdev->dev, "dma_coerce_mask_and_coherent: %d\n", ret); return ret; } /* * Set up the media controller BEFORE v4l2_device_register * binds the mdev so v4l2-core publishes the link between * the v4l2_device and the media_device. Stateless decoders * are required by spec to expose a media controller (the * request API rides on it) — v4l2-compliance's DECODER_CMD * test rejects drivers without it. * * Phase 8.11: wire the V4L2 request API media ops so libva- * v4l2-request-fourier can MEDIA_IOC_REQUEST_ALLOC against * us. vb2_request_validate + v4l2_m2m_request_queue are the * canonical helpers — they bundle per-buffer controls with * the matching qbuf so the decode submission is atomic * (required for stateless decoders feeding hardware that * needs all params present before kickoff). */ dev->mdev.dev = &pdev->dev; strscpy(dev->mdev.model, "daedalus-v4l2", sizeof(dev->mdev.model)); dev->mdev.ops = &daedalus_media_ops; media_device_init(&dev->mdev); dev->v4l2_dev.mdev = &dev->mdev; ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); if (ret) { dev_err(&pdev->dev, "v4l2_device_register: %d\n", ret); media_device_cleanup(&dev->mdev); return ret; } dev->m2m_dev = v4l2_m2m_init(&daedalus_m2m_ops); if (IS_ERR(dev->m2m_dev)) { ret = PTR_ERR(dev->m2m_dev); v4l2_err(&dev->v4l2_dev, "v4l2_m2m_init: %d\n", ret); goto err_v4l2_dev; } strscpy(dev->vdev.name, DAEDALUS_VIDEO_NAME, sizeof(dev->vdev.name)); dev->vdev.fops = &daedalus_fops; dev->vdev.ioctl_ops = &daedalus_ioctl_ops; dev->vdev.release = daedalus_vdev_release; dev->vdev.v4l2_dev = &dev->v4l2_dev; dev->vdev.vfl_dir = VFL_DIR_M2M; dev->vdev.device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING; dev->vdev.lock = &dev->m2m_lock; video_set_drvdata(&dev->vdev, dev); ret = video_register_device(&dev->vdev, VFL_TYPE_VIDEO, -1); if (ret) { v4l2_err(&dev->v4l2_dev, "video_register_device: %d\n", ret); goto err_m2m; } /* * Register the m2m entities with the media controller * AFTER video_register_device so vdev->num is set. * MEDIA_ENT_F_PROC_VIDEO_DECODER tags us as a decoder * entity in the graph — what libva-v4l2-request scans for. */ ret = v4l2_m2m_register_media_controller(dev->m2m_dev, &dev->vdev, MEDIA_ENT_F_PROC_VIDEO_DECODER); if (ret) { v4l2_err(&dev->v4l2_dev, "v4l2_m2m_register_media_controller: %d\n", ret); goto err_vdev; } ret = media_device_register(&dev->mdev); if (ret) { v4l2_err(&dev->v4l2_dev, "media_device_register: %d\n", ret); goto err_m2m_mc; } g_daedalus_dev = dev; v4l2_info(&dev->v4l2_dev, "daedalus-v4l2 m2m registered as /dev/video%d (Phase 8.7)\n", dev->vdev.num); return 0; err_m2m_mc: v4l2_m2m_unregister_media_controller(dev->m2m_dev); err_vdev: video_unregister_device(&dev->vdev); err_m2m: v4l2_m2m_release(dev->m2m_dev); err_v4l2_dev: v4l2_device_unregister(&dev->v4l2_dev); media_device_cleanup(&dev->mdev); return ret; } static void daedalus_remove(struct platform_device *pdev) { struct daedalus_dev *dev = platform_get_drvdata(pdev); g_daedalus_dev = NULL; media_device_unregister(&dev->mdev); v4l2_m2m_unregister_media_controller(dev->m2m_dev); video_unregister_device(&dev->vdev); v4l2_m2m_release(dev->m2m_dev); v4l2_device_unregister(&dev->v4l2_dev); media_device_cleanup(&dev->mdev); } static struct platform_driver daedalus_platform_driver = { .probe = daedalus_probe, .remove = daedalus_remove, .driver = { .name = DAEDALUS_DRV_NAME, }, }; static struct platform_device *daedalus_platform_device; static int __init daedalus_init(void) { int ret; ret = daedalus_chardev_init(); if (ret) return ret; daedalus_platform_device = platform_device_alloc(DAEDALUS_DRV_NAME, -1); if (!daedalus_platform_device) { ret = -ENOMEM; goto err_chardev; } ret = platform_device_add(daedalus_platform_device); if (ret) { platform_device_put(daedalus_platform_device); goto err_chardev; } ret = platform_driver_register(&daedalus_platform_driver); if (ret) { platform_device_unregister(daedalus_platform_device); goto err_chardev; } return 0; err_chardev: daedalus_chardev_exit(); return ret; } static void __exit daedalus_exit(void) { platform_driver_unregister(&daedalus_platform_driver); platform_device_unregister(daedalus_platform_device); daedalus_chardev_exit(); } module_init(daedalus_init); module_exit(daedalus_exit); MODULE_AUTHOR("Markus Fritsche "); MODULE_DESCRIPTION("V4L2 stateless decoder shim for daedalus-fourier (Pi 5 / VC7)"); MODULE_LICENSE("GPL v2"); MODULE_VERSION("0.0.2");