/* SPDX-License-Identifier: BSD-2-Clause */ /* * decoder.c — FFmpeg-driven decode helper for daedalus-v4l2 daemon. */ #include "decoder.h" #include "ffmpeg_loader.h" #include "h264_nal_synth.h" #include "log.h" #include #include #include #include #include #include /* * FNV-1a 32-bit hash. Used as a compact digest of the decoded * frame's YUV planes so the kernel can verify "the daemon produced * the expected pixels" without shipping the full frame through the * 64-KiB-capped chardev wire protocol. Phase 8.5's dmabuf path * carries actual pixel data; this digest stays useful as a * cross-host sanity check. */ static uint32_t fnv1a32_init(void) { return 0x811c9dc5u; } static uint32_t fnv1a32_update(uint32_t h, const uint8_t *data, size_t len) { size_t i; for (i = 0; i < len; i++) h = (h ^ data[i]) * 0x01000193u; return h; } /* * Hash plane @p (width @w bytes per line, @h lines, stride @stride * bytes between lines). We strip libav's row alignment padding so * the hash matches the layout used by `ffmpeg -f rawvideo` reference * output (which is tightly packed). */ static uint32_t fnv1a32_plane(uint32_t h, const uint8_t *p, int w, int height, int stride) { int y; for (y = 0; y < height; y++) h = fnv1a32_update(h, p + (size_t) y * (size_t) stride, (size_t) w); return h; } int daedalus_decoder_init(struct daedalus_decoder *dec, struct ffmpeg_loader *loader) { memset(dec, 0, sizeof(*dec)); dec->loader = loader; dec->pkt = loader->av_packet_alloc(); if (!dec->pkt) return -ENOMEM; dec->frame = loader->av_frame_alloc(); if (!dec->frame) { loader->av_packet_free(&dec->pkt); return -ENOMEM; } return 0; } void daedalus_decoder_cleanup(struct daedalus_decoder *dec) { if (!dec || !dec->loader) return; if (dec->ctx_vp9) dec->loader->avcodec_free_context(&dec->ctx_vp9); if (dec->ctx_av1) dec->loader->avcodec_free_context(&dec->ctx_av1); if (dec->ctx_h264) dec->loader->avcodec_free_context(&dec->ctx_h264); if (dec->frame) dec->loader->av_frame_free(&dec->frame); if (dec->pkt) dec->loader->av_packet_free(&dec->pkt); memset(dec, 0, sizeof(*dec)); } /* * Lazily open the AVCodecContext for codec_id. Returns 0 on * success, -ENOSYS on unknown codec, -EIO on FFmpeg failure. */ static int decoder_open_codec(struct daedalus_decoder *dec, uint32_t codec_id, struct AVCodecContext **out) { struct ffmpeg_loader *fm = dec->loader; const struct AVCodec *codec; struct AVCodecContext *ctx; enum AVCodecID av_id; struct AVCodecContext **cache; int rc; switch (codec_id) { case DAEDALUS_CODEC_VP9: av_id = AV_CODEC_ID_VP9; cache = &dec->ctx_vp9; break; case DAEDALUS_CODEC_AV1: av_id = AV_CODEC_ID_AV1; cache = &dec->ctx_av1; break; case DAEDALUS_CODEC_H264: av_id = AV_CODEC_ID_H264; cache = &dec->ctx_h264; break; default: log_warn("decoder: unknown codec_id %u", codec_id); return -ENOSYS; } if (*cache) { *out = *cache; return 0; } codec = fm->avcodec_find_decoder(av_id); if (!codec) { log_err("decoder: avcodec_find_decoder(%d) returned NULL", av_id); return -EIO; } ctx = fm->avcodec_alloc_context3(codec); if (!ctx) return -ENOMEM; rc = fm->avcodec_open2(ctx, codec, NULL); if (rc < 0) { log_err("decoder: avcodec_open2 failed: %d", rc); fm->avcodec_free_context(&ctx); return -EIO; } *cache = ctx; *out = ctx; log_info("decoder: opened %s context", codec->name); return 0; } /* * Pack the decoded YUV planes into NV12M layout across two * mapped CAPTURE planes: * planes[0] = Y, written w bytes per row with stride dst_y_stride * planes[1] = interleaved CbCr at half-res, two bytes per chroma * sample, written cw*2 bytes per row with stride * dst_uv_stride * * Source stride padding (fr->linesize[*]) is stripped; destination * stride padding (dst_stride - row_bytes) is left as-is — the V4L2 * client knows the format's bytesperline and walks accordingly. * * Returns 0 on success, -EINVAL if the source is not planar 4:2:0 * (Phase 8.6 still expects yuv420p-class outputs; 8.7 widens). */ /* * Pack 10-bit planar YUV420P10LE into V4L2_PIX_FMT_P010 single * plane: Y plane (width × 2 bytes per pixel, height rows) + * interleaved CbCr plane at half-res (cw*2 bytes per row, ch * rows). P010 stores 10-bit samples in 16-bit words, * MSB-aligned (low 6 bits zero). libav's YUV420P10LE delivers * 10-bit samples in the LOW 10 bits, so we shift left by 6. * * The single-plane layout means Y and CbCr are concatenated in * planes->base[0]; planes->stride[0] is the Y stride (which we * also use for the CbCr rows since both have the same * per-line byte count for 4:2:0 with interleaved chroma). */ static int pack_p010_to_plane(struct AVFrame *fr, const AVPixFmtDescriptor *desc, const struct daedalus_capture_planes *planes) { int h = fr->height; int w = fr->width; int cw, ch, y, x; uint8_t *base; uint32_t stride; uint8_t *dst_y, *dst_uv; size_t y_size; if (!desc || !planes || planes->nr < 1) return -EINVAL; if (desc->nb_components < 3) return -EINVAL; if (desc->log2_chroma_w != 1 || desc->log2_chroma_h != 1) return -EINVAL; /* Only 10-bit-per-sample sources packed into 16 bits per * libav convention. Anything else needs its own path. */ if (desc->comp[0].depth != 10) return -EINVAL; cw = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); ch = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); base = planes->base[0]; stride = planes->stride[0] ? planes->stride[0] : (uint32_t) (w * 2); if (!base) return -EINVAL; dst_y = base; y_size = (size_t) stride * (size_t) h; dst_uv = base + y_size; /* Y plane: shift 10-bit → MSB-aligned 16-bit. */ for (y = 0; y < h; y++) { const uint16_t *src = (const uint16_t *) (fr->data[0] + (size_t) y * fr->linesize[0]); uint16_t *dst = (uint16_t *) (dst_y + (size_t) y * stride); for (x = 0; x < w; x++) dst[x] = (uint16_t) (src[x] << 6); } /* Interleave Cb/Cr at half-res, also MSB-aligned. */ for (y = 0; y < ch; y++) { const uint16_t *u = (const uint16_t *) (fr->data[1] + (size_t) y * fr->linesize[1]); const uint16_t *v = (const uint16_t *) (fr->data[2] + (size_t) y * fr->linesize[2]); uint16_t *dst = (uint16_t *) (dst_uv + (size_t) y * stride); for (x = 0; x < cw; x++) { dst[x * 2 + 0] = (uint16_t) (u[x] << 6); dst[x * 2 + 1] = (uint16_t) (v[x] << 6); } } return 0; } /* * Pack 8-bit planar YUV420P into V4L2_PIX_FMT_NV12 single plane: * Y plane (W*H bytes) followed by interleaved CbCr at half-res * (W*H/2 bytes) all in planes->base[0]. Same layout as P010 * sans the depth shift. For libva-v4l2-request-style clients * that expect num_planes=1 NV12. */ static int pack_nv12_single_to_plane(struct AVFrame *fr, const AVPixFmtDescriptor *desc, const struct daedalus_capture_planes *planes) { int h = fr->height; int w = fr->width; int cw, ch, y, x; uint8_t *base; uint32_t stride; uint8_t *dst_y, *dst_uv; size_t y_size; if (!desc || !planes || planes->nr < 1) return -EINVAL; if (desc->nb_components < 3) return -EINVAL; if (desc->log2_chroma_w != 1 || desc->log2_chroma_h != 1) return -EINVAL; if (desc->comp[0].depth != 8) return -EINVAL; cw = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); ch = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); base = planes->base[0]; stride = planes->stride[0] ? planes->stride[0] : (uint32_t) w; if (!base) return -EINVAL; dst_y = base; y_size = (size_t) stride * (size_t) h; dst_uv = base + y_size; for (y = 0; y < h; y++) memcpy(dst_y + (size_t) y * stride, fr->data[0] + (size_t) y * fr->linesize[0], (size_t) w); for (y = 0; y < ch; y++) { const uint8_t *u = fr->data[1] + (size_t) y * fr->linesize[1]; const uint8_t *v = fr->data[2] + (size_t) y * fr->linesize[2]; uint8_t *row = dst_uv + (size_t) y * stride; for (x = 0; x < cw; x++) { row[x * 2 + 0] = u[x]; row[x * 2 + 1] = v[x]; } } return 0; } static int pack_nv12_to_planes(struct AVFrame *fr, const AVPixFmtDescriptor *desc, const struct daedalus_capture_planes *planes) { int h = fr->height; int w = fr->width; int cw, ch; size_t row_y, row_uv; int y, x; uint8_t *dst_y, *dst_uv; uint32_t dst_y_stride, dst_uv_stride; if (!desc || !planes || planes->nr < 2) return -EINVAL; if (desc->nb_components < 3) return -EINVAL; if (desc->log2_chroma_w != 1 || desc->log2_chroma_h != 1) return -EINVAL; /* not 4:2:0 — would need a different pack */ cw = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); ch = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); dst_y = planes->base[0]; dst_uv = planes->base[1]; dst_y_stride = planes->stride[0] ? planes->stride[0] : (uint32_t) w; dst_uv_stride = planes->stride[1] ? planes->stride[1] : (uint32_t) (cw * 2); row_y = (size_t) w; row_uv = (size_t) cw * 2u; if (!dst_y || !dst_uv) return -EINVAL; /* Y plane copy — strip source stride padding. */ for (y = 0; y < h; y++) memcpy(dst_y + (size_t) y * dst_y_stride, fr->data[0] + (size_t) y * fr->linesize[0], row_y); /* Interleave Cb and Cr into NV12 chroma plane. */ for (y = 0; y < ch; y++) { const uint8_t *u = fr->data[1] + (size_t) y * fr->linesize[1]; const uint8_t *v = fr->data[2] + (size_t) y * fr->linesize[2]; uint8_t *row = dst_uv + (size_t) y * dst_uv_stride; for (x = 0; x < cw; x++) { row[x * 2 + 0] = u[x]; row[x * 2 + 1] = v[x]; } } (void) row_uv; return 0; } int daedalus_decoder_run_request(struct daedalus_decoder *dec, const struct daedalus_req_decode *req, const uint8_t *bitstream, const struct daedalus_h264_meta *h264_meta, struct daedalus_resp_frame *resp, const struct daedalus_capture_planes *planes) { struct ffmpeg_loader *fm = dec->loader; struct AVCodecContext *ctx = NULL; uint8_t *assembled = NULL; size_t assembled_len = 0; int rc; memset(resp, 0, sizeof(*resp)); resp->codec_id = req->codec_id; rc = decoder_open_codec(dec, req->codec_id, &ctx); if (rc == -ENOSYS) { resp->status = DAEDALUS_DECODE_ERR_CODEC; goto out; } if (rc < 0) { resp->status = DAEDALUS_DECODE_ERR_OPEN; goto out; } fm->av_packet_unref(dec->pkt); /* * H.264 path: libavcodec needs SPS+PPS NAL units BEFORE the * slice can be decoded. libva-v4l2-request passes those as * separate V4L2 controls (per the stateless API), so the * daedalus kernel module forwards them to us as struct * daedalus_h264_meta. Synthesise AnnexB SPS+PPS NALs from * the structs and prepend them to @bitstream before feeding * libavcodec. */ if (req->codec_id == DAEDALUS_CODEC_H264 && h264_meta) { uint8_t sps_nal[256]; uint8_t pps_nal[128]; size_t sps_len, pps_len; sps_len = h264_synth_sps(&h264_meta->sps, sps_nal, sizeof(sps_nal)); pps_len = h264_synth_pps(&h264_meta->pps, pps_nal, sizeof(pps_nal)); if (sps_len == 0 || pps_len == 0) { log_err("decoder: SPS/PPS NAL synth failed (sps=%zu pps=%zu)", sps_len, pps_len); resp->status = DAEDALUS_DECODE_ERR_SEND; goto out; } assembled_len = sps_len + pps_len + req->bitstream_len; assembled = malloc(assembled_len + AV_INPUT_BUFFER_PADDING_SIZE); if (!assembled) { resp->status = DAEDALUS_DECODE_ERR_SEND; goto out; } memcpy(assembled, sps_nal, sps_len); memcpy(assembled + sps_len, pps_nal, pps_len); memcpy(assembled + sps_len + pps_len, bitstream, req->bitstream_len); memset(assembled + assembled_len, 0, AV_INPUT_BUFFER_PADDING_SIZE); dec->pkt->data = assembled; dec->pkt->size = (int) assembled_len; log_debug("decoder: h264 prepended SPS=%zuB PPS=%zuB slice=%uB", sps_len, pps_len, req->bitstream_len); } else { /* * VP9/AV1: bitstream is self-contained per frame, point the * AVPacket at it directly. Cast away const — AVPacket->data * is non-const but avcodec_send_packet doesn't mutate it. */ dec->pkt->data = (uint8_t *) (uintptr_t) bitstream; dec->pkt->size = (int) req->bitstream_len; } rc = fm->avcodec_send_packet(ctx, dec->pkt); if (rc < 0) { log_err("decoder: avcodec_send_packet failed: %d", rc); resp->status = DAEDALUS_DECODE_ERR_SEND; goto out; } fm->av_frame_unref(dec->frame); rc = fm->avcodec_receive_frame(ctx, dec->frame); if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) { log_debug("decoder: no frame ready yet (rc=%d)", rc); resp->status = DAEDALUS_DECODE_NO_FRAME; goto out; } if (rc < 0) { log_err("decoder: avcodec_receive_frame failed: %d", rc); resp->status = DAEDALUS_DECODE_ERR_RECV; goto out; } { struct AVFrame *fr = dec->frame; const AVPixFmtDescriptor *desc = fm->av_pix_fmt_desc_get(fr->format); uint32_t h = fnv1a32_init(); uint32_t luma_len = 0, chroma_len = 0; resp->status = DAEDALUS_DECODE_OK; resp->width = (uint32_t) fr->width; resp->height = (uint32_t) fr->height; resp->pix_fmt = fr->format; /* * Walk every plane reported by the AVPixFmtDescriptor. * For each component, byte width = ((plane_w * * step_minus1) >> 0) — but the descriptor only tells * us which plane each component sits in, not the * plane's byte stride per pixel. In practice for the * formats we care about (YUV420P, YUV422P, YUV444P, * GBRP, NV12), each plane has exactly one component * at 1 byte/sample. Hash each plane at * (width >> log2_chroma_w) × (height >> log2_chroma_h) * for chroma planes, full-size for plane 0. * * This generalises cleanly to anything 8-bit-per- * sample-per-plane; 10/12-bit (P010, YUV420P10LE) will * need depth handling when Phase 8.6 brings HDR * content into play. */ if (!desc) { log_warn("decoder: no descriptor for pix_fmt %d", fr->format); } else { int p, max_plane = 0; int i; for (i = 0; i < desc->nb_components; i++) { if (desc->comp[i].plane > max_plane) max_plane = desc->comp[i].plane; } for (p = 0; p <= max_plane; p++) { int pw, ph; if (!fr->data[p] || !fr->linesize[p]) continue; if (p == 0) { pw = fr->width; ph = fr->height; luma_len += (uint32_t) pw * (uint32_t) ph; } else { pw = AV_CEIL_RSHIFT(fr->width, desc->log2_chroma_w); ph = AV_CEIL_RSHIFT(fr->height, desc->log2_chroma_h); chroma_len += (uint32_t) pw * (uint32_t) ph; } h = fnv1a32_plane(h, fr->data[p], pw, ph, fr->linesize[p]); } } resp->luma_len = luma_len; resp->chroma_len = chroma_len; resp->fnv1a_yuv = h; /* * Pack pixels directly into the mapped CAPTURE dmabuf * planes. Dispatch on the V4L2 fourcc the kernel * negotiated: * V4L2_PIX_FMT_NV12M (default, 8-bit, 2 planes) * V4L2_PIX_FMT_P010 (10-bit HDR, 1 plane) */ if (planes && planes->nr >= 1) { int prc = 0; switch (req->capture_pix_fmt) { case V4L2_PIX_FMT_NV12M: prc = pack_nv12_to_planes(fr, desc, planes); break; case V4L2_PIX_FMT_NV12: prc = pack_nv12_single_to_plane(fr, desc, planes); break; case V4L2_PIX_FMT_P010: prc = pack_p010_to_plane(fr, desc, planes); break; default: log_warn("decoder: unsupported capture fourcc 0x%08x", req->capture_pix_fmt); prc = -EINVAL; break; } if (prc < 0) log_warn("decoder: pack failed (pix_fmt=%d cap_fourcc=0x%08x) — kernel will see metadata only", fr->format, req->capture_pix_fmt); } log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u", fr->width, fr->height, fr->format, desc ? desc->name : "?", h, luma_len, chroma_len); } fm->av_frame_unref(dec->frame); out: free(assembled); (void) assembled_len; return 0; }