8c1d9960c4
libva-v4l2-request-fourier (and any V4L2-stateless-API consumer)
passes H.264 SPS/PPS as separate V4L2_CID_STATELESS_H264_{SPS,PPS}
controls; only the slice NAL goes into the OUTPUT buffer. This is
correct per the V4L2 stateless contract. But libavcodec — which
the daedalus daemon uses for actual decode (Option γ) — wants a
self-contained AnnexB stream including SPS+PPS before any slice.
Result on higgs: "non-existing PPS 0 referenced" + decode_slice_
header errors on every H.264 frame, even after LIBVA-1 and -2
routing correctly delivered the request to the daemon.
Fix splits across kernel + daemon, keeping the kernel module as a
thin transport and putting the actual NAL encoding in userspace:
include/daedalus_v4l2_proto.h:
Add struct daedalus_h264_meta (the four v4l2_ctrl_h264_*
structs the kernel collects) and DAEDALUS_REQ_FLAG_H264_META
(set in req.flags when the meta block is present between the
daedalus_req_decode prefix and the slice bitstream).
kernel/daedalus_v4l2_main.c:
Add daedalus_collect_h264_meta() — reads the H.264 ctrl values
from the bound media_request via v4l2_ctrl_find +
ctrl->p_cur.p_h264_*. device_run() calls it on H.264 codec_id,
copies the structs into the REQ_DECODE payload between the
prefix and bitstream, and sets the flag. Payload size is
bounds-checked against DAEDALUS_PROTO_MAX_PAYLOAD so an over-
sized slice + meta fails loud instead of truncating.
daemon/src/bitstream_writer.{c,h}:
New module — MSB-first bit packer with H.264 Exp-Golomb ue(v)
and se(v) coding + rbsp_trailing_bits alignment. Sticky
overflow flag so callers can verify the output buffer wasn't
truncated.
daemon/src/h264_nal_synth.{c,h}:
New module — turns v4l2_ctrl_h264_sps / v4l2_ctrl_h264_pps
into AnnexB-framed NAL units per ITU-T H.264 7.3.2.1 / 7.3.2.2.
Emits emulation prevention bytes (0x03 after every 00 00 in the
EBSP) and the 4-byte start code (0x00000001). Coverage matches
what V4L2 stateless surface gives us: VUI parameters and full
scaling matrices are NOT emitted (V4L2 doesn't carry them — the
seq_scaling_matrix_present_flag is set to 0 and libavcodec uses
flat defaults, which matches the de-facto behaviour of most
H.264 streams libva-v4l2-request drives).
daemon/src/decoder.c:
daedalus_decoder_run_request() now takes an optional
h264_meta parameter. For codec_id == H264 with meta != NULL,
synthesises SPS+PPS NAL units, allocates a combined
[SPS][PPS][slice] buffer (+ AV_INPUT_BUFFER_PADDING_SIZE), and
feeds that to avcodec_send_packet instead of the raw slice.
VP9/AV1 path unchanged (frames are self-contained). Cleanup
now goes through a unified `out:` label so the assembled
buffer is always freed on every exit (including the existing
decoder_open_codec / no-frame / receive_frame failure paths).
daemon/src/chardev_client.c:
handle_req_decode() peels off the optional meta block when the
flag is set, passes it through to the decoder, and updates
the payload-length consistency check (now allows for an extra
sizeof(daedalus_h264_meta) when the flag is on).
Build (boltzmann aarch64): clean compile of all daemon sources,
including bitstream_writer + h264_nal_synth + the refactored
decoder.c. Kernel module compile to be verified via DKMS rebuild
on higgs in the marfrit-packages bump that follows.
Test plan: with this commit + a marfrit-packages daedalus pin
bump, higgs's ffmpeg -hwaccel vaapi -i h264_test.mp4 should
produce a successful decode (vs. the previous "non-existing PPS 0
referenced" failure). The daemon log should show:
decoder: opened h264 context
decoder: h264 prepended SPS=NB PPS=MB slice=KB
decoder: OK 320x240 fmt=0 (yuv420p) fnv1a=0x...
VP9 / AV1 behaviour unchanged — they don't carry meta and the
existing per-frame self-describing path still applies.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
561 lines
16 KiB
C
561 lines
16 KiB
C
/* SPDX-License-Identifier: BSD-2-Clause */
|
||
/*
|
||
* decoder.c — FFmpeg-driven decode helper for daedalus-v4l2 daemon.
|
||
*/
|
||
#include "decoder.h"
|
||
#include "ffmpeg_loader.h"
|
||
#include "h264_nal_synth.h"
|
||
#include "log.h"
|
||
|
||
#include <errno.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
|
||
#include <linux/videodev2.h>
|
||
|
||
#include <libavcodec/avcodec.h>
|
||
#include <libavutil/pixfmt.h>
|
||
|
||
/*
|
||
* FNV-1a 32-bit hash. Used as a compact digest of the decoded
|
||
* frame's YUV planes so the kernel can verify "the daemon produced
|
||
* the expected pixels" without shipping the full frame through the
|
||
* 64-KiB-capped chardev wire protocol. Phase 8.5's dmabuf path
|
||
* carries actual pixel data; this digest stays useful as a
|
||
* cross-host sanity check.
|
||
*/
|
||
static uint32_t fnv1a32_init(void)
|
||
{
|
||
return 0x811c9dc5u;
|
||
}
|
||
|
||
static uint32_t fnv1a32_update(uint32_t h, const uint8_t *data, size_t len)
|
||
{
|
||
size_t i;
|
||
for (i = 0; i < len; i++)
|
||
h = (h ^ data[i]) * 0x01000193u;
|
||
return h;
|
||
}
|
||
|
||
/*
|
||
* Hash plane @p (width @w bytes per line, @h lines, stride @stride
|
||
* bytes between lines). We strip libav's row alignment padding so
|
||
* the hash matches the layout used by `ffmpeg -f rawvideo` reference
|
||
* output (which is tightly packed).
|
||
*/
|
||
static uint32_t fnv1a32_plane(uint32_t h, const uint8_t *p,
|
||
int w, int height, int stride)
|
||
{
|
||
int y;
|
||
for (y = 0; y < height; y++)
|
||
h = fnv1a32_update(h, p + (size_t) y * (size_t) stride,
|
||
(size_t) w);
|
||
return h;
|
||
}
|
||
|
||
int daedalus_decoder_init(struct daedalus_decoder *dec,
|
||
struct ffmpeg_loader *loader)
|
||
{
|
||
memset(dec, 0, sizeof(*dec));
|
||
dec->loader = loader;
|
||
|
||
dec->pkt = loader->av_packet_alloc();
|
||
if (!dec->pkt)
|
||
return -ENOMEM;
|
||
dec->frame = loader->av_frame_alloc();
|
||
if (!dec->frame) {
|
||
loader->av_packet_free(&dec->pkt);
|
||
return -ENOMEM;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
void daedalus_decoder_cleanup(struct daedalus_decoder *dec)
|
||
{
|
||
if (!dec || !dec->loader)
|
||
return;
|
||
if (dec->ctx_vp9)
|
||
dec->loader->avcodec_free_context(&dec->ctx_vp9);
|
||
if (dec->ctx_av1)
|
||
dec->loader->avcodec_free_context(&dec->ctx_av1);
|
||
if (dec->ctx_h264)
|
||
dec->loader->avcodec_free_context(&dec->ctx_h264);
|
||
if (dec->frame)
|
||
dec->loader->av_frame_free(&dec->frame);
|
||
if (dec->pkt)
|
||
dec->loader->av_packet_free(&dec->pkt);
|
||
memset(dec, 0, sizeof(*dec));
|
||
}
|
||
|
||
/*
|
||
* Lazily open the AVCodecContext for codec_id. Returns 0 on
|
||
* success, -ENOSYS on unknown codec, -EIO on FFmpeg failure.
|
||
*/
|
||
static int decoder_open_codec(struct daedalus_decoder *dec, uint32_t codec_id,
|
||
struct AVCodecContext **out)
|
||
{
|
||
struct ffmpeg_loader *fm = dec->loader;
|
||
const struct AVCodec *codec;
|
||
struct AVCodecContext *ctx;
|
||
enum AVCodecID av_id;
|
||
struct AVCodecContext **cache;
|
||
int rc;
|
||
|
||
switch (codec_id) {
|
||
case DAEDALUS_CODEC_VP9:
|
||
av_id = AV_CODEC_ID_VP9;
|
||
cache = &dec->ctx_vp9;
|
||
break;
|
||
case DAEDALUS_CODEC_AV1:
|
||
av_id = AV_CODEC_ID_AV1;
|
||
cache = &dec->ctx_av1;
|
||
break;
|
||
case DAEDALUS_CODEC_H264:
|
||
av_id = AV_CODEC_ID_H264;
|
||
cache = &dec->ctx_h264;
|
||
break;
|
||
default:
|
||
log_warn("decoder: unknown codec_id %u", codec_id);
|
||
return -ENOSYS;
|
||
}
|
||
|
||
if (*cache) {
|
||
*out = *cache;
|
||
return 0;
|
||
}
|
||
|
||
codec = fm->avcodec_find_decoder(av_id);
|
||
if (!codec) {
|
||
log_err("decoder: avcodec_find_decoder(%d) returned NULL", av_id);
|
||
return -EIO;
|
||
}
|
||
ctx = fm->avcodec_alloc_context3(codec);
|
||
if (!ctx)
|
||
return -ENOMEM;
|
||
rc = fm->avcodec_open2(ctx, codec, NULL);
|
||
if (rc < 0) {
|
||
log_err("decoder: avcodec_open2 failed: %d", rc);
|
||
fm->avcodec_free_context(&ctx);
|
||
return -EIO;
|
||
}
|
||
|
||
*cache = ctx;
|
||
*out = ctx;
|
||
log_info("decoder: opened %s context", codec->name);
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* Pack the decoded YUV planes into NV12M layout across two
|
||
* mapped CAPTURE planes:
|
||
* planes[0] = Y, written w bytes per row with stride dst_y_stride
|
||
* planes[1] = interleaved CbCr at half-res, two bytes per chroma
|
||
* sample, written cw*2 bytes per row with stride
|
||
* dst_uv_stride
|
||
*
|
||
* Source stride padding (fr->linesize[*]) is stripped; destination
|
||
* stride padding (dst_stride - row_bytes) is left as-is — the V4L2
|
||
* client knows the format's bytesperline and walks accordingly.
|
||
*
|
||
* Returns 0 on success, -EINVAL if the source is not planar 4:2:0
|
||
* (Phase 8.6 still expects yuv420p-class outputs; 8.7 widens).
|
||
*/
|
||
/*
|
||
* Pack 10-bit planar YUV420P10LE into V4L2_PIX_FMT_P010 single
|
||
* plane: Y plane (width × 2 bytes per pixel, height rows) +
|
||
* interleaved CbCr plane at half-res (cw*2 bytes per row, ch
|
||
* rows). P010 stores 10-bit samples in 16-bit words,
|
||
* MSB-aligned (low 6 bits zero). libav's YUV420P10LE delivers
|
||
* 10-bit samples in the LOW 10 bits, so we shift left by 6.
|
||
*
|
||
* The single-plane layout means Y and CbCr are concatenated in
|
||
* planes->base[0]; planes->stride[0] is the Y stride (which we
|
||
* also use for the CbCr rows since both have the same
|
||
* per-line byte count for 4:2:0 with interleaved chroma).
|
||
*/
|
||
static int pack_p010_to_plane(struct AVFrame *fr,
|
||
const AVPixFmtDescriptor *desc,
|
||
const struct daedalus_capture_planes *planes)
|
||
{
|
||
int h = fr->height;
|
||
int w = fr->width;
|
||
int cw, ch, y, x;
|
||
uint8_t *base;
|
||
uint32_t stride;
|
||
uint8_t *dst_y, *dst_uv;
|
||
size_t y_size;
|
||
|
||
if (!desc || !planes || planes->nr < 1)
|
||
return -EINVAL;
|
||
if (desc->nb_components < 3)
|
||
return -EINVAL;
|
||
if (desc->log2_chroma_w != 1 || desc->log2_chroma_h != 1)
|
||
return -EINVAL;
|
||
/* Only 10-bit-per-sample sources packed into 16 bits per
|
||
* libav convention. Anything else needs its own path. */
|
||
if (desc->comp[0].depth != 10)
|
||
return -EINVAL;
|
||
|
||
cw = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
|
||
ch = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
|
||
|
||
base = planes->base[0];
|
||
stride = planes->stride[0] ? planes->stride[0] : (uint32_t) (w * 2);
|
||
if (!base)
|
||
return -EINVAL;
|
||
|
||
dst_y = base;
|
||
y_size = (size_t) stride * (size_t) h;
|
||
dst_uv = base + y_size;
|
||
|
||
/* Y plane: shift 10-bit → MSB-aligned 16-bit. */
|
||
for (y = 0; y < h; y++) {
|
||
const uint16_t *src = (const uint16_t *) (fr->data[0] +
|
||
(size_t) y * fr->linesize[0]);
|
||
uint16_t *dst = (uint16_t *) (dst_y +
|
||
(size_t) y * stride);
|
||
for (x = 0; x < w; x++)
|
||
dst[x] = (uint16_t) (src[x] << 6);
|
||
}
|
||
|
||
/* Interleave Cb/Cr at half-res, also MSB-aligned. */
|
||
for (y = 0; y < ch; y++) {
|
||
const uint16_t *u = (const uint16_t *) (fr->data[1] +
|
||
(size_t) y * fr->linesize[1]);
|
||
const uint16_t *v = (const uint16_t *) (fr->data[2] +
|
||
(size_t) y * fr->linesize[2]);
|
||
uint16_t *dst = (uint16_t *) (dst_uv +
|
||
(size_t) y * stride);
|
||
for (x = 0; x < cw; x++) {
|
||
dst[x * 2 + 0] = (uint16_t) (u[x] << 6);
|
||
dst[x * 2 + 1] = (uint16_t) (v[x] << 6);
|
||
}
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* Pack 8-bit planar YUV420P into V4L2_PIX_FMT_NV12 single plane:
|
||
* Y plane (W*H bytes) followed by interleaved CbCr at half-res
|
||
* (W*H/2 bytes) all in planes->base[0]. Same layout as P010
|
||
* sans the depth shift. For libva-v4l2-request-style clients
|
||
* that expect num_planes=1 NV12.
|
||
*/
|
||
static int pack_nv12_single_to_plane(struct AVFrame *fr,
|
||
const AVPixFmtDescriptor *desc,
|
||
const struct daedalus_capture_planes *planes)
|
||
{
|
||
int h = fr->height;
|
||
int w = fr->width;
|
||
int cw, ch, y, x;
|
||
uint8_t *base;
|
||
uint32_t stride;
|
||
uint8_t *dst_y, *dst_uv;
|
||
size_t y_size;
|
||
|
||
if (!desc || !planes || planes->nr < 1)
|
||
return -EINVAL;
|
||
if (desc->nb_components < 3)
|
||
return -EINVAL;
|
||
if (desc->log2_chroma_w != 1 || desc->log2_chroma_h != 1)
|
||
return -EINVAL;
|
||
if (desc->comp[0].depth != 8)
|
||
return -EINVAL;
|
||
|
||
cw = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
|
||
ch = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
|
||
|
||
base = planes->base[0];
|
||
stride = planes->stride[0] ? planes->stride[0] : (uint32_t) w;
|
||
if (!base)
|
||
return -EINVAL;
|
||
|
||
dst_y = base;
|
||
y_size = (size_t) stride * (size_t) h;
|
||
dst_uv = base + y_size;
|
||
|
||
for (y = 0; y < h; y++)
|
||
memcpy(dst_y + (size_t) y * stride,
|
||
fr->data[0] + (size_t) y * fr->linesize[0],
|
||
(size_t) w);
|
||
|
||
for (y = 0; y < ch; y++) {
|
||
const uint8_t *u = fr->data[1] +
|
||
(size_t) y * fr->linesize[1];
|
||
const uint8_t *v = fr->data[2] +
|
||
(size_t) y * fr->linesize[2];
|
||
uint8_t *row = dst_uv + (size_t) y * stride;
|
||
for (x = 0; x < cw; x++) {
|
||
row[x * 2 + 0] = u[x];
|
||
row[x * 2 + 1] = v[x];
|
||
}
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
static int pack_nv12_to_planes(struct AVFrame *fr,
|
||
const AVPixFmtDescriptor *desc,
|
||
const struct daedalus_capture_planes *planes)
|
||
{
|
||
int h = fr->height;
|
||
int w = fr->width;
|
||
int cw, ch;
|
||
size_t row_y, row_uv;
|
||
int y, x;
|
||
uint8_t *dst_y, *dst_uv;
|
||
uint32_t dst_y_stride, dst_uv_stride;
|
||
|
||
if (!desc || !planes || planes->nr < 2)
|
||
return -EINVAL;
|
||
if (desc->nb_components < 3)
|
||
return -EINVAL;
|
||
if (desc->log2_chroma_w != 1 || desc->log2_chroma_h != 1)
|
||
return -EINVAL; /* not 4:2:0 — would need a different pack */
|
||
|
||
cw = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
|
||
ch = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
|
||
|
||
dst_y = planes->base[0];
|
||
dst_uv = planes->base[1];
|
||
dst_y_stride = planes->stride[0] ? planes->stride[0] : (uint32_t) w;
|
||
dst_uv_stride = planes->stride[1] ? planes->stride[1] : (uint32_t) (cw * 2);
|
||
|
||
row_y = (size_t) w;
|
||
row_uv = (size_t) cw * 2u;
|
||
|
||
if (!dst_y || !dst_uv)
|
||
return -EINVAL;
|
||
|
||
/* Y plane copy — strip source stride padding. */
|
||
for (y = 0; y < h; y++)
|
||
memcpy(dst_y + (size_t) y * dst_y_stride,
|
||
fr->data[0] + (size_t) y * fr->linesize[0],
|
||
row_y);
|
||
|
||
/* Interleave Cb and Cr into NV12 chroma plane. */
|
||
for (y = 0; y < ch; y++) {
|
||
const uint8_t *u = fr->data[1] +
|
||
(size_t) y * fr->linesize[1];
|
||
const uint8_t *v = fr->data[2] +
|
||
(size_t) y * fr->linesize[2];
|
||
uint8_t *row = dst_uv + (size_t) y * dst_uv_stride;
|
||
for (x = 0; x < cw; x++) {
|
||
row[x * 2 + 0] = u[x];
|
||
row[x * 2 + 1] = v[x];
|
||
}
|
||
}
|
||
(void) row_uv;
|
||
return 0;
|
||
}
|
||
|
||
int daedalus_decoder_run_request(struct daedalus_decoder *dec,
|
||
const struct daedalus_req_decode *req,
|
||
const uint8_t *bitstream,
|
||
const struct daedalus_h264_meta *h264_meta,
|
||
struct daedalus_resp_frame *resp,
|
||
const struct daedalus_capture_planes *planes)
|
||
{
|
||
struct ffmpeg_loader *fm = dec->loader;
|
||
struct AVCodecContext *ctx = NULL;
|
||
uint8_t *assembled = NULL;
|
||
size_t assembled_len = 0;
|
||
int rc;
|
||
|
||
memset(resp, 0, sizeof(*resp));
|
||
resp->codec_id = req->codec_id;
|
||
|
||
rc = decoder_open_codec(dec, req->codec_id, &ctx);
|
||
if (rc == -ENOSYS) {
|
||
resp->status = DAEDALUS_DECODE_ERR_CODEC;
|
||
goto out;
|
||
}
|
||
if (rc < 0) {
|
||
resp->status = DAEDALUS_DECODE_ERR_OPEN;
|
||
goto out;
|
||
}
|
||
|
||
fm->av_packet_unref(dec->pkt);
|
||
|
||
/*
|
||
* H.264 path: libavcodec needs SPS+PPS NAL units BEFORE the
|
||
* slice can be decoded. libva-v4l2-request passes those as
|
||
* separate V4L2 controls (per the stateless API), so the
|
||
* daedalus kernel module forwards them to us as struct
|
||
* daedalus_h264_meta. Synthesise AnnexB SPS+PPS NALs from
|
||
* the structs and prepend them to @bitstream before feeding
|
||
* libavcodec.
|
||
*/
|
||
if (req->codec_id == DAEDALUS_CODEC_H264 && h264_meta) {
|
||
uint8_t sps_nal[256];
|
||
uint8_t pps_nal[128];
|
||
size_t sps_len, pps_len;
|
||
|
||
sps_len = h264_synth_sps(&h264_meta->sps,
|
||
sps_nal, sizeof(sps_nal));
|
||
pps_len = h264_synth_pps(&h264_meta->pps,
|
||
pps_nal, sizeof(pps_nal));
|
||
if (sps_len == 0 || pps_len == 0) {
|
||
log_err("decoder: SPS/PPS NAL synth failed (sps=%zu pps=%zu)",
|
||
sps_len, pps_len);
|
||
resp->status = DAEDALUS_DECODE_ERR_SEND;
|
||
goto out;
|
||
}
|
||
|
||
assembled_len = sps_len + pps_len + req->bitstream_len;
|
||
assembled = malloc(assembled_len + AV_INPUT_BUFFER_PADDING_SIZE);
|
||
if (!assembled) {
|
||
resp->status = DAEDALUS_DECODE_ERR_SEND;
|
||
goto out;
|
||
}
|
||
memcpy(assembled, sps_nal, sps_len);
|
||
memcpy(assembled + sps_len, pps_nal, pps_len);
|
||
memcpy(assembled + sps_len + pps_len,
|
||
bitstream, req->bitstream_len);
|
||
memset(assembled + assembled_len, 0,
|
||
AV_INPUT_BUFFER_PADDING_SIZE);
|
||
|
||
dec->pkt->data = assembled;
|
||
dec->pkt->size = (int) assembled_len;
|
||
log_debug("decoder: h264 prepended SPS=%zuB PPS=%zuB slice=%uB",
|
||
sps_len, pps_len, req->bitstream_len);
|
||
} else {
|
||
/*
|
||
* VP9/AV1: bitstream is self-contained per frame, point the
|
||
* AVPacket at it directly. Cast away const — AVPacket->data
|
||
* is non-const but avcodec_send_packet doesn't mutate it.
|
||
*/
|
||
dec->pkt->data = (uint8_t *) (uintptr_t) bitstream;
|
||
dec->pkt->size = (int) req->bitstream_len;
|
||
}
|
||
|
||
rc = fm->avcodec_send_packet(ctx, dec->pkt);
|
||
if (rc < 0) {
|
||
log_err("decoder: avcodec_send_packet failed: %d", rc);
|
||
resp->status = DAEDALUS_DECODE_ERR_SEND;
|
||
goto out;
|
||
}
|
||
|
||
fm->av_frame_unref(dec->frame);
|
||
rc = fm->avcodec_receive_frame(ctx, dec->frame);
|
||
if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) {
|
||
log_debug("decoder: no frame ready yet (rc=%d)", rc);
|
||
resp->status = DAEDALUS_DECODE_NO_FRAME;
|
||
goto out;
|
||
}
|
||
if (rc < 0) {
|
||
log_err("decoder: avcodec_receive_frame failed: %d", rc);
|
||
resp->status = DAEDALUS_DECODE_ERR_RECV;
|
||
goto out;
|
||
}
|
||
|
||
{
|
||
struct AVFrame *fr = dec->frame;
|
||
const AVPixFmtDescriptor *desc =
|
||
fm->av_pix_fmt_desc_get(fr->format);
|
||
uint32_t h = fnv1a32_init();
|
||
uint32_t luma_len = 0, chroma_len = 0;
|
||
|
||
resp->status = DAEDALUS_DECODE_OK;
|
||
resp->width = (uint32_t) fr->width;
|
||
resp->height = (uint32_t) fr->height;
|
||
resp->pix_fmt = fr->format;
|
||
|
||
/*
|
||
* Walk every plane reported by the AVPixFmtDescriptor.
|
||
* For each component, byte width = ((plane_w *
|
||
* step_minus1) >> 0) — but the descriptor only tells
|
||
* us which plane each component sits in, not the
|
||
* plane's byte stride per pixel. In practice for the
|
||
* formats we care about (YUV420P, YUV422P, YUV444P,
|
||
* GBRP, NV12), each plane has exactly one component
|
||
* at 1 byte/sample. Hash each plane at
|
||
* (width >> log2_chroma_w) × (height >> log2_chroma_h)
|
||
* for chroma planes, full-size for plane 0.
|
||
*
|
||
* This generalises cleanly to anything 8-bit-per-
|
||
* sample-per-plane; 10/12-bit (P010, YUV420P10LE) will
|
||
* need depth handling when Phase 8.6 brings HDR
|
||
* content into play.
|
||
*/
|
||
if (!desc) {
|
||
log_warn("decoder: no descriptor for pix_fmt %d",
|
||
fr->format);
|
||
} else {
|
||
int p, max_plane = 0;
|
||
int i;
|
||
|
||
for (i = 0; i < desc->nb_components; i++) {
|
||
if (desc->comp[i].plane > max_plane)
|
||
max_plane = desc->comp[i].plane;
|
||
}
|
||
|
||
for (p = 0; p <= max_plane; p++) {
|
||
int pw, ph;
|
||
if (!fr->data[p] || !fr->linesize[p])
|
||
continue;
|
||
if (p == 0) {
|
||
pw = fr->width;
|
||
ph = fr->height;
|
||
luma_len += (uint32_t) pw *
|
||
(uint32_t) ph;
|
||
} else {
|
||
pw = AV_CEIL_RSHIFT(fr->width,
|
||
desc->log2_chroma_w);
|
||
ph = AV_CEIL_RSHIFT(fr->height,
|
||
desc->log2_chroma_h);
|
||
chroma_len += (uint32_t) pw *
|
||
(uint32_t) ph;
|
||
}
|
||
h = fnv1a32_plane(h, fr->data[p], pw, ph,
|
||
fr->linesize[p]);
|
||
}
|
||
}
|
||
|
||
resp->luma_len = luma_len;
|
||
resp->chroma_len = chroma_len;
|
||
resp->fnv1a_yuv = h;
|
||
|
||
/*
|
||
* Pack pixels directly into the mapped CAPTURE dmabuf
|
||
* planes. Dispatch on the V4L2 fourcc the kernel
|
||
* negotiated:
|
||
* V4L2_PIX_FMT_NV12M (default, 8-bit, 2 planes)
|
||
* V4L2_PIX_FMT_P010 (10-bit HDR, 1 plane)
|
||
*/
|
||
if (planes && planes->nr >= 1) {
|
||
int prc = 0;
|
||
switch (req->capture_pix_fmt) {
|
||
case V4L2_PIX_FMT_NV12M:
|
||
prc = pack_nv12_to_planes(fr, desc, planes);
|
||
break;
|
||
case V4L2_PIX_FMT_NV12:
|
||
prc = pack_nv12_single_to_plane(fr, desc, planes);
|
||
break;
|
||
case V4L2_PIX_FMT_P010:
|
||
prc = pack_p010_to_plane(fr, desc, planes);
|
||
break;
|
||
default:
|
||
log_warn("decoder: unsupported capture fourcc 0x%08x",
|
||
req->capture_pix_fmt);
|
||
prc = -EINVAL;
|
||
break;
|
||
}
|
||
if (prc < 0)
|
||
log_warn("decoder: pack failed (pix_fmt=%d cap_fourcc=0x%08x) — kernel will see metadata only",
|
||
fr->format, req->capture_pix_fmt);
|
||
}
|
||
|
||
log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u",
|
||
fr->width, fr->height, fr->format,
|
||
desc ? desc->name : "?",
|
||
h, luma_len, chroma_len);
|
||
}
|
||
|
||
fm->av_frame_unref(dec->frame);
|
||
|
||
out:
|
||
free(assembled);
|
||
(void) assembled_len;
|
||
return 0;
|
||
}
|