diff --git a/src/av1.c b/src/av1.c new file mode 100644 index 0000000..fe4a82f --- /dev/null +++ b/src/av1.c @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2026 Markus Fritsche + * + * AV1 codec dispatcher. Populates V4L2_CID_STATELESS_AV1_SEQUENCE + * (struct v4l2_ctrl_av1_sequence) from VAAPI's VADecPictureParameterBufferAV1. + * + * Why a single SEQUENCE control and not the full V4L2_CID_STATELESS_AV1_* + * family (FRAME, TILE_GROUP_ENTRY, FILM_GRAIN): + * + * - The daedalus_v4l2 daemon path consumes the OUTPUT bitstream + * directly via libavcodec/libdav1d. libdav1d needs a complete OBU + * stream that includes the sequence header — ffmpeg-vaapi strips the + * sequence header on the client side (its parser is split across + * VAPictureParameterBufferAV1 + slice payload, with OBU_SEQUENCE_HEADER + * consumed and not re-emitted), so the daemon side has to synthesise + * it from the SEQUENCE ctrl. The other AV1 ctrls (FRAME / TILE / + * FILM_GRAIN) are not needed for that synthesis — the OBU_FRAME_HEADER + * + OBU_TILE_GROUP that libdav1d also needs are still in the slice + * bitstream. + * + * - The vpu981 (RK3588 dedicated AV1 hantro) hardware path doesn't + * consult these controls either — vpu981's driver parses the AV1 + * bitstream directly. So setting only SEQUENCE is correct for both + * destination decoders. + * + * Reference: marfrit/libva-v4l2-request-fourier issue #11 + * (DAEMON-PPS-style sequence-header re-synthesis on the daemon + * side, paralleling the H.264 SPS/PPS work in DAEMON-PPS). + * kernel uAPI: @ 2891-2919. + * VAAPI: typedef + * VADecPictureParameterBufferAV1. + */ + +#include "av1.h" + +#include "v4l2.h" +#include "utils.h" + +#include +#include + +#include +#include + +/* + * VADecPictureParameterBufferAV1 reaches us transitively via surface.h → + * va_backend.h → va.h → va_dec_av1.h (va_dec_av1.h alone won't compile + * standalone — it needs va.h's VA_PADDING_LOW / va_deprecated machinery). + */ + +/* Compile-time UAPI shift guard, sibling to vp9.c's pattern. */ +_Static_assert(sizeof(struct v4l2_ctrl_av1_sequence) == 12, + "v4l2_ctrl_av1_sequence size mismatch — kernel UAPI changed"); + +/* + * Map VAAPI bit_depth_idx (0/1/2 → 8/10/12) to the kernel ctrl's plain + * uint8_t bit_depth field. ffmpeg-vaapi sets idx from the bitstream + * BitDepth value, so this is an exact inverse of AV1 spec 5.5.2. + */ +static uint8_t av1_bit_depth_from_idx(uint8_t idx) +{ + switch (idx) { + case 0: return 8; + case 1: return 10; + case 2: return 12; + default: + /* Spec-illegal; pass through so a reviewer / test catches it. */ + return 8; + } +} + +int av1_set_controls(struct request_data *driver_data, + struct object_context *context, + struct object_surface *surface_object) +{ + VADecPictureParameterBufferAV1 *picture = + &surface_object->params.av1.picture; + struct v4l2_ctrl_av1_sequence sequence; + struct v4l2_ext_control ctrls[1]; + int rc; + + (void)context; + + memset(&sequence, 0, sizeof sequence); + + /* + * Scalar mapping. Names align with kernel uAPI; off-by-one and + * idx→value translations are annotated. + */ + sequence.seq_profile = picture->profile; + sequence.order_hint_bits = + (uint8_t)(picture->order_hint_bits_minus_1 + 1u); + sequence.bit_depth = av1_bit_depth_from_idx(picture->bit_depth_idx); + sequence.max_frame_width_minus_1 = picture->frame_width_minus1; + sequence.max_frame_height_minus_1 = picture->frame_height_minus1; + + /* + * Sequence-header flag mapping. VAAPI exposes most of these directly + * in seq_info_fields.fields.*; the ones that don't have a 1:1 mirror + * (V4L2_AV1_SEQUENCE_FLAG_ENABLE_WARPED_MOTION, _ENABLE_REF_FRAME_MVS, + * _ENABLE_SUPERRES, _ENABLE_RESTORATION, _SEPARATE_UV_DELTA_Q) live in + * VAAPI's per-frame pic_info_fields rather than the sequence struct. + * For SEQUENCE-control purposes we treat them as best-effort + * unobservable from libva and leave the corresponding bits clear; the + * daedalus daemon's OBU synthesiser (issue #11 daemon track) carries + * the SEQUENCE bytes verbatim, so per-frame consumers (libdav1d) will + * still see the full bitstream truth for those toggles via the + * OBU_FRAME stream already in the slice buffer. See feedback memory + * `feedback_vaapi_blind_to_some_hevc_sps_fields` for the precedent. + */ + if (picture->seq_info_fields.fields.still_picture) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_STILL_PICTURE; + if (picture->seq_info_fields.fields.use_128x128_superblock) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK; + if (picture->seq_info_fields.fields.enable_filter_intra) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA; + if (picture->seq_info_fields.fields.enable_intra_edge_filter) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER; + if (picture->seq_info_fields.fields.enable_interintra_compound) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND; + if (picture->seq_info_fields.fields.enable_masked_compound) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND; + if (picture->seq_info_fields.fields.enable_dual_filter) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER; + if (picture->seq_info_fields.fields.enable_order_hint) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT; + if (picture->seq_info_fields.fields.enable_jnt_comp) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP; + if (picture->seq_info_fields.fields.enable_cdef) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF; + if (picture->seq_info_fields.fields.mono_chrome) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME; + if (picture->seq_info_fields.fields.color_range) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_COLOR_RANGE; + if (picture->seq_info_fields.fields.subsampling_x) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_X; + if (picture->seq_info_fields.fields.subsampling_y) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_Y; + if (picture->seq_info_fields.fields.film_grain_params_present) + sequence.flags |= V4L2_AV1_SEQUENCE_FLAG_FILM_GRAIN_PARAMS_PRESENT; + + /* Single-control batched submission. */ + memset(ctrls, 0, sizeof ctrls); + ctrls[0].id = V4L2_CID_STATELESS_AV1_SEQUENCE; + ctrls[0].ptr = &sequence; + ctrls[0].size = sizeof sequence; + + rc = v4l2_set_controls(driver_data->video_fd, + surface_object->request_fd, + ctrls, 1); + if (rc < 0) + return VA_STATUS_ERROR_OPERATION_FAILED; + + return VA_STATUS_SUCCESS; +} diff --git a/src/av1.h b/src/av1.h new file mode 100644 index 0000000..a9b79c9 --- /dev/null +++ b/src/av1.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2026 Markus Fritsche + * + * AV1 codec dispatcher — populates V4L2_CID_STATELESS_AV1_SEQUENCE + * (struct v4l2_ctrl_av1_sequence) from VAAPI's VADecPictureParameterBufferAV1. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _AV1_H_ +#define _AV1_H_ + +#include "context.h" +#include "request.h" +#include "surface.h" + +int av1_set_controls(struct request_data *driver_data, + struct object_context *context, + struct object_surface *surface); + +#endif /* _AV1_H_ */ diff --git a/src/meson.build b/src/meson.build index e34c350..762cdf5 100644 --- a/src/meson.build +++ b/src/meson.build @@ -53,6 +53,7 @@ sources = [ 'h265.c', 'vp8.c', 'vp9.c', + 'av1.c', 'codec.c', 'nv15.c', 'nv12_col128.c', diff --git a/src/picture.c b/src/picture.c index 31d9e57..97e3563 100644 --- a/src/picture.c +++ b/src/picture.c @@ -36,6 +36,7 @@ #include "mpeg2.h" #include "vp8.h" #include "vp9.h" +#include "av1.h" #include #include @@ -157,6 +158,12 @@ static VAStatus codec_store_buffer(struct request_data *driver_data, sizeof(surface_object->params.vp9.picture)); break; + case VAProfileAV1Profile0: + memcpy(&surface_object->params.av1.picture, + buffer_object->data, + sizeof(surface_object->params.av1.picture)); + break; + default: break; } @@ -320,26 +327,22 @@ static VAStatus codec_set_controls(struct request_data *driver_data, case VAProfileAV1Profile0: /* - * AV1 has no codec-specific V4L2 control dispatch wired up - * yet on this branch (see config.c VAProfileAV1Profile0 - * comment). For the daedalus_v4l2 daemon path that's fine: - * AV1 frames are self-describing per-frame (OBU sequence + - * frame headers carry everything libavcodec needs), so the - * bitstream in the V4L2 OUTPUT buffer is sufficient — no - * V4L2_CID_STATELESS_AV1_* controls have to be populated. + * Populates V4L2_CID_STATELESS_AV1_SEQUENCE from + * VAPictureParameterBufferAV1. The daedalus_v4l2 daemon + * (issue #11 daemon track) synthesises an OBU_SEQUENCE_HEADER + * from this ctrl and prepends it to the slice bitstream + * before handing it to libavcodec/libdav1d, which otherwise + * cannot parse the (sequence-header-stripped) OUTPUT buffer + * that ffmpeg-vaapi delivers. * - * Per-codec dispatch in request_switch_device_for_profile - * has already retargeted (video_fd, media_fd) to - * video_fd_daedalus (or video_fd_vpu981 on RK3588 if - * present) by the time we get here; the OUTPUT buffer will - * be queued via that fd and the kernel forwards bytes to - * the daemon as a regular REQ_DECODE. No-op is the - * correct shape. - * - * When the vpu981-targeted V4L2_CID_STATELESS_AV1_* dispatch - * lands from the av1-iter1 operator branch, replace this - * with av1_set_controls(...). + * On the RK3588 vpu981 hardware path the same SEQUENCE ctrl + * is harmless: vpu981's driver parses the OBU stream + * directly and ignores the ctrl payload, so no per-decoder + * gating is required here. */ + rc = av1_set_controls(driver_data, context, surface_object); + if (rc < 0) + return VA_STATUS_ERROR_OPERATION_FAILED; break; default: diff --git a/src/surface.h b/src/surface.h index ada7994..3f7a611 100644 --- a/src/surface.h +++ b/src/surface.h @@ -122,6 +122,18 @@ struct object_surface { VADecPictureParameterBufferVP9 picture; VASliceParameterBufferVP9 slice; } vp9; + struct { + /* + * AV1 picture parameter buffer. Slice params are + * intentionally absent — the daedalus daemon track + * (issue #11) consumes the slice OBU bytes directly + * from the OUTPUT bitstream and synthesises only the + * sequence-header OBU from V4L2_CID_STATELESS_AV1_ + * SEQUENCE. No per-tile-group struct→OBU re-synthesis + * required from libva today. + */ + VADecPictureParameterBufferAV1 picture; + } av1; } params; int request_fd;