Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5d1ff51178 | |||
| 9797a0daa6 | |||
| 3a8f5405d4 | |||
| 4cfe0b470f | |||
| b958ef8166 | |||
| 94be8c3d03 | |||
| 1e9619afe8 | |||
| a43296c1ed | |||
| 872eec505e | |||
| ee42419479 | |||
| 1d8f5af164 | |||
| 3e4e6e8eae | |||
| 6e6dfa144d | |||
| 514da29a73 | |||
| 3bc0da168c | |||
| 814b74d0bb | |||
| 77e14e5a19 | |||
| 88b2ebfaa9 | |||
| 64b9599e47 | |||
| 234a103084 | |||
| 5d8b4369e5 | |||
| 714d781d22 | |||
| 49e60c9bba | |||
| 6ffe92bcac |
@@ -28,6 +28,20 @@ find_package(PkgConfig REQUIRED)
|
||||
pkg_check_modules(FFMPEG REQUIRED IMPORTED_TARGET
|
||||
libavformat libavcodec libavutil)
|
||||
|
||||
# daedalus-fourier — VC VII (V3D) + ARM NEON back-end kernel library.
|
||||
# Linked statically. Today only the no-QPU smoke-test path is wired
|
||||
# (a ctx_create_no_qpu at daemon startup, log-and-destroy at exit);
|
||||
# follow-up patches (per daedalus-v4l2#11) substitute the
|
||||
# `daedalus_recipe_dispatch_h264_*` family for libavcodec's per-MB
|
||||
# pixel primitives, one cycle at a time.
|
||||
#
|
||||
# We bypass IMPORTED_TARGET and consume pkg-config's static variables
|
||||
# (--static --libs path) directly so we control the link order:
|
||||
# libdaedalus_core.a must precede -lvulkan because the static archive
|
||||
# references vulkan symbols and the linker resolves left-to-right.
|
||||
pkg_check_modules(DAEDALUS_FOURIER REQUIRED daedalus-fourier)
|
||||
find_package(Vulkan REQUIRED)
|
||||
|
||||
add_executable(daedalus_v4l2_daemon
|
||||
src/main.c
|
||||
src/ffmpeg_loader.c
|
||||
@@ -38,6 +52,7 @@ add_executable(daedalus_v4l2_daemon
|
||||
src/dmabuf_capture.c
|
||||
src/bitstream_writer.c
|
||||
src/h264_nal_synth.c
|
||||
src/av1_obu_synth.c
|
||||
)
|
||||
|
||||
target_include_directories(daedalus_v4l2_daemon
|
||||
@@ -45,14 +60,46 @@ target_include_directories(daedalus_v4l2_daemon
|
||||
src
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../include
|
||||
${FFMPEG_INCLUDE_DIRS}
|
||||
${DAEDALUS_FOURIER_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
# dl for dlopen, pthread for future threading work.
|
||||
target_link_directories(daedalus_v4l2_daemon
|
||||
PRIVATE
|
||||
${DAEDALUS_FOURIER_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
target_link_libraries(daedalus_v4l2_daemon
|
||||
PRIVATE
|
||||
dl
|
||||
pthread
|
||||
# Order matters: libdaedalus_core.a first (so its undefined
|
||||
# vulkan symbols register), then -lvulkan to satisfy them.
|
||||
${DAEDALUS_FOURIER_LIBRARIES}
|
||||
Vulkan::Vulkan
|
||||
)
|
||||
|
||||
install(TARGETS daedalus_v4l2_daemon
|
||||
RUNTIME DESTINATION /usr/local/bin)
|
||||
|
||||
# --- Unit tests (opt-in) -------------------------------------------------
|
||||
#
|
||||
# DAEDALUS_BUILD_TESTS=ON enables standalone test executables that run on
|
||||
# the build host (no V4L2 / FFmpeg / Vulkan dependency). Used by CI to
|
||||
# gate bitstream synthesis modules against regressions.
|
||||
|
||||
option(DAEDALUS_BUILD_TESTS "Build daemon unit tests" OFF)
|
||||
|
||||
if (DAEDALUS_BUILD_TESTS)
|
||||
add_executable(test_av1_obu_synth
|
||||
src/test_av1_obu_synth.c
|
||||
src/av1_obu_synth.c
|
||||
src/bitstream_writer.c
|
||||
)
|
||||
target_include_directories(test_av1_obu_synth PRIVATE src)
|
||||
# Test binary does not link FFmpeg / Vulkan / dl — it exercises
|
||||
# pure-C encoders against in-memory inputs.
|
||||
|
||||
enable_testing()
|
||||
add_test(NAME av1_obu_synth COMMAND test_av1_obu_synth)
|
||||
endif()
|
||||
|
||||
@@ -0,0 +1,897 @@
|
||||
/* SPDX-License-Identifier: BSD-2-Clause */
|
||||
/*
|
||||
* av1_obu_synth.c — encode AV1 OBU bytes from V4L2 stateless controls.
|
||||
*
|
||||
* Spec references throughout are to the AOM AV1 Bitstream and Decoding
|
||||
* Process Specification rev 1.0.0 with errata. See §5.3.2 OBU header
|
||||
* syntax, §5.5.1 sequence_header_obu syntax, §5.9.2 trailing_bits().
|
||||
*
|
||||
* Synthesis defaults — fields the V4L2 control surface doesn't carry
|
||||
* are set to values that match the "common-case profile-0 4:2:0 8-bit"
|
||||
* path the V4L2 stateless AV1 contract is overwhelmingly used for.
|
||||
* Specifically:
|
||||
*
|
||||
* - reduced_still_picture_header = 0 (full sequence-header form)
|
||||
* - timing_info_present_flag = 0
|
||||
* - decoder_model_info_present_flag = 0
|
||||
* - initial_display_delay_present_flag = 0
|
||||
* - operating_points_cnt_minus_1 = 0 (single operating point)
|
||||
* - operating_point_idc[0] = 0 (all temporal/spatial layers)
|
||||
* - seq_level_idx[0] = 13 (level 5.1 — supports up to 4K, well past
|
||||
* anything libva-v4l2-request is likely to drive; libavcodec is
|
||||
* lenient on level mismatches that don't constrain the frame size)
|
||||
* - seq_tier[0] = 0
|
||||
* - frame_id_numbers_present_flag = 0
|
||||
* - seq_choose_screen_detection_tools = 1 (SELECT) so
|
||||
* seq_force_screen_content_tools = 2 (SELECT)
|
||||
* - seq_choose_integer_mv = 1 (SELECT) so seq_force_integer_mv = 2
|
||||
* - color_description_present_flag = 0 (V4L2 ctrl doesn't carry CICP)
|
||||
* - chroma_sample_position = 0 (CSP_UNKNOWN)
|
||||
*
|
||||
* If a V4L2 sequence control arrives with bit_depth / seq_profile /
|
||||
* subsampling combinations the AV1 spec doesn't allow (e.g. profile 1
|
||||
* with bit_depth 12), we return 0 to surface the mismatch loudly rather
|
||||
* than silently encoding nonsense the libavcodec parser would reject.
|
||||
*/
|
||||
|
||||
#include "av1_obu_synth.h"
|
||||
#include "bitstream_writer.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define OBU_SEQUENCE_HEADER 1
|
||||
#define OBU_TEMPORAL_DELIMITER 2
|
||||
#define OBU_FRAME_HEADER 3
|
||||
#define OBU_TILE_GROUP 4
|
||||
#define OBU_FRAME 6
|
||||
|
||||
/* AV1 §3 ref-frame symbolic constants — values per the spec table.
|
||||
* INTRA_FRAME is index 0 (used for intra-only); LAST_FRAME..ALTREF_FRAME
|
||||
* are 1..7. TOTAL_REFS_PER_FRAME = 8 (V4L2 mirrors this). */
|
||||
#define AV1_INTRA_FRAME 0
|
||||
#define AV1_LAST_FRAME 1
|
||||
#define AV1_NUM_REF_FRAMES 8 /* the DPB size */
|
||||
#define AV1_REFS_PER_FRAME 7 /* refs available to an inter frame */
|
||||
#define AV1_PRIMARY_REF_NONE 7
|
||||
|
||||
/* Default operating-point level: 5.1 — supports any frame size up to
|
||||
* 4K@60fps. Well past anything the V4L2 path is realistically driven
|
||||
* with on Pi 5; libavcodec doesn't enforce level against actual frame
|
||||
* dims at decode time, it just uses the field to size some bitstream
|
||||
* limits (max tile cols, etc.) that aren't load-bearing for stream
|
||||
* conformance. */
|
||||
#define DEFAULT_SEQ_LEVEL_IDX 13
|
||||
|
||||
/*
|
||||
* leb128 (§4.10.5) — unsigned variable-length encoding, 7 value bits per
|
||||
* byte, MSB of each byte set when another byte follows. Writes to
|
||||
* @out[w..] at byte alignment. Returns number of bytes written, or 0
|
||||
* on overflow. AV1 caps leb128 at 8 bytes (Leb128Bytes constraint).
|
||||
*/
|
||||
static size_t leb128_put(uint32_t v, uint8_t *out, size_t cap)
|
||||
{
|
||||
size_t w = 0;
|
||||
|
||||
do {
|
||||
uint8_t byte = (uint8_t) (v & 0x7fu);
|
||||
|
||||
v >>= 7;
|
||||
if (v != 0)
|
||||
byte |= 0x80u;
|
||||
if (w >= cap)
|
||||
return 0;
|
||||
out[w++] = byte;
|
||||
} while (v != 0);
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
/*
|
||||
* Smallest n such that (1 << n) > x; i.e. ceil(log2(x + 1)).
|
||||
* Used to compute frame_width_bits_minus_1 / frame_height_bits_minus_1
|
||||
* from max_frame_width_minus_1 / max_frame_height_minus_1. Spec wants
|
||||
* n_bits ≥ ceil(log2(max+1)), with n_bits encoded as (n_bits - 1) in
|
||||
* f(4) — so the value must fit in [1, 16]. We clamp to 16 (which
|
||||
* accommodates a 65536-pixel frame, comfortably absurd).
|
||||
*/
|
||||
static int min_bits_for(uint32_t x)
|
||||
{
|
||||
int n = 0;
|
||||
|
||||
while (x) {
|
||||
n++;
|
||||
x >>= 1;
|
||||
}
|
||||
return n == 0 ? 1 : n;
|
||||
}
|
||||
|
||||
/*
|
||||
* Resolve subsampling per §5.5.2. V4L2 carries SUBSAMPLING_X and
|
||||
* SUBSAMPLING_Y as flags but the AV1 spec forces them based on
|
||||
* seq_profile + bit_depth in some branches. Returns 1 on success and
|
||||
* 0 on illegal combination (e.g. profile 1 + bit_depth 12, which the
|
||||
* spec doesn't allow). Output via the two int pointers.
|
||||
*
|
||||
* Note: we intentionally don't honour the V4L2 flags in the forced
|
||||
* branches. Producers that set them inconsistently with seq_profile
|
||||
* are bug; we trust the profile.
|
||||
*/
|
||||
static int resolve_subsampling(uint8_t seq_profile, uint8_t bit_depth,
|
||||
uint32_t flags, bool monochrome,
|
||||
int *out_x, int *out_y)
|
||||
{
|
||||
if (monochrome) {
|
||||
*out_x = 1;
|
||||
*out_y = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
switch (seq_profile) {
|
||||
case 0: /* 4:2:0 8/10-bit */
|
||||
if (bit_depth != 8 && bit_depth != 10)
|
||||
return 0;
|
||||
*out_x = 1;
|
||||
*out_y = 1;
|
||||
return 1;
|
||||
case 1: /* 4:4:4 8/10-bit */
|
||||
if (bit_depth != 8 && bit_depth != 10)
|
||||
return 0;
|
||||
*out_x = 0;
|
||||
*out_y = 0;
|
||||
return 1;
|
||||
case 2: /* 4:2:2 or 4:2:0/4:4:4 12-bit */
|
||||
if (bit_depth == 12) {
|
||||
*out_x = (flags & V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_X) ? 1 : 0;
|
||||
if (*out_x)
|
||||
*out_y = (flags & V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_Y) ? 1 : 0;
|
||||
else
|
||||
*out_y = 0;
|
||||
} else if (bit_depth == 8 || bit_depth == 10) {
|
||||
*out_x = 1; /* forced 4:2:2 */
|
||||
*out_y = 0;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t av1_synth_sequence_header_obu(const struct v4l2_ctrl_av1_sequence *seq,
|
||||
uint8_t *out, size_t out_cap)
|
||||
{
|
||||
uint8_t rbsp[64];
|
||||
struct bs_writer bs;
|
||||
uint32_t flags;
|
||||
uint8_t bit_depth;
|
||||
uint8_t seq_profile;
|
||||
bool still_picture, monochrome, enable_order_hint;
|
||||
int high_bitdepth, twelve_bit;
|
||||
int subsampling_x, subsampling_y;
|
||||
int width_bits, height_bits;
|
||||
size_t payload_len;
|
||||
size_t w;
|
||||
|
||||
if (!seq || !out || out_cap < 8)
|
||||
return 0;
|
||||
|
||||
flags = seq->flags;
|
||||
bit_depth = seq->bit_depth;
|
||||
seq_profile = seq->seq_profile;
|
||||
still_picture = !!(flags & V4L2_AV1_SEQUENCE_FLAG_STILL_PICTURE);
|
||||
monochrome = !!(flags & V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME);
|
||||
enable_order_hint = !!(flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT);
|
||||
|
||||
/* Sanity checks against the spec's allowed combinations. */
|
||||
if (seq_profile > 2)
|
||||
return 0;
|
||||
if (bit_depth != 8 && bit_depth != 10 && bit_depth != 12)
|
||||
return 0;
|
||||
if (seq_profile == 1 && monochrome)
|
||||
return 0; /* profile 1 must be 4:4:4 colour */
|
||||
|
||||
high_bitdepth = (bit_depth > 8) ? 1 : 0;
|
||||
twelve_bit = (seq_profile == 2 && bit_depth == 12) ? 1 : 0;
|
||||
|
||||
if (!resolve_subsampling(seq_profile, bit_depth, flags, monochrome,
|
||||
&subsampling_x, &subsampling_y))
|
||||
return 0;
|
||||
|
||||
width_bits = min_bits_for((uint32_t) seq->max_frame_width_minus_1);
|
||||
height_bits = min_bits_for((uint32_t) seq->max_frame_height_minus_1);
|
||||
if (width_bits > 16 || height_bits > 16)
|
||||
return 0; /* spec encodes (n - 1) in f(4): n in [1, 16] */
|
||||
|
||||
bsw_init(&bs, rbsp, sizeof(rbsp));
|
||||
|
||||
/* --- sequence_header_obu --- §5.5.1 --- */
|
||||
|
||||
bsw_put_u(&bs, seq_profile, 3);
|
||||
bsw_put_u(&bs, still_picture ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, 0u, 1); /* reduced_still_picture_header */
|
||||
|
||||
/* full-form path (reduced_still_picture_header == 0) */
|
||||
bsw_put_u(&bs, 0u, 1); /* timing_info_present_flag */
|
||||
bsw_put_u(&bs, 0u, 1); /* initial_display_delay_present_flag */
|
||||
bsw_put_u(&bs, 0u, 5); /* operating_points_cnt_minus_1 */
|
||||
bsw_put_u(&bs, 0u, 12); /* operating_point_idc[0] */
|
||||
bsw_put_u(&bs, DEFAULT_SEQ_LEVEL_IDX, 5); /* seq_level_idx[0] */
|
||||
if (DEFAULT_SEQ_LEVEL_IDX > 7)
|
||||
bsw_put_u(&bs, 0u, 1); /* seq_tier[0] */
|
||||
|
||||
bsw_put_u(&bs, (uint32_t)(width_bits - 1), 4); /* frame_width_bits_minus_1 */
|
||||
bsw_put_u(&bs, (uint32_t)(height_bits - 1), 4); /* frame_height_bits_minus_1 */
|
||||
bsw_put_u(&bs, (uint32_t) seq->max_frame_width_minus_1, width_bits);
|
||||
bsw_put_u(&bs, (uint32_t) seq->max_frame_height_minus_1, height_bits);
|
||||
|
||||
bsw_put_u(&bs, 0u, 1); /* frame_id_numbers_present_flag */
|
||||
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA) ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER) ? 1u : 0u, 1);
|
||||
|
||||
/* non-still-picture block — V4L2 controls fill these in */
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND) ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND) ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_WARPED_MOTION) ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER) ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, enable_order_hint ? 1u : 0u, 1);
|
||||
if (enable_order_hint) {
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP) ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_REF_FRAME_MVS) ? 1u : 0u, 1);
|
||||
}
|
||||
bsw_put_u(&bs, 1u, 1); /* seq_choose_screen_detection_tools */
|
||||
/* seq_force_screen_content_tools = SELECT (2), so no further bits */
|
||||
/* seq_choose_integer_mv path:
|
||||
* seq_force_screen_content_tools > 0, so we emit seq_choose_integer_mv = 1
|
||||
* (SELECT) — which leaves seq_force_integer_mv = SELECT (2) without
|
||||
* further bits. */
|
||||
bsw_put_u(&bs, 1u, 1); /* seq_choose_integer_mv */
|
||||
if (enable_order_hint) {
|
||||
uint8_t ohb = seq->order_hint_bits;
|
||||
if (ohb < 1) ohb = 1;
|
||||
if (ohb > 8) ohb = 8;
|
||||
bsw_put_u(&bs, (uint32_t)(ohb - 1), 3); /* order_hint_bits_minus_1 */
|
||||
}
|
||||
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_SUPERRES) ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF) ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, (flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION) ? 1u : 0u, 1);
|
||||
|
||||
/* --- color_config() --- §5.5.2 --- */
|
||||
|
||||
bsw_put_u(&bs, high_bitdepth ? 1u : 0u, 1);
|
||||
if (seq_profile == 2 && high_bitdepth)
|
||||
bsw_put_u(&bs, twelve_bit ? 1u : 0u, 1);
|
||||
if (seq_profile != 1)
|
||||
bsw_put_u(&bs, monochrome ? 1u : 0u, 1);
|
||||
bsw_put_u(&bs, 0u, 1); /* color_description_present_flag */
|
||||
|
||||
if (monochrome) {
|
||||
bsw_put_u(&bs,
|
||||
(flags & V4L2_AV1_SEQUENCE_FLAG_COLOR_RANGE) ? 1u : 0u, 1);
|
||||
/* monochrome path: subsampling/chroma_sample_position/separate_uv_delta_q
|
||||
* are forced by the spec — no further bits emitted. */
|
||||
} else {
|
||||
bsw_put_u(&bs,
|
||||
(flags & V4L2_AV1_SEQUENCE_FLAG_COLOR_RANGE) ? 1u : 0u, 1);
|
||||
/* subsampling encoding depends on seq_profile */
|
||||
if (seq_profile == 2 && bit_depth == 12) {
|
||||
bsw_put_u(&bs, subsampling_x ? 1u : 0u, 1);
|
||||
if (subsampling_x)
|
||||
bsw_put_u(&bs, subsampling_y ? 1u : 0u, 1);
|
||||
}
|
||||
/* profile 0 / profile 1 / profile 2 non-12-bit: subsampling is
|
||||
* forced by the spec, no bits emitted. */
|
||||
if (subsampling_x && subsampling_y) {
|
||||
/* chroma_sample_position f(2) — V4L2 ctrl doesn't carry
|
||||
* this; default CSP_UNKNOWN (0). */
|
||||
bsw_put_u(&bs, 0u, 2);
|
||||
}
|
||||
bsw_put_u(&bs,
|
||||
(flags & V4L2_AV1_SEQUENCE_FLAG_SEPARATE_UV_DELTA_Q) ? 1u : 0u, 1);
|
||||
}
|
||||
|
||||
/* film_grain_params_present + trailing_bits */
|
||||
bsw_put_u(&bs,
|
||||
(flags & V4L2_AV1_SEQUENCE_FLAG_FILM_GRAIN_PARAMS_PRESENT) ? 1u : 0u, 1);
|
||||
|
||||
bsw_align_rbsp(&bs);
|
||||
if (bsw_overflowed(&bs))
|
||||
return 0;
|
||||
|
||||
payload_len = bsw_bytes(&bs);
|
||||
|
||||
/* --- assemble OBU: header byte | leb128(payload_len) | payload --- */
|
||||
|
||||
/* OBU header byte (§5.3.2):
|
||||
* obu_forbidden_bit = 0 [bit 7]
|
||||
* obu_type = 1 [bits 6..3] (OBU_SEQUENCE_HEADER)
|
||||
* obu_extension_flag = 0 [bit 2]
|
||||
* obu_has_size_field = 1 [bit 1]
|
||||
* obu_reserved_1bit = 0 [bit 0]
|
||||
* → 0_0001_0_1_0 = 0x0A
|
||||
*/
|
||||
w = 0;
|
||||
if (w >= out_cap)
|
||||
return 0;
|
||||
out[w++] = (uint8_t)
|
||||
((0u << 7) |
|
||||
((OBU_SEQUENCE_HEADER & 0xfu) << 3) |
|
||||
(0u << 2) |
|
||||
(1u << 1) |
|
||||
(0u << 0));
|
||||
|
||||
{
|
||||
size_t leb_n = leb128_put((uint32_t) payload_len,
|
||||
out + w, out_cap - w);
|
||||
if (leb_n == 0)
|
||||
return 0;
|
||||
w += leb_n;
|
||||
}
|
||||
|
||||
if (out_cap - w < payload_len)
|
||||
return 0;
|
||||
memcpy(out + w, rbsp, payload_len);
|
||||
w += payload_len;
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------
|
||||
* Shared OBU wrap helper (header byte + leb128 size + payload). Used
|
||||
* by frame_header_obu and the temporal_delimiter helper; the sequence
|
||||
* header above predates this factor-out and keeps its inline
|
||||
* assembly so its memory footprint stays predictable.
|
||||
* ----------------------------------------------------------------- */
|
||||
static size_t wrap_obu(uint8_t obu_type, const uint8_t *payload,
|
||||
size_t payload_len, uint8_t *out, size_t out_cap)
|
||||
{
|
||||
size_t w = 0;
|
||||
|
||||
if (out_cap < 2)
|
||||
return 0;
|
||||
out[w++] = (uint8_t)(
|
||||
(0u << 7) |
|
||||
((obu_type & 0xfu) << 3) |
|
||||
(0u << 2) |
|
||||
(1u << 1) |
|
||||
(0u << 0));
|
||||
|
||||
{
|
||||
size_t leb_n = leb128_put((uint32_t) payload_len,
|
||||
out + w, out_cap - w);
|
||||
if (leb_n == 0)
|
||||
return 0;
|
||||
w += leb_n;
|
||||
}
|
||||
|
||||
if (out_cap - w < payload_len)
|
||||
return 0;
|
||||
if (payload_len)
|
||||
memcpy(out + w, payload, payload_len);
|
||||
w += payload_len;
|
||||
return w;
|
||||
}
|
||||
|
||||
size_t av1_synth_temporal_delimiter_obu(uint8_t *out, size_t out_cap)
|
||||
{
|
||||
return wrap_obu(OBU_TEMPORAL_DELIMITER, NULL, 0, out, out_cap);
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------
|
||||
* Frame Header OBU — §5.9
|
||||
*
|
||||
* The encoder is sectioned to mirror the spec. Each subsection
|
||||
* helper writes into the shared bs_writer and signals "out of
|
||||
* scope" by setting a sticky `*unsupported` flag that the top-level
|
||||
* checks before returning. This keeps the spec-mirror linear and
|
||||
* the failure modes diagnosable.
|
||||
* ----------------------------------------------------------------- */
|
||||
|
||||
/* MiCols / MiRows per spec §3 — 4x4-unit count, rounded up to the
|
||||
* 8x8 alignment the spec uses for tiling math. Returns AlignPow2
|
||||
* of ((dim + 7) >> 3) at miSize=2 (8x8 mi-block). */
|
||||
static uint32_t mi_cols_for(uint32_t frame_width)
|
||||
{
|
||||
uint32_t mi = (frame_width + 7u) >> 3;
|
||||
return mi << 1; /* 4x4 mi units == miCols */
|
||||
}
|
||||
static uint32_t mi_rows_for(uint32_t frame_height)
|
||||
{
|
||||
uint32_t mi = (frame_height + 7u) >> 3;
|
||||
return mi << 1;
|
||||
}
|
||||
|
||||
/* tile_log2(blkSize, target) per AV1 §5.9.15 — smallest k such that
|
||||
* (blkSize << k) >= target. */
|
||||
static int tile_log2_ge(int blk, int target)
|
||||
{
|
||||
int k = 0;
|
||||
while ((blk << k) < target) k++;
|
||||
return k;
|
||||
}
|
||||
|
||||
/* §5.9.12 quantization_params */
|
||||
static void write_quantization_params(struct bs_writer *bs,
|
||||
const struct v4l2_av1_quantization *q,
|
||||
bool num_planes_gt_1,
|
||||
bool separate_uv_delta_q)
|
||||
{
|
||||
bsw_put_u(bs, q->base_q_idx, 8);
|
||||
|
||||
/* read_delta_q: 1 bit "delta_coded" + (s(7)?) — we always emit
|
||||
* the full delta if non-zero, zero-encoded as delta_coded=0
|
||||
* (single bit). */
|
||||
#define EMIT_DELTA_Q(val) do { \
|
||||
int _v = (int8_t)(val); \
|
||||
if (_v != 0) { \
|
||||
bsw_put_u(bs, 1u, 1); \
|
||||
/* su(1+6): sign + 6-bit magnitude */ \
|
||||
if (_v < 0) { \
|
||||
bsw_put_u(bs, (uint32_t)(_v + 128) & 0x7fu, 7); \
|
||||
} else { \
|
||||
bsw_put_u(bs, (uint32_t)_v & 0x7fu, 7); \
|
||||
} \
|
||||
} else { \
|
||||
bsw_put_u(bs, 0u, 1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
EMIT_DELTA_Q(q->delta_q_y_dc);
|
||||
|
||||
if (num_planes_gt_1) {
|
||||
if (separate_uv_delta_q)
|
||||
bsw_put_u(bs,
|
||||
(q->flags & V4L2_AV1_QUANTIZATION_FLAG_DIFF_UV_DELTA) ? 1u : 0u,
|
||||
1);
|
||||
EMIT_DELTA_Q(q->delta_q_u_dc);
|
||||
EMIT_DELTA_Q(q->delta_q_u_ac);
|
||||
if (separate_uv_delta_q &&
|
||||
(q->flags & V4L2_AV1_QUANTIZATION_FLAG_DIFF_UV_DELTA)) {
|
||||
EMIT_DELTA_Q(q->delta_q_v_dc);
|
||||
EMIT_DELTA_Q(q->delta_q_v_ac);
|
||||
}
|
||||
}
|
||||
#undef EMIT_DELTA_Q
|
||||
|
||||
bsw_put_u(bs,
|
||||
(q->flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) ? 1u : 0u,
|
||||
1);
|
||||
if (q->flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
|
||||
bsw_put_u(bs, q->qm_y, 4);
|
||||
bsw_put_u(bs, q->qm_u, 4);
|
||||
if (num_planes_gt_1 && separate_uv_delta_q)
|
||||
bsw_put_u(bs, q->qm_v, 4);
|
||||
}
|
||||
}
|
||||
|
||||
/* §5.9.11 loop_filter_params */
|
||||
static void write_loop_filter_params(struct bs_writer *bs,
|
||||
const struct v4l2_av1_loop_filter *lf,
|
||||
bool num_planes_gt_1,
|
||||
bool coded_lossless_or_allow_intrabc)
|
||||
{
|
||||
if (coded_lossless_or_allow_intrabc) {
|
||||
/* spec §6.8.10: when CodedLossless or allow_intrabc is set,
|
||||
* loop filter levels are inferred and not coded. */
|
||||
return;
|
||||
}
|
||||
bsw_put_u(bs, lf->level[0], 6);
|
||||
bsw_put_u(bs, lf->level[1], 6);
|
||||
if (num_planes_gt_1) {
|
||||
if (lf->level[0] || lf->level[1]) {
|
||||
bsw_put_u(bs, lf->level[2], 6);
|
||||
bsw_put_u(bs, lf->level[3], 6);
|
||||
}
|
||||
}
|
||||
bsw_put_u(bs, lf->sharpness, 3);
|
||||
|
||||
/* loop_filter_delta_enabled */
|
||||
bool delta_en = !!(lf->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED);
|
||||
bsw_put_u(bs, delta_en ? 1u : 0u, 1);
|
||||
if (delta_en) {
|
||||
bool delta_upd = !!(lf->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_UPDATE);
|
||||
bsw_put_u(bs, delta_upd ? 1u : 0u, 1);
|
||||
if (delta_upd) {
|
||||
int i;
|
||||
for (i = 0; i < 8; i++) {
|
||||
/* update_ref_delta: emit 0 (no update) — V4L2 carries the
|
||||
* cumulative state; trying to differentially encode here
|
||||
* would need previous-frame state we don't track. */
|
||||
bsw_put_u(bs, 0u, 1);
|
||||
}
|
||||
for (i = 0; i < 2; i++)
|
||||
bsw_put_u(bs, 0u, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* §5.9.19 cdef_params */
|
||||
static void write_cdef_params(struct bs_writer *bs,
|
||||
const struct v4l2_av1_cdef *cdef,
|
||||
bool num_planes_gt_1,
|
||||
bool enable_cdef,
|
||||
bool coded_lossless_or_intrabc)
|
||||
{
|
||||
int i, n;
|
||||
if (!enable_cdef || coded_lossless_or_intrabc)
|
||||
return;
|
||||
bsw_put_u(bs, cdef->damping_minus_3, 2);
|
||||
bsw_put_u(bs, cdef->bits, 2);
|
||||
n = 1 << cdef->bits;
|
||||
for (i = 0; i < n; i++) {
|
||||
bsw_put_u(bs, cdef->y_pri_strength[i] & 0xfu, 4);
|
||||
bsw_put_u(bs, cdef->y_sec_strength[i] & 0x3u, 2);
|
||||
if (num_planes_gt_1) {
|
||||
bsw_put_u(bs, cdef->uv_pri_strength[i] & 0xfu, 4);
|
||||
bsw_put_u(bs, cdef->uv_sec_strength[i] & 0x3u, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* §5.9.20 lr_params — only RESTORE_NONE supported here */
|
||||
static int write_lr_params(struct bs_writer *bs,
|
||||
const struct v4l2_av1_loop_restoration *lr,
|
||||
int num_planes,
|
||||
bool enable_restoration,
|
||||
bool coded_lossless_or_intrabc)
|
||||
{
|
||||
int p;
|
||||
if (!enable_restoration || coded_lossless_or_intrabc)
|
||||
return 1;
|
||||
|
||||
/* Out-of-scope if ANY plane uses restoration */
|
||||
if (lr->frame_restoration_type[0] != V4L2_AV1_FRAME_RESTORE_NONE)
|
||||
return 0;
|
||||
if (num_planes > 1) {
|
||||
if (lr->frame_restoration_type[1] != V4L2_AV1_FRAME_RESTORE_NONE)
|
||||
return 0;
|
||||
if (lr->frame_restoration_type[2] != V4L2_AV1_FRAME_RESTORE_NONE)
|
||||
return 0;
|
||||
}
|
||||
/* Emit 2-bit RESTORE_NONE per plane */
|
||||
for (p = 0; p < num_planes; p++)
|
||||
bsw_put_u(bs, 0u, 2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* §5.9.15 tile_info — single-tile uniform-spacing path only */
|
||||
static int write_tile_info_single_tile(struct bs_writer *bs,
|
||||
uint32_t frame_width,
|
||||
uint32_t frame_height,
|
||||
bool use_128_sb)
|
||||
{
|
||||
uint32_t mi_cols = mi_cols_for(frame_width);
|
||||
uint32_t mi_rows = mi_rows_for(frame_height);
|
||||
int sb_log2 = use_128_sb ? 5 : 4; /* mi units */
|
||||
uint32_t sb_cols = (mi_cols + ((1u << sb_log2) - 1u)) >> sb_log2;
|
||||
uint32_t sb_rows = (mi_rows + ((1u << sb_log2) - 1u)) >> sb_log2;
|
||||
int min_log2_cols = tile_log2_ge(use_128_sb ? 4096 : 4096 / 1,
|
||||
(int)(sb_cols * (use_128_sb ? 128 : 64)));
|
||||
(void) min_log2_cols;
|
||||
|
||||
/* uniform_tile_spacing_flag = 1, both increment loops = 0 →
|
||||
* tile_cols_log2 = tile_rows_log2 = 0 (single tile). This
|
||||
* matches "uniform spacing with no width/height halving" which
|
||||
* is the simplest valid encoding. */
|
||||
bsw_put_u(bs, 1u, 1); /* uniform_tile_spacing_flag */
|
||||
|
||||
/* increment_tile_cols_log2: 0 zeros + the next non-increment
|
||||
* bit terminates the loop. In single-tile mode we encode the
|
||||
* terminator immediately. */
|
||||
(void) sb_cols;
|
||||
(void) sb_rows;
|
||||
/* The increment loops in the spec run while
|
||||
* tile_cols_log2 < max_log2_tile_cols, reading bits until a 0
|
||||
* appears. For our forced single-tile, we emit a single 0 bit
|
||||
* to terminate the cols loop and another for the rows loop. */
|
||||
bsw_put_u(bs, 0u, 1); /* terminate cols */
|
||||
bsw_put_u(bs, 0u, 1); /* terminate rows */
|
||||
|
||||
/* tile_size_bytes_minus_1: 0 (1 byte) — only meaningful when
|
||||
* NumTiles > 1, but spec emits it unconditionally when
|
||||
* NumTiles > 1. Single tile → not emitted. We're single tile,
|
||||
* skip. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t av1_synth_frame_header_obu(const struct v4l2_ctrl_av1_sequence *seq,
|
||||
const struct v4l2_ctrl_av1_frame *fr,
|
||||
uint8_t *out, size_t out_cap)
|
||||
{
|
||||
uint8_t rbsp[256];
|
||||
struct bs_writer bs;
|
||||
uint32_t sf, ff;
|
||||
bool show_existing_frame = false;
|
||||
bool reduced_still_picture_header;
|
||||
bool show_frame, showable_frame, error_resilient_mode;
|
||||
bool disable_cdf_update, allow_screen_content_tools;
|
||||
bool force_integer_mv, allow_intrabc, frame_size_override;
|
||||
bool allow_high_precision_mv, is_motion_mode_switchable;
|
||||
bool use_ref_frame_mvs, disable_frame_end_update_cdf;
|
||||
bool reference_select, allow_warped_motion, reduced_tx_set;
|
||||
bool skip_mode_present, monochrome;
|
||||
uint8_t frame_type, primary_ref_frame;
|
||||
uint32_t frame_width, frame_height;
|
||||
int num_planes;
|
||||
int width_bits, height_bits;
|
||||
uint8_t order_hint_bits;
|
||||
bool enable_order_hint, enable_ref_frame_mvs, enable_warped_motion_seq;
|
||||
bool enable_cdef_seq, enable_restoration_seq;
|
||||
int i;
|
||||
|
||||
if (!seq || !fr || !out || out_cap < 16)
|
||||
return 0;
|
||||
|
||||
sf = seq->flags;
|
||||
ff = fr->flags;
|
||||
|
||||
/* sanity */
|
||||
monochrome = !!(sf & V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME);
|
||||
num_planes = monochrome ? 1 : 3;
|
||||
enable_order_hint = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT);
|
||||
enable_ref_frame_mvs = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_REF_FRAME_MVS);
|
||||
enable_warped_motion_seq = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_WARPED_MOTION);
|
||||
enable_cdef_seq = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF);
|
||||
enable_restoration_seq = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION);
|
||||
order_hint_bits = enable_order_hint ? seq->order_hint_bits : 0;
|
||||
if (order_hint_bits > 8) order_hint_bits = 8;
|
||||
reduced_still_picture_header = false; /* matches sequence-header default */
|
||||
|
||||
frame_type = fr->frame_type;
|
||||
if (frame_type == V4L2_AV1_SWITCH_FRAME)
|
||||
return 0; /* out of scope */
|
||||
|
||||
show_frame = !!(ff & V4L2_AV1_FRAME_FLAG_SHOW_FRAME);
|
||||
showable_frame = !!(ff & V4L2_AV1_FRAME_FLAG_SHOWABLE_FRAME);
|
||||
error_resilient_mode = !!(ff & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
|
||||
disable_cdf_update = !!(ff & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE);
|
||||
allow_screen_content_tools = !!(ff & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS);
|
||||
force_integer_mv = !!(ff & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV);
|
||||
allow_intrabc = !!(ff & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
|
||||
frame_size_override = !!(ff & V4L2_AV1_FRAME_FLAG_FRAME_SIZE_OVERRIDE);
|
||||
allow_high_precision_mv = !!(ff & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV);
|
||||
is_motion_mode_switchable = !!(ff & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE);
|
||||
use_ref_frame_mvs = !!(ff & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
|
||||
disable_frame_end_update_cdf = !!(ff & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF);
|
||||
reference_select = !!(ff & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT);
|
||||
allow_warped_motion = !!(ff & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION);
|
||||
reduced_tx_set = !!(ff & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET);
|
||||
skip_mode_present = !!(ff & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT);
|
||||
primary_ref_frame = fr->primary_ref_frame;
|
||||
|
||||
frame_width = fr->frame_width_minus_1 + 1;
|
||||
frame_height = fr->frame_height_minus_1 + 1;
|
||||
|
||||
width_bits = min_bits_for((uint32_t) seq->max_frame_width_minus_1);
|
||||
height_bits = min_bits_for((uint32_t) seq->max_frame_height_minus_1);
|
||||
|
||||
bsw_init(&bs, rbsp, sizeof(rbsp));
|
||||
|
||||
/* show_existing_frame: 0 (V4L2 doesn't surface the show-only path
|
||||
* — every fr ctrl describes a real decoded frame). */
|
||||
bsw_put_u(&bs, show_existing_frame ? 1u : 0u, 1);
|
||||
|
||||
bsw_put_u(&bs, (uint32_t) frame_type, 2);
|
||||
bsw_put_u(&bs, show_frame ? 1u : 0u, 1);
|
||||
if (show_frame) {
|
||||
/* No decoder_model_info_present_flag emitted in seq header,
|
||||
* so no buffer-removal-time bits here either. */
|
||||
} else {
|
||||
bsw_put_u(&bs, showable_frame ? 1u : 0u, 1);
|
||||
}
|
||||
if (frame_type == V4L2_AV1_SWITCH_FRAME ||
|
||||
(frame_type == V4L2_AV1_KEY_FRAME && show_frame)) {
|
||||
/* error_resilient_mode = 1 inferred — not coded */
|
||||
} else {
|
||||
bsw_put_u(&bs, error_resilient_mode ? 1u : 0u, 1);
|
||||
}
|
||||
|
||||
bsw_put_u(&bs, disable_cdf_update ? 1u : 0u, 1);
|
||||
/* allow_screen_content_tools coded as 1 bit when sequence
|
||||
* forces NOT-SELECT; SELECT mode means we always emit a 1 bit
|
||||
* for the SELECT_SCREEN_CONTENT_TOOLS path. Our sequence
|
||||
* header always emits SELECT, so emit a single bit equal to
|
||||
* the V4L2 flag. */
|
||||
bsw_put_u(&bs, allow_screen_content_tools ? 1u : 0u, 1);
|
||||
if (allow_screen_content_tools) {
|
||||
/* seq_force_integer_mv = SELECT (2) so:
|
||||
* force_integer_mv coded as 1 bit */
|
||||
bsw_put_u(&bs, force_integer_mv ? 1u : 0u, 1);
|
||||
}
|
||||
|
||||
/* frame_id_numbers_present_flag = 0 in seq → no current_frame_id */
|
||||
|
||||
if (frame_type != V4L2_AV1_SWITCH_FRAME && !reduced_still_picture_header)
|
||||
bsw_put_u(&bs, frame_size_override ? 1u : 0u, 1);
|
||||
|
||||
if (enable_order_hint)
|
||||
bsw_put_u(&bs, fr->order_hint, order_hint_bits);
|
||||
|
||||
if (frame_type != V4L2_AV1_KEY_FRAME && frame_type != V4L2_AV1_INTRA_ONLY_FRAME &&
|
||||
!error_resilient_mode)
|
||||
bsw_put_u(&bs, primary_ref_frame, 3);
|
||||
|
||||
/* frame_size + render_size (§5.9.5, §5.9.6) */
|
||||
if (frame_size_override) {
|
||||
bsw_put_u(&bs, fr->frame_width_minus_1, width_bits);
|
||||
bsw_put_u(&bs, fr->frame_height_minus_1, height_bits);
|
||||
}
|
||||
/* superres_params: §5.9.8 */
|
||||
{
|
||||
bool use_superres = !!(ff & V4L2_AV1_FRAME_FLAG_USE_SUPERRES);
|
||||
if (sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_SUPERRES)
|
||||
bsw_put_u(&bs, use_superres ? 1u : 0u, 1);
|
||||
if (use_superres) {
|
||||
/* coded_denom = superres_denom - SUPERRES_DENOM_MIN(9) */
|
||||
int denom = fr->superres_denom;
|
||||
if (denom < 9) denom = 9;
|
||||
bsw_put_u(&bs, (uint32_t)(denom - 9) & 0x7u, 3);
|
||||
}
|
||||
}
|
||||
/* render_size present flag: 1 if render dims given */
|
||||
{
|
||||
bool render_and_frame_match =
|
||||
(fr->render_width_minus_1 == fr->frame_width_minus_1) &&
|
||||
(fr->render_height_minus_1 == fr->frame_height_minus_1);
|
||||
bsw_put_u(&bs, render_and_frame_match ? 0u : 1u, 1);
|
||||
if (!render_and_frame_match) {
|
||||
bsw_put_u(&bs, fr->render_width_minus_1, 16);
|
||||
bsw_put_u(&bs, fr->render_height_minus_1, 16);
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_type != V4L2_AV1_KEY_FRAME && frame_type != V4L2_AV1_INTRA_ONLY_FRAME) {
|
||||
/* allow_intrabc only on key/intra-only — skip for inter */
|
||||
(void) allow_intrabc;
|
||||
if (!error_resilient_mode && enable_order_hint)
|
||||
bsw_put_u(&bs, 0u, 1); /* frame_refs_short_signaling */
|
||||
/* read ref_frame_idx for each of REFS_PER_FRAME */
|
||||
for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
|
||||
int8_t idx = fr->ref_frame_idx[i];
|
||||
if (idx < 0) idx = 0;
|
||||
bsw_put_u(&bs, (uint32_t)(idx & 0x7), 3);
|
||||
}
|
||||
if (frame_size_override && !error_resilient_mode) {
|
||||
/* found_ref loop — emit "no" for each, so frame_size
|
||||
* fields above already populated. */
|
||||
for (i = 0; i < AV1_REFS_PER_FRAME; i++)
|
||||
bsw_put_u(&bs, 0u, 1);
|
||||
}
|
||||
bsw_put_u(&bs, allow_high_precision_mv ? 1u : 0u, 1);
|
||||
/* read_interpolation_filter: is_filter_switchable + value */
|
||||
{
|
||||
int interp = fr->interpolation_filter;
|
||||
bool switchable = (interp == V4L2_AV1_INTERPOLATION_FILTER_SWITCHABLE);
|
||||
bsw_put_u(&bs, switchable ? 1u : 0u, 1);
|
||||
if (!switchable)
|
||||
bsw_put_u(&bs, (uint32_t)interp & 0x3u, 2);
|
||||
}
|
||||
bsw_put_u(&bs, is_motion_mode_switchable ? 1u : 0u, 1);
|
||||
if (!error_resilient_mode && enable_ref_frame_mvs)
|
||||
bsw_put_u(&bs, use_ref_frame_mvs ? 1u : 0u, 1);
|
||||
} else {
|
||||
if (frame_type == V4L2_AV1_INTRA_ONLY_FRAME && allow_screen_content_tools)
|
||||
bsw_put_u(&bs, allow_intrabc ? 1u : 0u, 1);
|
||||
else if (frame_type == V4L2_AV1_KEY_FRAME && allow_screen_content_tools)
|
||||
bsw_put_u(&bs, allow_intrabc ? 1u : 0u, 1);
|
||||
}
|
||||
|
||||
/* disable_frame_end_update_cdf */
|
||||
if (!disable_cdf_update)
|
||||
bsw_put_u(&bs, disable_frame_end_update_cdf ? 1u : 0u, 1);
|
||||
|
||||
/* tile_info: single-tile path */
|
||||
{
|
||||
bool use_128 = !!(sf & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK);
|
||||
if (!write_tile_info_single_tile(&bs, frame_width, frame_height,
|
||||
use_128))
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* quantization_params */
|
||||
write_quantization_params(&bs, &fr->quantization,
|
||||
num_planes > 1,
|
||||
!!(sf & V4L2_AV1_SEQUENCE_FLAG_SEPARATE_UV_DELTA_Q));
|
||||
|
||||
/* segmentation_params: only enabled=0 supported */
|
||||
{
|
||||
bool seg_en = !!(fr->segmentation.flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED);
|
||||
if (seg_en)
|
||||
return 0;
|
||||
bsw_put_u(&bs, 0u, 1); /* segmentation_enabled */
|
||||
}
|
||||
|
||||
/* delta_q_params + delta_lf_params */
|
||||
{
|
||||
bool delta_q_present = !!(fr->quantization.flags &
|
||||
V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT);
|
||||
if (fr->quantization.base_q_idx > 0) {
|
||||
bsw_put_u(&bs, delta_q_present ? 1u : 0u, 1);
|
||||
if (delta_q_present)
|
||||
bsw_put_u(&bs, fr->quantization.delta_q_res & 0x3u, 2);
|
||||
}
|
||||
if (delta_q_present && !allow_intrabc) {
|
||||
bool delta_lf_present =
|
||||
!!(fr->loop_filter.flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT);
|
||||
bsw_put_u(&bs, delta_lf_present ? 1u : 0u, 1);
|
||||
if (delta_lf_present) {
|
||||
bsw_put_u(&bs, fr->loop_filter.delta_lf_res & 0x3u, 2);
|
||||
bsw_put_u(&bs,
|
||||
(fr->loop_filter.flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI)
|
||||
? 1u : 0u, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* coded_lossless heuristic: when base_q_idx==0 and all deltas==0
|
||||
* and qm not in use, AV1 treats the frame as lossless. We
|
||||
* approximate with the base_q_idx check; the lf/cdef writers
|
||||
* gate on the same value. */
|
||||
{
|
||||
bool coded_lossless = (fr->quantization.base_q_idx == 0);
|
||||
|
||||
write_loop_filter_params(&bs, &fr->loop_filter,
|
||||
num_planes > 1,
|
||||
coded_lossless || allow_intrabc);
|
||||
write_cdef_params(&bs, &fr->cdef, num_planes > 1,
|
||||
enable_cdef_seq,
|
||||
coded_lossless || allow_intrabc);
|
||||
if (!write_lr_params(&bs, &fr->loop_restoration, num_planes,
|
||||
enable_restoration_seq,
|
||||
coded_lossless || allow_intrabc))
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* read_tx_mode (§5.9.21) */
|
||||
{
|
||||
bool coded_lossless = (fr->quantization.base_q_idx == 0);
|
||||
if (coded_lossless) {
|
||||
/* tx_mode = ONLY_4X4 (inferred) */
|
||||
} else {
|
||||
int tx_mode = fr->tx_mode;
|
||||
bsw_put_u(&bs, (tx_mode == V4L2_AV1_TX_MODE_SELECT) ? 1u : 0u, 1);
|
||||
if (tx_mode != V4L2_AV1_TX_MODE_SELECT)
|
||||
bsw_put_u(&bs, (tx_mode == V4L2_AV1_TX_MODE_LARGEST) ? 1u : 0u, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* frame_reference_mode (§5.9.23) */
|
||||
if (frame_type != V4L2_AV1_KEY_FRAME && frame_type != V4L2_AV1_INTRA_ONLY_FRAME)
|
||||
bsw_put_u(&bs, reference_select ? 1u : 0u, 1);
|
||||
|
||||
/* skip_mode_params (§5.9.22) */
|
||||
{
|
||||
bool skip_allowed = !!(ff & V4L2_AV1_FRAME_FLAG_SKIP_MODE_ALLOWED);
|
||||
if (skip_allowed)
|
||||
bsw_put_u(&bs, skip_mode_present ? 1u : 0u, 1);
|
||||
}
|
||||
|
||||
/* reduced_tx_set */
|
||||
bsw_put_u(&bs, reduced_tx_set ? 1u : 0u, 1);
|
||||
|
||||
/* global_motion_params: §5.9.24 — emit IDENTITY for each ref */
|
||||
if (frame_type != V4L2_AV1_KEY_FRAME && frame_type != V4L2_AV1_INTRA_ONLY_FRAME) {
|
||||
int r;
|
||||
(void) enable_warped_motion_seq;
|
||||
(void) allow_warped_motion;
|
||||
for (r = 1; r < AV1_NUM_REF_FRAMES; r++) {
|
||||
uint8_t wm_type = fr->global_motion.type[r];
|
||||
if (wm_type != V4L2_AV1_WARP_MODEL_IDENTITY)
|
||||
return 0; /* out of scope */
|
||||
bsw_put_u(&bs, 0u, 1); /* is_global = 0 → identity */
|
||||
}
|
||||
}
|
||||
|
||||
/* film_grain_params: §6.8.20 — only "not present" path supported */
|
||||
if (sf & V4L2_AV1_SEQUENCE_FLAG_FILM_GRAIN_PARAMS_PRESENT)
|
||||
return 0; /* out of scope: film grain coding deferred */
|
||||
|
||||
bsw_align_rbsp(&bs);
|
||||
if (bsw_overflowed(&bs))
|
||||
return 0;
|
||||
|
||||
return wrap_obu(OBU_FRAME_HEADER, rbsp, bsw_bytes(&bs), out, out_cap);
|
||||
}
|
||||
@@ -0,0 +1,119 @@
|
||||
/* SPDX-License-Identifier: BSD-2-Clause */
|
||||
/*
|
||||
* av1_obu_synth.h — synthesise AV1 OBU bytes from the V4L2 stateless
|
||||
* AV1 controls.
|
||||
*
|
||||
* V4L2 stateless AV1 (per drivers/media/v4l2-core/v4l2-h264.c-style
|
||||
* contract) passes the OUTPUT buffer as bare tile-group bitstream and
|
||||
* the sequence / frame-header information as structured controls
|
||||
* (V4L2_CID_STATELESS_AV1_SEQUENCE, V4L2_CID_STATELESS_AV1_FRAME, ...).
|
||||
* libavcodec's AV1 decoder is full-bitstream, so the daemon has to
|
||||
* reconstruct the OBUs that the consumer parsed out and prepend them
|
||||
* to the tile-group bytes before handing the assembled stream to
|
||||
* libavcodec.
|
||||
*
|
||||
* This header covers Sequence Header (§5.5.1), Temporal Delimiter
|
||||
* (§5.6), and Frame Header (§5.9) OBUs. All share the same wire
|
||||
* conventions:
|
||||
* - No emulation prevention (AV1 uses leb128 sized fields instead).
|
||||
* - obu_has_size_field = 1 in the OBU header byte.
|
||||
* - obu_extension_flag = 0 (no temporal_id / spatial_id encoding).
|
||||
* - trailing_bits() finalises the payload to a byte boundary the same
|
||||
* way H.264's rbsp_trailing_bits does — bsw_align_rbsp covers it.
|
||||
*
|
||||
* Synthesis decisions for fields V4L2 doesn't carry are documented in
|
||||
* the .c file (search for "synthesis default").
|
||||
*/
|
||||
#ifndef DAEDALUS_AV1_OBU_SYNTH_H
|
||||
#define DAEDALUS_AV1_OBU_SYNTH_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <linux/v4l2-controls.h>
|
||||
|
||||
/*
|
||||
* Encode an AV1 Sequence Header OBU (header byte + leb128 size + RBSP)
|
||||
* into @out. Returns total bytes written, or 0 on overflow / malformed
|
||||
* input (e.g. inconsistent bit_depth vs seq_profile). @out_cap must
|
||||
* be at least 32 bytes for any reasonable sequence header; 64 bytes
|
||||
* is a generous upper bound.
|
||||
*
|
||||
* The caller is expected to bracket the resulting bytes with a
|
||||
* Temporal Delimiter OBU (1 byte: 0x12 0x00) before any Frame OBU so
|
||||
* that libavcodec's AV1 parser sees a well-formed access unit; the
|
||||
* temporal-delimiter byte is trivial and not produced here.
|
||||
*/
|
||||
size_t av1_synth_sequence_header_obu(const struct v4l2_ctrl_av1_sequence *seq,
|
||||
uint8_t *out, size_t out_cap);
|
||||
|
||||
/*
|
||||
* Encode an AV1 Temporal Delimiter OBU into @out. Always exactly 2
|
||||
* bytes: 0x12 (obu_type=TEMPORAL_DELIMITER, has_size_field=1) followed
|
||||
* by 0x00 (leb128(0) — zero-payload). Returns 2 on success, 0 if
|
||||
* @out_cap < 2.
|
||||
*
|
||||
* Per AV1 spec §5.6 every temporal unit MUST start with a Temporal
|
||||
* Delimiter OBU when temporal_delimiter_obus_present is implied — the
|
||||
* libavcodec AV1 parser uses TD OBUs as access-unit boundaries when
|
||||
* fed full-bitstream input.
|
||||
*/
|
||||
size_t av1_synth_temporal_delimiter_obu(uint8_t *out, size_t out_cap);
|
||||
|
||||
/*
|
||||
* Integration status (2026-05-23):
|
||||
*
|
||||
* The Sequence / Frame Header / Temporal Delimiter encoders below are
|
||||
* standalone primitives. They are NOT yet called from decoder.c — the
|
||||
* AV1 decode hot path still passes the OUTPUT buffer straight to
|
||||
* libavcodec, which only works if the V4L2 consumer happens to be
|
||||
* sending a fully-OBU'd access unit (i.e. is not strictly following
|
||||
* the V4L2 stateless AV1 "tile-group bytes only" contract).
|
||||
*
|
||||
* Wiring these primitives in requires a separate kernel-side change:
|
||||
*
|
||||
* - extend daedalus_v4l2_proto.h with a `struct daedalus_av1_meta`
|
||||
* mirroring v4l2_ctrl_av1_sequence + v4l2_ctrl_av1_frame
|
||||
* - update kernel/daedalus_v4l2_main.c to capture
|
||||
* V4L2_CID_STATELESS_AV1_{SEQUENCE,FRAME} at device_run time and
|
||||
* ship the meta alongside the bitstream over the chardev
|
||||
* - update daemon/src/chardev_client.c to receive the meta
|
||||
* - update daemon/src/decoder.c to: synth TD + SH + FH OBUs, wrap
|
||||
* the OUTPUT bytes as an OBU_TILE_GROUP, concat in that order,
|
||||
* hand the assembled bitstream to libavcodec
|
||||
*
|
||||
* Tracked as a follow-on; see daedalus-v4l2 task notes.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Encode an AV1 Frame Header OBU from the V4L2 stateless frame control
|
||||
* (and the matching sequence control, which provides fields the
|
||||
* frame-header encoder branches on per §5.9.1).
|
||||
*
|
||||
* Scope (this revision — libva-v4l2-request common-case path):
|
||||
* - Frame types KEY / INTER / INTRA_ONLY. SWITCH frames return 0
|
||||
* (caller should fall back to libavcodec native parsing).
|
||||
* - segmentation_params() emits the "segmentation disabled" path
|
||||
* when V4L2_AV1_SEGMENTATION_FLAG_ENABLED is 0. Enabled
|
||||
* segmentation returns 0.
|
||||
* - loop_restoration_params(): only RESTORE_NONE on all planes
|
||||
* supported. Other restoration types return 0.
|
||||
* - global_motion: only IDENTITY warp model emitted. Non-IDENTITY
|
||||
* entries return 0.
|
||||
* - film_grain_params(): treated as "not present" — only valid when
|
||||
* the sequence header has film_grain_params_present = 0. If the
|
||||
* sequence claims film grain is present this revision returns 0
|
||||
* (the per-frame film-grain coding is a separate follow-on).
|
||||
*
|
||||
* Out-of-scope branches return 0 so the caller can surface a coverage
|
||||
* warning and fall back to direct libavcodec parsing of the original
|
||||
* bitstream where possible.
|
||||
*
|
||||
* @out_cap must be at least 128 bytes for any reasonable frame header;
|
||||
* 256 bytes is a safe upper bound for the supported subset.
|
||||
*/
|
||||
size_t av1_synth_frame_header_obu(const struct v4l2_ctrl_av1_sequence *seq,
|
||||
const struct v4l2_ctrl_av1_frame *fr,
|
||||
uint8_t *out, size_t out_cap);
|
||||
|
||||
#endif /* DAEDALUS_AV1_OBU_SYNTH_H */
|
||||
+63
-241
@@ -133,288 +133,110 @@ static int send_response(struct chardev_client *cli, uint32_t type,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register a new (src_pts → cookie) mapping in the pending table.
|
||||
* Reuses an existing slot for src_pts if one exists (defensive — the
|
||||
* kernel should never re-use the same src_pts for two live cookies,
|
||||
* but libva running against a test client without timestamps might
|
||||
* send all-zero src_pts; collapse them onto the latest cookie so the
|
||||
* 1:1-per-stream case keeps working). Returns 0 on success, -ENOSPC
|
||||
* if the table is full.
|
||||
*/
|
||||
static int pending_register(struct chardev_client *cli, uint64_t src_pts,
|
||||
uint32_t cookie,
|
||||
const struct daedalus_req_decode *req)
|
||||
{
|
||||
int free_slot = -1;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < DAEDALUS_MAX_PENDING_COOKIES; i++) {
|
||||
if (cli->pending[i].used && cli->pending[i].src_pts == src_pts) {
|
||||
cli->pending[i].cookie = cookie;
|
||||
cli->pending[i].cached_req = *req;
|
||||
return 0;
|
||||
}
|
||||
if (!cli->pending[i].used && free_slot < 0)
|
||||
free_slot = i;
|
||||
}
|
||||
|
||||
if (free_slot < 0) {
|
||||
log_err("pending: table full registering cookie=%u src_pts=%llu",
|
||||
cookie, (unsigned long long) src_pts);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
cli->pending[free_slot].used = 1;
|
||||
cli->pending[free_slot].src_pts = src_pts;
|
||||
cli->pending[free_slot].cookie = cookie;
|
||||
cli->pending[free_slot].cached_req = *req;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up the cookie + cached REQ_DECODE that originally introduced
|
||||
* @src_pts. Returns 0 + populates @cookie_out / @req_out, or -ENOENT
|
||||
* if no match (likely a daemon bug or codec output we can't route).
|
||||
*/
|
||||
static int pending_lookup(const struct chardev_client *cli,
|
||||
uint64_t src_pts,
|
||||
uint32_t *cookie_out,
|
||||
struct daedalus_req_decode *req_out)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < DAEDALUS_MAX_PENDING_COOKIES; i++) {
|
||||
if (cli->pending[i].used &&
|
||||
cli->pending[i].src_pts == src_pts) {
|
||||
*cookie_out = cli->pending[i].cookie;
|
||||
*req_out = cli->pending[i].cached_req;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static void pending_release(struct chardev_client *cli, uint64_t src_pts)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < DAEDALUS_MAX_PENDING_COOKIES; i++) {
|
||||
if (cli->pending[i].used &&
|
||||
cli->pending[i].src_pts == src_pts) {
|
||||
cli->pending[i].used = 0;
|
||||
cli->pending[i].src_pts = 0;
|
||||
cli->pending[i].cookie = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Pack the daemon's current AVFrame into the CAPTURE buffer owned by
|
||||
* @owner_cookie, then ship RESP_FRAME with the flags caller asked for.
|
||||
* Returns 0 on success; -errno on GET_DMABUF / mmap failure (RESP is
|
||||
* still emitted so the kernel doesn't park the dst buffer forever).
|
||||
*/
|
||||
static int deliver_frame_to_cookie(struct chardev_client *cli,
|
||||
uint32_t owner_cookie,
|
||||
const struct daedalus_req_decode *owner_req,
|
||||
struct daedalus_resp_frame *resp,
|
||||
uint32_t resp_flags)
|
||||
{
|
||||
struct daedalus_capture_planes planes;
|
||||
int orc;
|
||||
|
||||
orc = daedalus_capture_planes_open(cli->fd, owner_cookie, owner_req,
|
||||
&planes);
|
||||
if (orc < 0) {
|
||||
log_warn("drain: GET_DMABUF cookie=%u failed (%d); RESP metadata-only",
|
||||
owner_cookie, orc);
|
||||
} else {
|
||||
(void) daedalus_decoder_pack_current(cli->decoder, &planes,
|
||||
owner_req->capture_pix_fmt);
|
||||
daedalus_capture_planes_close(&planes);
|
||||
}
|
||||
|
||||
resp->flags |= resp_flags;
|
||||
return send_response(cli, DAEDALUS_MSG_RESP_FRAME, owner_cookie,
|
||||
resp, sizeof(*resp));
|
||||
}
|
||||
|
||||
static int handle_req_decode(struct chardev_client *cli,
|
||||
const struct daedalus_msg_hdr *hdr,
|
||||
const uint8_t *payload)
|
||||
{
|
||||
struct daedalus_req_decode req;
|
||||
struct daedalus_resp_frame resp;
|
||||
struct daedalus_capture_planes planes;
|
||||
const struct daedalus_h264_meta *h264_meta = NULL;
|
||||
size_t meta_off, meta_len = 0;
|
||||
int submit_status;
|
||||
int src_consumed_emitted = 0;
|
||||
int rc;
|
||||
int decoded = 0;
|
||||
|
||||
if (hdr->payload_len < sizeof(req)) {
|
||||
struct daedalus_resp_frame err = { 0 };
|
||||
|
||||
log_err("REQ_DECODE cookie=%u: payload too short %u < %zu",
|
||||
hdr->cookie, hdr->payload_len, sizeof(req));
|
||||
err.status = DAEDALUS_DECODE_ERR_RECV;
|
||||
err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
|
||||
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
|
||||
memset(&resp, 0, sizeof(resp));
|
||||
resp.status = DAEDALUS_DECODE_ERR_RECV;
|
||||
return send_response(cli, DAEDALUS_MSG_RESP_FRAME,
|
||||
hdr->cookie, &err, sizeof(err));
|
||||
hdr->cookie, &resp, sizeof(resp));
|
||||
}
|
||||
memcpy(&req, payload, sizeof(req));
|
||||
|
||||
/* Optional H.264 meta block follows req when the flag is set;
|
||||
* bitstream comes after meta. */
|
||||
if (req.flags & DAEDALUS_REQ_FLAG_H264_META)
|
||||
meta_len = sizeof(struct daedalus_h264_meta);
|
||||
meta_off = sizeof(req);
|
||||
|
||||
if ((size_t) req.bitstream_len + sizeof(req) + meta_len !=
|
||||
hdr->payload_len) {
|
||||
struct daedalus_resp_frame err = { 0 };
|
||||
|
||||
log_err("REQ_DECODE cookie=%u: bitstream_len %u + meta %zu inconsistent with payload_len %u",
|
||||
hdr->cookie, req.bitstream_len, meta_len,
|
||||
hdr->payload_len);
|
||||
err.status = DAEDALUS_DECODE_ERR_RECV;
|
||||
err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
|
||||
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
|
||||
memset(&resp, 0, sizeof(resp));
|
||||
resp.status = DAEDALUS_DECODE_ERR_RECV;
|
||||
return send_response(cli, DAEDALUS_MSG_RESP_FRAME,
|
||||
hdr->cookie, &err, sizeof(err));
|
||||
hdr->cookie, &resp, sizeof(resp));
|
||||
}
|
||||
if (meta_len)
|
||||
h264_meta = (const struct daedalus_h264_meta *)
|
||||
(payload + meta_off);
|
||||
|
||||
log_info("REQ_DECODE cookie=%u codec=%u bitstream=%u bytes meta=%s capture=%ux%u %u planes src_pts=%llu",
|
||||
log_info("REQ_DECODE cookie=%u codec=%u bitstream=%u bytes meta=%s capture=%ux%u %u planes",
|
||||
hdr->cookie, req.codec_id, req.bitstream_len,
|
||||
h264_meta ? "h264" : "none",
|
||||
req.capture_width, req.capture_height,
|
||||
req.capture_num_planes,
|
||||
(unsigned long long) req.src_pts);
|
||||
req.capture_num_planes);
|
||||
|
||||
/*
|
||||
* Register (src_pts → cookie) mapping BEFORE submit, so any drained
|
||||
* frame whose pts matches this REQ's src_pts (the steady-state
|
||||
* 1:1 path) can find its owner via pending_lookup below. Out of
|
||||
* space here is fatal — we'd lose the routing identity for this
|
||||
* cookie's eventual frame. Send an error RESP that releases both
|
||||
* src and dst so the V4L2 client moves on.
|
||||
* Degenerate-bitstream filter (issue #17): libva-v4l2-request-
|
||||
* fourier flushes a stub packet into the OUTPUT_MPLANE queue at
|
||||
* playback-pause boundaries. The payload is shorter than any
|
||||
* parseable H.264 NAL (3-byte start code + 1-byte NAL header =
|
||||
* 4 bytes minimum); avcodec_send_packet returns
|
||||
* AVERROR_INVALIDDATA, which we used to propagate to the kernel
|
||||
* as a decode failure. Firefox then marks H.264-via-VAAPI as
|
||||
* broken for the session and routes every subsequent frame to
|
||||
* libmozavcodec SW — pause never recovers to HW.
|
||||
*
|
||||
* Drop the request as a no-op decode and reply RESP_FRAME OK so
|
||||
* libva's V4L2 state machine keeps its surface pool alive.
|
||||
*/
|
||||
rc = pending_register(cli, req.src_pts, hdr->cookie, &req);
|
||||
if (rc < 0) {
|
||||
struct daedalus_resp_frame err = { 0 };
|
||||
|
||||
err.status = DAEDALUS_DECODE_ERR_SEND;
|
||||
err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
|
||||
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
|
||||
if (req.bitstream_len < 4) {
|
||||
log_info("REQ_DECODE cookie=%u: tiny bitstream %u bytes — dropping as no-op (pause-time sentinel)",
|
||||
hdr->cookie, req.bitstream_len);
|
||||
memset(&resp, 0, sizeof(resp));
|
||||
resp.status = DAEDALUS_DECODE_NO_FRAME;
|
||||
return send_response(cli, DAEDALUS_MSG_RESP_FRAME,
|
||||
hdr->cookie, &err, sizeof(err));
|
||||
}
|
||||
|
||||
submit_status = daedalus_decoder_submit(cli->decoder, &req,
|
||||
payload + meta_off + meta_len,
|
||||
h264_meta);
|
||||
if (submit_status != 0) {
|
||||
/*
|
||||
* avcodec_send_packet failed before any frame could have
|
||||
* been queued for this src_pts. Drop the pending entry
|
||||
* (no future drain will find a matching pts), and emit a
|
||||
* combined HAS_PIXELS|SRC_CONSUMED error RESP for this
|
||||
* cookie so the V4L2 client unblocks.
|
||||
*/
|
||||
struct daedalus_resp_frame err = { 0 };
|
||||
|
||||
pending_release(cli, req.src_pts);
|
||||
err.status = (uint32_t) submit_status;
|
||||
err.codec_id = req.codec_id;
|
||||
err.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
|
||||
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
|
||||
err.output_src_pts = req.src_pts;
|
||||
return send_response(cli, DAEDALUS_MSG_RESP_FRAME,
|
||||
hdr->cookie, &err, sizeof(err));
|
||||
}
|
||||
|
||||
/*
|
||||
* Drain libavcodec for as many display-ordered frames as it can
|
||||
* emit right now. Each frame's pts identifies which cookie's
|
||||
* CAPTURE buffer the pixels go in (see [[daedalus-v4l2#6]]). In
|
||||
* steady state for VP9/AV1 (no reorder) the loop runs exactly
|
||||
* once, draining the just-submitted packet's own frame. For
|
||||
* H.264 with B-frames the first drained frame may belong to an
|
||||
* EARLIER cookie's bitstream — that's the entire point.
|
||||
*/
|
||||
for (;;) {
|
||||
struct daedalus_resp_frame resp;
|
||||
uint32_t owner_cookie = 0;
|
||||
struct daedalus_req_decode owner_req;
|
||||
uint32_t flags;
|
||||
|
||||
rc = daedalus_decoder_drain_one(cli->decoder, req.codec_id,
|
||||
&resp);
|
||||
if (rc == -EAGAIN)
|
||||
break;
|
||||
if (rc != 0) {
|
||||
/*
|
||||
* Hard codec error during drain. resp->status is set.
|
||||
* Pin it to THIS REQ's cookie (we can't know whose
|
||||
* pts the failed frame would have had); set both
|
||||
* flags so the V4L2 client moves on.
|
||||
*/
|
||||
pending_release(cli, req.src_pts);
|
||||
resp.flags = DAEDALUS_RESP_FLAG_HAS_PIXELS |
|
||||
DAEDALUS_RESP_FLAG_SRC_CONSUMED;
|
||||
resp.output_src_pts = req.src_pts;
|
||||
(void) send_response(cli, DAEDALUS_MSG_RESP_FRAME,
|
||||
hdr->cookie, &resp, sizeof(resp));
|
||||
src_consumed_emitted = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (pending_lookup(cli, resp.output_src_pts,
|
||||
&owner_cookie, &owner_req) != 0) {
|
||||
/*
|
||||
* Frame's pts has no registered owner — implies a
|
||||
* daemon-side tracking bug or a codec output for a
|
||||
* packet we never registered (e.g. a B-frame that
|
||||
* was queued before the daemon caught up). Drop the
|
||||
* frame; can't safely route it.
|
||||
*/
|
||||
log_warn("drain: no pending entry for output_src_pts=%llu (codec dropped a frame?)",
|
||||
(unsigned long long) resp.output_src_pts);
|
||||
continue;
|
||||
}
|
||||
|
||||
flags = DAEDALUS_RESP_FLAG_HAS_PIXELS;
|
||||
if (owner_cookie == hdr->cookie) {
|
||||
flags |= DAEDALUS_RESP_FLAG_SRC_CONSUMED;
|
||||
src_consumed_emitted = 1;
|
||||
}
|
||||
|
||||
(void) deliver_frame_to_cookie(cli, owner_cookie, &owner_req,
|
||||
&resp, flags);
|
||||
pending_release(cli, resp.output_src_pts);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the drain loop didn't already SRC_CONSUMED this REQ's cookie
|
||||
* (libavcodec held the frame for display-order reorder — the
|
||||
* pixels will arrive in a future drain), emit a standalone
|
||||
* SRC_CONSUMED RESP now. Kernel releases src_buf + runs
|
||||
* job_finish; dst_buf parked until the matching HAS_PIXELS
|
||||
* shows up later.
|
||||
*/
|
||||
if (!src_consumed_emitted) {
|
||||
struct daedalus_resp_frame resp = { 0 };
|
||||
|
||||
resp.status = DAEDALUS_DECODE_OK;
|
||||
resp.codec_id = req.codec_id;
|
||||
resp.flags = DAEDALUS_RESP_FLAG_SRC_CONSUMED;
|
||||
(void) send_response(cli, DAEDALUS_MSG_RESP_FRAME,
|
||||
hdr->cookie, &resp, sizeof(resp));
|
||||
}
|
||||
|
||||
return 0;
|
||||
/*
|
||||
* Open dmabuf-fds for every CAPTURE plane and mmap them.
|
||||
* If this fails we still attempt the decode (so the kernel
|
||||
* gets a structured error response) — but we pass NULL
|
||||
* planes so pixels aren't written anywhere.
|
||||
*/
|
||||
rc = daedalus_capture_planes_open(cli->fd, hdr->cookie, &req,
|
||||
&planes);
|
||||
if (rc < 0) {
|
||||
log_warn("REQ_DECODE cookie=%u: GET_DMABUF/mmap failed (%d); decode metadata-only",
|
||||
hdr->cookie, rc);
|
||||
/* planes is already zeroed by capture_planes_open */
|
||||
}
|
||||
|
||||
rc = daedalus_decoder_run_request(cli->decoder, &req,
|
||||
payload + meta_off + meta_len,
|
||||
h264_meta,
|
||||
&resp,
|
||||
planes.nr ? &planes : NULL);
|
||||
decoded = (rc >= 0);
|
||||
|
||||
daedalus_capture_planes_close(&planes);
|
||||
|
||||
if (!decoded)
|
||||
return rc;
|
||||
|
||||
/*
|
||||
* RESP_FRAME is metadata-only in Phase 8.6 — pixels already
|
||||
* live in the V4L2 client's CAPTURE buffer via the dmabuf
|
||||
* the daemon wrote to in pack_nv12_to_planes.
|
||||
*/
|
||||
return send_response(cli, DAEDALUS_MSG_RESP_FRAME, hdr->cookie,
|
||||
&resp, sizeof(resp));
|
||||
}
|
||||
|
||||
static int handle_ping(struct chardev_client *cli,
|
||||
|
||||
@@ -18,44 +18,18 @@
|
||||
struct ffmpeg_loader;
|
||||
struct daedalus_decoder;
|
||||
|
||||
/*
|
||||
* Per-inflight (cookie, src_pts) tracking for the H.264 B-frame
|
||||
* display-reorder fix (daedalus-v4l2#6). When the daemon drains a
|
||||
* frame from libavcodec, frame->pts (= src_pts of the OUTPUT bitstream
|
||||
* that contained the frame's slices) identifies which cookie's CAPTURE
|
||||
* buffer the pixels belong in — distinct from the cookie of the REQ
|
||||
* that triggered the receive_frame call. Mapping is small (bounded
|
||||
* by the V4L2 client's buffer pool depth, typically ≤24) so a linear
|
||||
* array beats a hashtable for cache-locality.
|
||||
*
|
||||
* cached_req carries the capture geometry (num_planes, plane sizes,
|
||||
* strides, pix_fmt) so a later drain — which may target this cookie
|
||||
* from a DIFFERENT REQ's drain loop — can call GET_DMABUF + open
|
||||
* planes with the original REQ's parameters.
|
||||
*/
|
||||
#define DAEDALUS_MAX_PENDING_COOKIES 64
|
||||
|
||||
struct chardev_pending_cookie {
|
||||
int used;
|
||||
uint64_t src_pts;
|
||||
uint32_t cookie;
|
||||
struct daedalus_req_decode cached_req;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct chardev_client - daemon-side chardev state
|
||||
* @fd: open /dev/daedalus-v4l2 descriptor (-1 if not open)
|
||||
* @loader: dlopen'd FFmpeg loader (borrowed; not owned)
|
||||
* @decoder: per-codec AVCodecContext cache (owned)
|
||||
* @stop_flag: set non-zero from a signal handler to break the loop
|
||||
* @pending: pts → cookie lookup table for split SRC/DST RESPs
|
||||
*/
|
||||
struct chardev_client {
|
||||
int fd;
|
||||
struct ffmpeg_loader *loader;
|
||||
struct daedalus_decoder *decoder;
|
||||
volatile sig_atomic_t *stop_flag;
|
||||
struct chardev_pending_cookie pending[DAEDALUS_MAX_PENDING_COOKIES];
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
+310
-153
@@ -10,12 +10,55 @@
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavutil/pixfmt.h>
|
||||
|
||||
/*
|
||||
* Per-codec running stats — daedalus-v4l2#11 step 1. Establishes
|
||||
* baseline observability before any daedalus-fourier kernel
|
||||
* substitution lands, so we can see what each substitution actually
|
||||
* shifted. Per-frame `decoder: OK` line now carries decode_us; a
|
||||
* "decoder stats" summary line lands every DAEDALUS_STATS_EVERY OK
|
||||
* frames with throughput + per-frame budget aggregates.
|
||||
*
|
||||
* Counters are static (process-local) and unsynchronised — the
|
||||
* daemon's chardev event loop is single-threaded, so no atomics or
|
||||
* locking needed. Reset when codec_id changes (different stream).
|
||||
*/
|
||||
#define DAEDALUS_STATS_EVERY 60u
|
||||
|
||||
struct daedalus_decode_stats {
|
||||
uint32_t codec_id;
|
||||
uint64_t frames;
|
||||
uint64_t total_decode_ns;
|
||||
uint64_t total_bitstream_bytes;
|
||||
uint64_t total_mbs; /* derived from frame WxH; H.264-style 16x16 */
|
||||
struct timespec window_start;
|
||||
};
|
||||
|
||||
static struct daedalus_decode_stats g_stats;
|
||||
|
||||
static inline uint64_t timespec_delta_ns(const struct timespec *a,
|
||||
const struct timespec *b)
|
||||
{
|
||||
return (uint64_t)(b->tv_sec - a->tv_sec) * 1000000000ull +
|
||||
(uint64_t)(b->tv_nsec - a->tv_nsec);
|
||||
}
|
||||
|
||||
static const char *codec_id_name(uint32_t cid)
|
||||
{
|
||||
switch (cid) {
|
||||
case DAEDALUS_CODEC_VP9: return "vp9";
|
||||
case DAEDALUS_CODEC_AV1: return "av1";
|
||||
case DAEDALUS_CODEC_H264: return "h264";
|
||||
default: return "?";
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* FNV-1a 32-bit hash. Used as a compact digest of the decoded
|
||||
* frame's YUV planes so the kernel can verify "the daemon produced
|
||||
@@ -132,6 +175,32 @@ static int decoder_open_codec(struct daedalus_decoder *dec, uint32_t codec_id,
|
||||
ctx = fm->avcodec_alloc_context3(codec);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* H.264-only: force libavcodec to emit frames in DECODE order
|
||||
* (one frame per send_packet, no internal display-order reorder
|
||||
* queue). V4L2 stateless decoder protocol expects each OUTPUT
|
||||
* bitstream packet to produce one CAPTURE buffer with that
|
||||
* packet's slice-decoded pixels — regardless of display order.
|
||||
* ffmpeg-vaapi's H.264 decoder (which is what consumes our
|
||||
* CAPTURE buffers via libva-v4l2-request-fourier) does its own
|
||||
* POC-based display reorder upstream, so producing decode-order
|
||||
* output is correct.
|
||||
*
|
||||
* AV_CODEC_FLAG_LOW_DELAY forces `low_delay = 1` inside
|
||||
* libavcodec's H.264 decoder — `h264_select_output_frame` emits
|
||||
* the just-decoded picture immediately instead of holding it
|
||||
* for the display-order DPB output queue. DPB management for
|
||||
* reference frames (short_ref / long_ref) is unaffected; B-frame
|
||||
* decoding correctness is preserved.
|
||||
*
|
||||
* Closes daedalus-v4l2#11 part (2). Skipped for VP9 / AV1 —
|
||||
* those formats don't internally reorder, so the flag would be
|
||||
* a no-op but adds no value.
|
||||
*/
|
||||
if (codec_id == DAEDALUS_CODEC_H264)
|
||||
ctx->flags |= AV_CODEC_FLAG_LOW_DELAY;
|
||||
|
||||
rc = fm->avcodec_open2(ctx, codec, NULL);
|
||||
if (rc < 0) {
|
||||
log_err("decoder: avcodec_open2 failed: %d", rc);
|
||||
@@ -204,6 +273,20 @@ static int pack_p010_to_plane(struct AVFrame *fr,
|
||||
if (!base)
|
||||
return -EINVAL;
|
||||
|
||||
/* Bounds-check (see pack_nv12_single comment). P010 stores 16
|
||||
* bits per sample on both Y and CbCr planes; stride is in bytes
|
||||
* and already accounts for the 2× expansion. */
|
||||
{
|
||||
size_t y_size_chk = (size_t) stride * (size_t) h;
|
||||
size_t required = y_size_chk + (size_t) stride * (size_t) ch;
|
||||
if (planes->size[0] < required) {
|
||||
log_warn("pack_p010: frame %dx%d (stride=%u required=%zu) "
|
||||
"exceeds CAPTURE plane[0] size %zu — skipping pack",
|
||||
w, h, stride, required, planes->size[0]);
|
||||
return -EOVERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
dst_y = base;
|
||||
y_size = (size_t) stride * (size_t) h;
|
||||
dst_uv = base + y_size;
|
||||
@@ -251,7 +334,7 @@ static int pack_nv12_single_to_plane(struct AVFrame *fr,
|
||||
uint8_t *base;
|
||||
uint32_t stride;
|
||||
uint8_t *dst_y, *dst_uv;
|
||||
size_t y_size;
|
||||
size_t y_size, required;
|
||||
|
||||
if (!desc || !planes || planes->nr < 1)
|
||||
return -EINVAL;
|
||||
@@ -270,8 +353,27 @@ static int pack_nv12_single_to_plane(struct AVFrame *fr,
|
||||
if (!base)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Bounds-check before any write — the V4L2 client's CAPTURE
|
||||
* dmabuf may have been sized for a smaller frame than what
|
||||
* libavcodec just decoded (e.g. YouTube DASH stepping
|
||||
* resolution mid-stream — libva is supposed to handle the
|
||||
* SOURCE_CHANGE event with STREAMOFF + S_FMT + REQBUFS but
|
||||
* sometimes a stale request slips through carrying the old
|
||||
* buffer size). Writing the chroma interleave loop into an
|
||||
* undersized mapping faults the daemon with SIGSEGV mid-frame.
|
||||
* Bail loudly with a warn instead.
|
||||
*/
|
||||
y_size = (size_t) stride * (size_t) h;
|
||||
required = y_size + (size_t) stride * (size_t) ch;
|
||||
if (planes->size[0] < required) {
|
||||
log_warn("pack_nv12_single: frame %dx%d (stride=%u required=%zu) "
|
||||
"exceeds CAPTURE plane[0] size %zu — skipping pack",
|
||||
w, h, stride, required, planes->size[0]);
|
||||
return -EOVERFLOW;
|
||||
}
|
||||
|
||||
dst_y = base;
|
||||
y_size = (size_t) stride * (size_t) h;
|
||||
dst_uv = base + y_size;
|
||||
|
||||
for (y = 0; y < h; y++)
|
||||
@@ -326,6 +428,24 @@ static int pack_nv12_to_planes(struct AVFrame *fr,
|
||||
if (!dst_y || !dst_uv)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Bounds-check both planes against the mapped dmabuf size. See
|
||||
* pack_nv12_single_to_plane comment for the resolution-change-
|
||||
* mid-stream crash story this protects against.
|
||||
*/
|
||||
{
|
||||
size_t y_required = (size_t) dst_y_stride * (size_t) h;
|
||||
size_t uv_required = (size_t) dst_uv_stride * (size_t) ch;
|
||||
if (planes->size[0] < y_required ||
|
||||
planes->size[1] < uv_required) {
|
||||
log_warn("pack_nv12_2plane: frame %dx%d "
|
||||
"(y=%zu/%zu uv=%zu/%zu) exceeds CAPTURE — skipping pack",
|
||||
w, h, y_required, planes->size[0],
|
||||
uv_required, planes->size[1]);
|
||||
return -EOVERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
/* Y plane copy — strip source stride padding. */
|
||||
for (y = 0; y < h; y++)
|
||||
memcpy(dst_y + (size_t) y * dst_y_stride,
|
||||
@@ -348,30 +468,31 @@ static int pack_nv12_to_planes(struct AVFrame *fr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Per-codec assemble + send_packet. Returns 0 on success, or one
|
||||
* of DAEDALUS_DECODE_ERR_* on failure (errors here propagate via
|
||||
* the caller's RESP_FRAME status field — they are NOT logged as a
|
||||
* silent skip). pkt->pts is stamped from req->src_pts so the
|
||||
* resulting frame->pts comes back identifiable on the drain side.
|
||||
*/
|
||||
int daedalus_decoder_submit(struct daedalus_decoder *dec,
|
||||
const struct daedalus_req_decode *req,
|
||||
const uint8_t *bitstream,
|
||||
const struct daedalus_h264_meta *h264_meta)
|
||||
int daedalus_decoder_run_request(struct daedalus_decoder *dec,
|
||||
const struct daedalus_req_decode *req,
|
||||
const uint8_t *bitstream,
|
||||
const struct daedalus_h264_meta *h264_meta,
|
||||
struct daedalus_resp_frame *resp,
|
||||
const struct daedalus_capture_planes *planes)
|
||||
{
|
||||
struct ffmpeg_loader *fm = dec->loader;
|
||||
struct AVCodecContext *ctx = NULL;
|
||||
uint8_t *assembled = NULL;
|
||||
size_t assembled_len = 0;
|
||||
int rc;
|
||||
int status = 0;
|
||||
|
||||
memset(resp, 0, sizeof(*resp));
|
||||
resp->codec_id = req->codec_id;
|
||||
|
||||
rc = decoder_open_codec(dec, req->codec_id, &ctx);
|
||||
if (rc == -ENOSYS)
|
||||
return DAEDALUS_DECODE_ERR_CODEC;
|
||||
if (rc < 0)
|
||||
return DAEDALUS_DECODE_ERR_OPEN;
|
||||
if (rc == -ENOSYS) {
|
||||
resp->status = DAEDALUS_DECODE_ERR_CODEC;
|
||||
goto out;
|
||||
}
|
||||
if (rc < 0) {
|
||||
resp->status = DAEDALUS_DECODE_ERR_OPEN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fm->av_packet_unref(dec->pkt);
|
||||
|
||||
@@ -396,14 +517,14 @@ int daedalus_decoder_submit(struct daedalus_decoder *dec,
|
||||
if (sps_len == 0 || pps_len == 0) {
|
||||
log_err("decoder: SPS/PPS NAL synth failed (sps=%zu pps=%zu)",
|
||||
sps_len, pps_len);
|
||||
status = DAEDALUS_DECODE_ERR_SEND;
|
||||
resp->status = DAEDALUS_DECODE_ERR_SEND;
|
||||
goto out;
|
||||
}
|
||||
|
||||
assembled_len = sps_len + pps_len + req->bitstream_len;
|
||||
assembled = malloc(assembled_len + AV_INPUT_BUFFER_PADDING_SIZE);
|
||||
if (!assembled) {
|
||||
status = DAEDALUS_DECODE_ERR_SEND;
|
||||
resp->status = DAEDALUS_DECODE_ERR_SEND;
|
||||
goto out;
|
||||
}
|
||||
memcpy(assembled, sps_nal, sps_len);
|
||||
@@ -441,161 +562,197 @@ int daedalus_decoder_submit(struct daedalus_decoder *dec,
|
||||
}
|
||||
|
||||
/*
|
||||
* Stamp pkt->pts from REQ_DECODE's src_pts (the V4L2 OUTPUT
|
||||
* buffer's vb2 timestamp captured by the kernel at device_run
|
||||
* time). libavcodec carries pkt->pts forward to frame->pts on
|
||||
* the receive_frame side — even after display-order reordering
|
||||
* inside the H.264 DPB — which lets the chardev_client identify
|
||||
* which cookie's CAPTURE buffer a drained frame's pixels belong
|
||||
* in. Without this stamp, every drained frame would look like
|
||||
* it came from the current REQ; pairs of B/P would swap places
|
||||
* in the visible output (daedalus-v4l2#6).
|
||||
* Time send_packet+receive_frame for the per-frame `decoder: OK`
|
||||
* line + the periodic stats summary. Includes only the
|
||||
* libavcodec round-trip — not the bitstream packing, SPS/PPS
|
||||
* synth, or pack-to-planes work (those are accounted for
|
||||
* separately in the request's overall handle time).
|
||||
*/
|
||||
dec->pkt->pts = (int64_t) req->src_pts;
|
||||
struct timespec t_decode_start, t_decode_end;
|
||||
uint64_t decode_ns = 0;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t_decode_start);
|
||||
|
||||
rc = fm->avcodec_send_packet(ctx, dec->pkt);
|
||||
if (rc < 0) {
|
||||
log_err("decoder: avcodec_send_packet failed: %d", rc);
|
||||
status = DAEDALUS_DECODE_ERR_SEND;
|
||||
resp->status = DAEDALUS_DECODE_ERR_SEND;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
free(assembled);
|
||||
(void) assembled_len;
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pull the next display-ordered frame out of libavcodec's DPB.
|
||||
* Returns 0 if a frame was returned (dec->frame holds it and resp
|
||||
* is populated with metadata + output_src_pts == frame->pts),
|
||||
* -EAGAIN if libavcodec needs more input, or DAEDALUS_DECODE_ERR_*
|
||||
* on a hard codec error. Caller may immediately invoke
|
||||
* daedalus_decoder_pack_current() to copy this frame's pixels into
|
||||
* a CAPTURE buffer's mapped planes, then call drain_one again for
|
||||
* any further frames in the DPB.
|
||||
*/
|
||||
int daedalus_decoder_drain_one(struct daedalus_decoder *dec,
|
||||
uint32_t codec_id,
|
||||
struct daedalus_resp_frame *resp)
|
||||
{
|
||||
struct ffmpeg_loader *fm = dec->loader;
|
||||
struct AVCodecContext *ctx = NULL;
|
||||
struct AVFrame *fr;
|
||||
const AVPixFmtDescriptor *desc;
|
||||
uint32_t h, luma_len = 0, chroma_len = 0;
|
||||
int rc;
|
||||
|
||||
memset(resp, 0, sizeof(*resp));
|
||||
resp->codec_id = codec_id;
|
||||
|
||||
rc = decoder_open_codec(dec, codec_id, &ctx);
|
||||
if (rc == -ENOSYS) {
|
||||
resp->status = DAEDALUS_DECODE_ERR_CODEC;
|
||||
return DAEDALUS_DECODE_ERR_CODEC;
|
||||
}
|
||||
if (rc < 0) {
|
||||
resp->status = DAEDALUS_DECODE_ERR_OPEN;
|
||||
return DAEDALUS_DECODE_ERR_OPEN;
|
||||
}
|
||||
|
||||
fm->av_frame_unref(dec->frame);
|
||||
rc = fm->avcodec_receive_frame(ctx, dec->frame);
|
||||
if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF)
|
||||
return -EAGAIN;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t_decode_end);
|
||||
decode_ns = timespec_delta_ns(&t_decode_start, &t_decode_end);
|
||||
if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) {
|
||||
log_debug("decoder: no frame ready yet (rc=%d, %lu us)",
|
||||
rc, (unsigned long)(decode_ns / 1000));
|
||||
resp->status = DAEDALUS_DECODE_NO_FRAME;
|
||||
goto out;
|
||||
}
|
||||
if (rc < 0) {
|
||||
log_err("decoder: avcodec_receive_frame failed: %d", rc);
|
||||
resp->status = DAEDALUS_DECODE_ERR_RECV;
|
||||
return DAEDALUS_DECODE_ERR_RECV;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fr = dec->frame;
|
||||
desc = fm->av_pix_fmt_desc_get(fr->format);
|
||||
h = fnv1a32_init();
|
||||
{
|
||||
struct AVFrame *fr = dec->frame;
|
||||
const AVPixFmtDescriptor *desc =
|
||||
fm->av_pix_fmt_desc_get(fr->format);
|
||||
uint32_t h = fnv1a32_init();
|
||||
uint32_t luma_len = 0, chroma_len = 0;
|
||||
|
||||
resp->status = DAEDALUS_DECODE_OK;
|
||||
resp->width = (uint32_t) fr->width;
|
||||
resp->height = (uint32_t) fr->height;
|
||||
resp->pix_fmt = fr->format;
|
||||
resp->output_src_pts = (uint64_t) fr->pts;
|
||||
resp->status = DAEDALUS_DECODE_OK;
|
||||
resp->width = (uint32_t) fr->width;
|
||||
resp->height = (uint32_t) fr->height;
|
||||
resp->pix_fmt = fr->format;
|
||||
|
||||
if (!desc) {
|
||||
log_warn("decoder: no descriptor for pix_fmt %d", fr->format);
|
||||
} else {
|
||||
int p, max_plane = 0;
|
||||
int i;
|
||||
/*
|
||||
* Walk every plane reported by the AVPixFmtDescriptor.
|
||||
* For each component, byte width = ((plane_w *
|
||||
* step_minus1) >> 0) — but the descriptor only tells
|
||||
* us which plane each component sits in, not the
|
||||
* plane's byte stride per pixel. In practice for the
|
||||
* formats we care about (YUV420P, YUV422P, YUV444P,
|
||||
* GBRP, NV12), each plane has exactly one component
|
||||
* at 1 byte/sample. Hash each plane at
|
||||
* (width >> log2_chroma_w) × (height >> log2_chroma_h)
|
||||
* for chroma planes, full-size for plane 0.
|
||||
*
|
||||
* This generalises cleanly to anything 8-bit-per-
|
||||
* sample-per-plane; 10/12-bit (P010, YUV420P10LE) will
|
||||
* need depth handling when Phase 8.6 brings HDR
|
||||
* content into play.
|
||||
*/
|
||||
if (!desc) {
|
||||
log_warn("decoder: no descriptor for pix_fmt %d",
|
||||
fr->format);
|
||||
} else {
|
||||
int p, max_plane = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < desc->nb_components; i++) {
|
||||
if (desc->comp[i].plane > max_plane)
|
||||
max_plane = desc->comp[i].plane;
|
||||
}
|
||||
|
||||
for (p = 0; p <= max_plane; p++) {
|
||||
int pw, ph;
|
||||
if (!fr->data[p] || !fr->linesize[p])
|
||||
continue;
|
||||
if (p == 0) {
|
||||
pw = fr->width;
|
||||
ph = fr->height;
|
||||
luma_len += (uint32_t) pw * (uint32_t) ph;
|
||||
} else {
|
||||
pw = AV_CEIL_RSHIFT(fr->width,
|
||||
desc->log2_chroma_w);
|
||||
ph = AV_CEIL_RSHIFT(fr->height,
|
||||
desc->log2_chroma_h);
|
||||
chroma_len += (uint32_t) pw * (uint32_t) ph;
|
||||
for (i = 0; i < desc->nb_components; i++) {
|
||||
if (desc->comp[i].plane > max_plane)
|
||||
max_plane = desc->comp[i].plane;
|
||||
}
|
||||
h = fnv1a32_plane(h, fr->data[p], pw, ph,
|
||||
fr->linesize[p]);
|
||||
|
||||
for (p = 0; p <= max_plane; p++) {
|
||||
int pw, ph;
|
||||
if (!fr->data[p] || !fr->linesize[p])
|
||||
continue;
|
||||
if (p == 0) {
|
||||
pw = fr->width;
|
||||
ph = fr->height;
|
||||
luma_len += (uint32_t) pw *
|
||||
(uint32_t) ph;
|
||||
} else {
|
||||
pw = AV_CEIL_RSHIFT(fr->width,
|
||||
desc->log2_chroma_w);
|
||||
ph = AV_CEIL_RSHIFT(fr->height,
|
||||
desc->log2_chroma_h);
|
||||
chroma_len += (uint32_t) pw *
|
||||
(uint32_t) ph;
|
||||
}
|
||||
h = fnv1a32_plane(h, fr->data[p], pw, ph,
|
||||
fr->linesize[p]);
|
||||
}
|
||||
}
|
||||
|
||||
resp->luma_len = luma_len;
|
||||
resp->chroma_len = chroma_len;
|
||||
resp->fnv1a_yuv = h;
|
||||
|
||||
/*
|
||||
* Pack pixels directly into the mapped CAPTURE dmabuf
|
||||
* planes. Dispatch on the V4L2 fourcc the kernel
|
||||
* negotiated:
|
||||
* V4L2_PIX_FMT_NV12M (default, 8-bit, 2 planes)
|
||||
* V4L2_PIX_FMT_P010 (10-bit HDR, 1 plane)
|
||||
*/
|
||||
if (planes && planes->nr >= 1) {
|
||||
int prc = 0;
|
||||
switch (req->capture_pix_fmt) {
|
||||
case V4L2_PIX_FMT_NV12M:
|
||||
prc = pack_nv12_to_planes(fr, desc, planes);
|
||||
break;
|
||||
case V4L2_PIX_FMT_NV12:
|
||||
prc = pack_nv12_single_to_plane(fr, desc, planes);
|
||||
break;
|
||||
case V4L2_PIX_FMT_P010:
|
||||
prc = pack_p010_to_plane(fr, desc, planes);
|
||||
break;
|
||||
default:
|
||||
log_warn("decoder: unsupported capture fourcc 0x%08x",
|
||||
req->capture_pix_fmt);
|
||||
prc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (prc < 0)
|
||||
log_warn("decoder: pack failed (pix_fmt=%d cap_fourcc=0x%08x) — kernel will see metadata only",
|
||||
fr->format, req->capture_pix_fmt);
|
||||
}
|
||||
|
||||
log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u decode_us=%lu",
|
||||
fr->width, fr->height, fr->format,
|
||||
desc ? desc->name : "?",
|
||||
h, luma_len, chroma_len,
|
||||
(unsigned long)(decode_ns / 1000));
|
||||
|
||||
/*
|
||||
* Periodic stats summary (every DAEDALUS_STATS_EVERY frames).
|
||||
* Reset window on codec change. Gives observable baseline
|
||||
* for the daedalus-v4l2#11 substitution arc: fps, average
|
||||
* decode_us, MB/s throughput, bitstream B/MB. Compare
|
||||
* against daedalus-fourier README's per-kernel NEON
|
||||
* baselines (e.g. H.264 IDCT 4x4 = 175 Mblock/s) to gauge
|
||||
* which substitutions are worth pursuing.
|
||||
*/
|
||||
if (g_stats.codec_id != req->codec_id) {
|
||||
g_stats.codec_id = req->codec_id;
|
||||
g_stats.frames = 0;
|
||||
g_stats.total_decode_ns = 0;
|
||||
g_stats.total_bitstream_bytes = 0;
|
||||
g_stats.total_mbs = 0;
|
||||
clock_gettime(CLOCK_MONOTONIC, &g_stats.window_start);
|
||||
}
|
||||
g_stats.frames++;
|
||||
g_stats.total_decode_ns += decode_ns;
|
||||
g_stats.total_bitstream_bytes += req->bitstream_len;
|
||||
g_stats.total_mbs += (uint64_t)((fr->width + 15) / 16) *
|
||||
(uint64_t)((fr->height + 15) / 16);
|
||||
|
||||
if (g_stats.frames % DAEDALUS_STATS_EVERY == 0) {
|
||||
struct timespec t_now;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t_now);
|
||||
uint64_t window_ns =
|
||||
timespec_delta_ns(&g_stats.window_start, &t_now);
|
||||
double window_s = (double)window_ns / 1e9;
|
||||
double fps = window_s > 0 ?
|
||||
(double)g_stats.frames / window_s : 0.0;
|
||||
double avg_decode_us = g_stats.frames > 0 ?
|
||||
(double)g_stats.total_decode_ns /
|
||||
(double)g_stats.frames / 1000.0 : 0.0;
|
||||
double mb_per_s = window_s > 0 ?
|
||||
(double)g_stats.total_mbs / window_s : 0.0;
|
||||
double bs_b_per_mb = g_stats.total_mbs > 0 ?
|
||||
(double)g_stats.total_bitstream_bytes /
|
||||
(double)g_stats.total_mbs : 0.0;
|
||||
|
||||
log_info("decoder stats: codec=%s "
|
||||
"frames=%llu window=%.2fs fps=%.2f "
|
||||
"avg_decode_us=%.1f mbs_per_s=%.0f "
|
||||
"bs_b_per_mb=%.2f",
|
||||
codec_id_name(g_stats.codec_id),
|
||||
(unsigned long long)g_stats.frames,
|
||||
window_s, fps, avg_decode_us,
|
||||
mb_per_s, bs_b_per_mb);
|
||||
}
|
||||
}
|
||||
|
||||
resp->luma_len = luma_len;
|
||||
resp->chroma_len = chroma_len;
|
||||
resp->fnv1a_yuv = h;
|
||||
|
||||
log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u src_pts=%llu",
|
||||
fr->width, fr->height, fr->format,
|
||||
desc ? desc->name : "?",
|
||||
h, luma_len, chroma_len,
|
||||
(unsigned long long) fr->pts);
|
||||
fm->av_frame_unref(dec->frame);
|
||||
|
||||
out:
|
||||
free(assembled);
|
||||
(void) assembled_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int daedalus_decoder_pack_current(struct daedalus_decoder *dec,
|
||||
const struct daedalus_capture_planes *planes,
|
||||
uint32_t capture_pix_fmt)
|
||||
{
|
||||
struct ffmpeg_loader *fm = dec->loader;
|
||||
struct AVFrame *fr = dec->frame;
|
||||
const AVPixFmtDescriptor *desc;
|
||||
int prc;
|
||||
|
||||
if (!planes || planes->nr < 1 || !fr || !fr->width || !fr->height)
|
||||
return -EINVAL;
|
||||
|
||||
desc = fm->av_pix_fmt_desc_get(fr->format);
|
||||
|
||||
switch (capture_pix_fmt) {
|
||||
case V4L2_PIX_FMT_NV12M:
|
||||
prc = pack_nv12_to_planes(fr, desc, planes);
|
||||
break;
|
||||
case V4L2_PIX_FMT_NV12:
|
||||
prc = pack_nv12_single_to_plane(fr, desc, planes);
|
||||
break;
|
||||
case V4L2_PIX_FMT_P010:
|
||||
prc = pack_p010_to_plane(fr, desc, planes);
|
||||
break;
|
||||
default:
|
||||
log_warn("decoder: unsupported capture fourcc 0x%08x",
|
||||
capture_pix_fmt);
|
||||
prc = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (prc < 0)
|
||||
log_warn("decoder: pack failed (pix_fmt=%d cap_fourcc=0x%08x)",
|
||||
fr->format, capture_pix_fmt);
|
||||
return prc;
|
||||
}
|
||||
|
||||
+22
-57
@@ -56,68 +56,33 @@ int daedalus_decoder_init(struct daedalus_decoder *dec,
|
||||
void daedalus_decoder_cleanup(struct daedalus_decoder *dec);
|
||||
|
||||
/**
|
||||
* daedalus_decoder_submit - send one REQ_DECODE's bitstream into libavcodec
|
||||
* daedalus_decoder_run_request - decode one REQ_DECODE payload
|
||||
* @dec: initialised decoder
|
||||
* @req: REQ_DECODE prefix (from the wire); src_pts is stamped on
|
||||
* the AVPacket so libavcodec returns frame->pts == src_pts
|
||||
* when it eventually outputs the matching frame in display
|
||||
* order (daedalus-v4l2#6).
|
||||
* @req: REQ_DECODE prefix (from the wire)
|
||||
* @bitstream: bitstream blob (req->bitstream_len bytes)
|
||||
* @h264_meta: optional H.264 SPS/PPS metadata; non-NULL only when
|
||||
* codec_id == H264 and the kernel set DAEDALUS_REQ_FLAG_
|
||||
* H264_META. See decoder.c for the AnnexB synthesis.
|
||||
* H264_META. Used to synthesise the AnnexB SPS+PPS NALs
|
||||
* libavcodec needs before any slice (libva-v4l2-request
|
||||
* passes only the slice in @bitstream per the V4L2
|
||||
* stateless API contract). NULL for VP9/AV1 paths.
|
||||
* @resp: caller-allocated RESP_FRAME output (zeroed by callee)
|
||||
* @planes: mapped CAPTURE planes (Phase 8.6 dmabuf path). If
|
||||
* NULL or planes->nr == 0, the decoder runs but
|
||||
* writes no pixels — caller still gets dims + digest.
|
||||
*
|
||||
* Calls avcodec_send_packet on the codec's per-codec AVCodecContext.
|
||||
* Returns 0 on success; one of DAEDALUS_DECODE_ERR_* on failure
|
||||
* (which the caller should propagate as the RESP_FRAME status for
|
||||
* the cookie of this REQ). Does NOT call avcodec_receive_frame —
|
||||
* use daedalus_decoder_drain_one for that.
|
||||
* Populates @resp with the decode outcome and writes decoded
|
||||
* pixels (NV12 layout: Y to plane 0, interleaved CbCr to plane
|
||||
* 1) directly into the mapped dmabuf planes. Always returns
|
||||
* 0; decode-level failures are reported via @resp->status so
|
||||
* the kernel sees a structured response rather than a dropped
|
||||
* request.
|
||||
*/
|
||||
int daedalus_decoder_submit(struct daedalus_decoder *dec,
|
||||
const struct daedalus_req_decode *req,
|
||||
const uint8_t *bitstream,
|
||||
const struct daedalus_h264_meta *h264_meta);
|
||||
|
||||
/**
|
||||
* daedalus_decoder_drain_one - pop the next display-ordered frame, if any
|
||||
* @dec: initialised decoder
|
||||
* @codec_id: which codec context to drain (matches the REQ that just
|
||||
* called submit). VP9/AV1/H264 use independent contexts.
|
||||
* @resp: caller-allocated RESP_FRAME output (zeroed by callee).
|
||||
* On a successful drain (return 0), resp's status / width /
|
||||
* height / pix_fmt / luma_len / chroma_len / fnv1a_yuv /
|
||||
* output_src_pts are populated; flags is left at 0 (caller
|
||||
* adds HAS_PIXELS / SRC_CONSUMED). On EAGAIN, resp is
|
||||
* zeroed.
|
||||
*
|
||||
* Return: 0 on a frame returned, -EAGAIN if libavcodec needs more
|
||||
* input (display-order frame held inside DPB), <0 on a hard codec
|
||||
* error (resp->status set).
|
||||
*
|
||||
* After a successful drain, the dec's internal AVFrame holds the
|
||||
* decoded picture. Caller may immediately call
|
||||
* daedalus_decoder_pack_current(planes) to write that picture into
|
||||
* a CAPTURE buffer's dmabuf-mapped planes. Subsequent calls to
|
||||
* drain_one (without another submit) try to pull additional frames
|
||||
* from libavcodec's DPB.
|
||||
*/
|
||||
int daedalus_decoder_drain_one(struct daedalus_decoder *dec,
|
||||
uint32_t codec_id,
|
||||
struct daedalus_resp_frame *resp);
|
||||
|
||||
/**
|
||||
* daedalus_decoder_pack_current - pack the last drained frame into planes
|
||||
* @dec: initialised decoder; must have a frame from drain_one
|
||||
* @planes: mapped CAPTURE planes (open via GET_DMABUF using the
|
||||
* cookie that owns the frame's output_src_pts).
|
||||
* @capture_pix_fmt: V4L2 fourcc on the CAPTURE side (NV12M, NV12,
|
||||
* P010).
|
||||
*
|
||||
* Return: 0 on success, <0 on a pack failure (kernel sees only the
|
||||
* metadata, not pixels — typical when a format isn't wired yet).
|
||||
*/
|
||||
int daedalus_decoder_pack_current(struct daedalus_decoder *dec,
|
||||
const struct daedalus_capture_planes *planes,
|
||||
uint32_t capture_pix_fmt);
|
||||
int daedalus_decoder_run_request(struct daedalus_decoder *dec,
|
||||
const struct daedalus_req_decode *req,
|
||||
const uint8_t *bitstream,
|
||||
const struct daedalus_h264_meta *h264_meta,
|
||||
struct daedalus_resp_frame *resp,
|
||||
const struct daedalus_capture_planes *planes);
|
||||
|
||||
#endif /* DAEDALUS_V4L2_DECODER_H */
|
||||
|
||||
@@ -11,14 +11,31 @@
|
||||
#include <dlfcn.h>
|
||||
|
||||
/*
|
||||
* SONAME versions match Debian Trixie / FFmpeg 7.1.3 today. If
|
||||
* the system FFmpeg changes major, the daemon needs a rebuild;
|
||||
* we could add fallback paths (.so.60, .so.59, ...) but for
|
||||
* Phase 8.3 the pinned version is fine.
|
||||
* SONAME versions match the Kwiboo ffmpeg-v4l2-request-fourier
|
||||
* fork (FFmpeg 8.1) installed at the /opt/fourier prefix. The
|
||||
* fourier campaign's ld.so.conf.d/fourier.conf entry resolves
|
||||
* these sonames from /opt/fourier/lib via the ld cache, so
|
||||
* dlopen-by-soname works without LD_LIBRARY_PATH wrappers.
|
||||
*
|
||||
* Switched from Debian-stock soname 61/61/59 (FFmpeg 7.1.3) at
|
||||
* 2026-05-21 to land daedalus-fourier kernel substitution into
|
||||
* the production decode path via patches in the Kwiboo fork
|
||||
* (see daedalus-v4l2#11 substitution arc): we own the fork
|
||||
* source in marfrit-packages, so we can layer NEON-DSP
|
||||
* substitution patches there for libavcodec/aarch64/h264dsp_init
|
||||
* → daedalus_recipe_dispatch_* thunks. The Debian-stock 7.1.3
|
||||
* is built outside the marfrit-packages source tree, which
|
||||
* would have made layering substitution patches awkward.
|
||||
*
|
||||
* Note: libavutil bumps soname 59 → 60 between FFmpeg 7.1 and
|
||||
* 8.1; libavformat + libavcodec each bump 61 → 62. The public
|
||||
* API surface the daemon uses (avcodec_send_packet /
|
||||
* receive_frame / AVCodecContext flags / AVFrame fields) is
|
||||
* stable across the bump.
|
||||
*/
|
||||
#define LIBAVFORMAT_SONAME "libavformat.so.61"
|
||||
#define LIBAVCODEC_SONAME "libavcodec.so.61"
|
||||
#define LIBAVUTIL_SONAME "libavutil.so.59"
|
||||
#define LIBAVFORMAT_SONAME "libavformat.so.62"
|
||||
#define LIBAVCODEC_SONAME "libavcodec.so.62"
|
||||
#define LIBAVUTIL_SONAME "libavutil.so.60"
|
||||
|
||||
/*
|
||||
* Resolve a symbol from a dlopen'd handle. Logs the failure
|
||||
|
||||
@@ -22,6 +22,8 @@
|
||||
|
||||
#include <libavutil/log.h>
|
||||
|
||||
#include <daedalus.h>
|
||||
|
||||
static volatile sig_atomic_t g_terminate = 0;
|
||||
|
||||
static void on_signal(int sig)
|
||||
@@ -120,6 +122,26 @@ int main(int argc, char **argv)
|
||||
/* Mute FFmpeg's own chattiness unless the user asked. */
|
||||
fm.av_log_set_level(verbose ? AV_LOG_INFO : AV_LOG_WARNING);
|
||||
|
||||
/*
|
||||
* Initialise daedalus-fourier early so we can log substrate
|
||||
* availability up front. daedalus_ctx_create_no_qpu() skips
|
||||
* the V3D Vulkan probe — we're not dispatching any kernels
|
||||
* yet, this is just the linkage sanity check + a marker in the
|
||||
* journal that the binary is wired against the right
|
||||
* daedalus-fourier version. Future work (per daedalus-v4l2#11)
|
||||
* promotes to daedalus_ctx_create() once shader-path resolution
|
||||
* is wired through the public API.
|
||||
*/
|
||||
daedalus_ctx *df_ctx = daedalus_ctx_create_no_qpu();
|
||||
if (df_ctx) {
|
||||
log_info("daedalus-fourier: linked, ctx alive (no_qpu mode; "
|
||||
"has_qpu=%d)",
|
||||
daedalus_ctx_has_qpu(df_ctx));
|
||||
} else {
|
||||
log_warn("daedalus-fourier: ctx_create_no_qpu returned NULL "
|
||||
"(out of memory?) — continuing without backend kernels");
|
||||
}
|
||||
|
||||
int rc;
|
||||
const char *cmd = argv[i++];
|
||||
if (strcmp(cmd, "parse") == 0) {
|
||||
@@ -132,6 +154,8 @@ int main(int argc, char **argv)
|
||||
rc = 2;
|
||||
}
|
||||
|
||||
if (df_ctx)
|
||||
daedalus_ctx_destroy(df_ctx);
|
||||
ffmpeg_loader_cleanup(&fm);
|
||||
log_cleanup();
|
||||
return rc;
|
||||
|
||||
@@ -0,0 +1,479 @@
|
||||
/* SPDX-License-Identifier: BSD-2-Clause */
|
||||
/*
|
||||
* test_av1_obu_synth — standalone unit test for av1_synth_sequence_header_obu.
|
||||
*
|
||||
* Builds as an opt-in executable target (test_av1_obu_synth) gated on
|
||||
* -DDAEDALUS_BUILD_TESTS=ON. Runs by default in the CI build matrix
|
||||
* to gate the OBU encoder against regressions.
|
||||
*
|
||||
* Each test case sets up a struct v4l2_ctrl_av1_sequence with known
|
||||
* field values, calls the synthesiser, then walks the output bit by
|
||||
* bit against a hand-computed expected encoding. The bit-walker uses
|
||||
* the same reader semantics as bitstream_writer: MSB-first within each
|
||||
* byte, with the OBU header byte / leb128 size at byte-aligned
|
||||
* positions and the RBSP payload starting at the byte right after.
|
||||
*/
|
||||
|
||||
#include "av1_obu_synth.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <linux/v4l2-controls.h>
|
||||
|
||||
/* MSB-first bit reader over a byte stream. */
|
||||
struct br {
|
||||
const uint8_t *buf;
|
||||
size_t bytes;
|
||||
size_t pos_bytes;
|
||||
int pos_bit;
|
||||
int overflow;
|
||||
};
|
||||
|
||||
static void br_init(struct br *b, const uint8_t *buf, size_t bytes)
|
||||
{
|
||||
b->buf = buf;
|
||||
b->bytes = bytes;
|
||||
b->pos_bytes = 0;
|
||||
b->pos_bit = 0;
|
||||
b->overflow = 0;
|
||||
}
|
||||
|
||||
static uint32_t br_get(struct br *b, int n)
|
||||
{
|
||||
uint32_t v = 0;
|
||||
int i;
|
||||
for (i = 0; i < n; i++) {
|
||||
uint8_t bit;
|
||||
if (b->pos_bytes >= b->bytes) {
|
||||
b->overflow = 1;
|
||||
return 0;
|
||||
}
|
||||
bit = (b->buf[b->pos_bytes] >> (7 - b->pos_bit)) & 1u;
|
||||
v = (v << 1) | bit;
|
||||
b->pos_bit++;
|
||||
if (b->pos_bit == 8) {
|
||||
b->pos_bit = 0;
|
||||
b->pos_bytes++;
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Round up to next byte; returns bytes consumed for boundary. */
|
||||
static void br_byte_align(struct br *b)
|
||||
{
|
||||
if (b->pos_bit != 0) {
|
||||
b->pos_bit = 0;
|
||||
b->pos_bytes++;
|
||||
}
|
||||
}
|
||||
|
||||
#define CHECK(cond, ...) do { \
|
||||
if (!(cond)) { \
|
||||
fprintf(stderr, "FAIL %s:%d: ", \
|
||||
__func__, __LINE__); \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fputc('\n', stderr); \
|
||||
return 1; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_EQ(actual, expected, name) do { \
|
||||
uint32_t _a = (uint32_t)(actual); \
|
||||
uint32_t _e = (uint32_t)(expected); \
|
||||
if (_a != _e) { \
|
||||
fprintf(stderr, "FAIL %s:%d %s: " \
|
||||
"got %u, expected %u\n", \
|
||||
__func__, __LINE__, (name), \
|
||||
_a, _e); \
|
||||
return 1; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Case 1: 1080p, profile 0 (4:2:0), 8-bit, color_range studio,
|
||||
* order_hint enabled with 7 bits, CDEF + restoration on, no film grain.
|
||||
* Covers the most common decode path libva-v4l2-request drives on
|
||||
* the daedalus daemon.
|
||||
*/
|
||||
static int test_profile0_1080p(void)
|
||||
{
|
||||
struct v4l2_ctrl_av1_sequence seq;
|
||||
uint8_t out[64];
|
||||
size_t n;
|
||||
struct br br;
|
||||
uint32_t bit;
|
||||
|
||||
memset(&seq, 0, sizeof(seq));
|
||||
seq.seq_profile = 0;
|
||||
seq.order_hint_bits = 7;
|
||||
seq.bit_depth = 8;
|
||||
seq.max_frame_width_minus_1 = 1919; /* 1920 */
|
||||
seq.max_frame_height_minus_1 = 1079; /* 1080 */
|
||||
seq.flags =
|
||||
V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK |
|
||||
V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT |
|
||||
V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF |
|
||||
V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION;
|
||||
/* COLOR_RANGE flag unset = studio swing (limited range, =0 in spec) */
|
||||
|
||||
n = av1_synth_sequence_header_obu(&seq, out, sizeof(out));
|
||||
CHECK(n > 0 && n <= sizeof(out), "synthesiser returned %zu bytes", n);
|
||||
|
||||
/* OBU header byte: 0x0A (obu_type=1, has_size_field=1). */
|
||||
CHECK_EQ(out[0], 0x0A, "OBU header byte");
|
||||
|
||||
/* leb128 size — payload fits in 1 byte for sub-128-byte payloads. */
|
||||
CHECK(n >= 2, "OBU has size field byte");
|
||||
CHECK((out[1] & 0x80) == 0, "leb128 single-byte form (no continuation)");
|
||||
{
|
||||
size_t payload_len = out[1] & 0x7fu;
|
||||
CHECK_EQ(n, 2 + payload_len, "total length matches header+leb+payload");
|
||||
}
|
||||
|
||||
/* Walk payload bits. */
|
||||
br_init(&br, out + 2, n - 2);
|
||||
|
||||
bit = br_get(&br, 3); CHECK_EQ(bit, 0, "seq_profile");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "still_picture");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "reduced_still_picture_header");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "timing_info_present_flag");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "initial_display_delay_present_flag");
|
||||
bit = br_get(&br, 5); CHECK_EQ(bit, 0, "operating_points_cnt_minus_1");
|
||||
bit = br_get(&br, 12); CHECK_EQ(bit, 0, "operating_point_idc[0]");
|
||||
bit = br_get(&br, 5); CHECK_EQ(bit, 13, "seq_level_idx[0]");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "seq_tier[0]");
|
||||
|
||||
/* min_bits_for(1919) = 11; encoded value = 11 - 1 = 10 */
|
||||
bit = br_get(&br, 4); CHECK_EQ(bit, 10, "frame_width_bits_minus_1");
|
||||
/* min_bits_for(1079) = 11; same value */
|
||||
bit = br_get(&br, 4); CHECK_EQ(bit, 10, "frame_height_bits_minus_1");
|
||||
bit = br_get(&br, 11); CHECK_EQ(bit, 1919, "max_frame_width_minus_1");
|
||||
bit = br_get(&br, 11); CHECK_EQ(bit, 1079, "max_frame_height_minus_1");
|
||||
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "frame_id_numbers_present_flag");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "use_128x128_superblock");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_filter_intra");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_intra_edge_filter");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_interintra_compound");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_masked_compound");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_warped_motion");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_dual_filter");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "enable_order_hint");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_jnt_comp");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_ref_frame_mvs");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "seq_choose_screen_detection_tools");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "seq_choose_integer_mv");
|
||||
/* order_hint_bits=7 → order_hint_bits_minus_1 = 6 */
|
||||
bit = br_get(&br, 3); CHECK_EQ(bit, 6, "order_hint_bits_minus_1");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_superres");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "enable_cdef");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "enable_restoration");
|
||||
|
||||
/* color_config: high_bitdepth=0 (8-bit), monochrome=0,
|
||||
* color_description_present=0, color_range=0, subsampling forced (no bits),
|
||||
* chroma_sample_position=0 (2 bits when subsampling_x && subsampling_y),
|
||||
* separate_uv_delta_q=0. */
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "high_bitdepth");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "monochrome");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "color_description_present_flag");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "color_range");
|
||||
bit = br_get(&br, 2); CHECK_EQ(bit, 0, "chroma_sample_position");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "separate_uv_delta_q");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "film_grain_params_present");
|
||||
|
||||
/* trailing_bits — single '1' then zero-fill */
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "trailing_bits stop_one");
|
||||
br_byte_align(&br);
|
||||
|
||||
CHECK(!br.overflow, "no bit-reader overflow");
|
||||
CHECK_EQ(br.pos_bytes, n - 2, "consumed exactly the payload");
|
||||
|
||||
printf(" profile0 1080p 8-bit: OK (%zu bytes)\n", n);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Case 2: profile 0, 10-bit, 4:2:0, monochrome.
|
||||
* Exercises the high_bitdepth + monochrome short-form color_config path. */
|
||||
static int test_profile0_monochrome_10bit(void)
|
||||
{
|
||||
struct v4l2_ctrl_av1_sequence seq;
|
||||
uint8_t out[64];
|
||||
size_t n;
|
||||
struct br br;
|
||||
uint32_t bit;
|
||||
|
||||
memset(&seq, 0, sizeof(seq));
|
||||
seq.seq_profile = 0;
|
||||
seq.order_hint_bits = 0;
|
||||
seq.bit_depth = 10;
|
||||
seq.max_frame_width_minus_1 = 1279;
|
||||
seq.max_frame_height_minus_1 = 719;
|
||||
seq.flags = V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME;
|
||||
|
||||
n = av1_synth_sequence_header_obu(&seq, out, sizeof(out));
|
||||
CHECK(n > 0, "synthesiser returned %zu bytes", n);
|
||||
CHECK_EQ(out[0], 0x0A, "OBU header byte");
|
||||
|
||||
br_init(&br, out + 2, n - 2);
|
||||
bit = br_get(&br, 3); CHECK_EQ(bit, 0, "seq_profile");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "still_picture");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "reduced_still_picture_header");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "timing_info_present_flag");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "initial_display_delay_present_flag");
|
||||
bit = br_get(&br, 5); CHECK_EQ(bit, 0, "operating_points_cnt_minus_1");
|
||||
bit = br_get(&br, 12); CHECK_EQ(bit, 0, "operating_point_idc[0]");
|
||||
bit = br_get(&br, 5); CHECK_EQ(bit, 13, "seq_level_idx[0]");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "seq_tier[0]");
|
||||
/* 1279 fits in 11 bits → width_bits_minus_1 = 10 */
|
||||
bit = br_get(&br, 4); CHECK_EQ(bit, 10, "frame_width_bits_minus_1");
|
||||
/* 719 fits in 10 bits → height_bits_minus_1 = 9 */
|
||||
bit = br_get(&br, 4); CHECK_EQ(bit, 9, "frame_height_bits_minus_1");
|
||||
bit = br_get(&br, 11); CHECK_EQ(bit, 1279, "max_frame_width_minus_1");
|
||||
bit = br_get(&br, 10); CHECK_EQ(bit, 719, "max_frame_height_minus_1");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "frame_id_numbers_present_flag");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "use_128x128_superblock");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_filter_intra");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_intra_edge_filter");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_interintra_compound");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_masked_compound");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_warped_motion");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_dual_filter");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_order_hint");
|
||||
/* enable_order_hint=0 → no jnt_comp / ref_frame_mvs / order_hint_bits */
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "seq_choose_screen_detection_tools");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "seq_choose_integer_mv");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_superres");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_cdef");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "enable_restoration");
|
||||
|
||||
/* color_config: high_bitdepth=1 (10-bit), seq_profile==0 so no twelve_bit,
|
||||
* monochrome=1, color_description_present=0, color_range=0.
|
||||
* Monochrome short-form: no subsampling/chroma_sample_position/uv_delta_q bits. */
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "high_bitdepth");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "monochrome");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "color_description_present_flag");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "color_range");
|
||||
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "film_grain_params_present");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "trailing_bits stop_one");
|
||||
|
||||
CHECK(!br.overflow, "no overflow");
|
||||
printf(" profile0 monochrome 10-bit: OK (%zu bytes)\n", n);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Case 3: reject illegal seq_profile + bit_depth combination. */
|
||||
static int test_reject_invalid_profile_bitdepth(void)
|
||||
{
|
||||
struct v4l2_ctrl_av1_sequence seq;
|
||||
uint8_t out[64];
|
||||
size_t n;
|
||||
|
||||
memset(&seq, 0, sizeof(seq));
|
||||
seq.seq_profile = 1; /* 4:4:4 only */
|
||||
seq.bit_depth = 12; /* but profile 1 doesn't allow 12-bit */
|
||||
seq.max_frame_width_minus_1 = 1919;
|
||||
seq.max_frame_height_minus_1 = 1079;
|
||||
|
||||
n = av1_synth_sequence_header_obu(&seq, out, sizeof(out));
|
||||
CHECK_EQ(n, 0, "expected 0 (rejected) for profile1+12bit");
|
||||
|
||||
printf(" reject profile1+12bit: OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Case 4: small out_cap → overflow path. */
|
||||
static int test_overflow(void)
|
||||
{
|
||||
struct v4l2_ctrl_av1_sequence seq;
|
||||
uint8_t out[4]; /* deliberately too small */
|
||||
size_t n;
|
||||
|
||||
memset(&seq, 0, sizeof(seq));
|
||||
seq.seq_profile = 0;
|
||||
seq.bit_depth = 8;
|
||||
seq.max_frame_width_minus_1 = 1919;
|
||||
seq.max_frame_height_minus_1 = 1079;
|
||||
|
||||
n = av1_synth_sequence_header_obu(&seq, out, sizeof(out));
|
||||
CHECK_EQ(n, 0, "expected 0 (overflow) for tiny out buffer");
|
||||
|
||||
printf(" out_cap overflow: OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Case 5: Temporal Delimiter is exactly 2 bytes 0x12 0x00. */
|
||||
static int test_temporal_delimiter(void)
|
||||
{
|
||||
uint8_t out[4];
|
||||
size_t n;
|
||||
|
||||
memset(out, 0xff, sizeof(out));
|
||||
n = av1_synth_temporal_delimiter_obu(out, sizeof(out));
|
||||
CHECK_EQ(n, 2, "TD length");
|
||||
CHECK_EQ(out[0], 0x12, "TD obu header byte (obu_type=2, has_size=1)");
|
||||
CHECK_EQ(out[1], 0x00, "TD leb128 size = 0");
|
||||
printf(" temporal delimiter: OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Test fixtures for Frame Header cases. */
|
||||
static void mk_seq_1080p_p0(struct v4l2_ctrl_av1_sequence *seq)
|
||||
{
|
||||
memset(seq, 0, sizeof(*seq));
|
||||
seq->seq_profile = 0;
|
||||
seq->order_hint_bits = 7;
|
||||
seq->bit_depth = 8;
|
||||
seq->max_frame_width_minus_1 = 1919;
|
||||
seq->max_frame_height_minus_1 = 1079;
|
||||
seq->flags = V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK |
|
||||
V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT |
|
||||
V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF |
|
||||
V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION;
|
||||
}
|
||||
|
||||
static void mk_frame_key_1080p(struct v4l2_ctrl_av1_frame *fr)
|
||||
{
|
||||
memset(fr, 0, sizeof(*fr));
|
||||
fr->frame_type = V4L2_AV1_KEY_FRAME;
|
||||
fr->frame_width_minus_1 = 1919;
|
||||
fr->frame_height_minus_1 = 1079;
|
||||
fr->render_width_minus_1 = 1919;
|
||||
fr->render_height_minus_1 = 1079;
|
||||
fr->primary_ref_frame = 7; /* PRIMARY_REF_NONE */
|
||||
fr->quantization.base_q_idx = 60;
|
||||
fr->loop_filter.level[0] = 16;
|
||||
fr->loop_filter.level[1] = 16;
|
||||
fr->loop_filter.level[2] = 16;
|
||||
fr->loop_filter.level[3] = 16;
|
||||
fr->cdef.bits = 0;
|
||||
fr->loop_restoration.frame_restoration_type[0] = V4L2_AV1_FRAME_RESTORE_NONE;
|
||||
fr->loop_restoration.frame_restoration_type[1] = V4L2_AV1_FRAME_RESTORE_NONE;
|
||||
fr->loop_restoration.frame_restoration_type[2] = V4L2_AV1_FRAME_RESTORE_NONE;
|
||||
fr->interpolation_filter = 0;
|
||||
fr->tx_mode = V4L2_AV1_TX_MODE_SELECT;
|
||||
fr->flags = V4L2_AV1_FRAME_FLAG_SHOW_FRAME;
|
||||
}
|
||||
|
||||
/* Case 6: KEY frame at 1080p — happy path, structural smoke. */
|
||||
static int test_frame_header_key_1080p(void)
|
||||
{
|
||||
struct v4l2_ctrl_av1_sequence seq;
|
||||
struct v4l2_ctrl_av1_frame fr;
|
||||
uint8_t out[256];
|
||||
size_t n;
|
||||
struct br br;
|
||||
uint32_t bit;
|
||||
|
||||
mk_seq_1080p_p0(&seq);
|
||||
mk_frame_key_1080p(&fr);
|
||||
|
||||
n = av1_synth_frame_header_obu(&seq, &fr, out, sizeof(out));
|
||||
CHECK(n > 0 && n <= sizeof(out), "FH synth returned %zu", n);
|
||||
|
||||
/* OBU header byte: obu_type=3 (FRAME_HEADER), has_size_field=1
|
||||
* → 0_0011_0_1_0 = 0x1A. */
|
||||
CHECK_EQ(out[0], 0x1A, "FH obu header byte");
|
||||
CHECK((out[1] & 0x80) == 0, "leb128 single byte");
|
||||
CHECK_EQ(n, 2 + (size_t)(out[1] & 0x7f), "total = header+leb+payload");
|
||||
|
||||
br_init(&br, out + 2, n - 2);
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "show_existing_frame");
|
||||
bit = br_get(&br, 2); CHECK_EQ(bit, 0, "frame_type=KEY");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 1, "show_frame");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "disable_cdf_update");
|
||||
bit = br_get(&br, 1); CHECK_EQ(bit, 0, "allow_screen_content_tools");
|
||||
|
||||
printf(" KEY frame 1080p: OK (%zu bytes)\n", n);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Case 7: INTER frame — coverage smoke. */
|
||||
static int test_frame_header_inter(void)
|
||||
{
|
||||
struct v4l2_ctrl_av1_sequence seq;
|
||||
struct v4l2_ctrl_av1_frame fr;
|
||||
uint8_t out[256];
|
||||
size_t n;
|
||||
int i;
|
||||
|
||||
mk_seq_1080p_p0(&seq);
|
||||
mk_frame_key_1080p(&fr);
|
||||
fr.frame_type = V4L2_AV1_INTER_FRAME;
|
||||
fr.primary_ref_frame = 0;
|
||||
for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++)
|
||||
fr.ref_frame_idx[i] = (int8_t)(i & 7);
|
||||
fr.flags |= V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT;
|
||||
|
||||
n = av1_synth_frame_header_obu(&seq, &fr, out, sizeof(out));
|
||||
CHECK(n > 0, "INTER FH synth returned %zu", n);
|
||||
CHECK_EQ(out[0], 0x1A, "FH obu header");
|
||||
printf(" INTER frame: OK (%zu bytes)\n", n);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Case 8: SWITCH frame should be rejected. */
|
||||
static int test_frame_header_switch_rejected(void)
|
||||
{
|
||||
struct v4l2_ctrl_av1_sequence seq;
|
||||
struct v4l2_ctrl_av1_frame fr;
|
||||
uint8_t out[256];
|
||||
size_t n;
|
||||
|
||||
mk_seq_1080p_p0(&seq);
|
||||
mk_frame_key_1080p(&fr);
|
||||
fr.frame_type = V4L2_AV1_SWITCH_FRAME;
|
||||
n = av1_synth_frame_header_obu(&seq, &fr, out, sizeof(out));
|
||||
CHECK_EQ(n, 0, "SWITCH frame should be out of scope");
|
||||
printf(" SWITCH frame rejected: OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Case 9: segmentation enabled should be rejected. */
|
||||
static int test_frame_header_segmentation_rejected(void)
|
||||
{
|
||||
struct v4l2_ctrl_av1_sequence seq;
|
||||
struct v4l2_ctrl_av1_frame fr;
|
||||
uint8_t out[256];
|
||||
size_t n;
|
||||
|
||||
mk_seq_1080p_p0(&seq);
|
||||
mk_frame_key_1080p(&fr);
|
||||
fr.segmentation.flags = V4L2_AV1_SEGMENTATION_FLAG_ENABLED;
|
||||
n = av1_synth_frame_header_obu(&seq, &fr, out, sizeof(out));
|
||||
CHECK_EQ(n, 0, "segmentation-enabled should be out of scope");
|
||||
printf(" segmentation enabled rejected: OK\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int fail = 0;
|
||||
|
||||
printf("=== av1_synth_sequence_header_obu ===\n");
|
||||
|
||||
fail |= test_profile0_1080p();
|
||||
fail |= test_profile0_monochrome_10bit();
|
||||
fail |= test_reject_invalid_profile_bitdepth();
|
||||
fail |= test_overflow();
|
||||
|
||||
printf("=== av1_synth_temporal_delimiter_obu ===\n");
|
||||
fail |= test_temporal_delimiter();
|
||||
|
||||
printf("=== av1_synth_frame_header_obu ===\n");
|
||||
fail |= test_frame_header_key_1080p();
|
||||
fail |= test_frame_header_inter();
|
||||
fail |= test_frame_header_switch_rejected();
|
||||
fail |= test_frame_header_segmentation_rejected();
|
||||
|
||||
if (fail) {
|
||||
fprintf(stderr, "AV1 OBU synth tests FAILED\n");
|
||||
return 1;
|
||||
}
|
||||
printf("AV1 OBU synth tests PASSED\n");
|
||||
return 0;
|
||||
}
|
||||
@@ -28,12 +28,7 @@
|
||||
#include <linux/v4l2-controls.h>
|
||||
|
||||
#define DAEDALUS_PROTO_MAGIC 0x44303456u /* 'D04V' */
|
||||
#define DAEDALUS_PROTO_VERSION 1u /* pre-1.0; bumped for
|
||||
* REQ_DECODE.src_pts +
|
||||
* RESP_FRAME.flags +
|
||||
* RESP_FRAME.output_src_pts
|
||||
* (H.264 B-frame reorder fix,
|
||||
* daedalus-v4l2#6). */
|
||||
#define DAEDALUS_PROTO_VERSION 0u /* pre-1.0 */
|
||||
|
||||
/*
|
||||
* Wire-protocol message types.
|
||||
@@ -76,7 +71,18 @@ struct daedalus_msg_hdr {
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
#define DAEDALUS_PROTO_MAX_PAYLOAD (64u * 1024u) /* 64 KiB */
|
||||
/*
|
||||
* Wire-protocol payload cap. Sized to comfortably hold real-world
|
||||
* H.264 / VP9 / AV1 access-unit bitstreams:
|
||||
* - 720p H.264 worst-case I-frame: ~200 KiB
|
||||
* - 1080p H.264 worst-case I-frame: ~500 KiB
|
||||
* - 4K H.264 worst-case I-frame: ~2 MiB (would need a bump)
|
||||
* 1 MiB is the conservative end of what cedrus / rkvdec / hantro
|
||||
* report as OUTPUT_MPLANE sizeimage. Allocations (chardev kmalloc
|
||||
* / kmemdup, daemon read buffer, vb2 plane backing) are sized per-
|
||||
* payload at runtime; this only sets the ceiling. Issue #19.
|
||||
*/
|
||||
#define DAEDALUS_PROTO_MAX_PAYLOAD (1024u * 1024u) /* 1 MiB */
|
||||
|
||||
/* -- REQ_DECODE / RESP_FRAME payload structures ---------------------- */
|
||||
|
||||
@@ -147,17 +153,6 @@ struct daedalus_req_decode {
|
||||
__u32 capture_plane_size[3];
|
||||
__u32 capture_plane_stride[3];
|
||||
__u32 flags;
|
||||
__u32 reserved0; /* explicit pad to 8-byte align src_pts */
|
||||
/*
|
||||
* The V4L2 OUTPUT (bitstream) buffer's vb2 timestamp at submission
|
||||
* time. The daemon sets pkt->pts = src_pts before
|
||||
* avcodec_send_packet so libavcodec's display-ordered
|
||||
* receive_frame can return frame->pts == src_pts of the bitstream
|
||||
* the frame's slices belong to. Decouples kernel cookie (decode
|
||||
* order, in-kernel identity) from display order — required for
|
||||
* H.264 B-frame correctness (daedalus-v4l2#6).
|
||||
*/
|
||||
__u64 src_pts;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -224,31 +219,6 @@ enum daedalus_decode_status {
|
||||
* Fixed size — keeps wire parsing simple. No variable-length
|
||||
* pixel data in Phase 8.4; dmabuf in Phase 8.5 carries that.
|
||||
*/
|
||||
/**
|
||||
* DAEDALUS_RESP_FLAG_HAS_PIXELS - this RESP delivers a decoded frame's
|
||||
* pixels. The owning CAPTURE buffer is identified by output_src_pts
|
||||
* (matched against an in-flight item's src_pts on the kernel side),
|
||||
* NOT by the chardev message header's cookie. Required since
|
||||
* libavcodec's H.264 decoder reorders to display order — the cookie
|
||||
* the daemon just received the REQ on may not be the cookie whose
|
||||
* bitstream produced the frame just popped from receive_frame.
|
||||
*
|
||||
* DAEDALUS_RESP_FLAG_SRC_CONSUMED - the chardev header's cookie's
|
||||
* OUTPUT bitstream buffer is done from the daemon's perspective
|
||||
* (libavcodec has accepted the slice data via avcodec_send_packet).
|
||||
* Kernel releases src_buf for the cookie and runs job_finish so the
|
||||
* m2m scheduler can dispatch the next REQ. Independent of any
|
||||
* pixel delivery — the dst_buf paired with this cookie may still
|
||||
* be parked, awaiting a future RESP with HAS_PIXELS + matching
|
||||
* output_src_pts.
|
||||
*
|
||||
* Both flags may be set in a single message (steady-state path with
|
||||
* no codec reorder lag — the just-sent packet immediately yielded a
|
||||
* frame whose pts == this REQ's src_pts).
|
||||
*/
|
||||
#define DAEDALUS_RESP_FLAG_HAS_PIXELS 0x00000001u
|
||||
#define DAEDALUS_RESP_FLAG_SRC_CONSUMED 0x00000002u
|
||||
|
||||
struct daedalus_resp_frame {
|
||||
__u32 status;
|
||||
__u32 codec_id;
|
||||
@@ -258,16 +228,7 @@ struct daedalus_resp_frame {
|
||||
__u32 luma_len;
|
||||
__u32 chroma_len;
|
||||
__u32 fnv1a_yuv;
|
||||
__u32 flags; /* bitmask of DAEDALUS_RESP_FLAG_* */
|
||||
__u32 reserved0; /* explicit pad to 8-byte align output_src_pts */
|
||||
/*
|
||||
* Set when DAEDALUS_RESP_FLAG_HAS_PIXELS is in flags. Identifies
|
||||
* which OUTPUT bitstream's slices produced the pixels in this
|
||||
* RESP — kernel completes the CAPTURE buffer whose inflight item
|
||||
* has src_pts == output_src_pts. Ignored when HAS_PIXELS is
|
||||
* clear.
|
||||
*/
|
||||
__u64 output_src_pts;
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
/* -- chardev ioctl ABI ----------------------------------------------- */
|
||||
|
||||
@@ -167,6 +167,26 @@ static int daedalus_chardev_release(struct inode *inode, struct file *file)
|
||||
}
|
||||
mutex_unlock(&dev->req_lock);
|
||||
|
||||
/*
|
||||
* Drain the V4L2-side in-flight list before the daemon goes
|
||||
* away. Any REQ_DECODE we already sent to the daemon won't
|
||||
* get a matching RESP_FRAME — without this drain,
|
||||
* v4l2_m2m_cancel_job() in the V4L2 consumer's close() path
|
||||
* (or in vb2's STREAMOFF path) blocks forever waiting for a
|
||||
* job_finish that will never arrive, and the consumer becomes
|
||||
* unkillable D-state. Issue #146.
|
||||
*
|
||||
* Done AFTER draining the request queue: any REQ_DECODE still
|
||||
* sitting in dev->req_queue is per definition not yet "in
|
||||
* flight" (the kernel never released it to the daemon), so it
|
||||
* doesn't need the m2m-job-finish dance — freeing the message
|
||||
* is sufficient. The inflight list holds entries the kernel
|
||||
* already committed to (added in device_run after the message
|
||||
* was queued or written), which is exactly what needs to be
|
||||
* failed back to vb2 here.
|
||||
*/
|
||||
daedalus_drain_inflight_on_disconnect();
|
||||
|
||||
mutex_lock(&dev->open_lock);
|
||||
dev->opened = 0;
|
||||
mutex_unlock(&dev->open_lock);
|
||||
|
||||
+145
-214
@@ -611,28 +611,8 @@ struct daedalus_inflight {
|
||||
struct list_head list;
|
||||
u32 cookie;
|
||||
struct daedalus_ctx *ctx;
|
||||
/*
|
||||
* src_buf / dst_buf decouple in the daedalus-v4l2#6 reorder fix.
|
||||
* src_buf is cleared (NULL'd) when DAEDALUS_RESP_FLAG_SRC_CONSUMED
|
||||
* arrives — that signals libavcodec has accepted the bitstream
|
||||
* even if no display-order frame is ready yet. dst_buf is cleared
|
||||
* when DAEDALUS_RESP_FLAG_HAS_PIXELS arrives — the daemon has
|
||||
* written pixels into this CAPTURE buffer. When both are NULL
|
||||
* the inflight entry is removed and freed.
|
||||
*/
|
||||
struct vb2_v4l2_buffer *src_buf;
|
||||
struct vb2_v4l2_buffer *dst_buf;
|
||||
/*
|
||||
* src_buf->vb2_buf.timestamp captured at device_run time.
|
||||
* Mirrored into REQ_DECODE.src_pts so the daemon can set
|
||||
* pkt->pts = src_pts on avcodec_send_packet, and read back
|
||||
* frame->pts to identify which OUTPUT bitstream produced the
|
||||
* current display-order frame. Kept here so the kernel can
|
||||
* stamp dst_buf.timestamp explicitly at HAS_PIXELS time even
|
||||
* though V4L2_BUF_FLAG_TIMESTAMP_COPY's automatic src->dst
|
||||
* pairing no longer applies (src/dst lifecycles decoupled).
|
||||
*/
|
||||
u64 src_pts;
|
||||
/*
|
||||
* Captured media_request the src_buf was bound to (if any).
|
||||
* Set by device_run from src_buf->vb2_buf.req_obj.req;
|
||||
@@ -643,22 +623,16 @@ struct daedalus_inflight {
|
||||
struct media_request *req;
|
||||
};
|
||||
|
||||
/*
|
||||
* Peek (don't remove). The split-completion path may receive
|
||||
* multiple RESP_FRAME messages on a single inflight item (one for
|
||||
* SRC_CONSUMED, one for HAS_PIXELS — possibly separated in time if
|
||||
* libavcodec held the picture for display reorder). Caller removes
|
||||
* the entry only when both src_buf and dst_buf have been cleared
|
||||
* from inside the inflight lock.
|
||||
*/
|
||||
static struct daedalus_inflight *
|
||||
daedalus_inflight_peek_locked(struct daedalus_dev *dev, u32 cookie)
|
||||
daedalus_inflight_pop_locked(struct daedalus_dev *dev, u32 cookie)
|
||||
{
|
||||
struct daedalus_inflight *e;
|
||||
|
||||
list_for_each_entry(e, &dev->inflight, list) {
|
||||
if (e->cookie == cookie)
|
||||
if (e->cookie == cookie) {
|
||||
list_del(&e->list);
|
||||
return e;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@@ -731,7 +705,6 @@ static void daedalus_device_run(void *priv)
|
||||
size_t blen, payload_len;
|
||||
u32 cookie;
|
||||
int ret;
|
||||
bool claimed = false; /* src/dst removed from m2m rdy_queue */
|
||||
|
||||
src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx);
|
||||
dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
|
||||
@@ -822,17 +795,6 @@ static void daedalus_device_run(void *priv)
|
||||
|
||||
req->codec_id = cid;
|
||||
req->bitstream_len = (u32) blen;
|
||||
/*
|
||||
* Ferry the OUTPUT buffer's vb2 timestamp through to the
|
||||
* daemon for the H.264 B-frame display-reorder fix
|
||||
* (daedalus-v4l2#6). Daemon sets pkt->pts = src_pts before
|
||||
* avcodec_send_packet; libavcodec stamps frame->pts with
|
||||
* the same value when it eventually outputs the frame in
|
||||
* display order, letting the daemon route HAS_PIXELS RESPs
|
||||
* to the correct cookie even when libavcodec's display
|
||||
* order disagrees with V4L2's decode submission order.
|
||||
*/
|
||||
req->src_pts = (u64) src_buf->vb2_buf.timestamp;
|
||||
req->capture_width = ctx->dst_fmt.width;
|
||||
req->capture_height = ctx->dst_fmt.height;
|
||||
req->capture_pix_fmt = ctx->dst_fmt.pixelformat;
|
||||
@@ -857,34 +819,11 @@ static void daedalus_device_run(void *priv)
|
||||
inf = kzalloc(sizeof(*inf), GFP_KERNEL);
|
||||
if (!inf)
|
||||
goto fail_buf_error;
|
||||
|
||||
/*
|
||||
* Take both buffers off the m2m ready-queue HERE — before the
|
||||
* inflight list grows. Once src_consumed releases the src side
|
||||
* and the m2m scheduler can dispatch the next device_run, the
|
||||
* NEW device_run mustn't see this dst_buf (which we're still
|
||||
* holding for a future HAS_PIXELS). Without this claim,
|
||||
* v4l2_m2m_next_dst_buf at the next device_run returns the same
|
||||
* parked dst_buf, two inflight entries reference it, and the
|
||||
* later HAS_PIXELS triggers a list_del on an already-removed
|
||||
* vb2_buffer → kernel panic (observed on Pi CM5 hard reboot
|
||||
* during mpv vaapi-copy playback of 720p H.264, 2026-05-21).
|
||||
*
|
||||
* Both helpers are inline list_del+counter-decrement under the
|
||||
* q_ctx rdy_spinlock — safe to call from device_run on the
|
||||
* buffer we just peeked via next_*_buf above. Mirrors the
|
||||
* amphion vdec/venc pattern.
|
||||
*/
|
||||
v4l2_m2m_src_buf_remove_by_buf(ctx->m2m_ctx, src_buf);
|
||||
v4l2_m2m_dst_buf_remove_by_buf(ctx->m2m_ctx, dst_buf);
|
||||
claimed = true;
|
||||
|
||||
cookie = daedalus_next_cookie();
|
||||
inf->cookie = cookie;
|
||||
inf->ctx = ctx;
|
||||
inf->src_buf = src_buf;
|
||||
inf->dst_buf = dst_buf;
|
||||
inf->src_pts = req->src_pts;
|
||||
/*
|
||||
* Capture the bound media_request (if any) so the
|
||||
* completion path can call v4l2_ctrl_request_complete +
|
||||
@@ -932,13 +871,11 @@ static void daedalus_device_run(void *priv)
|
||||
|
||||
fail_buf_error:
|
||||
if (src_buf) {
|
||||
if (!claimed)
|
||||
v4l2_m2m_src_buf_remove(ctx->m2m_ctx);
|
||||
v4l2_m2m_src_buf_remove(ctx->m2m_ctx);
|
||||
v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
|
||||
}
|
||||
if (dst_buf) {
|
||||
if (!claimed)
|
||||
v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
|
||||
v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
|
||||
v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR);
|
||||
}
|
||||
kfree(req);
|
||||
@@ -952,185 +889,179 @@ static const struct v4l2_m2m_ops daedalus_m2m_ops = {
|
||||
|
||||
/* -- chardev RESP_FRAME → buf_done bridge ---------------------------- */
|
||||
|
||||
/*
|
||||
* Pack the daemon's pixel delivery into the inflight item's CAPTURE
|
||||
* buffer. Called from daedalus_complete_resp_frame on the
|
||||
* HAS_PIXELS branch, after the lock has been dropped (vb2 ops may
|
||||
* sleep / take their own locks). The dst_buf reference was
|
||||
* snapshotted under the inflight lock and cleared from the entry,
|
||||
* so no other RESP can race for this buffer.
|
||||
*
|
||||
* pixels_len == 0 → dmabuf path (Phase 8.6+); the daemon mmap'd the
|
||||
* CAPTURE plane via GET_DMABUF and wrote pixels in place; we just
|
||||
* set the plane payloads. pixels_len > 0 → legacy Phase 8.5 inline
|
||||
* NV12 path; we memcpy from the chardev payload.
|
||||
*/
|
||||
static void daedalus_pack_pixels_into_dst(struct vb2_v4l2_buffer *dst_buf,
|
||||
const struct daedalus_resp_frame *fr,
|
||||
const u8 *pixels, size_t pixels_len)
|
||||
{
|
||||
struct vb2_buffer *vb = &dst_buf->vb2_buf;
|
||||
void *dst_y, *dst_uv;
|
||||
u32 y_size, uv_size;
|
||||
unsigned int p;
|
||||
|
||||
if (pixels_len) {
|
||||
y_size = min_t(u32, fr->luma_len,
|
||||
(u32) vb2_plane_size(vb, 0));
|
||||
uv_size = vb->num_planes > 1 ?
|
||||
min_t(u32, fr->chroma_len,
|
||||
(u32) vb2_plane_size(vb, 1)) : 0;
|
||||
dst_y = vb2_plane_vaddr(vb, 0);
|
||||
dst_uv = vb->num_planes > 1 ?
|
||||
vb2_plane_vaddr(vb, 1) : NULL;
|
||||
if (dst_y && y_size && pixels_len >= y_size)
|
||||
memcpy(dst_y, pixels, y_size);
|
||||
else
|
||||
y_size = 0;
|
||||
if (dst_uv && uv_size &&
|
||||
pixels_len >= y_size + uv_size)
|
||||
memcpy(dst_uv, pixels + y_size, uv_size);
|
||||
else
|
||||
uv_size = 0;
|
||||
vb2_set_plane_payload(vb, 0, y_size);
|
||||
if (vb->num_planes > 1)
|
||||
vb2_set_plane_payload(vb, 1, uv_size);
|
||||
} else {
|
||||
for (p = 0; p < vb->num_planes; p++)
|
||||
vb2_set_plane_payload(vb, p,
|
||||
vb2_plane_size(vb, p));
|
||||
}
|
||||
}
|
||||
|
||||
void daedalus_complete_resp_frame(u32 cookie,
|
||||
const struct daedalus_resp_frame *fr,
|
||||
const u8 *pixels, size_t pixels_len)
|
||||
{
|
||||
struct daedalus_dev *dev = g_daedalus_dev;
|
||||
struct daedalus_inflight *inf;
|
||||
struct daedalus_ctx *ctx = NULL;
|
||||
struct vb2_v4l2_buffer *src_to_complete = NULL;
|
||||
struct vb2_v4l2_buffer *dst_to_complete = NULL;
|
||||
struct media_request *req_to_complete = NULL;
|
||||
enum vb2_buffer_state state;
|
||||
u64 dst_timestamp = 0;
|
||||
bool entry_freed = false;
|
||||
bool has_pixels, src_consumed;
|
||||
void *dst_y, *dst_uv;
|
||||
u32 y_size, uv_size;
|
||||
|
||||
if (!dev)
|
||||
return;
|
||||
|
||||
state = (fr->status == DAEDALUS_DECODE_OK)
|
||||
? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
|
||||
has_pixels = !!(fr->flags & DAEDALUS_RESP_FLAG_HAS_PIXELS);
|
||||
src_consumed = !!(fr->flags & DAEDALUS_RESP_FLAG_SRC_CONSUMED);
|
||||
|
||||
if (!has_pixels && !src_consumed) {
|
||||
pr_warn_ratelimited(
|
||||
"daedalus_v4l2: RESP_FRAME cookie=%u with neither HAS_PIXELS nor SRC_CONSUMED — ignoring\n",
|
||||
cookie);
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&dev->inflight_lock);
|
||||
inf = daedalus_inflight_peek_locked(dev, cookie);
|
||||
inf = daedalus_inflight_pop_locked(dev, cookie);
|
||||
mutex_unlock(&dev->inflight_lock);
|
||||
if (!inf) {
|
||||
mutex_unlock(&dev->inflight_lock);
|
||||
pr_warn_ratelimited(
|
||||
"daedalus_v4l2: RESP_FRAME for unknown cookie=%u\n",
|
||||
cookie);
|
||||
return;
|
||||
}
|
||||
|
||||
ctx = inf->ctx;
|
||||
state = (fr->status == DAEDALUS_DECODE_OK)
|
||||
? VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
|
||||
|
||||
/*
|
||||
* Snapshot what this RESP completes and clear the matching
|
||||
* fields on the inflight item, so concurrent RESPs (e.g. a
|
||||
* later HAS_PIXELS arriving on the same cookie after this
|
||||
* SRC_CONSUMED clears src_buf) see the correct residual
|
||||
* state. Actual vb2 buf_done calls happen below the lock.
|
||||
* Two routes the daemon can take, both supported:
|
||||
*
|
||||
* Sanity check on output_src_pts only when HAS_PIXELS is
|
||||
* set — the daemon's output_src_pts should equal this
|
||||
* inflight's stored src_pts, since the daemon routes pixels
|
||||
* to the cookie of the OUTPUT bitstream that contained the
|
||||
* frame's slices (which is what we stored at device_run time).
|
||||
* Surface a mismatch loudly — indicates daemon-side pts→cookie
|
||||
* mapping bug, not silent data corruption.
|
||||
* (a) dmabuf path (Phase 8.6+) — daemon called
|
||||
* DAEDALUS_IOC_GET_DMABUF, mmap'd the CAPTURE buffer,
|
||||
* wrote pixels in place. RESP_FRAME carries metadata
|
||||
* only (pixels_len == 0). Each plane's payload is
|
||||
* the full plane size (the daemon wrote everything
|
||||
* the format requires).
|
||||
*
|
||||
* (b) Phase 8.5 inline path — daemon shipped raw NV12 in
|
||||
* the chardev payload (≤ 64 KiB cap). We memcpy
|
||||
* into the vb2 buffer. Plane payloads come from
|
||||
* the daemon's NV12 luma/chroma counts.
|
||||
*/
|
||||
if (has_pixels) {
|
||||
if (fr->output_src_pts != inf->src_pts)
|
||||
pr_warn_ratelimited(
|
||||
"daedalus_v4l2: RESP HAS_PIXELS cookie=%u output_src_pts=%llu but inflight.src_pts=%llu — daemon dispatch bug?\n",
|
||||
cookie,
|
||||
(unsigned long long) fr->output_src_pts,
|
||||
(unsigned long long) inf->src_pts);
|
||||
if (state == VB2_BUF_STATE_DONE) {
|
||||
struct vb2_buffer *vb = &inf->dst_buf->vb2_buf;
|
||||
unsigned int p;
|
||||
|
||||
dst_to_complete = inf->dst_buf;
|
||||
dst_timestamp = inf->src_pts;
|
||||
inf->dst_buf = NULL;
|
||||
if (pixels_len) {
|
||||
/* (b) inline NV12 copy — legacy 2-plane only */
|
||||
y_size = min_t(u32, fr->luma_len,
|
||||
(u32) vb2_plane_size(vb, 0));
|
||||
uv_size = vb->num_planes > 1 ?
|
||||
min_t(u32, fr->chroma_len,
|
||||
(u32) vb2_plane_size(vb, 1)) : 0;
|
||||
dst_y = vb2_plane_vaddr(vb, 0);
|
||||
dst_uv = vb->num_planes > 1 ?
|
||||
vb2_plane_vaddr(vb, 1) : NULL;
|
||||
if (dst_y && y_size && pixels_len >= y_size)
|
||||
memcpy(dst_y, pixels, y_size);
|
||||
else
|
||||
y_size = 0;
|
||||
if (dst_uv && uv_size &&
|
||||
pixels_len >= y_size + uv_size)
|
||||
memcpy(dst_uv, pixels + y_size, uv_size);
|
||||
else
|
||||
uv_size = 0;
|
||||
vb2_set_plane_payload(vb, 0, y_size);
|
||||
if (vb->num_planes > 1)
|
||||
vb2_set_plane_payload(vb, 1, uv_size);
|
||||
} else {
|
||||
/* (a) dmabuf path: plane is fully populated by
|
||||
* the daemon, so payload == sizeimage. */
|
||||
for (p = 0; p < vb->num_planes; p++)
|
||||
vb2_set_plane_payload(vb, p,
|
||||
vb2_plane_size(vb, p));
|
||||
}
|
||||
}
|
||||
|
||||
if (src_consumed) {
|
||||
src_to_complete = inf->src_buf;
|
||||
req_to_complete = inf->req;
|
||||
inf->src_buf = NULL;
|
||||
inf->req = NULL;
|
||||
}
|
||||
/*
|
||||
* Phase 8.14: if the src_buf was bound to a media_request
|
||||
* (libva-driven decode path), complete the per-request
|
||||
* control state BEFORE buf_done_and_job_finish. vb2-core's
|
||||
* buf_done unbinds the buffer's req_obj on its own, but the
|
||||
* control object stays bound until v4l2_ctrl_request_complete
|
||||
* runs — only after BOTH objects unbind does the request
|
||||
* transition to MEDIA_REQUEST_STATE_COMPLETE and wake any
|
||||
* userspace poll on the request fd.
|
||||
*
|
||||
* For non-request flows (test_m2m_stream direct QBUF) inf->req
|
||||
* is NULL and v4l2_ctrl_request_complete just no-ops.
|
||||
*/
|
||||
if (inf->req)
|
||||
v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl);
|
||||
|
||||
if (!inf->src_buf && !inf->dst_buf) {
|
||||
list_del(&inf->list);
|
||||
entry_freed = true;
|
||||
}
|
||||
/*
|
||||
* Use the buf_done_and_job_finish helper rather than plain
|
||||
* buf_done + job_finish: the helper pops the buffers off
|
||||
* the m2m queue before marking them done, otherwise the
|
||||
* scheduler immediately re-runs device_run on the same
|
||||
* still-queued src buffer. Caught during Phase 8.5 first
|
||||
* run — second REQ_DECODE with identical bitstream + oops
|
||||
* in stop_streaming when the test client tore down.
|
||||
*/
|
||||
v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, inf->ctx->m2m_ctx,
|
||||
state);
|
||||
|
||||
/*
|
||||
* Release our reference taken in device_run; safe to do
|
||||
* AFTER buf_done_and_job_finish (which dropped the vb2
|
||||
* reference) because we still hold this one. If the
|
||||
* refcount hits zero here, media-core releases the request.
|
||||
*/
|
||||
if (inf->req)
|
||||
media_request_put(inf->req);
|
||||
|
||||
kfree(inf);
|
||||
}
|
||||
|
||||
/* -- daemon disconnect drain ----------------------------------------- */
|
||||
|
||||
void daedalus_drain_inflight_on_disconnect(void)
|
||||
{
|
||||
struct daedalus_dev *dev = g_daedalus_dev;
|
||||
struct daedalus_inflight *inf, *tmp;
|
||||
LIST_HEAD(local);
|
||||
|
||||
if (!dev)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Splice the in-flight list onto a local list under the lock,
|
||||
* then process each entry with the lock dropped — every
|
||||
* v4l2_m2m_buf_done_and_job_finish call may itself try to
|
||||
* re-enter device_run via the scheduler (which would need to
|
||||
* walk dev->inflight again on a future REQ_DECODE), and
|
||||
* v4l2_m2m_buf_done can sleep via vb2's buffer-done dispatch.
|
||||
* Holding inflight_lock across either is a deadlock invitation.
|
||||
*/
|
||||
mutex_lock(&dev->inflight_lock);
|
||||
list_splice_init(&dev->inflight, &local);
|
||||
mutex_unlock(&dev->inflight_lock);
|
||||
|
||||
/*
|
||||
* Complete the CAPTURE side first (when applicable). vb2-core's
|
||||
* V4L2_BUF_FLAG_TIMESTAMP_COPY semantics no longer auto-copy
|
||||
* src→dst timestamps because src and dst are no longer paired
|
||||
* 1:1 in m2m's view — stamp dst explicitly from the inflight's
|
||||
* stored src_pts (= the OUTPUT vb2_buf.timestamp captured at
|
||||
* device_run). The V4L2 client gets the same display-PTS it
|
||||
* originally set on the OUTPUT side.
|
||||
*/
|
||||
if (dst_to_complete) {
|
||||
if (state == VB2_BUF_STATE_DONE)
|
||||
daedalus_pack_pixels_into_dst(dst_to_complete, fr,
|
||||
pixels, pixels_len);
|
||||
dst_to_complete->vb2_buf.timestamp = dst_timestamp;
|
||||
list_for_each_entry_safe(inf, tmp, &local, list) {
|
||||
list_del(&inf->list);
|
||||
|
||||
v4l2_warn(&dev->v4l2_dev,
|
||||
"draining inflight cookie=%u (daemon disconnect)\n",
|
||||
inf->cookie);
|
||||
|
||||
/*
|
||||
* The buffer was already removed from m2m's rdy_queue at
|
||||
* device_run time (see the "Take both buffers off ..."
|
||||
* block). Just call buf_done here — calling
|
||||
* v4l2_m2m_dst_buf_remove_by_buf again would list_del a
|
||||
* list_head that's no longer linked, smashing the list.
|
||||
* Complete the per-request control state before
|
||||
* buf_done_and_job_finish, same ordering as the success
|
||||
* path in daedalus_complete_resp_frame(). For non-request
|
||||
* flows inf->req is NULL and v4l2_ctrl_request_complete
|
||||
* no-ops.
|
||||
*/
|
||||
v4l2_m2m_buf_done(dst_to_complete, state);
|
||||
}
|
||||
if (inf->req)
|
||||
v4l2_ctrl_request_complete(inf->req, &inf->ctx->hdl);
|
||||
|
||||
/*
|
||||
* Complete the OUTPUT side: release the bound media_request's
|
||||
* controls (libva-driven path), drop our request reference taken
|
||||
* in device_run, mark src done, then job_finish so the m2m
|
||||
* scheduler can dispatch the next pending REQ on this ctx. The
|
||||
* dst_buf for this cookie may still be parked (HAS_PIXELS hasn't
|
||||
* arrived yet — libavcodec is holding the frame for display-
|
||||
* order release). That's fine: the next device_run picks a
|
||||
* different next_dst_buf out of the CAPTURE queue and proceeds.
|
||||
*/
|
||||
if (src_to_complete) {
|
||||
if (req_to_complete)
|
||||
v4l2_ctrl_request_complete(req_to_complete, &ctx->hdl);
|
||||
/* Already off the rdy_queue (see device_run claim) — buf_done only. */
|
||||
v4l2_m2m_buf_done(src_to_complete, state);
|
||||
if (req_to_complete)
|
||||
media_request_put(req_to_complete);
|
||||
v4l2_m2m_job_finish(dev->m2m_dev, ctx->m2m_ctx);
|
||||
}
|
||||
/*
|
||||
* Mark both buffers ERROR and clear the m2m scheduler's
|
||||
* job_running flag. This is what unsticks
|
||||
* v4l2_m2m_cancel_job() inside the consumer's close()
|
||||
* path; without it, the consumer hangs in TASK_UNINTERRUPTIBLE
|
||||
* forever (issue #146).
|
||||
*/
|
||||
v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev,
|
||||
inf->ctx->m2m_ctx,
|
||||
VB2_BUF_STATE_ERROR);
|
||||
|
||||
if (inf->req)
|
||||
media_request_put(inf->req);
|
||||
|
||||
if (entry_freed)
|
||||
kfree(inf);
|
||||
}
|
||||
}
|
||||
|
||||
/* -- v4l2_ioctl_ops -------------------------------------------------- */
|
||||
|
||||
@@ -103,4 +103,27 @@ void daedalus_complete_resp_frame(u32 cookie,
|
||||
int daedalus_export_capture_dmabuf(u32 cookie, u32 plane, u32 flags,
|
||||
int *out_fd);
|
||||
|
||||
/**
|
||||
* daedalus_drain_inflight_on_disconnect() - fail all in-flight m2m jobs
|
||||
*
|
||||
* Called from daedalus_chardev_release() when the daemon disconnects
|
||||
* (graceful close, SIGKILL, daemon crash — anything that triggers
|
||||
* chardev release). Walks the in-flight list and, for every entry,
|
||||
* marks both src+dst buffers VB2_BUF_STATE_ERROR and calls
|
||||
* v4l2_m2m_buf_done_and_job_finish() to clear the m2m scheduler's
|
||||
* "job_running" flag.
|
||||
*
|
||||
* Without this, v4l2_m2m_cancel_job() (called from
|
||||
* v4l2_m2m_ctx_release() during the consumer's close() / task exit)
|
||||
* blocks forever waiting for a job_finish that the dead daemon will
|
||||
* never send — the consumer enters TASK_UNINTERRUPTIBLE and survives
|
||||
* SIGKILL until reboot. See issue #146 for the full trace.
|
||||
*
|
||||
* Safe to call with an empty in-flight list; no-op in that case.
|
||||
* Must NOT be called from atomic context — uses inflight_lock
|
||||
* (sleeping mutex) and v4l2_m2m_buf_done_and_job_finish (which can
|
||||
* sleep via vb2 buffer-done dispatch).
|
||||
*/
|
||||
void daedalus_drain_inflight_on_disconnect(void);
|
||||
|
||||
#endif /* DAEDALUS_V4L2_MAIN_H */
|
||||
|
||||
Reference in New Issue
Block a user