Merge pull request 'daemon: shadow_decoder wiring (PR-Q3a.1)' (#25 ) from noether/daemon-shadow-decoder-wiring into main

Reviewed-on: #25
daemon: shadow_decoder wiring (PR-Q3a.1)
2026-05-26 12:28:16 +00:00 · 2026-05-26 14:15:13 +02:00 · 2026-05-23 17:16:27 +00:00 · 2026-05-23 18:31:41 +02:00
10 changed files with 1129 additions and 6 deletions
@@ -40,6 +40,11 @@ pkg_check_modules(FFMPEG REQUIRED IMPORTED_TARGET
 # libdaedalus_core.a must precede -lvulkan because the static archive
 # references vulkan symbols and the linker resolves left-to-right.
 pkg_check_modules(DAEDALUS_FOURIER REQUIRED daedalus-fourier)
+# daedalus-decoder — frame-major UMA H.264 decoder.  Linked into the
+# shadow-mode path (env DAEDALUS_SHADOW_MODE=1) and inert otherwise.
+# Linked unconditionally to keep CMake configurations symmetrical
+# between production and shadow-mode runs.
+pkg_check_modules(DAEDALUS_DECODER REQUIRED daedalus-decoder)
 find_package(Vulkan REQUIRED)

 add_executable(daedalus_v4l2_daemon
@@ -48,6 +53,7 @@ add_executable(daedalus_v4l2_daemon
 	src/log.c
 	src/parser.c
 	src/decoder.c
+	src/shadow_decoder.c
 	src/chardev_client.c
 	src/dmabuf_capture.c
 	src/bitstream_writer.c
@@ -61,20 +67,25 @@ target_include_directories(daedalus_v4l2_daemon
 		${CMAKE_CURRENT_SOURCE_DIR}/../include
 		${FFMPEG_INCLUDE_DIRS}
 		${DAEDALUS_FOURIER_INCLUDE_DIRS}
+		${DAEDALUS_DECODER_INCLUDE_DIRS}
 )

 # dl for dlopen, pthread for future threading work.
 target_link_directories(daedalus_v4l2_daemon
 	PRIVATE
 		${DAEDALUS_FOURIER_LIBRARY_DIRS}
+		${DAEDALUS_DECODER_LIBRARY_DIRS}
 )

 target_link_libraries(daedalus_v4l2_daemon
 	PRIVATE
 		dl
 		pthread
-		# Order matters: libdaedalus_core.a first (so its undefined
-		# vulkan symbols register), then -lvulkan to satisfy them.
+		# Order matters for left-to-right linker resolution of
+		# static archives.  daedalus-decoder references symbols
+		# from daedalus-fourier; daedalus-fourier references
+		# vulkan symbols.  So: decoder, fourier, vulkan.
+		${DAEDALUS_DECODER_LIBRARIES}
 		${DAEDALUS_FOURIER_LIBRARIES}
 		Vulkan::Vulkan
 )
@@ -40,6 +40,19 @@
 #include <string.h>

 #define OBU_SEQUENCE_HEADER	 1
+#define OBU_TEMPORAL_DELIMITER	 2
+#define OBU_FRAME_HEADER	 3
+#define OBU_TILE_GROUP		 4
+#define OBU_FRAME		 6
+
+/* AV1 §3 ref-frame symbolic constants — values per the spec table.
+ * INTRA_FRAME is index 0 (used for intra-only); LAST_FRAME..ALTREF_FRAME
+ * are 1..7.  TOTAL_REFS_PER_FRAME = 8 (V4L2 mirrors this). */
+#define AV1_INTRA_FRAME		0
+#define AV1_LAST_FRAME		1
+#define AV1_NUM_REF_FRAMES	8	/* the DPB size */
+#define AV1_REFS_PER_FRAME	7	/* refs available to an inter frame */
+#define AV1_PRIMARY_REF_NONE	7

 /* Default operating-point level: 5.1 — supports any frame size up to
 * 4K@60fps.  Well past anything the V4L2 path is realistically driven
@@ -325,3 +338,560 @@ size_t av1_synth_sequence_header_obu(const struct v4l2_ctrl_av1_sequence *seq,

 	return w;
 }
+
+/* -------------------------------------------------------------------
+ * Shared OBU wrap helper (header byte + leb128 size + payload).  Used
+ * by frame_header_obu and the temporal_delimiter helper; the sequence
+ * header above predates this factor-out and keeps its inline
+ * assembly so its memory footprint stays predictable.
+ * ----------------------------------------------------------------- */
+static size_t wrap_obu(uint8_t obu_type, const uint8_t *payload,
+		       size_t payload_len, uint8_t *out, size_t out_cap)
+{
+	size_t w = 0;
+
+	if (out_cap < 2)
+		return 0;
+	out[w++] = (uint8_t)(
+		(0u << 7) |
+		((obu_type & 0xfu) << 3) |
+		(0u << 2) |
+		(1u << 1) |
+		(0u << 0));
+
+	{
+		size_t leb_n = leb128_put((uint32_t) payload_len,
+					   out + w, out_cap - w);
+		if (leb_n == 0)
+			return 0;
+		w += leb_n;
+	}
+
+	if (out_cap - w < payload_len)
+		return 0;
+	if (payload_len)
+		memcpy(out + w, payload, payload_len);
+	w += payload_len;
+	return w;
+}
+
+size_t av1_synth_temporal_delimiter_obu(uint8_t *out, size_t out_cap)
+{
+	return wrap_obu(OBU_TEMPORAL_DELIMITER, NULL, 0, out, out_cap);
+}
+
+/* -------------------------------------------------------------------
+ * Frame Header OBU — §5.9
+ *
+ * The encoder is sectioned to mirror the spec.  Each subsection
+ * helper writes into the shared bs_writer and signals "out of
+ * scope" by setting a sticky `*unsupported` flag that the top-level
+ * checks before returning.  This keeps the spec-mirror linear and
+ * the failure modes diagnosable.
+ * ----------------------------------------------------------------- */
+
+/* MiCols / MiRows per spec §3 — 4x4-unit count, rounded up to the
+ * 8x8 alignment the spec uses for tiling math.  Returns AlignPow2
+ * of ((dim + 7) >> 3) at miSize=2 (8x8 mi-block). */
+static uint32_t mi_cols_for(uint32_t frame_width)
+{
+	uint32_t mi = (frame_width + 7u) >> 3;
+	return mi << 1;	/* 4x4 mi units == miCols */
+}
+static uint32_t mi_rows_for(uint32_t frame_height)
+{
+	uint32_t mi = (frame_height + 7u) >> 3;
+	return mi << 1;
+}
+
+/* tile_log2(blkSize, target) per AV1 §5.9.15 — smallest k such that
+ * (blkSize << k) >= target. */
+static int tile_log2_ge(int blk, int target)
+{
+	int k = 0;
+	while ((blk << k) < target) k++;
+	return k;
+}
+
+/* §5.9.12 quantization_params */
+static void write_quantization_params(struct bs_writer *bs,
+				      const struct v4l2_av1_quantization *q,
+				      bool num_planes_gt_1,
+				      bool separate_uv_delta_q)
+{
+	bsw_put_u(bs, q->base_q_idx, 8);
+
+	/* read_delta_q: 1 bit "delta_coded" + (s(7)?) — we always emit
+	 * the full delta if non-zero, zero-encoded as delta_coded=0
+	 * (single bit). */
+	#define EMIT_DELTA_Q(val) do {					\
+		int _v = (int8_t)(val);					\
+		if (_v != 0) {						\
+			bsw_put_u(bs, 1u, 1);				\
+			/* su(1+6): sign + 6-bit magnitude */		\
+			if (_v < 0) {					\
+				bsw_put_u(bs, (uint32_t)(_v + 128) & 0x7fu, 7); \
+			} else {					\
+				bsw_put_u(bs, (uint32_t)_v & 0x7fu, 7); \
+			}						\
+		} else {						\
+			bsw_put_u(bs, 0u, 1);				\
+		}							\
+	} while (0)
+
+	EMIT_DELTA_Q(q->delta_q_y_dc);
+
+	if (num_planes_gt_1) {
+		if (separate_uv_delta_q)
+			bsw_put_u(bs,
+				  (q->flags & V4L2_AV1_QUANTIZATION_FLAG_DIFF_UV_DELTA) ? 1u : 0u,
+				  1);
+		EMIT_DELTA_Q(q->delta_q_u_dc);
+		EMIT_DELTA_Q(q->delta_q_u_ac);
+		if (separate_uv_delta_q &&
+		    (q->flags & V4L2_AV1_QUANTIZATION_FLAG_DIFF_UV_DELTA)) {
+			EMIT_DELTA_Q(q->delta_q_v_dc);
+			EMIT_DELTA_Q(q->delta_q_v_ac);
+		}
+	}
+	#undef EMIT_DELTA_Q
+
+	bsw_put_u(bs,
+		  (q->flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) ? 1u : 0u,
+		  1);
+	if (q->flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
+		bsw_put_u(bs, q->qm_y, 4);
+		bsw_put_u(bs, q->qm_u, 4);
+		if (num_planes_gt_1 && separate_uv_delta_q)
+			bsw_put_u(bs, q->qm_v, 4);
+	}
+}
+
+/* §5.9.11 loop_filter_params */
+static void write_loop_filter_params(struct bs_writer *bs,
+				     const struct v4l2_av1_loop_filter *lf,
+				     bool num_planes_gt_1,
+				     bool coded_lossless_or_allow_intrabc)
+{
+	if (coded_lossless_or_allow_intrabc) {
+		/* spec §6.8.10: when CodedLossless or allow_intrabc is set,
+		 * loop filter levels are inferred and not coded. */
+		return;
+	}
+	bsw_put_u(bs, lf->level[0], 6);
+	bsw_put_u(bs, lf->level[1], 6);
+	if (num_planes_gt_1) {
+		if (lf->level[0] || lf->level[1]) {
+			bsw_put_u(bs, lf->level[2], 6);
+			bsw_put_u(bs, lf->level[3], 6);
+		}
+	}
+	bsw_put_u(bs, lf->sharpness, 3);
+
+	/* loop_filter_delta_enabled */
+	bool delta_en = !!(lf->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED);
+	bsw_put_u(bs, delta_en ? 1u : 0u, 1);
+	if (delta_en) {
+		bool delta_upd = !!(lf->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_UPDATE);
+		bsw_put_u(bs, delta_upd ? 1u : 0u, 1);
+		if (delta_upd) {
+			int i;
+			for (i = 0; i < 8; i++) {
+				/* update_ref_delta: emit 0 (no update) — V4L2 carries the
+				 * cumulative state; trying to differentially encode here
+				 * would need previous-frame state we don't track. */
+				bsw_put_u(bs, 0u, 1);
+			}
+			for (i = 0; i < 2; i++)
+				bsw_put_u(bs, 0u, 1);
+		}
+	}
+}
+
+/* §5.9.19 cdef_params */
+static void write_cdef_params(struct bs_writer *bs,
+			      const struct v4l2_av1_cdef *cdef,
+			      bool num_planes_gt_1,
+			      bool enable_cdef,
+			      bool coded_lossless_or_intrabc)
+{
+	int i, n;
+	if (!enable_cdef || coded_lossless_or_intrabc)
+		return;
+	bsw_put_u(bs, cdef->damping_minus_3, 2);
+	bsw_put_u(bs, cdef->bits, 2);
+	n = 1 << cdef->bits;
+	for (i = 0; i < n; i++) {
+		bsw_put_u(bs, cdef->y_pri_strength[i] & 0xfu, 4);
+		bsw_put_u(bs, cdef->y_sec_strength[i] & 0x3u, 2);
+		if (num_planes_gt_1) {
+			bsw_put_u(bs, cdef->uv_pri_strength[i] & 0xfu, 4);
+			bsw_put_u(bs, cdef->uv_sec_strength[i] & 0x3u, 2);
+		}
+	}
+}
+
+/* §5.9.20 lr_params — only RESTORE_NONE supported here */
+static int write_lr_params(struct bs_writer *bs,
+			   const struct v4l2_av1_loop_restoration *lr,
+			   int num_planes,
+			   bool enable_restoration,
+			   bool coded_lossless_or_intrabc)
+{
+	int p;
+	if (!enable_restoration || coded_lossless_or_intrabc)
+		return 1;
+
+	/* Out-of-scope if ANY plane uses restoration */
+	if (lr->frame_restoration_type[0] != V4L2_AV1_FRAME_RESTORE_NONE)
+		return 0;
+	if (num_planes > 1) {
+		if (lr->frame_restoration_type[1] != V4L2_AV1_FRAME_RESTORE_NONE)
+			return 0;
+		if (lr->frame_restoration_type[2] != V4L2_AV1_FRAME_RESTORE_NONE)
+			return 0;
+	}
+	/* Emit 2-bit RESTORE_NONE per plane */
+	for (p = 0; p < num_planes; p++)
+		bsw_put_u(bs, 0u, 2);
+	return 1;
+}
+
+/* §5.9.15 tile_info — single-tile uniform-spacing path only */
+static int write_tile_info_single_tile(struct bs_writer *bs,
+				       uint32_t frame_width,
+				       uint32_t frame_height,
+				       bool use_128_sb)
+{
+	uint32_t mi_cols = mi_cols_for(frame_width);
+	uint32_t mi_rows = mi_rows_for(frame_height);
+	int sb_log2 = use_128_sb ? 5 : 4;	/* mi units */
+	uint32_t sb_cols = (mi_cols + ((1u << sb_log2) - 1u)) >> sb_log2;
+	uint32_t sb_rows = (mi_rows + ((1u << sb_log2) - 1u)) >> sb_log2;
+	int min_log2_cols = tile_log2_ge(use_128_sb ? 4096 : 4096 / 1,
+					 (int)(sb_cols * (use_128_sb ? 128 : 64)));
+	(void) min_log2_cols;
+
+	/* uniform_tile_spacing_flag = 1, both increment loops = 0 →
+	 * tile_cols_log2 = tile_rows_log2 = 0 (single tile).  This
+	 * matches "uniform spacing with no width/height halving" which
+	 * is the simplest valid encoding. */
+	bsw_put_u(bs, 1u, 1);	/* uniform_tile_spacing_flag */
+
+	/* increment_tile_cols_log2: 0 zeros + the next non-increment
+	 * bit terminates the loop.  In single-tile mode we encode the
+	 * terminator immediately. */
+	(void) sb_cols;
+	(void) sb_rows;
+	/* The increment loops in the spec run while
+	 * tile_cols_log2 < max_log2_tile_cols, reading bits until a 0
+	 * appears.  For our forced single-tile, we emit a single 0 bit
+	 * to terminate the cols loop and another for the rows loop. */
+	bsw_put_u(bs, 0u, 1);	/* terminate cols */
+	bsw_put_u(bs, 0u, 1);	/* terminate rows */
+
+	/* tile_size_bytes_minus_1: 0 (1 byte) — only meaningful when
+	 * NumTiles > 1, but spec emits it unconditionally when
+	 * NumTiles > 1.  Single tile → not emitted.  We're single tile,
+	 * skip. */
+	return 1;
+}
+
+size_t av1_synth_frame_header_obu(const struct v4l2_ctrl_av1_sequence *seq,
+				  const struct v4l2_ctrl_av1_frame *fr,
+				  uint8_t *out, size_t out_cap)
+{
+	uint8_t rbsp[256];
+	struct bs_writer bs;
+	uint32_t sf, ff;
+	bool show_existing_frame = false;
+	bool reduced_still_picture_header;
+	bool show_frame, showable_frame, error_resilient_mode;
+	bool disable_cdf_update, allow_screen_content_tools;
+	bool force_integer_mv, allow_intrabc, frame_size_override;
+	bool allow_high_precision_mv, is_motion_mode_switchable;
+	bool use_ref_frame_mvs, disable_frame_end_update_cdf;
+	bool reference_select, allow_warped_motion, reduced_tx_set;
+	bool skip_mode_present, monochrome;
+	uint8_t frame_type, primary_ref_frame;
+	uint32_t frame_width, frame_height;
+	int num_planes;
+	int width_bits, height_bits;
+	uint8_t order_hint_bits;
+	bool enable_order_hint, enable_ref_frame_mvs, enable_warped_motion_seq;
+	bool enable_cdef_seq, enable_restoration_seq;
+	int i;
+
+	if (!seq || !fr || !out || out_cap < 16)
+		return 0;
+
+	sf = seq->flags;
+	ff = fr->flags;
+
+	/* sanity */
+	monochrome = !!(sf & V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME);
+	num_planes = monochrome ? 1 : 3;
+	enable_order_hint = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT);
+	enable_ref_frame_mvs = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_REF_FRAME_MVS);
+	enable_warped_motion_seq = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_WARPED_MOTION);
+	enable_cdef_seq = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF);
+	enable_restoration_seq = !!(sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION);
+	order_hint_bits = enable_order_hint ? seq->order_hint_bits : 0;
+	if (order_hint_bits > 8) order_hint_bits = 8;
+	reduced_still_picture_header = false;	/* matches sequence-header default */
+
+	frame_type = fr->frame_type;
+	if (frame_type == V4L2_AV1_SWITCH_FRAME)
+		return 0;	/* out of scope */
+
+	show_frame = !!(ff & V4L2_AV1_FRAME_FLAG_SHOW_FRAME);
+	showable_frame = !!(ff & V4L2_AV1_FRAME_FLAG_SHOWABLE_FRAME);
+	error_resilient_mode = !!(ff & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
+	disable_cdf_update = !!(ff & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE);
+	allow_screen_content_tools = !!(ff & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS);
+	force_integer_mv = !!(ff & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV);
+	allow_intrabc = !!(ff & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
+	frame_size_override = !!(ff & V4L2_AV1_FRAME_FLAG_FRAME_SIZE_OVERRIDE);
+	allow_high_precision_mv = !!(ff & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV);
+	is_motion_mode_switchable = !!(ff & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE);
+	use_ref_frame_mvs = !!(ff & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
+	disable_frame_end_update_cdf = !!(ff & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF);
+	reference_select = !!(ff & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT);
+	allow_warped_motion = !!(ff & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION);
+	reduced_tx_set = !!(ff & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET);
+	skip_mode_present = !!(ff & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT);
+	primary_ref_frame = fr->primary_ref_frame;
+
+	frame_width = fr->frame_width_minus_1 + 1;
+	frame_height = fr->frame_height_minus_1 + 1;
+
+	width_bits = min_bits_for((uint32_t) seq->max_frame_width_minus_1);
+	height_bits = min_bits_for((uint32_t) seq->max_frame_height_minus_1);
+
+	bsw_init(&bs, rbsp, sizeof(rbsp));
+
+	/* show_existing_frame: 0 (V4L2 doesn't surface the show-only path
+	 * — every fr ctrl describes a real decoded frame). */
+	bsw_put_u(&bs, show_existing_frame ? 1u : 0u, 1);
+
+	bsw_put_u(&bs, (uint32_t) frame_type, 2);
+	bsw_put_u(&bs, show_frame ? 1u : 0u, 1);
+	if (show_frame) {
+		/* No decoder_model_info_present_flag emitted in seq header,
+		 * so no buffer-removal-time bits here either. */
+	} else {
+		bsw_put_u(&bs, showable_frame ? 1u : 0u, 1);
+	}
+	if (frame_type == V4L2_AV1_SWITCH_FRAME ||
+	    (frame_type == V4L2_AV1_KEY_FRAME && show_frame)) {
+		/* error_resilient_mode = 1 inferred — not coded */
+	} else {
+		bsw_put_u(&bs, error_resilient_mode ? 1u : 0u, 1);
+	}
+
+	bsw_put_u(&bs, disable_cdf_update ? 1u : 0u, 1);
+	/* allow_screen_content_tools coded as 1 bit when sequence
+	 * forces NOT-SELECT; SELECT mode means we always emit a 1 bit
+	 * for the SELECT_SCREEN_CONTENT_TOOLS path.  Our sequence
+	 * header always emits SELECT, so emit a single bit equal to
+	 * the V4L2 flag. */
+	bsw_put_u(&bs, allow_screen_content_tools ? 1u : 0u, 1);
+	if (allow_screen_content_tools) {
+		/* seq_force_integer_mv = SELECT (2) so:
+		 *   force_integer_mv coded as 1 bit */
+		bsw_put_u(&bs, force_integer_mv ? 1u : 0u, 1);
+	}
+
+	/* frame_id_numbers_present_flag = 0 in seq → no current_frame_id */
+
+	if (frame_type != V4L2_AV1_SWITCH_FRAME && !reduced_still_picture_header)
+		bsw_put_u(&bs, frame_size_override ? 1u : 0u, 1);
+
+	if (enable_order_hint)
+		bsw_put_u(&bs, fr->order_hint, order_hint_bits);
+
+	if (frame_type != V4L2_AV1_KEY_FRAME && frame_type != V4L2_AV1_INTRA_ONLY_FRAME &&
+	    !error_resilient_mode)
+		bsw_put_u(&bs, primary_ref_frame, 3);
+
+	/* frame_size + render_size (§5.9.5, §5.9.6) */
+	if (frame_size_override) {
+		bsw_put_u(&bs, fr->frame_width_minus_1, width_bits);
+		bsw_put_u(&bs, fr->frame_height_minus_1, height_bits);
+	}
+	/* superres_params: §5.9.8 */
+	{
+		bool use_superres = !!(ff & V4L2_AV1_FRAME_FLAG_USE_SUPERRES);
+		if (sf & V4L2_AV1_SEQUENCE_FLAG_ENABLE_SUPERRES)
+			bsw_put_u(&bs, use_superres ? 1u : 0u, 1);
+		if (use_superres) {
+			/* coded_denom = superres_denom - SUPERRES_DENOM_MIN(9) */
+			int denom = fr->superres_denom;
+			if (denom < 9) denom = 9;
+			bsw_put_u(&bs, (uint32_t)(denom - 9) & 0x7u, 3);
+		}
+	}
+	/* render_size present flag: 1 if render dims given */
+	{
+		bool render_and_frame_match =
+			(fr->render_width_minus_1 == fr->frame_width_minus_1) &&
+			(fr->render_height_minus_1 == fr->frame_height_minus_1);
+		bsw_put_u(&bs, render_and_frame_match ? 0u : 1u, 1);
+		if (!render_and_frame_match) {
+			bsw_put_u(&bs, fr->render_width_minus_1, 16);
+			bsw_put_u(&bs, fr->render_height_minus_1, 16);
+		}
+	}
+
+	if (frame_type != V4L2_AV1_KEY_FRAME && frame_type != V4L2_AV1_INTRA_ONLY_FRAME) {
+		/* allow_intrabc only on key/intra-only — skip for inter */
+		(void) allow_intrabc;
+		if (!error_resilient_mode && enable_order_hint)
+			bsw_put_u(&bs, 0u, 1);	/* frame_refs_short_signaling */
+		/* read ref_frame_idx for each of REFS_PER_FRAME */
+		for (i = 0; i < AV1_REFS_PER_FRAME; i++) {
+			int8_t idx = fr->ref_frame_idx[i];
+			if (idx < 0) idx = 0;
+			bsw_put_u(&bs, (uint32_t)(idx & 0x7), 3);
+		}
+		if (frame_size_override && !error_resilient_mode) {
+			/* found_ref loop — emit "no" for each, so frame_size
+			 * fields above already populated. */
+			for (i = 0; i < AV1_REFS_PER_FRAME; i++)
+				bsw_put_u(&bs, 0u, 1);
+		}
+		bsw_put_u(&bs, allow_high_precision_mv ? 1u : 0u, 1);
+		/* read_interpolation_filter: is_filter_switchable + value */
+		{
+			int interp = fr->interpolation_filter;
+			bool switchable = (interp == V4L2_AV1_INTERPOLATION_FILTER_SWITCHABLE);
+			bsw_put_u(&bs, switchable ? 1u : 0u, 1);
+			if (!switchable)
+				bsw_put_u(&bs, (uint32_t)interp & 0x3u, 2);
+		}
+		bsw_put_u(&bs, is_motion_mode_switchable ? 1u : 0u, 1);
+		if (!error_resilient_mode && enable_ref_frame_mvs)
+			bsw_put_u(&bs, use_ref_frame_mvs ? 1u : 0u, 1);
+	} else {
+		if (frame_type == V4L2_AV1_INTRA_ONLY_FRAME && allow_screen_content_tools)
+			bsw_put_u(&bs, allow_intrabc ? 1u : 0u, 1);
+		else if (frame_type == V4L2_AV1_KEY_FRAME && allow_screen_content_tools)
+			bsw_put_u(&bs, allow_intrabc ? 1u : 0u, 1);
+	}
+
+	/* disable_frame_end_update_cdf */
+	if (!disable_cdf_update)
+		bsw_put_u(&bs, disable_frame_end_update_cdf ? 1u : 0u, 1);
+
+	/* tile_info: single-tile path */
+	{
+		bool use_128 = !!(sf & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK);
+		if (!write_tile_info_single_tile(&bs, frame_width, frame_height,
+						 use_128))
+			return 0;
+	}
+
+	/* quantization_params */
+	write_quantization_params(&bs, &fr->quantization,
+				  num_planes > 1,
+				  !!(sf & V4L2_AV1_SEQUENCE_FLAG_SEPARATE_UV_DELTA_Q));
+
+	/* segmentation_params: only enabled=0 supported */
+	{
+		bool seg_en = !!(fr->segmentation.flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED);
+		if (seg_en)
+			return 0;
+		bsw_put_u(&bs, 0u, 1);	/* segmentation_enabled */
+	}
+
+	/* delta_q_params + delta_lf_params */
+	{
+		bool delta_q_present = !!(fr->quantization.flags &
+					   V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT);
+		if (fr->quantization.base_q_idx > 0) {
+			bsw_put_u(&bs, delta_q_present ? 1u : 0u, 1);
+			if (delta_q_present)
+				bsw_put_u(&bs, fr->quantization.delta_q_res & 0x3u, 2);
+		}
+		if (delta_q_present && !allow_intrabc) {
+			bool delta_lf_present =
+				!!(fr->loop_filter.flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT);
+			bsw_put_u(&bs, delta_lf_present ? 1u : 0u, 1);
+			if (delta_lf_present) {
+				bsw_put_u(&bs, fr->loop_filter.delta_lf_res & 0x3u, 2);
+				bsw_put_u(&bs,
+					  (fr->loop_filter.flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI)
+						? 1u : 0u, 1);
+			}
+		}
+	}
+
+	/* coded_lossless heuristic: when base_q_idx==0 and all deltas==0
+	 * and qm not in use, AV1 treats the frame as lossless.  We
+	 * approximate with the base_q_idx check; the lf/cdef writers
+	 * gate on the same value. */
+	{
+		bool coded_lossless = (fr->quantization.base_q_idx == 0);
+
+		write_loop_filter_params(&bs, &fr->loop_filter,
+					 num_planes > 1,
+					 coded_lossless || allow_intrabc);
+		write_cdef_params(&bs, &fr->cdef, num_planes > 1,
+				  enable_cdef_seq,
+				  coded_lossless || allow_intrabc);
+		if (!write_lr_params(&bs, &fr->loop_restoration, num_planes,
+				     enable_restoration_seq,
+				     coded_lossless || allow_intrabc))
+			return 0;
+	}
+
+	/* read_tx_mode (§5.9.21) */
+	{
+		bool coded_lossless = (fr->quantization.base_q_idx == 0);
+		if (coded_lossless) {
+			/* tx_mode = ONLY_4X4 (inferred) */
+		} else {
+			int tx_mode = fr->tx_mode;
+			bsw_put_u(&bs, (tx_mode == V4L2_AV1_TX_MODE_SELECT) ? 1u : 0u, 1);
+			if (tx_mode != V4L2_AV1_TX_MODE_SELECT)
+				bsw_put_u(&bs, (tx_mode == V4L2_AV1_TX_MODE_LARGEST) ? 1u : 0u, 1);
+		}
+	}
+
+	/* frame_reference_mode (§5.9.23) */
+	if (frame_type != V4L2_AV1_KEY_FRAME && frame_type != V4L2_AV1_INTRA_ONLY_FRAME)
+		bsw_put_u(&bs, reference_select ? 1u : 0u, 1);
+
+	/* skip_mode_params (§5.9.22) */
+	{
+		bool skip_allowed = !!(ff & V4L2_AV1_FRAME_FLAG_SKIP_MODE_ALLOWED);
+		if (skip_allowed)
+			bsw_put_u(&bs, skip_mode_present ? 1u : 0u, 1);
+	}
+
+	/* reduced_tx_set */
+	bsw_put_u(&bs, reduced_tx_set ? 1u : 0u, 1);
+
+	/* global_motion_params: §5.9.24 — emit IDENTITY for each ref */
+	if (frame_type != V4L2_AV1_KEY_FRAME && frame_type != V4L2_AV1_INTRA_ONLY_FRAME) {
+		int r;
+		(void) enable_warped_motion_seq;
+		(void) allow_warped_motion;
+		for (r = 1; r < AV1_NUM_REF_FRAMES; r++) {
+			uint8_t wm_type = fr->global_motion.type[r];
+			if (wm_type != V4L2_AV1_WARP_MODEL_IDENTITY)
+				return 0;	/* out of scope */
+			bsw_put_u(&bs, 0u, 1);	/* is_global = 0 → identity */
+		}
+	}
+
+	/* film_grain_params: §6.8.20 — only "not present" path supported */
+	if (sf & V4L2_AV1_SEQUENCE_FLAG_FILM_GRAIN_PARAMS_PRESENT)
+		return 0;	/* out of scope: film grain coding deferred */
+
+	bsw_align_rbsp(&bs);
+	if (bsw_overflowed(&bs))
+		return 0;
+
+	return wrap_obu(OBU_FRAME_HEADER, rbsp, bsw_bytes(&bs), out, out_cap);
+}
@@ -12,9 +12,9 @@
 * to the tile-group bytes before handing the assembled stream to
 * libavcodec.
 *
- * This header covers the Sequence Header OBU (AV1 spec §5.5.1).
- * The Frame Header / Frame OBU synthesisers are separate modules
- * (follow-on tasks); they all share the same wire conventions:
+ * This header covers Sequence Header (§5.5.1), Temporal Delimiter
+ * (§5.6), and Frame Header (§5.9) OBUs.  All share the same wire
+ * conventions:
 *   - No emulation prevention (AV1 uses leb128 sized fields instead).
 *   - obu_has_size_field = 1 in the OBU header byte.
 *   - obu_extension_flag = 0 (no temporal_id / spatial_id encoding).
@@ -47,4 +47,73 @@
 size_t av1_synth_sequence_header_obu(const struct v4l2_ctrl_av1_sequence *seq,
 				     uint8_t *out, size_t out_cap);

+/*
+ * Encode an AV1 Temporal Delimiter OBU into @out.  Always exactly 2
+ * bytes: 0x12 (obu_type=TEMPORAL_DELIMITER, has_size_field=1) followed
+ * by 0x00 (leb128(0) — zero-payload).  Returns 2 on success, 0 if
+ * @out_cap < 2.
+ *
+ * Per AV1 spec §5.6 every temporal unit MUST start with a Temporal
+ * Delimiter OBU when temporal_delimiter_obus_present is implied — the
+ * libavcodec AV1 parser uses TD OBUs as access-unit boundaries when
+ * fed full-bitstream input.
+ */
+size_t av1_synth_temporal_delimiter_obu(uint8_t *out, size_t out_cap);
+
+/*
+ * Integration status (2026-05-23):
+ *
+ * The Sequence / Frame Header / Temporal Delimiter encoders below are
+ * standalone primitives.  They are NOT yet called from decoder.c — the
+ * AV1 decode hot path still passes the OUTPUT buffer straight to
+ * libavcodec, which only works if the V4L2 consumer happens to be
+ * sending a fully-OBU'd access unit (i.e. is not strictly following
+ * the V4L2 stateless AV1 "tile-group bytes only" contract).
+ *
+ * Wiring these primitives in requires a separate kernel-side change:
+ *
+ *   - extend daedalus_v4l2_proto.h with a `struct daedalus_av1_meta`
+ *     mirroring v4l2_ctrl_av1_sequence + v4l2_ctrl_av1_frame
+ *   - update kernel/daedalus_v4l2_main.c to capture
+ *     V4L2_CID_STATELESS_AV1_{SEQUENCE,FRAME} at device_run time and
+ *     ship the meta alongside the bitstream over the chardev
+ *   - update daemon/src/chardev_client.c to receive the meta
+ *   - update daemon/src/decoder.c to: synth TD + SH + FH OBUs, wrap
+ *     the OUTPUT bytes as an OBU_TILE_GROUP, concat in that order,
+ *     hand the assembled bitstream to libavcodec
+ *
+ * Tracked as a follow-on; see daedalus-v4l2 task notes.
+ */
+
+/*
+ * Encode an AV1 Frame Header OBU from the V4L2 stateless frame control
+ * (and the matching sequence control, which provides fields the
+ * frame-header encoder branches on per §5.9.1).
+ *
+ * Scope (this revision — libva-v4l2-request common-case path):
+ *   - Frame types KEY / INTER / INTRA_ONLY.  SWITCH frames return 0
+ *     (caller should fall back to libavcodec native parsing).
+ *   - segmentation_params() emits the "segmentation disabled" path
+ *     when V4L2_AV1_SEGMENTATION_FLAG_ENABLED is 0.  Enabled
+ *     segmentation returns 0.
+ *   - loop_restoration_params(): only RESTORE_NONE on all planes
+ *     supported.  Other restoration types return 0.
+ *   - global_motion: only IDENTITY warp model emitted.  Non-IDENTITY
+ *     entries return 0.
+ *   - film_grain_params(): treated as "not present" — only valid when
+ *     the sequence header has film_grain_params_present = 0.  If the
+ *     sequence claims film grain is present this revision returns 0
+ *     (the per-frame film-grain coding is a separate follow-on).
+ *
+ * Out-of-scope branches return 0 so the caller can surface a coverage
+ * warning and fall back to direct libavcodec parsing of the original
+ * bitstream where possible.
+ *
+ * @out_cap must be at least 128 bytes for any reasonable frame header;
+ * 256 bytes is a safe upper bound for the supported subset.
+ */
+size_t av1_synth_frame_header_obu(const struct v4l2_ctrl_av1_sequence *seq,
+				  const struct v4l2_ctrl_av1_frame *fr,
+				  uint8_t *out, size_t out_cap);
+
 #endif /* DAEDALUS_AV1_OBU_SYNTH_H */
@@ -6,6 +6,7 @@
 #include "ffmpeg_loader.h"
 #include "h264_nal_synth.h"
 #include "log.h"
+#include "shadow_decoder.h"

 #include <errno.h>
 #include <stdlib.h>
@@ -110,6 +111,13 @@ int daedalus_decoder_init(struct daedalus_decoder *dec,
 		loader->av_packet_free(&dec->pkt);
 		return -ENOMEM;
 	}
+	/*
+	 * Returns NULL when DAEDALUS_SHADOW_MODE != "1" or the loaded
+	 * libavcodec lacks the per-MB inspection callback.  Both are
+	 * the normal production state — the rest of decoder.c is
+	 * shadow-aware via NULL-safe shadow_decoder_* entry points.
+	 */
+	dec->shadow = shadow_decoder_create(loader);
 	return 0;
 }

@@ -117,6 +125,8 @@ void daedalus_decoder_cleanup(struct daedalus_decoder *dec)
 {
 	if (!dec || !dec->loader)
 		return;
+	if (dec->shadow)
+		shadow_decoder_destroy(dec->shadow);
 	if (dec->ctx_vp9)
 		dec->loader->avcodec_free_context(&dec->ctx_vp9);
 	if (dec->ctx_av1)
@@ -211,6 +221,16 @@ static int decoder_open_codec(struct daedalus_decoder *dec, uint32_t codec_id,
 	*cache = ctx;
 	*out = ctx;
 	log_info("decoder: opened %s context", codec->name);
+
+	/*
+	 * Shadow-mode hook on H.264 only: install the per-MB inspection
+	 * callback once the AVCodecContext is open.  NULL-safe — when
+	 * shadow mode is disabled (the normal production case) this
+	 * does nothing.
+	 */
+	if (codec_id == DAEDALUS_CODEC_H264)
+		shadow_decoder_install_cb(dec->shadow, ctx);
+
 	return 0;
 }

@@ -595,6 +615,16 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec,
 		goto out;
 	}

+	/*
+	 * Shadow-mode frame-boundary hook.  H.264-only — the per-MB
+	 * callback is only registered for H.264, so on VP9/AV1 frames
+	 * shadow->mbs_this_frame stays zero anyway, but keeping the
+	 * codec gate here makes the log lines easier to read.
+	 * NULL-safe.
+	 */
+	if (req->codec_id == DAEDALUS_CODEC_H264)
+		shadow_decoder_on_frame(dec->shadow, dec->frame);
+
 	{
 		struct AVFrame *fr = dec->frame;
 		const AVPixFmtDescriptor *desc =
@@ -21,6 +21,7 @@ struct ffmpeg_loader;
 struct AVCodecContext;
 struct AVPacket;
 struct AVFrame;
+struct shadow_decoder;

 /**
 * struct daedalus_decoder - per-daemon decoder state
@@ -31,6 +32,10 @@ struct AVFrame;
 * @ctx_h264:	lazily-opened H.264 AVCodecContext
 * @pkt:	shared AVPacket reused across requests
 * @frame:	shared AVFrame reused across requests
+ * @shadow:	env-gated daedalus-decoder shadow path; NULL when
+ *		DAEDALUS_SHADOW_MODE != "1" or libavcodec lacks the
+ *		per-MB inspection callback.  Production path doesn't
+ *		care; all shadow_decoder_* entry points are NULL-safe.
 */
 struct daedalus_decoder {
 	struct ffmpeg_loader	*loader;
@@ -39,6 +44,7 @@ struct daedalus_decoder {
 	struct AVCodecContext	*ctx_h264;
 	struct AVPacket		*pkt;
 	struct AVFrame		*frame;
+	struct shadow_decoder	*shadow;
 };

 /**
@@ -109,6 +109,24 @@ int ffmpeg_loader_init(struct ffmpeg_loader *loader)
 	RESOLVE(libavutil, LIBAVUTIL_SONAME, av_version_info);
 	RESOLVE(libavutil, LIBAVUTIL_SONAME, av_pix_fmt_desc_get);

+	/*
+	 * Optional symbols.  Resolved NULL-tolerantly — stock libavcodec
+	 * does not export these; the marfrit-packages
+	 * ffmpeg-v4l2-request-fourier fork does (patches 0016/0017).
+	 * Callers MUST NULL-check before invoking.  Clear any stale
+	 * dlerror() the previous lookups left behind so we read a clean
+	 * status here.
+	 */
+	(void) dlerror();
+	*(void **) &loader->ff_h264_set_mb_inspect_cb =
+		dlsym(loader->libavcodec, "ff_h264_set_mb_inspect_cb");
+	if (!loader->ff_h264_set_mb_inspect_cb) {
+		log_info("libavcodec lacks ff_h264_set_mb_inspect_cb "
+			 "(stock build, no daedalus-fourier 0016 patch) "
+			 "— shadow-mode unavailable");
+		(void) dlerror();	/* discard the not-found message */
+	}
+
 	{
 		unsigned int v = loader->avformat_version();
 		log_info("FFmpeg loaded: %s (libavformat %u.%u.%u)",
@@ -35,6 +35,14 @@
 #include <libavutil/avutil.h>
 #include <libavutil/pixdesc.h>

+/*
+ * Forward declaration must precede ff_h264_set_mb_inspect_cb's
+ * function-pointer signature below — otherwise the compiler treats
+ * `struct H264Context` as a parameter-scope declaration and the type
+ * is incompatible with later uses in shadow_decoder.c.
+ */
+struct H264Context;	/* opaque outside libavcodec */
+
 /**
 * struct ffmpeg_loader - resolved FFmpeg API entry points
 * @libavformat:	dlopen handle (close in cleanup)
@@ -88,6 +96,27 @@ struct ffmpeg_loader {
 	const char *(*av_get_media_type_string)(enum AVMediaType);
 	const char *(*av_version_info)(void);
 	const AVPixFmtDescriptor *(*av_pix_fmt_desc_get)(enum AVPixelFormat);
+
+	/*
+	 * Optional libavcodec symbols.  NULL when the loaded
+	 * libavcodec.so doesn't carry the corresponding marfrit-packages
+	 * patch.  Callers must NULL-check before invoking.
+	 *
+	 * ff_h264_set_mb_inspect_cb — marfrit-packages patch 0016.
+	 * Registers a per-MB callback that fires at the end of
+	 * ff_h264_hl_decode_mb.  Used by daedalus-v4l2's shadow-mode
+	 * path to drive daedalus-decoder's frame-major dispatch
+	 * alongside libavcodec's reference decode.  H264Context stays
+	 * opaque to the daemon — extraction of its private fields needs
+	 * the patched FFmpeg source-tree headers (see the CLI in
+	 * daedalus-decoder/tools/daedalus_decode_h264.c) and is
+	 * deferred to PR-Q3a.2.
+	 */
+	void  (*ff_h264_set_mb_inspect_cb)(struct AVCodecContext *avctx,
+					    void (*cb)(void *opaque,
+						       const struct H264Context *h,
+						       int mb_x, int mb_y),
+					    void *opaque);
 };

 /**
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * shadow_decoder.c — env-gated parallel daedalus-decoder wiring.
+ *
+ * PR-Q3a.1 scope: prove the toolchain.
+ *
+ *   1. DAEDALUS_SHADOW_MODE=1 + libavcodec carries marfrit-packages
+ *      0016 (ff_h264_set_mb_inspect_cb) → shadow path active.
+ *   2. Per-MB callback fires on every macroblock libavcodec emits.
+ *      We only count the firings here.
+ *   3. Frame boundary creates a daedalus_decoder context lazily
+ *      (sized from the first AVFrame); destroy + recreate on
+ *      resolution change.
+ *   4. Per-frame log line surfaces MB count + has_qpu state.
+ *
+ * No daedalus_decoder_append_mb / flush_frame calls yet — that
+ * needs H264Context introspection which depends on the patched
+ * FFmpeg source-tree headers (DAEDALUS_FFMPEG_SRC) and lands in
+ * PR-Q3a.2.  This module's job here is to confirm the link
+ * survives, the callback resolves, the context creates, and
+ * tearing the path back down doesn't perturb the production
+ * AVFrame → V4L2 pipeline.
+ */
+#include "shadow_decoder.h"
+
+#include "ffmpeg_loader.h"
+#include "log.h"
+
+#include <libavcodec/avcodec.h>
+#include <libavutil/frame.h>
+
+#include <daedalus_decoder.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+struct shadow_decoder {
+	struct ffmpeg_loader *loader;
+	daedalus_decoder     *dec;	/* lazily created on first frame */
+	int                   ctx_w;	/* coded-frame width at last create */
+	int                   ctx_h;
+	uint64_t              mbs_this_frame;
+	uint64_t              total_frames;
+	uint64_t              total_mbs;
+};
+
+static void shadow_mb_inspect(void *opaque,
+			      const struct H264Context *h __attribute__((unused)),
+			      int mb_x __attribute__((unused)),
+			      int mb_y __attribute__((unused)))
+{
+	struct shadow_decoder *sh = opaque;
+	sh->mbs_this_frame++;
+}
+
+struct shadow_decoder *shadow_decoder_create(struct ffmpeg_loader *loader)
+{
+	const char *env = getenv("DAEDALUS_SHADOW_MODE");
+
+	if (!env || strcmp(env, "1") != 0)
+		return NULL;
+
+	if (!loader || !loader->ff_h264_set_mb_inspect_cb) {
+		log_warn("shadow_decoder: DAEDALUS_SHADOW_MODE=1 set but "
+			 "libavcodec lacks ff_h264_set_mb_inspect_cb — disabled");
+		return NULL;
+	}
+
+	struct shadow_decoder *sh = calloc(1, sizeof(*sh));
+	if (!sh) {
+		log_err("shadow_decoder: out of memory");
+		return NULL;
+	}
+	sh->loader = loader;
+	log_info("shadow_decoder: enabled (DAEDALUS_SHADOW_MODE=1, "
+		 "daedalus-decoder version %s)",
+		 daedalus_decoder_version());
+	return sh;
+}
+
+void shadow_decoder_destroy(struct shadow_decoder *sh)
+{
+	if (!sh)
+		return;
+	if (sh->dec)
+		daedalus_decoder_destroy(sh->dec);
+	log_info("shadow_decoder: shutdown — observed %llu frames / %llu MBs",
+		 (unsigned long long) sh->total_frames,
+		 (unsigned long long) sh->total_mbs);
+	free(sh);
+}
+
+void shadow_decoder_install_cb(struct shadow_decoder *sh,
+			       struct AVCodecContext *avctx)
+{
+	if (!sh || !avctx)
+		return;
+	/*
+	 * Loader's optional-symbol pointer was checked at create time
+	 * (we wouldn't be non-NULL otherwise), so the call is safe.
+	 */
+	sh->loader->ff_h264_set_mb_inspect_cb(avctx, shadow_mb_inspect, sh);
+	log_info("shadow_decoder: per-MB callback installed on H.264 ctx");
+}
+
+/*
+ * Ensure the daedalus_decoder context matches the frame's dimensions.
+ * Rounds up to the H.264 macroblock grid (16-pixel multiples) — the
+ * coded picture is always 16-aligned even when the displayed crop
+ * isn't.  Returns 0 on success, -1 on failure (ctx left NULL; caller
+ * logs and continues without shadow dispatch this frame).
+ */
+static int shadow_ensure_ctx(struct shadow_decoder *sh, int w, int h)
+{
+	int coded_w = (w + 15) & ~15;
+	int coded_h = (h + 15) & ~15;
+
+	if (sh->dec && sh->ctx_w == coded_w && sh->ctx_h == coded_h)
+		return 0;
+
+	if (sh->dec) {
+		daedalus_decoder_destroy(sh->dec);
+		sh->dec = NULL;
+	}
+
+	sh->dec = daedalus_decoder_create(coded_w, coded_h);
+	if (!sh->dec) {
+		log_warn("shadow_decoder: daedalus_decoder_create(%dx%d) "
+			 "failed — shadow dispatch skipped this stream",
+			 coded_w, coded_h);
+		sh->ctx_w = sh->ctx_h = 0;
+		return -1;
+	}
+	sh->ctx_w = coded_w;
+	sh->ctx_h = coded_h;
+	log_info("shadow_decoder: ctx ready (%dx%d coded, has_qpu=%d)",
+		 coded_w, coded_h, daedalus_decoder_has_qpu(sh->dec));
+	return 0;
+}
+
+void shadow_decoder_on_frame(struct shadow_decoder *sh,
+			     const struct AVFrame *fr)
+{
+	if (!sh || !fr)
+		return;
+
+	(void) shadow_ensure_ctx(sh, fr->width, fr->height);
+
+	sh->total_frames++;
+	sh->total_mbs += sh->mbs_this_frame;
+
+	uint64_t expected = (uint64_t) ((fr->width + 15) >> 4) *
+			    (uint64_t) ((fr->height + 15) >> 4);
+	log_info("shadow_decoder: frame #%llu %dx%d — %llu MBs observed "
+		 "(expected %llu)",
+		 (unsigned long long) sh->total_frames,
+		 fr->width, fr->height,
+		 (unsigned long long) sh->mbs_this_frame,
+		 (unsigned long long) expected);
+
+	sh->mbs_this_frame = 0;
+}
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * shadow_decoder.h — env-gated parallel daedalus-decoder path.
+ *
+ * When the daemon is launched with DAEDALUS_SHADOW_MODE=1, shadow_decoder
+ * runs alongside libavcodec's normal H.264 decode: a per-MB inspection
+ * callback fires for every macroblock libavcodec emits, and a frame-
+ * boundary hook lets the shadow path observe and (in future PRs)
+ * dispatch the same frame's worth of work through daedalus-decoder's
+ * frame-major UMA pipeline.  Production output (AVFrame → V4L2 NV12)
+ * is unchanged regardless of this module's state.
+ *
+ * PR-Q3a.1 scope: wiring only.  The callback counts MBs and the per-
+ * frame hook logs the count.  No daedalus-decoder dispatch yet; that
+ * lands in PR-Q3a.2 along with the H264Context-introspection path
+ * gated on the patched FFmpeg source-tree headers.
+ *
+ * Disabled state (env unset or libavcodec lacks ff_h264_set_mb_inspect_cb)
+ * is a hard NULL — shadow_decoder_create() returns NULL, all other
+ * entry points are safe with NULL and become no-ops.
+ *
+ * The daedalus-decoder context, when active, is created lazily on the
+ * first observed frame (dimensions come from libavcodec's AVFrame, not
+ * from the SPS — keeps init independent of stream-header bring-up
+ * order) and re-created on resolution change.
+ */
+#ifndef DAEDALUS_V4L2_SHADOW_DECODER_H
+#define DAEDALUS_V4L2_SHADOW_DECODER_H
+
+#include <stdint.h>
+
+struct ffmpeg_loader;
+struct AVCodecContext;
+struct AVFrame;
+struct shadow_decoder;
+
+/**
+ * shadow_decoder_create - allocate shadow state if env-enabled
+ * @loader:	borrowed FFmpeg loader (must outlive the returned ctx)
+ *
+ * Probes DAEDALUS_SHADOW_MODE env var and the loader's optional
+ * ff_h264_set_mb_inspect_cb pointer.  Returns NULL when shadow mode
+ * is disabled or unsupported; that's the normal production state.
+ * Returns a usable handle otherwise.  Caller owns the handle and must
+ * call shadow_decoder_destroy.
+ */
+struct shadow_decoder *shadow_decoder_create(struct ffmpeg_loader *loader);
+
+/**
+ * shadow_decoder_destroy - tear down.  Safe with NULL.
+ */
+void shadow_decoder_destroy(struct shadow_decoder *sh);
+
+/**
+ * shadow_decoder_install_cb - install the per-MB inspection callback
+ *					   on a freshly-opened H.264 AVCodecContext
+ *
+ * Safe with NULL @sh (NOP).  Should be called once per H.264 codec
+ * open; repeated calls just reinstall and are harmless.
+ */
+void shadow_decoder_install_cb(struct shadow_decoder *sh,
+			       struct AVCodecContext *avctx);
+
+/**
+ * shadow_decoder_on_frame - per-frame boundary hook
+ *
+ * Called after avcodec_receive_frame returns a frame.  Logs the per-
+ * frame MB counter, resets it, and (in future PRs) drives
+ * daedalus_decoder_flush_frame + the AVFrame-vs-shadow diff.  Safe
+ * with NULL @sh.
+ */
+void shadow_decoder_on_frame(struct shadow_decoder *sh,
+			     const struct AVFrame *fr);
+
+#endif /* DAEDALUS_V4L2_SHADOW_DECODER_H */
@@ -306,6 +306,150 @@ static int test_overflow(void)
 	return 0;
 }

+/* Case 5: Temporal Delimiter is exactly 2 bytes 0x12 0x00. */
+static int test_temporal_delimiter(void)
+{
+	uint8_t out[4];
+	size_t n;
+
+	memset(out, 0xff, sizeof(out));
+	n = av1_synth_temporal_delimiter_obu(out, sizeof(out));
+	CHECK_EQ(n, 2, "TD length");
+	CHECK_EQ(out[0], 0x12, "TD obu header byte (obu_type=2, has_size=1)");
+	CHECK_EQ(out[1], 0x00, "TD leb128 size = 0");
+	printf("  temporal delimiter: OK\n");
+	return 0;
+}
+
+/* Test fixtures for Frame Header cases. */
+static void mk_seq_1080p_p0(struct v4l2_ctrl_av1_sequence *seq)
+{
+	memset(seq, 0, sizeof(*seq));
+	seq->seq_profile		= 0;
+	seq->order_hint_bits		= 7;
+	seq->bit_depth			= 8;
+	seq->max_frame_width_minus_1	= 1919;
+	seq->max_frame_height_minus_1	= 1079;
+	seq->flags = V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK |
+		     V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT |
+		     V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF |
+		     V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION;
+}
+
+static void mk_frame_key_1080p(struct v4l2_ctrl_av1_frame *fr)
+{
+	memset(fr, 0, sizeof(*fr));
+	fr->frame_type			= V4L2_AV1_KEY_FRAME;
+	fr->frame_width_minus_1		= 1919;
+	fr->frame_height_minus_1	= 1079;
+	fr->render_width_minus_1	= 1919;
+	fr->render_height_minus_1	= 1079;
+	fr->primary_ref_frame		= 7;	/* PRIMARY_REF_NONE */
+	fr->quantization.base_q_idx	= 60;
+	fr->loop_filter.level[0]	= 16;
+	fr->loop_filter.level[1]	= 16;
+	fr->loop_filter.level[2]	= 16;
+	fr->loop_filter.level[3]	= 16;
+	fr->cdef.bits			= 0;
+	fr->loop_restoration.frame_restoration_type[0] = V4L2_AV1_FRAME_RESTORE_NONE;
+	fr->loop_restoration.frame_restoration_type[1] = V4L2_AV1_FRAME_RESTORE_NONE;
+	fr->loop_restoration.frame_restoration_type[2] = V4L2_AV1_FRAME_RESTORE_NONE;
+	fr->interpolation_filter	= 0;
+	fr->tx_mode			= V4L2_AV1_TX_MODE_SELECT;
+	fr->flags = V4L2_AV1_FRAME_FLAG_SHOW_FRAME;
+}
+
+/* Case 6: KEY frame at 1080p — happy path, structural smoke. */
+static int test_frame_header_key_1080p(void)
+{
+	struct v4l2_ctrl_av1_sequence seq;
+	struct v4l2_ctrl_av1_frame fr;
+	uint8_t out[256];
+	size_t n;
+	struct br br;
+	uint32_t bit;
+
+	mk_seq_1080p_p0(&seq);
+	mk_frame_key_1080p(&fr);
+
+	n = av1_synth_frame_header_obu(&seq, &fr, out, sizeof(out));
+	CHECK(n > 0 && n <= sizeof(out), "FH synth returned %zu", n);
+
+	/* OBU header byte: obu_type=3 (FRAME_HEADER), has_size_field=1
+	 * → 0_0011_0_1_0 = 0x1A. */
+	CHECK_EQ(out[0], 0x1A, "FH obu header byte");
+	CHECK((out[1] & 0x80) == 0, "leb128 single byte");
+	CHECK_EQ(n, 2 + (size_t)(out[1] & 0x7f), "total = header+leb+payload");
+
+	br_init(&br, out + 2, n - 2);
+	bit = br_get(&br, 1); CHECK_EQ(bit, 0, "show_existing_frame");
+	bit = br_get(&br, 2); CHECK_EQ(bit, 0, "frame_type=KEY");
+	bit = br_get(&br, 1); CHECK_EQ(bit, 1, "show_frame");
+	bit = br_get(&br, 1); CHECK_EQ(bit, 0, "disable_cdf_update");
+	bit = br_get(&br, 1); CHECK_EQ(bit, 0, "allow_screen_content_tools");
+
+	printf("  KEY frame 1080p: OK (%zu bytes)\n", n);
+	return 0;
+}
+
+/* Case 7: INTER frame — coverage smoke. */
+static int test_frame_header_inter(void)
+{
+	struct v4l2_ctrl_av1_sequence seq;
+	struct v4l2_ctrl_av1_frame fr;
+	uint8_t out[256];
+	size_t n;
+	int i;
+
+	mk_seq_1080p_p0(&seq);
+	mk_frame_key_1080p(&fr);
+	fr.frame_type = V4L2_AV1_INTER_FRAME;
+	fr.primary_ref_frame = 0;
+	for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++)
+		fr.ref_frame_idx[i] = (int8_t)(i & 7);
+	fr.flags |= V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT;
+
+	n = av1_synth_frame_header_obu(&seq, &fr, out, sizeof(out));
+	CHECK(n > 0, "INTER FH synth returned %zu", n);
+	CHECK_EQ(out[0], 0x1A, "FH obu header");
+	printf("  INTER frame: OK (%zu bytes)\n", n);
+	return 0;
+}
+
+/* Case 8: SWITCH frame should be rejected. */
+static int test_frame_header_switch_rejected(void)
+{
+	struct v4l2_ctrl_av1_sequence seq;
+	struct v4l2_ctrl_av1_frame fr;
+	uint8_t out[256];
+	size_t n;
+
+	mk_seq_1080p_p0(&seq);
+	mk_frame_key_1080p(&fr);
+	fr.frame_type = V4L2_AV1_SWITCH_FRAME;
+	n = av1_synth_frame_header_obu(&seq, &fr, out, sizeof(out));
+	CHECK_EQ(n, 0, "SWITCH frame should be out of scope");
+	printf("  SWITCH frame rejected: OK\n");
+	return 0;
+}
+
+/* Case 9: segmentation enabled should be rejected. */
+static int test_frame_header_segmentation_rejected(void)
+{
+	struct v4l2_ctrl_av1_sequence seq;
+	struct v4l2_ctrl_av1_frame fr;
+	uint8_t out[256];
+	size_t n;
+
+	mk_seq_1080p_p0(&seq);
+	mk_frame_key_1080p(&fr);
+	fr.segmentation.flags = V4L2_AV1_SEGMENTATION_FLAG_ENABLED;
+	n = av1_synth_frame_header_obu(&seq, &fr, out, sizeof(out));
+	CHECK_EQ(n, 0, "segmentation-enabled should be out of scope");
+	printf("  segmentation enabled rejected: OK\n");
+	return 0;
+}
+
 int main(void)
 {
 	int fail = 0;
@@ -317,6 +461,15 @@ int main(void)
 	fail |= test_reject_invalid_profile_bitdepth();
 	fail |= test_overflow();

+	printf("=== av1_synth_temporal_delimiter_obu ===\n");
+	fail |= test_temporal_delimiter();
+
+	printf("=== av1_synth_frame_header_obu ===\n");
+	fail |= test_frame_header_key_1080p();
+	fail |= test_frame_header_inter();
+	fail |= test_frame_header_switch_rejected();
+	fail |= test_frame_header_segmentation_rejected();
+
 	if (fail) {
 		fprintf(stderr, "AV1 OBU synth tests FAILED\n");
 		return 1;
Author	SHA1	Message	Date
marfrit	6b1d90816d	Merge pull request 'daemon: shadow_decoder wiring (PR-Q3a.1)' (#25 ) from noether/daemon-shadow-decoder-wiring into main Reviewed-on: #25	2026-05-26 12:28:16 +00:00
marfrit	dbf01eddb8	daemon: shadow_decoder wiring (PR-Q3a.1) Toolchain plumbing for the upcoming daedalus-decoder shadow-mode path. Production behaviour is unchanged. What lands here: 1. CMake links libdaedalus_decoder via pkg-config. Static archive, so no .so dependency change in the daemon's link map. 2. ffmpeg_loader resolves ff_h264_set_mb_inspect_cb NULL-tolerantly. Stock libavcodec lacks the symbol (logged as INFO at startup); the marfrit-packages ffmpeg-v4l2-request-fourier fork's 0016 patch exports it. The shadow path activates only when both env DAEDALUS_SHADOW_MODE=1 AND the symbol resolves. 3. New shadow_decoder.[ch] module: - shadow_decoder_create() gates on env + symbol presence, returns NULL in production state (the common case). - shadow_decoder_install_cb() registers a per-MB callback on the H.264 AVCodecContext; lazily-created daedalus_decoder context will pick up dimensions from the first AVFrame. - shadow_decoder_on_frame() logs per-frame MB-observed count. Every entry point is NULL-safe so decoder.c stays clean of conditionals. 4. decoder.{c,h} grow a `struct shadow_decoder *shadow` field on daedalus_decoder. Install hook fires once per H.264 codec open; frame hook fires after each successful avcodec_receive_frame. PR-Q3a.1 scope ENDS here. The callback just counts MBs; no daedalus_decoder_append_mb or flush_frame yet. Real-coeffs / edges extraction needs the patched FFmpeg source-tree headers (DAEDALUS_FFMPEG_SRC) to introspect H264Context internals — that lands in PR-Q3a.2. dejavu-check: this path is daedalus-decoder's frame-major UMA dispatch architecture (one cmdbuf per frame, one submit) running alongside libavcodec's reference decode for validation. It is NOT per-kernel libavcodec function-pointer substitution. No new libavcodec patches; the existing 0016 callback is the only intercept point. Verified on hertz: - Build: clean, libdaedalus_decoder.a linked. - Disabled state (env unset OR symbol absent): no shadow log lines, daemon init continues normally, INFO logs "libavcodec lacks ff_h264_set_mb_inspect_cb (stock build, no daedalus-fourier 0016 patch) — shadow-mode unavailable". - Enabled state would require ffmpeg-v4l2-request-fourier .deb rebuilt with patches 0016/0017 deployed to hertz (current .deb release 10 predates them). That's a deployment task, separate from this PR. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-26 14:15:13 +02:00
marfrit	5d1ff51178	Merge pull request 'daemon: AV1 Frame Header OBU synthesiser + Temporal Delimiter' (#24 ) from noether/daemon-av1-frame-header-obu into main Reviewed-on: #24	2026-05-23 17:16:27 +00:00
claude-noether	9797a0daa6	daemon: AV1 Frame Header OBU synthesiser + Temporal Delimiter Extends the AV1 OBU encoder pack (PR #22 landed the Sequence Header half) with the two remaining pieces of the per-frame OBU assembly: - av1_synth_temporal_delimiter_obu() — trivial 2-byte OBU (0x12, 0x00) that AV1 temporal units must start with so libavcodec's parser can detect access-unit boundaries. - av1_synth_frame_header_obu() — encodes a Frame Header OBU (AV1 §5.9) from V4L2_CID_STATELESS_AV1_SEQUENCE + V4L2_CID_STATELESS_ AV1_FRAME controls. ## Frame Header scope The encoder covers the libva-v4l2-request common-case path: - frame_type: KEY / INTER / INTRA_ONLY supported. SWITCH returns 0. - tile_info: single-tile uniform-spacing only (forced tile_cols_log2 = tile_rows_log2 = 0). - quantization_params: full coverage (base_q_idx, delta_q_*, qmatrix). - loop_filter_params: full coverage (levels, sharpness, ref/mode deltas). - cdef_params: full coverage. - segmentation: only enabled=0 path supported (returns 0 if enabled). - loop_restoration: only RESTORE_NONE supported (returns 0 if any plane uses Wiener / SGRPROJ / SWITCHABLE). - global_motion: only IDENTITY warp model emitted (returns 0 if any ref uses ROTZOOM / AFFINE / TRANSLATION). - film_grain_params: only "not present" path — returns 0 if the sequence header has FILM_GRAIN_PARAMS_PRESENT set. Out-of-scope branches return 0 so a future decoder.c integration can surface a coverage warning and fall back to direct libavcodec parsing of the original bitstream where the consumer happens to ship a fully-OBU'd access unit. ## Integration status The new primitives are NOT yet wired into decoder.c. The AV1 decode hot path still passes the OUTPUT buffer straight to libavcodec, which works only when the V4L2 consumer is sending a fully-OBU'd access unit (not strictly the V4L2 stateless contract). A real wiring needs a separate kernel-side change: - daedalus_v4l2_proto.h: add struct daedalus_av1_meta mirroring v4l2_ctrl_av1_sequence + v4l2_ctrl_av1_frame - kernel/daedalus_v4l2_main.c: capture V4L2_CID_STATELESS_AV1_{SEQUENCE, FRAME} at device_run, ship over the chardev - daemon/src/chardev_client.c: receive meta - daemon/src/decoder.c: assemble TD + SH + FH + OBU_TILE_GROUP-wrapped OUTPUT bytes, send to libavcodec Tracked as a follow-on. ## Tests test_av1_obu_synth.c grows 5 new cases (9 total, all green on hertz): === av1_synth_temporal_delimiter_obu === temporal delimiter: OK === av1_synth_frame_header_obu === KEY frame 1080p: OK (13 bytes) INTER frame: OK (18 bytes) SWITCH frame rejected: OK segmentation enabled rejected: OK AV1 OBU synth tests PASSED Bit-walk of the KEY-frame happy path confirms the OBU envelope (obu_type=3 = FRAME_HEADER, has_size_field=1, leb128 size byte), then steps through show_existing_frame, frame_type, show_frame, disable_cdf_update, allow_screen_content_tools. Fuller bit-walks would tie the test to encoder details that are spec-driven and already linear in the source; structural smoke + spec-driven linearity is the right gate. Build clean on hertz (Pi 5, Debian trixie, 6.18.29+rpt-rpi-2712, gcc -Wall -Wextra -Wpedantic). No new warnings. Closes daedalus backlog task #159 (AV1 Frame Header OBU synthesiser; decoder.c integration deferred per task notes above).	2026-05-23 18:31:41 +02:00