scaffold: CMake + API skeleton + smoke test

First code on daedalus-decoder per the Phase 1 decisions merged 2026-05-24. Repo skeleton only — no Vulkan pipeline yet, no shaders, no libavcodec intercept. Establishes the build shape so subsequent work has a place to land. Layout: LICENSE BSD-2-Clause (matches daedalus-fourier) .gitignore build/, CMake artefacts, *.spv CMakeLists.txt top-level — finds daedalus-fourier ≥0.1.0 via pkg-config (per §9.6 decision: find_package, pinned to tagged release; .pc consumed via pkg_check_modules until we ship a CMake config), Vulkan via find_package, builds static lib + smoke test, GNUInstallDirs install include/daedalus_decoder.h public API surface: - daedalus_decoder_{create,destroy, version,has_qpu} - daedalus_decoder_set_output_format (NV12 default, RGBA opt-in per §5) - daedalus_decoder_append_mb + struct daedalus_decoder_mb_input (matches §3 per-MB descriptor) - daedalus_decoder_flush_frame (per-frame submit + wait) - daedalus_decoder_export_dmabuf (Vulkan-native VkImage export per §9.4 decision) Dimensions are CODED frame size (mod-16), not displayed — caller translates from SPS + crop offsets. src/internal.h internal mb_desc struct (matches shader std430 layout, to be nailed down once shaders exist) + per-ctx state src/daedalus_decoder.c stub bodies: - create/destroy with proper resource lifecycle - append_mb validates + writes CPU staging buffers (no GPU yet) - flush_frame returns -2 (not implemented) — Phase 1 work - export_dmabuf returns -1 - has_qpu / version diagnostics tests/test_smoke.c link + lifecycle test: bad dims reject, OOB MB reject, null inputs reject, raster-order enforcement, mid-frame format-change reject, incomplete-frame flush reject. On hosts without V3D7 Vulkan, SKIPs gracefully (returns 0). Verified on hertz (Pi 5 / V3D 7.1 / Mesa V3DV via daedalus-fourier 0.1.0): $ cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release $ cmake --build build $ ctest --test-dir build --output-on-failure Test #1: smoke ... Passed $ ./build/test_smoke daedalus-decoder version: 0.0.1 ctx created: 1920x1088, has_qpu=1 smoke OK Note the coded-vs-displayed dims trap: 1080p H.264 has coded height 1088 with 8 rows cropped via SPS frame_cropping_*. Header docstring on daedalus_decoder_create() spells this out so future callers don't hit the multiple-of-16 reject (smoke test caught it during scaffold write). Next: Phase 1 implementation begins — IDCT 4×4 / 8×8 frame-scaled dispatch (reusing daedalus-fourier shaders per Appendix A), intra prediction wavefront, reconstruct stage, NV12 output via dmabuf export. Smoke test grows from "ctx lifecycle works" to "I-frame-only Baseline decode bit-exact vs FFmpeg reference".
2026-05-24 22:08:46 +02:00
parent 59885dd868
commit 08080f062c
7 changed files with 683 additions and 0 deletions
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+/*
+ * daedalus-decoder — public C API.
+ *
+ * Frame-level GPU H.264 decoder targeting V3D7 (Raspberry Pi 5).  Built
+ * on daedalus-fourier's V3D compute primitives at frame granularity —
+ * one Vulkan submit per frame, one fence wait per frame, encoded
+ * bitstream in (via libavcodec's per-MB intercept), NV12 frame out.
+ *
+ * Per the 2026-05-24 Phase 1 design decisions:
+ *   - libavcodec intercept is at macroblock-level (substitution-arc
+ *     evolution): the caller is expected to drive the per-MB CABAC /
+ *     CAVLC entropy decode and feed each macroblock's descriptor +
+ *     coefficients via daedalus_decoder_append_mb().  flush_frame()
+ *     builds the per-frame VkCommandBuffer and submits.
+ *   - DPB is Vulkan-native VkImage with VK_KHR_external_memory_dma_buf
+ *     export.  The caller can obtain the output frame's dmabuf fd
+ *     via daedalus_decoder_export_dmabuf().
+ *   - Daemon integration shape: this library is statically linked into
+ *     daedalus_v4l2_daemon.  No IPC.
+ *
+ * STATUS: scaffold.  No GPU pipeline implemented yet; all functions
+ * are stubs that compile but do not decode anything.  See DESIGN.md
+ * for the architecture.
+ *
+ * ABI: pre-0.1 — every signature here may change.  Don't rely on
+ * stability yet.
+ */
+#ifndef DAEDALUS_DECODER_H
+#define DAEDALUS_DECODER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* -------------------------------------------------------------------
+ * Opaque decoder context.  One per concurrent stream.
+ * ----------------------------------------------------------------- */
+typedef struct daedalus_decoder daedalus_decoder;
+
+/* -------------------------------------------------------------------
+ * Per-macroblock input.  Mirrors §3 of DESIGN.md.  The caller's
+ * libavcodec intercept populates this from the H264SliceContext
+ * fields after ff_h264_decode_mb_cabac/cavlc returns and before
+ * ff_h264_hl_decode_mb is supposed to run (we replace the latter).
+ * ----------------------------------------------------------------- */
+struct daedalus_decoder_mb_input {
+    /* Frame coordinates (macroblock units). */
+    uint16_t mb_x;
+    uint16_t mb_y;
+
+    /* Type + quantisation. */
+    uint8_t  mb_type;            /* H.264 spec table 7-13/7-14/7-17/7-18 enum */
+    uint8_t  mb_qp_y;
+    uint8_t  mb_qp_uv;
+    uint8_t  cbp;                /* coded block pattern, 0..47 */
+
+    /* Intra prediction (used iff mb_type == I_NxN or I_16x16). */
+    uint8_t  intra_4x4_modes[16];
+    uint8_t  intra_16x16_mode;
+    uint8_t  intra_chroma_mode;
+
+    /* Inter motion / partitions (used iff P_* or B_*). */
+    uint8_t  partition_mode;     /* P_16x16 / P_16x8 / P_8x16 / P_8x8 / etc. */
+    int8_t   ref_idx_l0[4];      /* per partition; -1 = not used */
+    int8_t   ref_idx_l1[4];      /* B only */
+    int16_t  mv_l0[4][2];        /* qpel precision (1/4 sample); (x, y) */
+    int16_t  mv_l1[4][2];
+
+    /* Deblocking filter parameters. */
+    uint8_t  deblock_disable;    /* 0 = enabled */
+    int8_t   deblock_alpha_c0;
+    int8_t   deblock_beta;
+
+    /* Transform coefficients — 256 luma (4x4 x 16) + 64 cb + 64 cr,
+     * column-major within each 4x4 block (matches FFmpeg convention).
+     * Caller-owned; copied during append. */
+    const int16_t *coeffs;       /* points at exactly 384 int16_t */
+};
+
+/* -------------------------------------------------------------------
+ * Output frame format selector.
+ * ----------------------------------------------------------------- */
+typedef enum {
+    DAEDALUS_DECODER_OUTPUT_NV12 = 0,   /* default; Stage 4 final */
+    DAEDALUS_DECODER_OUTPUT_RGBA = 1,   /* Stage 5 opt-in */
+} daedalus_decoder_output_format;
+
+/* -------------------------------------------------------------------
+ * Lifecycle
+ * ----------------------------------------------------------------- */
+
+/* Create a decoder context for the given **coded** frame dimensions.
+ *
+ * width, height: pixels of the H.264 coded picture, NOT the displayed
+ * picture.  Both must be multiples of 16 (macroblock granularity).
+ * For displayed 1080p (1920×1080), the coded frame is 1920×1088 with
+ * the SPS's `frame_cropping_*` offsets cropping the bottom 8 rows.
+ * The caller is responsible for translating from SPS dims + crop
+ * rectangle to the values passed here; we decode the coded frame.
+ *
+ * Returns NULL on bad dimensions or allocation failure.  Returns a
+ * usable context with daedalus_decoder_has_qpu() == 0 when Vulkan
+ * init fails — callers that need GPU work should check has_qpu
+ * before relying on it.
+ */
+daedalus_decoder *daedalus_decoder_create(int width, int height);
+
+/* Free all resources.  Safe with NULL. */
+void daedalus_decoder_destroy(daedalus_decoder *dec);
+
+/* Switch output format BEFORE the first append_mb call of a frame.
+ * Default is NV12.  Returns 0 on success, -1 if called mid-frame
+ * (caller must flush first). */
+int daedalus_decoder_set_output_format(daedalus_decoder *dec,
+                                        daedalus_decoder_output_format fmt);
+
+/* -------------------------------------------------------------------
+ * Per-frame submission
+ * ----------------------------------------------------------------- */
+
+/* Append one macroblock's data to the current frame's descriptor SSBO
+ * + coefficient SSBO.  No GPU dispatch yet — just CPU-side writes.
+ *
+ * Must be called in raster order (mb_y * mb_width + mb_x) for the
+ * intra-prediction wavefront to work correctly in Phase 1.
+ *
+ * Returns 0 on success, negative on bounds violation or OOM.
+ */
+int daedalus_decoder_append_mb(daedalus_decoder *dec,
+                                const struct daedalus_decoder_mb_input *mb);
+
+/* End-of-frame flush: builds the per-frame VkCommandBuffer with all
+ * pipeline stages, submits once, waits on a single fence, copies the
+ * NV12 (or RGBA when opted in) output into the caller-provided
+ * planes.
+ *
+ * For NV12:
+ *   out_y / y_stride: Y plane (W*H bytes minimum, at the given stride)
+ *   out_uv / uv_stride: interleaved UV plane (W*(H/2) bytes minimum)
+ *
+ * For RGBA: out_y receives 4*W*H bytes at y_stride; out_uv ignored.
+ *
+ * Returns 0 on success, negative on Vulkan failure or undecodable
+ * frame.  After return, the decoder is ready for the next frame's
+ * append calls.
+ */
+int daedalus_decoder_flush_frame(daedalus_decoder *dec,
+                                  uint8_t *out_y,  size_t y_stride,
+                                  uint8_t *out_uv, size_t uv_stride);
+
+/* Export the most-recently-decoded frame as a dma_buf fd.  The fd is
+ * owned by the caller and must be closed when done.  Lets V4L2
+ * consumers (daedalus_v4l2_daemon, libva-v4l2-request-fourier) attach
+ * the GPU-decoded surface directly to a CAPTURE plane without a CPU
+ * round-trip.
+ *
+ * Returns the dmabuf fd on success, -1 on failure.  Must be called
+ * AFTER flush_frame returns for the relevant frame.
+ */
+int daedalus_decoder_export_dmabuf(daedalus_decoder *dec, int plane);
+
+/* -------------------------------------------------------------------
+ * Diagnostics
+ * ----------------------------------------------------------------- */
+
+/* daedalus-decoder build version (semver string, e.g. "0.0.1+g0a1b2c3"). */
+const char *daedalus_decoder_version(void);
+
+/* Whether the underlying daedalus-fourier context picked up a working
+ * V3D7 Vulkan instance.  Returns 0 if Vulkan init failed and the
+ * decoder is operating in stub / failure mode. */
+int daedalus_decoder_has_qpu(const daedalus_decoder *dec);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* DAEDALUS_DECODER_H */