Phase 8 skeleton: public C API + first end-to-end smoke test
include/daedalus.h: stable C API surface exposing the 5 cycles (VP9 IDCT 8x8, LPF wd=4, MC 8h, LPF wd=8; AV1 CDEF). Per-kernel recipe-dispatch helpers default to the cycle 1-5 verdict substrate (QPU for cycles 1+2+4, CPU for cycles 3+5); explicit override available for benchmarking and runtime-aware scheduling. src/daedalus_core.c: NEON-path implementation of all 5 kernels wrapped behind the public API. QPU path stubbed out (returns -1) since wiring v3d_runner into daedalus_ctx is the next Phase 8 sub-step; with has_qpu=0 the recipe falls back to CPU cleanly. tests/test_api_idct.c: 64-block IDCT through the public recipe dispatch, bit-exact vs C ref. PASS 4096/4096 bytes — proves the API surface compiles, library links, dispatch routing works, and NEON fallback delivers correct results. docs/phase8_scoping.md: architecture options (A=userspace V4L2, B=kernel V4L2 shim, C=direct libva); pick A for v1; explicitly out-of-scope work tracked. Next Phase 8 sub-step: wire v3d_runner into daedalus_ctx so has_qpu=1 and QPU dispatch goes through the API too. After that: V4L2 ioctl glue, bitstream parser, superblock loop. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,252 @@
|
||||
/*
|
||||
* daedalus-fourier core library — Phase 8 skeleton.
|
||||
*
|
||||
* Wraps cycles 1-5 kernels behind the public C API in
|
||||
* include/daedalus.h. Recipe dispatch routes per-kernel to the
|
||||
* verdict substrate from each cycle's Phase 7 doc.
|
||||
*
|
||||
* License: BSD-2-Clause. Links vendored FFmpeg LGPL-2.1+ +
|
||||
* dav1d BSD-2-Clause NEON snapshots.
|
||||
*/
|
||||
#include "../include/daedalus.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
/* -------------------- Context -------------------- */
|
||||
|
||||
struct daedalus_ctx {
|
||||
/* For Phase 8 skeleton: just a flag. Real impl would hold the
|
||||
* v3d_runner + per-kernel pipeline handles. */
|
||||
int has_qpu;
|
||||
};
|
||||
|
||||
daedalus_ctx *daedalus_ctx_create(void)
|
||||
{
|
||||
daedalus_ctx *ctx = calloc(1, sizeof(*ctx));
|
||||
if (!ctx) return NULL;
|
||||
/* Phase 8 deferred: real impl probes V3D Vulkan device; for now
|
||||
* default to CPU-only (NEON paths are always available). */
|
||||
ctx->has_qpu = 0;
|
||||
return ctx;
|
||||
}
|
||||
|
||||
int daedalus_ctx_has_qpu(const daedalus_ctx *ctx)
|
||||
{
|
||||
return ctx ? ctx->has_qpu : 0;
|
||||
}
|
||||
|
||||
void daedalus_ctx_destroy(daedalus_ctx *ctx)
|
||||
{
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
/* -------------------- Recipe query -------------------- */
|
||||
|
||||
daedalus_substrate daedalus_recipe_substrate_for(daedalus_kernel k)
|
||||
{
|
||||
switch (k) {
|
||||
case DAEDALUS_KERNEL_VP9_IDCT8: return DAEDALUS_SUBSTRATE_QPU;
|
||||
case DAEDALUS_KERNEL_VP9_LPF4_INNER: return DAEDALUS_SUBSTRATE_QPU;
|
||||
case DAEDALUS_KERNEL_VP9_MC_8H: return DAEDALUS_SUBSTRATE_CPU;
|
||||
case DAEDALUS_KERNEL_VP9_LPF8_INNER: return DAEDALUS_SUBSTRATE_QPU;
|
||||
case DAEDALUS_KERNEL_AV1_CDEF_8X8: return DAEDALUS_SUBSTRATE_CPU;
|
||||
}
|
||||
return DAEDALUS_SUBSTRATE_CPU; /* defensive default */
|
||||
}
|
||||
|
||||
/* -------------------- NEON externs (per cycle bench links) ----- */
|
||||
|
||||
extern void ff_vp9_idct_idct_8x8_add_neon(uint8_t *dst, ptrdiff_t stride,
|
||||
int16_t *block, int eob);
|
||||
extern void ff_vp9_loop_filter_h_4_8_neon(uint8_t *dst, ptrdiff_t stride,
|
||||
int E, int I, int H);
|
||||
extern void ff_vp9_loop_filter_h_8_8_neon(uint8_t *dst, ptrdiff_t stride,
|
||||
int E, int I, int H);
|
||||
extern void ff_vp9_put_regular8_h_neon(uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const uint8_t *src, ptrdiff_t src_stride,
|
||||
int h, int mx, int my);
|
||||
extern void dav1d_cdef_filter8_8bpc_neon(uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const uint16_t *tmp,
|
||||
int pri_strength, int sec_strength,
|
||||
int dir, int damping, int h,
|
||||
size_t edges);
|
||||
|
||||
/* -------------------- CPU dispatch implementations -------------- */
|
||||
|
||||
static int dispatch_idct8_cpu(daedalus_ctx *ctx,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
const int16_t *coeffs, size_t n_blocks,
|
||||
const daedalus_idct8_meta *meta)
|
||||
{
|
||||
(void) ctx;
|
||||
int16_t scratch[64];
|
||||
for (size_t i = 0; i < n_blocks; i++) {
|
||||
memcpy(scratch, coeffs + i * 64, 64 * sizeof(int16_t));
|
||||
ff_vp9_idct_idct_8x8_add_neon(dst + meta[i].dst_off,
|
||||
(ptrdiff_t) dst_stride,
|
||||
scratch, 64);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dispatch_lpf_cpu(daedalus_ctx *ctx, int wd_8,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
size_t n_edges, const daedalus_lpf_meta *meta)
|
||||
{
|
||||
(void) ctx;
|
||||
for (size_t i = 0; i < n_edges; i++) {
|
||||
uint8_t *p = dst + meta[i].dst_off;
|
||||
if (wd_8) ff_vp9_loop_filter_h_8_8_neon(p, (ptrdiff_t) dst_stride,
|
||||
meta[i].E, meta[i].I, meta[i].H);
|
||||
else ff_vp9_loop_filter_h_4_8_neon(p, (ptrdiff_t) dst_stride,
|
||||
meta[i].E, meta[i].I, meta[i].H);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dispatch_mc_8h_cpu(daedalus_ctx *ctx,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
const uint8_t *src, size_t src_stride,
|
||||
size_t n_blocks, const daedalus_mc_meta *meta)
|
||||
{
|
||||
(void) ctx;
|
||||
for (size_t i = 0; i < n_blocks; i++) {
|
||||
ff_vp9_put_regular8_h_neon(dst + meta[i].dst_off,
|
||||
(ptrdiff_t) dst_stride,
|
||||
src + meta[i].src_off + 3,
|
||||
(ptrdiff_t) src_stride,
|
||||
8, meta[i].mx, 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dispatch_cdef_cpu(daedalus_ctx *ctx,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
const uint16_t *tmp,
|
||||
size_t n_blocks, const daedalus_cdef_meta *meta)
|
||||
{
|
||||
(void) ctx;
|
||||
for (size_t i = 0; i < n_blocks; i++) {
|
||||
dav1d_cdef_filter8_8bpc_neon(dst + meta[i].dst_off,
|
||||
(ptrdiff_t) dst_stride,
|
||||
tmp + meta[i].tmp_off_u16,
|
||||
meta[i].pri_strength,
|
||||
meta[i].sec_strength,
|
||||
meta[i].dir, meta[i].damping, 8, 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* -------------------- Public dispatch entry points -------------- */
|
||||
|
||||
#define ROUTE(_kernel, _cpu_fn, ...) \
|
||||
daedalus_substrate eff = sub; \
|
||||
if (eff == DAEDALUS_SUBSTRATE_AUTO) eff = daedalus_recipe_substrate_for(_kernel); \
|
||||
if (eff == DAEDALUS_SUBSTRATE_QPU && !daedalus_ctx_has_qpu(ctx)) \
|
||||
eff = DAEDALUS_SUBSTRATE_CPU; \
|
||||
if (eff == DAEDALUS_SUBSTRATE_CPU) return _cpu_fn(ctx, __VA_ARGS__); \
|
||||
return -1 /* QPU path not yet wired in Phase 8 skeleton */
|
||||
|
||||
int daedalus_dispatch_vp9_idct8(daedalus_ctx *ctx, daedalus_substrate sub,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
const int16_t *coeffs, size_t n_blocks,
|
||||
const daedalus_idct8_meta *meta)
|
||||
{
|
||||
ROUTE(DAEDALUS_KERNEL_VP9_IDCT8, dispatch_idct8_cpu,
|
||||
dst, dst_stride, coeffs, n_blocks, meta);
|
||||
}
|
||||
|
||||
int daedalus_dispatch_vp9_lpf4(daedalus_ctx *ctx, daedalus_substrate sub,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
size_t n_edges, const daedalus_lpf_meta *meta)
|
||||
{
|
||||
daedalus_substrate eff = sub;
|
||||
if (eff == DAEDALUS_SUBSTRATE_AUTO)
|
||||
eff = daedalus_recipe_substrate_for(DAEDALUS_KERNEL_VP9_LPF4_INNER);
|
||||
if (eff == DAEDALUS_SUBSTRATE_QPU && !daedalus_ctx_has_qpu(ctx))
|
||||
eff = DAEDALUS_SUBSTRATE_CPU;
|
||||
if (eff == DAEDALUS_SUBSTRATE_CPU)
|
||||
return dispatch_lpf_cpu(ctx, 0, dst, dst_stride, n_edges, meta);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int daedalus_dispatch_vp9_lpf8(daedalus_ctx *ctx, daedalus_substrate sub,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
size_t n_edges, const daedalus_lpf_meta *meta)
|
||||
{
|
||||
daedalus_substrate eff = sub;
|
||||
if (eff == DAEDALUS_SUBSTRATE_AUTO)
|
||||
eff = daedalus_recipe_substrate_for(DAEDALUS_KERNEL_VP9_LPF8_INNER);
|
||||
if (eff == DAEDALUS_SUBSTRATE_QPU && !daedalus_ctx_has_qpu(ctx))
|
||||
eff = DAEDALUS_SUBSTRATE_CPU;
|
||||
if (eff == DAEDALUS_SUBSTRATE_CPU)
|
||||
return dispatch_lpf_cpu(ctx, 1, dst, dst_stride, n_edges, meta);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int daedalus_dispatch_vp9_mc_8h(daedalus_ctx *ctx, daedalus_substrate sub,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
const uint8_t *src, size_t src_stride,
|
||||
size_t n_blocks, const daedalus_mc_meta *meta)
|
||||
{
|
||||
ROUTE(DAEDALUS_KERNEL_VP9_MC_8H, dispatch_mc_8h_cpu,
|
||||
dst, dst_stride, src, src_stride, n_blocks, meta);
|
||||
}
|
||||
|
||||
int daedalus_dispatch_cdef_8x8(daedalus_ctx *ctx, daedalus_substrate sub,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
const uint16_t *tmp,
|
||||
size_t n_blocks, const daedalus_cdef_meta *meta)
|
||||
{
|
||||
ROUTE(DAEDALUS_KERNEL_AV1_CDEF_8X8, dispatch_cdef_cpu,
|
||||
dst, dst_stride, tmp, n_blocks, meta);
|
||||
}
|
||||
|
||||
/* -------------------- Recipe convenience wrappers --------------- */
|
||||
|
||||
int daedalus_recipe_dispatch_vp9_idct8(daedalus_ctx *ctx,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
const int16_t *coeffs, size_t n_blocks,
|
||||
const daedalus_idct8_meta *meta)
|
||||
{
|
||||
return daedalus_dispatch_vp9_idct8(ctx, DAEDALUS_SUBSTRATE_AUTO,
|
||||
dst, dst_stride, coeffs, n_blocks, meta);
|
||||
}
|
||||
|
||||
int daedalus_recipe_dispatch_vp9_lpf4(daedalus_ctx *ctx,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
size_t n_edges, const daedalus_lpf_meta *meta)
|
||||
{
|
||||
return daedalus_dispatch_vp9_lpf4(ctx, DAEDALUS_SUBSTRATE_AUTO,
|
||||
dst, dst_stride, n_edges, meta);
|
||||
}
|
||||
|
||||
int daedalus_recipe_dispatch_vp9_lpf8(daedalus_ctx *ctx,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
size_t n_edges, const daedalus_lpf_meta *meta)
|
||||
{
|
||||
return daedalus_dispatch_vp9_lpf8(ctx, DAEDALUS_SUBSTRATE_AUTO,
|
||||
dst, dst_stride, n_edges, meta);
|
||||
}
|
||||
|
||||
int daedalus_recipe_dispatch_vp9_mc_8h(daedalus_ctx *ctx,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
const uint8_t *src, size_t src_stride,
|
||||
size_t n_blocks, const daedalus_mc_meta *meta)
|
||||
{
|
||||
return daedalus_dispatch_vp9_mc_8h(ctx, DAEDALUS_SUBSTRATE_AUTO,
|
||||
dst, dst_stride, src, src_stride, n_blocks, meta);
|
||||
}
|
||||
|
||||
int daedalus_recipe_dispatch_cdef_8x8(daedalus_ctx *ctx,
|
||||
uint8_t *dst, size_t dst_stride,
|
||||
const uint16_t *tmp,
|
||||
size_t n_blocks, const daedalus_cdef_meta *meta)
|
||||
{
|
||||
return daedalus_dispatch_cdef_8x8(ctx, DAEDALUS_SUBSTRATE_AUTO,
|
||||
dst, dst_stride, tmp, n_blocks, meta);
|
||||
}
|
||||
Reference in New Issue
Block a user