From 737e87980dacf657ce80619c1f6bbe781a715497 Mon Sep 17 00:00:00 2001 From: claude-noether Date: Sat, 23 May 2026 19:59:53 +0200 Subject: [PATCH] QPU is default substrate: recipe table + ctx env-var override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per the user decree 2026-05-23 — "what can be done in QPU will be done in QPU" — this lands two coupled changes that flip production-decode kernels with existing V3D shaders from CPU-by-recipe to QPU-by-recipe: 1) daedalus_recipe_substrate_for() returns SUBSTRATE_QPU for every kernel that has a shipped V3D compute shader: cycle 1 VP9 IDCT 8x8 QPU (was QPU; unchanged) cycle 2 VP9 LPF wd=4 QPU (was QPU; unchanged) cycle 3 VP9 MC 8h QPU (FLIPPED from CPU — v3d_mc_8h.spv) cycle 4 VP9 LPF wd=8 QPU (was QPU; unchanged) cycle 5 AV1 CDEF 8x8 QPU (FLIPPED from CPU — v3d_cdef.spv) cycle 6 H.264 IDCT 4x4 CPU (no shader yet; task #165) cycle 7 H.264 IDCT 8x8 CPU (no shader yet; task #165) cycle 8 H.264 deblock luma-v QPU (FLIPPED from CPU — v3d_h264deblock.spv) cycle 9 H.264 qpel mc20 CPU (no shader yet; task #165) The R-band cost/benefit framework still applies but is now superseded for substrate selection by the decree. Where R stays RED, the cost is in dispatch overhead, which is a fixable engineering issue (tasks 160 buffer-pool, 161 persistent cmdbuf, 162 dmabuf import). 2) daedalus_ctx_create_no_qpu() now honours an env-var override: set DAEDALUS_FORCE_QPU=1 in the process and create_no_qpu silently escalates to a full daedalus_ctx_create(). Lets the libavcodec substitution shims in marfrit-packages (which pthread_once a create_no_qpu ctx — see libavcodec/aarch64/h264_idct_daedalus.c) fire QPU paths without rebuilding those patches. Firefox / mpv consumers stay on the Vulkan-free path by default (env var unset). The daedalus-v4l2 daemon will set DAEDALUS_FORCE_QPU=1 explicitly before dlopen'ing libavcodec (separate daedalus-v4l2 follow-up). Smoke (hertz, Pi 5, kernel 6.18.29): === test_api_h264 === H264_IDCT4 recipe substrate: 1 (1=CPU, 2=QPU) H264_IDCT8 recipe substrate: 1 H264_DEBLOCK_LV recipe substrate: 2 ← flipped H264_QPEL_MC20 recipe substrate: 1 H.264 IDCT 4x4: 2048/2048 bytes bit-exact H.264 IDCT 8x8: 2048/2048 bytes bit-exact H.264 deblock luma v: 2048/2048 bytes bit-exact ← QPU path H.264 qpel mc20: 1024/1024 bytes bit-exact === test_api_idct === all substrates (CPU/QPU/AUTO) bit-exact === test_api_lpf === all substrates bit-exact wd=4 and wd=8 The dispatch wrapper's fall-through logic (eff == SUBSTRATE_QPU && !ctx_has_qpu(ctx) → eff = SUBSTRATE_CPU) handles the case where the recipe says QPU but the consumer didn't opt in — it falls back to CPU silently, no regression. Closes daedalus-fourier tasks #163, #164. Refs the 2026-05-23 "QPU default substrate" decree. --- src/daedalus_core.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/daedalus_core.c b/src/daedalus_core.c index fd7d73b..7be1500 100644 --- a/src/daedalus_core.c +++ b/src/daedalus_core.c @@ -53,6 +53,25 @@ daedalus_ctx *daedalus_ctx_create(void) daedalus_ctx *daedalus_ctx_create_no_qpu(void) { + /* + * Per the "QPU is default substrate" decree 2026-05-23: + * setting DAEDALUS_FORCE_QPU=1 in the process env escalates this + * function to a full daedalus_ctx_create(), letting the libavcodec + * substitution shims (which call create_no_qpu via pthread_once) + * fire the V3D shaders that exist for cycles 1/2/4/5/8. Without + * this hook each consumer process (firefox, mpv, daemon) would + * need its own shim build to opt into QPU. + * + * Default behaviour (env var unset / not "1") is unchanged: pure + * NEON ctx, no implicit Vulkan init. Firefox / mpv consumers + * that dlopen libavcodec without opting in stay on the + * Vulkan-free path; the daemon explicitly sets + * DAEDALUS_FORCE_QPU=1 before loading libavcodec. + */ + const char *force = getenv("DAEDALUS_FORCE_QPU"); + if (force && force[0] == '1' && force[1] == 0) + return daedalus_ctx_create(); + daedalus_ctx *ctx = calloc(1, sizeof(*ctx)); if (!ctx) return NULL; ctx->has_qpu = 0; @@ -84,16 +103,25 @@ void daedalus_ctx_destroy(daedalus_ctx *ctx) daedalus_substrate daedalus_recipe_substrate_for(daedalus_kernel k) { + /* + * Recipe table per the "QPU is default substrate" decree + * 2026-05-23. Any kernel that has a V3D compute shader returns + * SUBSTRATE_QPU; CPU is the fallback for kernels without a + * shader (still the case for H.264 IDCT 4x4 / IDCT 8x8 / qpel + * mc20 — covered by follow-on task 165). The dispatch + * wrappers already fall back to CPU automatically when the + * ctx doesn't have QPU available (daedalus_ctx_has_qpu == 0). + */ switch (k) { case DAEDALUS_KERNEL_VP9_IDCT8: return DAEDALUS_SUBSTRATE_QPU; case DAEDALUS_KERNEL_VP9_LPF4_INNER: return DAEDALUS_SUBSTRATE_QPU; - case DAEDALUS_KERNEL_VP9_MC_8H: return DAEDALUS_SUBSTRATE_CPU; + case DAEDALUS_KERNEL_VP9_MC_8H: return DAEDALUS_SUBSTRATE_QPU; /* v3d_mc_8h.spv */ case DAEDALUS_KERNEL_VP9_LPF8_INNER: return DAEDALUS_SUBSTRATE_QPU; - case DAEDALUS_KERNEL_AV1_CDEF_8X8: return DAEDALUS_SUBSTRATE_CPU; - case DAEDALUS_KERNEL_H264_IDCT4: return DAEDALUS_SUBSTRATE_CPU; - case DAEDALUS_KERNEL_H264_IDCT8: return DAEDALUS_SUBSTRATE_CPU; - case DAEDALUS_KERNEL_H264_DEBLOCK_LV: return DAEDALUS_SUBSTRATE_CPU; - case DAEDALUS_KERNEL_H264_QPEL_MC20: return DAEDALUS_SUBSTRATE_CPU; + case DAEDALUS_KERNEL_AV1_CDEF_8X8: return DAEDALUS_SUBSTRATE_QPU; /* v3d_cdef.spv */ + case DAEDALUS_KERNEL_H264_IDCT4: return DAEDALUS_SUBSTRATE_CPU; /* TODO task #165 */ + case DAEDALUS_KERNEL_H264_IDCT8: return DAEDALUS_SUBSTRATE_CPU; /* TODO task #165 */ + case DAEDALUS_KERNEL_H264_DEBLOCK_LV: return DAEDALUS_SUBSTRATE_QPU; /* v3d_h264deblock.spv */ + case DAEDALUS_KERNEL_H264_QPEL_MC20: return DAEDALUS_SUBSTRATE_CPU; /* TODO task #165 */ } return DAEDALUS_SUBSTRATE_CPU; }