phase1: substrate selector API + cross-substrate bit-exact ctest
Surfaces daedalus-fourier's substrate-override capability at the
decoder boundary. Lets tests run on CPU-only hosts (CI runners,
x86 dev boxes) AND cross-checks V3D shader output against NEON
reference on hosts that have both.
API additions (pre-0.1 ABI, additive):
- daedalus_decoder_substrate enum { AUTO, CPU, QPU }
(mirrors daedalus_substrate; isolated for ABI reasons).
- daedalus_decoder_set_substrate(dec, sub) setter, same
mid-frame-change restrictions as set_output_format.
- Default remains AUTO — the only sensible choice for production.
Internal:
- flush_frame now calls daedalus_dispatch_h264_idct{4,8} with an
explicit substrate instead of daedalus_recipe_dispatch_*. Mapped
via a small map_substrate() helper. No perf delta on AUTO (recipe
layer was just doing the same dispatch under the hood).
Test changes:
- test_smoke: new EXPECTs for set_substrate (valid + bogus).
- test_idct_bitexact: new argv[4] takes "auto" (default), "cpu", or
"qpu" to force the substrate.
- CMakeLists.txt: new ctest entry `idct_bitexact_cpu` re-runs the
QVGA case forcing the CPU path. Catches silent drift between
the V3D shader and the NEON reference; both must produce
identical output for the same coefficient input (and they do —
see ctest log below).
Verified on hertz (Pi 5 / V3D 7.1 / daedalus-fourier 0.1.0):
$ ctest --test-dir build --output-on-failure
Start 1: smoke
1/4 Test #1: smoke ............................ Passed 0.10 sec
Start 2: idct_bitexact
2/4 Test #2: idct_bitexact .................... Passed 0.03 sec
Start 3: idct_bitexact_cpu
3/4 Test #3: idct_bitexact_cpu ................ Passed 0.03 sec
Start 4: idct_bitexact_1080p
4/4 Test #4: idct_bitexact_1080p .............. Passed 0.06 sec
100% tests passed, 0 tests failed out of 4
CPU substrate produces byte-identical Y + Cb + Cr planes against the
same C reference that the AUTO/QPU path matches — confirming the V3D
shaders and the daedalus-fourier NEON path agree at the spec level.
Why we plumbed the lower-level dispatch instead of leaving recipe in
place: recipe is just a thin wrapper that calls dispatch with
AUTO. Once we needed substrate control, the wrapper became a
liability (would have required adding a parallel recipe API for each
substrate); going direct is simpler and the AUTO path is unchanged.
Coverage note: idct_bitexact_cpu runs at QVGA (300 MBs); not also at
1080p because the CPU path's wall time scales linearly with block
count and a 1080p CPU run is ~0.5s on hertz — fine standalone but
slows ctest enough that it would tempt opt-in gating. The bit-exact
content is the same regardless of frame size; the 1080p variant only
exists to gate index-arithmetic bugs that surface above small int
boundaries.
This commit is contained in:
+40
-11
@@ -41,6 +41,7 @@ daedalus_decoder *daedalus_decoder_create(int width, int height)
|
||||
dec->mb_height = height >> 4;
|
||||
dec->n_mbs = dec->mb_width * dec->mb_height;
|
||||
dec->output_fmt = DAEDALUS_DECODER_OUTPUT_NV12;
|
||||
dec->substrate = DAEDALUS_DECODER_SUBSTRATE_AUTO;
|
||||
|
||||
/* daedalus-fourier ctx — required. Phase 1 needs the QPU; if
|
||||
* Vulkan init fails the decoder is unusable. Caller can check
|
||||
@@ -86,6 +87,33 @@ int daedalus_decoder_set_output_format(daedalus_decoder *dec,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int daedalus_decoder_set_substrate(daedalus_decoder *dec,
|
||||
daedalus_decoder_substrate sub)
|
||||
{
|
||||
if (!dec)
|
||||
return -1;
|
||||
if (dec->mbs_appended != 0)
|
||||
return -1;
|
||||
if (sub != DAEDALUS_DECODER_SUBSTRATE_AUTO &&
|
||||
sub != DAEDALUS_DECODER_SUBSTRATE_CPU &&
|
||||
sub != DAEDALUS_DECODER_SUBSTRATE_QPU)
|
||||
return -1;
|
||||
dec->substrate = sub;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Map our public substrate enum onto daedalus-fourier's. Same
|
||||
* ordering by intent — we duplicate the enum for ABI isolation. */
|
||||
static daedalus_substrate map_substrate(daedalus_decoder_substrate s)
|
||||
{
|
||||
switch (s) {
|
||||
case DAEDALUS_DECODER_SUBSTRATE_CPU: return DAEDALUS_SUBSTRATE_CPU;
|
||||
case DAEDALUS_DECODER_SUBSTRATE_QPU: return DAEDALUS_SUBSTRATE_QPU;
|
||||
case DAEDALUS_DECODER_SUBSTRATE_AUTO:
|
||||
default: return DAEDALUS_SUBSTRATE_AUTO;
|
||||
}
|
||||
}
|
||||
|
||||
int daedalus_decoder_append_mb(daedalus_decoder *dec,
|
||||
const struct daedalus_decoder_mb_input *mb)
|
||||
{
|
||||
@@ -267,16 +295,17 @@ int daedalus_decoder_flush_frame(daedalus_decoder *dec,
|
||||
* Skipping the dispatch when the partition is empty avoids the
|
||||
* shader-pool warm-up cost on the common case (a typical Baseline
|
||||
* stream is all-4x4 → 8x8 dispatch is no-op). */
|
||||
const daedalus_substrate sub = map_substrate(dec->substrate);
|
||||
if (bi4 > 0) {
|
||||
int dr = daedalus_recipe_dispatch_h264_idct4(dec->dctx,
|
||||
scratch_y, y_stride_int,
|
||||
coeffs4, bi4, meta4);
|
||||
int dr = daedalus_dispatch_h264_idct4(dec->dctx, sub,
|
||||
scratch_y, y_stride_int,
|
||||
coeffs4, bi4, meta4);
|
||||
if (dr != 0) { rc = -3; goto cleanup; }
|
||||
}
|
||||
if (bi8 > 0) {
|
||||
int dr = daedalus_recipe_dispatch_h264_idct8(dec->dctx,
|
||||
scratch_y, y_stride_int,
|
||||
coeffs8, bi8, meta8);
|
||||
int dr = daedalus_dispatch_h264_idct8(dec->dctx, sub,
|
||||
scratch_y, y_stride_int,
|
||||
coeffs8, bi8, meta8);
|
||||
if (dr != 0) { rc = -3; goto cleanup; }
|
||||
}
|
||||
|
||||
@@ -361,11 +390,11 @@ int daedalus_decoder_flush_frame(daedalus_decoder *dec,
|
||||
}
|
||||
/* assert cbi == n_chroma_blocks; loop math guarantees it */
|
||||
|
||||
int cr_rc = daedalus_recipe_dispatch_h264_idct4(dec->dctx,
|
||||
scratch_uv, chroma_w,
|
||||
chroma_coeffs,
|
||||
n_chroma_blocks,
|
||||
chroma_meta);
|
||||
int cr_rc = daedalus_dispatch_h264_idct4(dec->dctx, sub,
|
||||
scratch_uv, chroma_w,
|
||||
chroma_coeffs,
|
||||
n_chroma_blocks,
|
||||
chroma_meta);
|
||||
if (cr_rc != 0) {
|
||||
rc = -3;
|
||||
goto chroma_cleanup;
|
||||
|
||||
@@ -64,6 +64,9 @@ struct daedalus_decoder {
|
||||
|
||||
/* Output format. */
|
||||
daedalus_decoder_output_format output_fmt;
|
||||
|
||||
/* Dispatch substrate (AUTO by default — recipe-table-driven). */
|
||||
daedalus_decoder_substrate substrate;
|
||||
};
|
||||
|
||||
#endif /* DAEDALUS_DECODER_INTERNAL_H */
|
||||
|
||||
Reference in New Issue
Block a user