Files
claude-noether 44ca4e550f phase1: substrate selector API + cross-substrate bit-exact ctest
Surfaces daedalus-fourier's substrate-override capability at the
decoder boundary.  Lets tests run on CPU-only hosts (CI runners,
x86 dev boxes) AND cross-checks V3D shader output against NEON
reference on hosts that have both.

API additions (pre-0.1 ABI, additive):

  - daedalus_decoder_substrate enum { AUTO, CPU, QPU }
    (mirrors daedalus_substrate; isolated for ABI reasons).
  - daedalus_decoder_set_substrate(dec, sub) setter, same
    mid-frame-change restrictions as set_output_format.
  - Default remains AUTO — the only sensible choice for production.

Internal:

  - flush_frame now calls daedalus_dispatch_h264_idct{4,8} with an
    explicit substrate instead of daedalus_recipe_dispatch_*.  Mapped
    via a small map_substrate() helper.  No perf delta on AUTO (recipe
    layer was just doing the same dispatch under the hood).

Test changes:

  - test_smoke: new EXPECTs for set_substrate (valid + bogus).
  - test_idct_bitexact: new argv[4] takes "auto" (default), "cpu", or
    "qpu" to force the substrate.
  - CMakeLists.txt: new ctest entry `idct_bitexact_cpu` re-runs the
    QVGA case forcing the CPU path.  Catches silent drift between
    the V3D shader and the NEON reference; both must produce
    identical output for the same coefficient input (and they do —
    see ctest log below).

Verified on hertz (Pi 5 / V3D 7.1 / daedalus-fourier 0.1.0):

  $ ctest --test-dir build --output-on-failure
      Start 1: smoke
  1/4 Test #1: smoke ............................   Passed    0.10 sec
      Start 2: idct_bitexact
  2/4 Test #2: idct_bitexact ....................   Passed    0.03 sec
      Start 3: idct_bitexact_cpu
  3/4 Test #3: idct_bitexact_cpu ................   Passed    0.03 sec
      Start 4: idct_bitexact_1080p
  4/4 Test #4: idct_bitexact_1080p ..............   Passed    0.06 sec

  100% tests passed, 0 tests failed out of 4

CPU substrate produces byte-identical Y + Cb + Cr planes against the
same C reference that the AUTO/QPU path matches — confirming the V3D
shaders and the daedalus-fourier NEON path agree at the spec level.

Why we plumbed the lower-level dispatch instead of leaving recipe in
place: recipe is just a thin wrapper that calls dispatch with
AUTO.  Once we needed substrate control, the wrapper became a
liability (would have required adding a parallel recipe API for each
substrate); going direct is simpler and the AUTO path is unchanged.

Coverage note: idct_bitexact_cpu runs at QVGA (300 MBs); not also at
1080p because the CPU path's wall time scales linearly with block
count and a 1080p CPU run is ~0.5s on hertz — fine standalone but
slows ctest enough that it would tempt opt-in gating.  The bit-exact
content is the same regardless of frame size; the 1080p variant only
exists to gate index-arithmetic bugs that surface above small int
boundaries.
2026-05-24 23:07:45 +02:00

176 lines
6.6 KiB
C

/* SPDX-License-Identifier: BSD-2-Clause */
/*
* Scaffold smoke test — verifies the daedalus-decoder library links
* cleanly against daedalus-fourier and the lifecycle entry points
* don't immediately crash. No actual decoding work yet.
*
* Returns 0 on success, non-zero on any unexpected behaviour.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "daedalus_decoder.h"
#define EXPECT(cond, msg) do { \
if (!(cond)) { \
fprintf(stderr, "EXPECT FAIL %s:%d: %s\n", __FILE__, __LINE__, msg); \
return 1; \
} \
} while (0)
int main(void)
{
printf("daedalus-decoder version: %s\n", daedalus_decoder_version());
/* Create / destroy null is a no-op. */
daedalus_decoder_destroy(NULL);
/* Bad dimensions rejected. */
EXPECT(daedalus_decoder_create(0, 0 ) == NULL, "zero dims must reject");
EXPECT(daedalus_decoder_create(1919, 1088) == NULL, "non-16-multiple width must reject");
EXPECT(daedalus_decoder_create(1920, 1079) == NULL, "non-16-multiple height must reject");
/* Valid 1088p create. */
daedalus_decoder *dec = daedalus_decoder_create(1920, 1088);
if (!dec) {
/* Vulkan init failure on this host — degrades to skip, not fail.
* (CI runners without V3D7 will hit this; the smoke test
* shouldn't gate on hardware presence.) */
fprintf(stderr, "SKIP: daedalus_decoder_create returned NULL "
"(Vulkan / V3D7 unavailable on this host)\n");
return 0;
}
printf("ctx created: 1920x1088, has_qpu=%d\n",
daedalus_decoder_has_qpu(dec));
/* set_output_format mid-frame on virgin ctx is allowed
* (mbs_appended == 0). */
EXPECT(daedalus_decoder_set_output_format(dec, DAEDALUS_DECODER_OUTPUT_RGBA) == 0,
"switch to RGBA on virgin ctx");
EXPECT(daedalus_decoder_set_output_format(dec, DAEDALUS_DECODER_OUTPUT_NV12) == 0,
"switch back to NV12");
/* Substrate setter — same lifecycle rules. */
EXPECT(daedalus_decoder_set_substrate(dec, DAEDALUS_DECODER_SUBSTRATE_CPU) == 0,
"force CPU substrate on virgin ctx");
EXPECT(daedalus_decoder_set_substrate(dec, DAEDALUS_DECODER_SUBSTRATE_QPU) == 0,
"force QPU substrate on virgin ctx");
EXPECT(daedalus_decoder_set_substrate(dec, DAEDALUS_DECODER_SUBSTRATE_AUTO) == 0,
"back to AUTO");
EXPECT(daedalus_decoder_set_substrate(dec, (daedalus_decoder_substrate) 99) == -1,
"bogus substrate rejects");
/* Append rejects out-of-bounds + null inputs. */
int16_t coeffs[384] = {0};
struct daedalus_decoder_mb_input mb = {0};
mb.coeffs = coeffs;
mb.mb_x = 0; mb.mb_y = 0;
EXPECT(daedalus_decoder_append_mb(dec, NULL) == -1, "null mb rejects");
{
struct daedalus_decoder_mb_input mb2 = mb;
mb2.coeffs = NULL;
EXPECT(daedalus_decoder_append_mb(dec, &mb2) == -1, "null coeffs rejects");
}
{
struct daedalus_decoder_mb_input mb2 = mb;
mb2.mb_x = 9999; mb2.mb_y = 9999;
EXPECT(daedalus_decoder_append_mb(dec, &mb2) == -1, "OOB coords reject");
}
/* Append first MB at raster index 0 — should succeed. */
EXPECT(daedalus_decoder_append_mb(dec, &mb) == 0, "append (0,0)");
/* Skipping (0,1) and appending (1,0) violates raster order — reject. */
{
struct daedalus_decoder_mb_input mb2 = mb;
mb2.mb_x = 0; mb2.mb_y = 1;
EXPECT(daedalus_decoder_append_mb(dec, &mb2) == -1,
"out-of-raster-order rejects");
}
/* In-order: (1,0). */
mb.mb_x = 1; mb.mb_y = 0;
EXPECT(daedalus_decoder_append_mb(dec, &mb) == 0, "append (1,0)");
/* Flush an incomplete frame: should fail because mbs_appended != n_mbs. */
EXPECT(daedalus_decoder_flush_frame(dec, NULL, 0, NULL, 0) == -1,
"incomplete-frame flush rejects");
/* set_output_format mid-frame (mbs_appended > 0) must reject. */
EXPECT(daedalus_decoder_set_output_format(dec, DAEDALUS_DECODER_OUTPUT_RGBA) == -1,
"mid-frame format change rejects");
daedalus_decoder_destroy(dec);
/* ---- Full-frame round-trip with all-zero coefficients.
* Phase 1 stage 1 validation: flush_frame builds the per-frame IDCT
* dispatch and a successful GPU round-trip returns 0. IDCT of
* all-zero coefficients with zero-initialised predicted = all-zero
* output pixels. */
dec = daedalus_decoder_create(1920, 1088);
if (!dec) {
fprintf(stderr, "SKIP roundtrip: ctx create failed\n");
return 0;
}
static int16_t zero_coeffs[384] = {0};
struct daedalus_decoder_mb_input zmb = {0};
zmb.coeffs = zero_coeffs;
int mb_width = 1920 / 16; /* 120 */
int mb_height = 1088 / 16; /* 68 */
int n_mbs = mb_width * mb_height;
for (int mby = 0; mby < mb_height; mby++) {
for (int mbx = 0; mbx < mb_width; mbx++) {
zmb.mb_x = (uint16_t) mbx;
zmb.mb_y = (uint16_t) mby;
if (daedalus_decoder_append_mb(dec, &zmb) != 0) {
fprintf(stderr, "append (%d, %d) failed\n", mbx, mby);
return 1;
}
}
}
printf("appended %d MBs (%dx%d)\n", n_mbs, mb_width, mb_height);
size_t y_size = (size_t) 1920 * 1088;
size_t uv_size = (size_t) 1920 * 1088 / 2;
uint8_t *out_y = malloc(y_size);
uint8_t *out_uv = malloc(uv_size);
/* Pre-fill with sentinel so any read-then-write bug becomes visible. */
memset(out_y, 0xab, y_size);
memset(out_uv, 0xcd, uv_size);
int frc = daedalus_decoder_flush_frame(dec, out_y, 1920, out_uv, 1920);
printf("flush_frame rc=%d\n", frc);
EXPECT(frc == 0, "flush succeeds on full frame");
/* Y plane should be all zero (clip255(IDCT(zeros)) = 0). */
int y_nz = 0;
for (size_t i = 0; i < y_size; i++)
if (out_y[i] != 0) y_nz++;
printf("Y non-zero bytes: %d / %zu\n", y_nz, y_size);
EXPECT(y_nz == 0, "Y plane all zero for zero-coeff frame");
/* UV plane should be all zero now (real chroma IDCT runs with
* zero coeffs → zero residual → clip255(0+0) = 0). Previously a
* 128 placeholder when chroma was a memset stub; this PR replaced
* that with the real dispatch. Sentinel 0xcd above guarantees we
* are observing post-dispatch writes, not the leftover memset. */
int uv_nz = 0;
for (size_t i = 0; i < uv_size; i++)
if (out_uv[i] != 0) uv_nz++;
printf("UV non-zero bytes: %d / %zu\n", uv_nz, uv_size);
EXPECT(uv_nz == 0, "UV plane all zero for zero-coeff frame");
free(out_y);
free(out_uv);
daedalus_decoder_destroy(dec);
printf("smoke OK\n");
return 0;
}