Files
daedalus-decoder/tests/test_deblock_smoke.c
T

282 lines
11 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/* SPDX-License-Identifier: BSD-2-Clause */
/*
* test_deblock_smoke — Stage 2 PR-b smoke test for flush_frame's
* per-frame deblock dispatch.
*
* Strategy
* --------
*
* Bit-exact-against-C-reference would require transcribing ~400 lines
* of FFmpeg's deblock kernels into this test. daedalus-fourier's
* tests/test_api_h264 already does that for both CPU NEON and V3D QPU
* substrates per kernel. So here we instead validate the daedalus-
* decoder's *dispatch wiring* — that the frame's edge list correctly
* partitions into (plane × orient × bS-band) buckets, with correct
* dst_off math, and reaches both backends identically:
*
* 1. Build a frame with random coeffs + predicted + edges.
* 2. Decode it with substrate=CPU → out_cpu.
* 3. Decode it again (same input!) with substrate=QPU → out_qpu.
* 4. Assert out_cpu == out_qpu byte-for-byte.
*
* Plus an anti-no-op check:
*
* 5. Decode a third time with n_edges=0 on every MB → out_no_deblock.
* 6. Assert out_cpu != out_no_deblock (some bytes differ — deblock
* actually fired and changed pixels).
*
* The CPU↔QPU equivalence combined with daedalus-fourier's own kernel-
* level bit-exact gate gives transitive proof of spec-correct dispatch
* routing. This test is cheap (sub-second on QVGA) so it runs in
* every ctest invocation.
*
* Not in scope:
* - Spec-exact deblock semantics (caller's bS / alpha / beta derivation
* per H.264 §8.7 is the integrator's responsibility; the decoder
* just routes whatever edges it receives).
* - Frame-boundary edge handling (caller MUST set bS=0 there; we
* generate edges that respect this).
*/
#include "daedalus_decoder.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static uint64_t xs64_state;
static uint64_t xs64(void)
{
uint64_t x = xs64_state;
x ^= x << 13; x ^= x >> 7; x ^= x << 17;
return xs64_state = x;
}
/* Build a list of edges for one MB. Returns the count written.
*
* Layout (caller pre-allocates an array of >= 16 entries):
* - 4 V-luma edges (edge_idx 0..3). edge 0 = MB-boundary at mb_x;
* bS=0 if mb_x==0 (frame boundary).
* - 4 H-luma edges. edge 0 = MB-boundary at mb_y; bS=0 if mb_y==0.
* - 2 V-chroma edges, plane=Cb (edge 0 = MB boundary; bS=0 if mb_x==0).
* - 2 H-chroma edges, plane=Cb (edge 0 = MB boundary; bS=0 if mb_y==0).
* - 2 V-chroma edges, plane=Cr.
* - 2 H-chroma edges, plane=Cr.
*
* Total 16 edges. For interior MBs all 16 are filtered; for frame
* boundary MBs the boundary edges drop to bS=0.
*
* bS pattern: edge 0 (MB boundary) → bS=4 ("intra" path); edges 1..3
* (internal) → random bS in {1, 2, 3} (bS<4 path). alpha/beta/tc0
* randomized in spec-realistic ranges. */
static int build_mb_edges(int mb_x, int mb_y, int last_mb_x, int last_mb_y,
struct daedalus_decoder_edge *out)
{
int n = 0;
(void) last_mb_x; (void) last_mb_y;
/* Helper to make one edge — closes over the running counter. */
#define EDGE(orient_, plane_, eidx_, bs_, edge_is_frame_boundary) \
do { \
out[n].mb_x = (uint16_t) mb_x; \
out[n].mb_y = (uint16_t) mb_y; \
out[n].edge_idx = (uint8_t) (eidx_); \
out[n].orient = (uint8_t) (orient_); \
out[n].plane = (uint8_t) (plane_); \
out[n].bS = (uint8_t) ((edge_is_frame_boundary) ? 0 \
: (bs_)); \
out[n].alpha = (uint8_t) (20 + (int)(xs64() % 40)); \
out[n].beta = (uint8_t) ( 8 + (int)(xs64() % 16)); \
for (int s = 0; s < 4; s++) \
out[n].tc0[s] = (int8_t) (xs64() % 8); \
n++; \
} while (0)
/* V luma: 4 edges. edge 0 at MB-boundary → frame boundary iff mb_x==0. */
for (int e = 0; e < 4; e++)
EDGE(/*V*/0, /*luma*/0, e,
(e == 0) ? 4 : (int)(1 + xs64() % 3),
/*boundary?*/ (e == 0 && mb_x == 0));
/* H luma: 4 edges. edge 0 → frame boundary iff mb_y==0. */
for (int e = 0; e < 4; e++)
EDGE(/*H*/1, /*luma*/0, e,
(e == 0) ? 4 : (int)(1 + xs64() % 3),
/*boundary?*/ (e == 0 && mb_y == 0));
/* V chroma Cb: 2 edges. */
for (int e = 0; e < 2; e++)
EDGE(0, /*Cb*/1, e,
(e == 0) ? 4 : (int)(1 + xs64() % 3),
(e == 0 && mb_x == 0));
/* H chroma Cb. */
for (int e = 0; e < 2; e++)
EDGE(1, 1, e,
(e == 0) ? 4 : (int)(1 + xs64() % 3),
(e == 0 && mb_y == 0));
/* V chroma Cr. */
for (int e = 0; e < 2; e++)
EDGE(0, /*Cr*/2, e,
(e == 0) ? 4 : (int)(1 + xs64() % 3),
(e == 0 && mb_x == 0));
/* H chroma Cr. */
for (int e = 0; e < 2; e++)
EDGE(1, 2, e,
(e == 0) ? 4 : (int)(1 + xs64() % 3),
(e == 0 && mb_y == 0));
#undef EDGE
return n; /* 16 */
}
/* Drive the decoder once with the given substrate + optional edges.
* Returns 0 on success, fills out_y/out_uv. */
static int run_once(daedalus_decoder *dec, daedalus_decoder_substrate sub,
int mb_w, int mb_h,
const int16_t (*per_mb_coeffs)[384],
const uint8_t (*per_mb_pred)[384],
const struct daedalus_decoder_edge (*per_mb_edges)[16],
int with_edges,
int width, int height,
uint8_t *out_y, uint8_t *out_uv)
{
if (daedalus_decoder_set_substrate(dec, sub) != 0) {
fprintf(stderr, "set_substrate failed\n");
return -1;
}
struct daedalus_decoder_mb_input mb = {0};
for (int my = 0; my < mb_h; my++) {
for (int mx = 0; mx < mb_w; mx++) {
int idx = my * mb_w + mx;
mb.mb_x = (uint16_t) mx;
mb.mb_y = (uint16_t) my;
mb.coeffs = per_mb_coeffs[idx];
mb.predicted = per_mb_pred[idx];
mb.transform_8x8 = 0;
mb.edges = with_edges ? per_mb_edges[idx] : NULL;
mb.n_edges = with_edges ? 16 : 0;
if (daedalus_decoder_append_mb(dec, &mb) != 0) {
fprintf(stderr, "append (%d,%d) failed\n", mx, my);
return -1;
}
}
}
int frc = daedalus_decoder_flush_frame(dec, out_y, (size_t) width,
out_uv, (size_t) width);
if (frc != 0) {
fprintf(stderr, "flush_frame rc=%d sub=%d\n", frc, (int) sub);
return -1;
}
(void) height;
return 0;
}
int main(int argc, char **argv)
{
int width = argc > 1 ? atoi(argv[1]) : 320;
int height = argc > 2 ? atoi(argv[2]) : 240;
uint64_t seed = argc > 3 ? strtoull(argv[3], NULL, 0) : 0xdeadbeefcafebabeULL;
xs64_state = seed;
int mb_w = width / 16;
int mb_h = height / 16;
int n_mbs = mb_w * mb_h;
printf("test_deblock_smoke: %dx%d (%d MBs), seed=0x%lx\n",
width, height, n_mbs, (unsigned long) seed);
/* Allocate per-MB arrays. */
int16_t (*coeffs)[384] = malloc((size_t) n_mbs * sizeof(*coeffs));
uint8_t (*pred)[384] = malloc((size_t) n_mbs * sizeof(*pred));
struct daedalus_decoder_edge (*edges)[16] =
malloc((size_t) n_mbs * sizeof(*edges));
if (!coeffs || !pred || !edges) { fprintf(stderr, "alloc fail\n"); return 1; }
for (int mb = 0; mb < n_mbs; mb++) {
for (int i = 0; i < 384; i++) {
coeffs[mb][i] = (int16_t)((int)(xs64() % 1024) - 512);
pred[mb][i] = (uint8_t)(xs64() & 0xff);
}
}
int edge_total = 0, edge_non_skip = 0;
for (int my = 0; my < mb_h; my++) {
for (int mx = 0; mx < mb_w; mx++) {
int idx = my * mb_w + mx;
int n = build_mb_edges(mx, my, mb_w - 1, mb_h - 1, edges[idx]);
edge_total += n;
for (int k = 0; k < n; k++)
if (edges[idx][k].bS != 0) edge_non_skip++;
}
}
printf("edges total=%d non-skip=%d (frame boundaries skipped)\n",
edge_total, edge_non_skip);
daedalus_decoder *dec = daedalus_decoder_create(width, height);
if (!dec) {
fprintf(stderr, "SKIP: ctx create failed (Vulkan / V3D7 unavailable)\n");
return 0;
}
size_t y_size = (size_t) width * height;
size_t uv_size = y_size / 2;
uint8_t *out_cpu_y = malloc(y_size);
uint8_t *out_cpu_uv = malloc(uv_size);
uint8_t *out_qpu_y = malloc(y_size);
uint8_t *out_qpu_uv = malloc(uv_size);
uint8_t *out_nodb_y = malloc(y_size);
uint8_t *out_nodb_uv = malloc(uv_size);
if (!out_cpu_y || !out_cpu_uv || !out_qpu_y || !out_qpu_uv ||
!out_nodb_y || !out_nodb_uv) return 1;
/* Pass 1: substrate=CPU, with edges. */
if (run_once(dec, DAEDALUS_DECODER_SUBSTRATE_CPU, mb_w, mb_h,
coeffs, pred, edges, /*with_edges*/1,
width, height, out_cpu_y, out_cpu_uv) != 0) return 1;
/* Pass 2: substrate=QPU, with edges. */
if (run_once(dec, DAEDALUS_DECODER_SUBSTRATE_QPU, mb_w, mb_h,
coeffs, pred, edges, /*with_edges*/1,
width, height, out_qpu_y, out_qpu_uv) != 0) return 1;
/* Pass 3: substrate=CPU, no edges → IDCT-only baseline. */
if (run_once(dec, DAEDALUS_DECODER_SUBSTRATE_CPU, mb_w, mb_h,
coeffs, pred, edges, /*with_edges*/0,
width, height, out_nodb_y, out_nodb_uv) != 0) return 1;
/* Check 1: CPU vs QPU byte-exact. */
size_t y_diffs = 0, uv_diffs = 0;
for (size_t i = 0; i < y_size; i++)
if (out_cpu_y[i] != out_qpu_y[i]) y_diffs++;
for (size_t i = 0; i < uv_size; i++)
if (out_cpu_uv[i] != out_qpu_uv[i]) uv_diffs++;
printf("CPU vs QPU: Y diff %zu/%zu, UV diff %zu/%zu\n",
y_diffs, y_size, uv_diffs, uv_size);
if (y_diffs != 0 || uv_diffs != 0) {
fprintf(stderr, "FAIL: CPU and QPU outputs differ — dispatch wiring broken\n");
return 1;
}
/* Check 2: with-edges vs no-edges different → deblock actually ran. */
size_t y_changed = 0, uv_changed = 0;
for (size_t i = 0; i < y_size; i++)
if (out_cpu_y[i] != out_nodb_y[i]) y_changed++;
for (size_t i = 0; i < uv_size; i++)
if (out_cpu_uv[i] != out_nodb_uv[i]) uv_changed++;
printf("With vs without deblock: Y changed %zu/%zu, UV changed %zu/%zu\n",
y_changed, y_size, uv_changed, uv_size);
if (y_changed == 0 && uv_changed == 0) {
fprintf(stderr, "FAIL: deblock produced no pixel changes — likely a no-op\n");
return 1;
}
printf("PASS (CPU≡QPU, deblock fired)\n");
daedalus_decoder_destroy(dec);
free(out_nodb_uv); free(out_nodb_y);
free(out_qpu_uv); free(out_qpu_y);
free(out_cpu_uv); free(out_cpu_y);
free(edges); free(pred); free(coeffs);
return 0;
}