/* SPDX-License-Identifier: BSD-2-Clause */ /* * daedalus_decode_h264 — option A standalone test harness for * daedalus-decoder against real H.264 streams. * * Decodes an H.264 file via stock libavcodec (the reference), AND * in parallel runs the same frame through daedalus-decoder in * identity-passthrough mode (predicted = libavcodec's reconstructed * frame, coeffs = 0, no deblock edges). Writes both outputs as * NV12 YUV, then byte-exact diffs. * * PR-A1b purpose: validate the daedalus-decoder data path / API * contract at real-stream frame sizes (16k+ MBs at 1080p, real * H.264-decoded predicted-sample distributions), without yet * requiring per-MB internal state extraction from libavcodec. * Follow-up PRs (A2+) extend this harness to feed REAL per-MB * state (residual coeffs, pre-residual predicted, deblock edges) * via the per-MB inspection callback added in marfrit-packages * patch 0016 (PR #106). * * Identity-passthrough math: * - mb_input.predicted = AVFrame pixels at this MB's raster pos * - mb_input.coeffs = 384 int16's, all zero * - mb_input.edges = NULL, n_edges = 0 * Then flush_frame: * scratch_y/_uv pre-fill from predicted_y/_uv = AVFrame pixels * IDCT dispatches with all-zero coeffs add 0 (no-op) * No deblock dispatches (no edges) * copy-out to caller's planes * Result MUST equal AVFrame pixels byte-for-byte. * * Invoke: * daedalus_decode_h264 [--substrate cpu|qpu|auto] * [--max-frames N] * * * Exit status: * 0 — bit-exact match across all decoded frames * 1 — argument / setup error * 2 — decode error from libavcodec * 3 — daedalus-decoder error (ctx, append, flush) * 4 — bit-exact comparison failed (diff > 0 bytes) */ #define _POSIX_C_SOURCE 200809L #include "daedalus_decoder.h" #include #include #include /* Per-MB inspection callback API — provided by the patched FFmpeg * fork via marfrit-packages patches 0016 + 0017. * * When DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS is defined (CMake sets it * alongside DAEDALUS_FFMPEG_SRC), we include libavcodec's INTERNAL * h264dec.h header to dereference H264Context fields — specifically * h->mb_inspect_coeffs (the 0017 side buffer holding pre-IDCT- * destruction sl->mb), h->cur_pic.f (pre-deblock reconstructed pixels), * and h->cur_pic.mb_type[mb_xy] for the mb-type gate. The same * configure-time config.h that built the static libavcodec.a is * picked up via -DHAVE_AV_CONFIG_H + -I path; ABI match is automatic. * * When only DAEDALUS_HAVE_H264_MB_INSPECT_CB is defined (no source * tree available — e.g. building against a distro-shipped patched * libavcodec), the H264Context stays opaque and we fall back to * identity-passthrough across all MBs. * * When neither is defined: stock libavcodec, no callback, identity- * passthrough only (PR-A1b behaviour). */ #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS # include "libavcodec/h264dec.h" # include "libavcodec/h264.h" /* IS_INTRA4x4 / IS_8x8DCT / IS_INTRA_PCM */ #elif defined(DAEDALUS_HAVE_H264_MB_INSPECT_CB) struct H264Context; #endif #if defined(DAEDALUS_HAVE_H264_MB_INSPECT_CB) || defined(DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS) typedef void (*ff_h264_mb_inspect_cb)(void *opaque, const struct H264Context *h, int mb_x, int mb_y); void ff_h264_set_mb_inspect_cb(AVCodecContext *avctx, ff_h264_mb_inspect_cb cb, void *opaque); #endif #include #include #include #include #include static const char *substrate_str = "auto"; static int max_frames = -1; /* Inspection-callback state: per-frame counter + "each MB seen exactly * once" check. Bitmap, not raster-order — libavcodec's MB threading + * multi-slice frames mean MBs reach the callback out of strict order; * contract is "every MB fires the callback exactly once per frame". * * When real-coeff extraction is compiled in (PR-A3+), we ALSO maintain * a per-MB capture buffer (real-coeffs path) so the main loop can * drive daedalus_decoder_append_mb with REAL pre-residual P + real * coefficients for MBs that satisfy the gate (Intra_4x4, no 8x8 DCT, * no PCM). Other MBs stay on identity-passthrough. */ #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB struct mb_capture { int valid; /* 1 = real-coeffs IDCT path, 0 = identity (predicted = pre_deblock_snap) */ int16_t coeffs[256]; /* luma, raster block order, raw sl->mb layout */ uint8_t predicted[256]; /* luma P recovered = pre_deblock - clipped IDCT(C) */ uint8_t pre_deblock_snap_y[256]; /* luma 16×16 pre-deblock at callback time */ uint8_t pre_deblock_snap_cb[64]; /* Cb 8×8 pre-deblock */ uint8_t pre_deblock_snap_cr[64]; /* Cr 8×8 pre-deblock */ int qp_y; /* QP_Y for this MB (sl->qscale at callback time) */ int mb_type_intra; /* 1 if MB is intra (any flavour), 0 otherwise */ int transform_8x8; /* 1 if 8×8 DCT (affects which internal edges fire) */ }; struct inspect_state { int n_cbs_this_frame; int mb_w, mb_h; uint8_t *seen; /* mb_w * mb_h bitmap */ int duplicate_mbs; int out_of_bounds; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS struct mb_capture *captures; /* mb_w * mb_h entries */ int real_coeffs_mbs; /* count of MBs in real-coeffs IDCT path this frame */ int skipped_intra16x16; int skipped_8x8dct; int skipped_other; /* Slice-level deblock params (captured first time the callback sees a * slice context). Per H.264 spec these are constant per slice; we * assume single-slice frames in our test stream. */ int slice_alpha_c0_offset; int slice_beta_offset; int slice_deblock_disable; /* sl->deblocking_filter from spec */ #endif }; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS /* H.264 §8.7.2.2/8.7.2.3 deblock filter tables — transcribed verbatim * from FFmpeg libavcodec/h264_loopfilter.c (LGPL-2.1+; algorithm + table * values come from the H.264 spec which is normative and unpatented). * Tables are size 52*3 — FFmpeg's trick to absorb slice_alpha_c0_offset + * slice_beta_offset (in -12..+12) into the index without bounds-clamping. * Usage: alpha = alpha_table[qp + a] where a = 52 + slice_alpha_c0_offset * (8-bit only; high-bit-depth subtracts qp_bd_offset). */ static const uint8_t alpha_table[52*3] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, 80, 90,101,113,127,144,162,182,203,226, 255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255, }; static const uint8_t beta_table[52*3] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, }; static const int8_t tc0_table[52*3][4] = { {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 }, {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 }, {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 }, {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, }; /* H.264 §8.5.11 / Table 8-11: qP_y → qP_chroma mapping for chroma_qp_index_offset == 0. * For qP_y < 30, qP_c = qP_y. Above that, the spec table compresses. */ static const uint8_t chroma_qp_table[52] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37, 37, 38, 38, 38, 39, 39, 39, 39, }; /* libavcodec's sl->mb stores coefficients in RASTER (row-major) order, * not zig-zag scan order — h264_cavlc.c does * block[*scantable] = (level * qmul[*scantable] + 32) >> 6 * where *scantable advances through ff_zigzag_scan[] which contains * RASTER positions (row*4 + col). So sl->mb[i] = coef at raster * position i = (i/4, i%4) = (row, col). No inverse-zigzag needed; * just transpose row-major → column-major (daedalus's convention). */ /* H.264 §6.4.3 4x4 luma block scan within MB (z-scan). * Maps raster-block-idx (sb_y*4+sb_x) → libavcodec sl->mb's z-scan idx. * Z-scan happens to be its own inverse (symmetric mapping). */ static const uint8_t raster_to_zscan[16] = { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 }; /* H.264 4x4 IDCT — transcribed from daedalus-fourier * tests/test_idct_bitexact.c (which itself mirrors h264_idct4_ref.c). * Outputs row-major 16-element residual; clip + shift happens in * the consumer. */ static void h264_idct4_butterfly(const int d[4], int out[4]) { int e = d[0] + d[2]; int f = d[0] - d[2]; int g = (d[1] >> 1) - d[3]; int h = d[1] + (d[3] >> 1); out[0] = e + h; out[1] = f + g; out[2] = f - g; out[3] = e - h; } static void ref_idct4_compute(const int16_t block[16], int out[16]) { /* block COLUMN-MAJOR: block[c*4+r] = coef at (row=r, col=c). * * Pass order: COLUMN-pass first, then ROW-pass — matches FFmpeg's * h264idct_template.c. The pass order matters for integer * arithmetic with `>>1` on signed values (which round toward -inf * for odd negatives in C); row-first vs column-first orders can * disagree by 1 unit at the intermediate stage, propagating to * the final pixel residual. * * (daedalus-fourier's tests/h264_idct4_ref.c does ROW-first, which * matches its NEON kernel + GPU shader bit-exact within the * package but DIVERGES from FFmpeg's IDCT for some inputs. PR-A3b * surfaces the divergence; investigating the fix is a daedalus- * fourier follow-up — see task #184.) */ int tmp[4][4]; /* Column pass: process each column c independently. */ for (int c = 0; c < 4; c++) { int d[4] = { block[c*4+0], block[c*4+1], block[c*4+2], block[c*4+3] }; int o[4]; h264_idct4_butterfly(d, o); for (int r = 0; r < 4; r++) tmp[r][c] = o[r]; } /* Row pass: process each row r. */ for (int r = 0; r < 4; r++) { int d[4] = { tmp[r][0], tmp[r][1], tmp[r][2], tmp[r][3] }; int o[4]; h264_idct4_butterfly(d, o); for (int c = 0; c < 4; c++) out[r*4+c] = o[c]; } } #endif /* DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS */ static void inspect_cb(void *opaque, const struct H264Context *h, int mb_x, int mb_y) { struct inspect_state *st = opaque; #ifndef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS (void) h; #endif if (mb_x < 0 || mb_x >= st->mb_w || mb_y < 0 || mb_y >= st->mb_h) { st->out_of_bounds++; st->n_cbs_this_frame++; return; } const size_t idx = (size_t) mb_y * st->mb_w + (size_t) mb_x; if (st->seen[idx]) st->duplicate_mbs++; st->seen[idx] = 1; st->n_cbs_this_frame++; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS /* Capture slice-level deblock params once. Per spec they're constant * per slice; for our single-slice test streams we just keep the * latest values seen. */ { const H264SliceContext *sl = &h->slice_ctx[0]; st->slice_alpha_c0_offset = sl->slice_alpha_c0_offset; st->slice_beta_offset = sl->slice_beta_offset; st->slice_deblock_disable = sl->deblocking_filter; } /* Real-coeffs path: extract per-MB state for daedalus-decoder * IDCT validation on this MB. Gate: only Intra_4x4 + 4x4 transform * + non-PCM is supported in PR-A3b — other MB flavours fall back * to identity-passthrough in the main loop. */ struct mb_capture *cap = &st->captures[idx]; cap->valid = 0; /* default to passthrough */ const int mb_xy = mb_y * h->mb_stride + mb_x; const uint32_t mb_type = h->cur_pic.mb_type[mb_xy]; /* Capture state needed for deblock edge derivation, regardless * of whether this MB takes the real-coeffs IDCT path. */ cap->qp_y = h->cur_pic.qscale_table[mb_xy]; cap->mb_type_intra = IS_INTRA(mb_type) ? 1 : 0; cap->transform_8x8 = IS_8x8DCT(mb_type) ? 1 : 0; /* Snapshot pre-deblock pixels for all 3 planes at this MB's position. */ { const int y_stride = h->cur_pic.f->linesize[0]; const int uv_stride = h->cur_pic.f->linesize[1]; const uint8_t *mb_y_px = h->cur_pic.f->data[0] + (ptrdiff_t) mb_y * 16 * y_stride + mb_x * 16; const uint8_t *mb_cb_px = h->cur_pic.f->data[1] + (ptrdiff_t) mb_y * 8 * uv_stride + mb_x * 8; const uint8_t *mb_cr_px = h->cur_pic.f->data[2] + (ptrdiff_t) mb_y * 8 * uv_stride + mb_x * 8; for (int r = 0; r < 16; r++) memcpy(&cap->pre_deblock_snap_y[r * 16], &mb_y_px[r * y_stride], 16); for (int r = 0; r < 8; r++) { memcpy(&cap->pre_deblock_snap_cb[r * 8], &mb_cb_px[r * uv_stride], 8); memcpy(&cap->pre_deblock_snap_cr[r * 8], &mb_cr_px[r * uv_stride], 8); } } if (!IS_INTRA4x4(mb_type)) { if (IS_INTRA16x16(mb_type)) st->skipped_intra16x16++; else st->skipped_other++; return; } if (IS_8x8DCT(mb_type)) { st->skipped_8x8dct++; return; } if (IS_INTRA_PCM(mb_type)) { st->skipped_other++; return; } /* Snapshot luma pre-deblock pixels from cur_pic. */ const uint8_t *luma_plane = h->cur_pic.f->data[0]; const int luma_stride = h->cur_pic.f->linesize[0]; const uint8_t *mb_pixels = luma_plane + (ptrdiff_t) mb_y * 16 * luma_stride + mb_x * 16; /* (pre_deblock_snap_y is already populated above for all 3 planes; * we use it later in the main loop as the daedalus predicted input.) */ /* Coefficients are in sl->mb at end of entropy decode but zeroed by * the time the callback fires (IDCT-add consumed them). Patch 0017 * preserves them in h->mb_inspect_coeffs[16 * 48] BEFORE IDCT runs, * so we read from there. */ const int16_t *zz_mb = h->mb_inspect_coeffs; /* layout matches sl->mb 8-bit half */ for (int r_block = 0; r_block < 16; r_block++) { const int z_block = raster_to_zscan[r_block]; const int16_t *block_raw = &zz_mb[z_block * 16]; /* sl->mb stores 16 int16 per block. Empirical finding (via * /tmp/idct_compare.c, 2026-05-26): daedalus-fourier's C ref * IDCT and FFmpeg's C ref IDCT produce IDENTICAL output for * the same input array — the "column-major vs row-major" * labelling is decoration; both functions implement the same * H.264 spec IDCT on a 16-int16 input. So we feed daedalus * the raw sl->mb data unchanged. Previous attempt to * transpose row-major→column-major was wrong — the transpose * changed the IDCT result. */ int16_t col[16]; memcpy(col, block_raw, 16 * sizeof(int16_t)); memcpy(&cap->coeffs[r_block * 16], col, 16 * sizeof(int16_t)); /* IDCT → row-major 16-int residual. */ int idct_row[16]; ref_idct4_compute(col, idct_row); /* P = clip(pre_deblock - ((IDCT + 32) >> 6)) for each pixel. * Symmetric: daedalus IDCT-add will undo the subtract, including * for saturating cases (where the same shift puts the value back * at the same clip boundary). */ const int sb_y = r_block >> 2; const int sb_x = r_block & 3; for (int r = 0; r < 4; r++) { for (int c = 0; c < 4; c++) { const int pre_db = mb_pixels[(sb_y * 4 + r) * luma_stride + sb_x * 4 + c]; const int shift = (idct_row[r * 4 + c] + 32) >> 6; int p = pre_db - shift; if (p < 0) p = 0; if (p > 255) p = 255; cap->predicted[(sb_y * 4 + r) * 16 + (sb_x * 4 + c)] = (uint8_t) p; } } } cap->valid = 1; st->real_coeffs_mbs++; /* One-shot diagnostic enabled by DAEDALUS_DUMP_MB_3_0 env var. */ if (mb_x == 3 && mb_y == 0 && getenv("DAEDALUS_DUMP_MB_3_0")) { const int16_t *zz = &zz_mb[1 * 16]; /* z_block = raster_block = 1 */ const struct mb_capture *capdiag = &st->captures[mb_y * st->mb_w + mb_x]; fprintf(stderr, " MB(3,0) block z=1 raster coeffs (sl->mb):"); for (int p = 0; p < 16; p++) fprintf(stderr, " %d", (int) zz[p]); fprintf(stderr, "\n"); fprintf(stderr, " MB(3,0) block z=1 col_major coeffs (after transpose):"); for (int i = 0; i < 16; i++) fprintf(stderr, " %d", (int) capdiag->coeffs[1 * 16 + i]); fprintf(stderr, "\n"); /* Recompute IDCT for this block (already done in the loop above but * print here for visibility). */ int idct_print[16]; ref_idct4_compute(&capdiag->coeffs[1 * 16], idct_print); fprintf(stderr, " MB(3,0) block z=1 IDCT row-major (raw, pre-shift):"); for (int i = 0; i < 16; i++) fprintf(stderr, " %d", idct_print[i]); fprintf(stderr, "\n"); fprintf(stderr, " MB(3,0) block z=1 IDCT (+32)>>6:"); for (int i = 0; i < 16; i++) fprintf(stderr, " %d", (idct_print[i] + 32) >> 6); fprintf(stderr, "\n"); const uint8_t *bpix = mb_pixels + 0 * luma_stride + 4; /* sb_y=0, sb_x=1 → cols 4..7 within MB */ fprintf(stderr, " MB(3,0) block z=1 pre_deblock pixels:\n"); for (int r = 0; r < 4; r++) { fprintf(stderr, " "); for (int c = 0; c < 4; c++) fprintf(stderr, " %3u", bpix[r * luma_stride + c]); fprintf(stderr, "\n"); } fprintf(stderr, " MB(3,0) block z=1 P_rec (= pre_deblock - shift):\n"); for (int r = 0; r < 4; r++) { fprintf(stderr, " "); for (int c = 0; c < 4; c++) fprintf(stderr, " %3u", capdiag->predicted[(0*4+r) * 16 + (1*4+c)]); fprintf(stderr, "\n"); } /* And what daedalus_decoder SHOULD produce: clip(P_rec + shift). */ fprintf(stderr, " MB(3,0) block z=1 expected daedalus output = clip(P_rec + shift):\n"); for (int r = 0; r < 4; r++) { fprintf(stderr, " "); for (int c = 0; c < 4; c++) { int p_rec = capdiag->predicted[(0*4+r) * 16 + (1*4+c)]; int sh = (idct_print[r*4+c] + 32) >> 6; int e = p_rec + sh; if (e < 0) e = 0; if (e > 255) e = 255; fprintf(stderr, " %3d", e); } fprintf(stderr, "\n"); } } #endif } #endif /* Extract one MB's predicted-samples block from a YUV420P AVFrame * (stock libavcodec) and pack it into the 384-byte mb_input.predicted * layout: 16x16 luma raster, then 8x8 Cb raster, then 8x8 Cr raster. * * AVFrame's data[] points at separate Y / U / V planes (or NV12's * interleaved UV — we handle both via the pix_fmt branch). */ static void pack_mb_predicted(const AVFrame *fr, int mb_x, int mb_y, uint8_t out[384]) { const int y_off = mb_y * 16 * fr->linesize[0] + mb_x * 16; const int uv_off = mb_y * 8 * fr->linesize[1] + mb_x * 8; /* Luma: 16 rows × 16 cols */ for (int r = 0; r < 16; r++) memcpy(&out[r * 16], &fr->data[0][y_off + r * fr->linesize[0]], 16); /* Chroma: 8 rows × 8 cols per component */ if (fr->format == AV_PIX_FMT_YUV420P) { for (int r = 0; r < 8; r++) { memcpy(&out[256 + r * 8], &fr->data[1][uv_off + r * fr->linesize[1]], 8); memcpy(&out[256 + 64 + r * 8], &fr->data[2][uv_off + r * fr->linesize[2]], 8); } } else if (fr->format == AV_PIX_FMT_NV12) { /* NV12: interleaved UV plane, deinterleave into Cb/Cr halves */ const int uv_off_nv12 = mb_y * 8 * fr->linesize[1] + mb_x * 16; for (int r = 0; r < 8; r++) { for (int c = 0; c < 8; c++) { out[256 + r * 8 + c] = fr->data[1][uv_off_nv12 + r * fr->linesize[1] + c * 2 + 0]; out[256 + 64 + r * 8 + c] = fr->data[1][uv_off_nv12 + r * fr->linesize[1] + c * 2 + 1]; } } } else { /* Unsupported pixel format — zero out chroma (test will fail loud) */ memset(&out[256], 0, 128); } } /* Convert an AVFrame (YUV420P or NV12) to NV12 in caller-provided * planes. Used to write the reference YUV file. */ static void avframe_to_nv12(const AVFrame *fr, uint8_t *out_y, size_t y_stride, uint8_t *out_uv, size_t uv_stride, int width, int height) { /* Y plane: row-major copy from src linesize to dst stride */ for (int r = 0; r < height; r++) memcpy(&out_y[(size_t) r * y_stride], &fr->data[0][(size_t) r * fr->linesize[0]], (size_t) width); if (fr->format == AV_PIX_FMT_NV12) { for (int r = 0; r < height / 2; r++) memcpy(&out_uv[(size_t) r * uv_stride], &fr->data[1][(size_t) r * fr->linesize[1]], (size_t) width); } else if (fr->format == AV_PIX_FMT_YUV420P) { /* Interleave U+V → NV12 UV */ const int cw = width / 2, ch = height / 2; for (int r = 0; r < ch; r++) { for (int c = 0; c < cw; c++) { out_uv[(size_t) r * uv_stride + (size_t) c * 2 + 0] = fr->data[1][(size_t) r * fr->linesize[1] + c]; out_uv[(size_t) r * uv_stride + (size_t) c * 2 + 1] = fr->data[2][(size_t) r * fr->linesize[2] + c]; } } } } static int parse_args(int argc, char **argv, const char **in_path, const char **out_dadec_path, const char **out_ref_path) { int i = 1; while (i < argc && argv[i][0] == '-') { if (!strcmp(argv[i], "--substrate") && i + 1 < argc) { substrate_str = argv[++i]; } else if (!strcmp(argv[i], "--max-frames") && i + 1 < argc) { max_frames = atoi(argv[++i]); } else { fprintf(stderr, "unknown option: %s\n", argv[i]); return -1; } i++; } if (argc - i != 3) { fprintf(stderr, "usage: %s [--substrate cpu|qpu|auto] [--max-frames N] " " \n", argv[0]); return -1; } *in_path = argv[i + 0]; *out_dadec_path = argv[i + 1]; *out_ref_path = argv[i + 2]; return 0; } static daedalus_decoder_substrate parse_substrate(const char *s) { if (!strcmp(s, "cpu")) return DAEDALUS_DECODER_SUBSTRATE_CPU; if (!strcmp(s, "qpu")) return DAEDALUS_DECODER_SUBSTRATE_QPU; return DAEDALUS_DECODER_SUBSTRATE_AUTO; } int main(int argc, char **argv) { const char *in_path, *out_dadec_path, *out_ref_path; if (parse_args(argc, argv, &in_path, &out_dadec_path, &out_ref_path) != 0) return 1; /* ---- Open input via libavformat (so we get NAL framing for free * from the raw .h264 elementary stream demuxer). ---- */ AVFormatContext *fmt = NULL; if (avformat_open_input(&fmt, in_path, NULL, NULL) < 0) { fprintf(stderr, "avformat_open_input(%s) failed\n", in_path); return 2; } if (avformat_find_stream_info(fmt, NULL) < 0) { fprintf(stderr, "avformat_find_stream_info failed\n"); avformat_close_input(&fmt); return 2; } int vstream = -1; for (unsigned s = 0; s < fmt->nb_streams; s++) if (fmt->streams[s]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { vstream = (int) s; break; } if (vstream < 0) { fprintf(stderr, "no video stream in %s\n", in_path); avformat_close_input(&fmt); return 2; } /* ---- Open H.264 decoder ---- */ const AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec); avcodec_parameters_to_context(avctx, fmt->streams[vstream]->codecpar); #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS /* Patch 0017's coefficient side buffer lives in H264Context (single * per-stream); multi-threaded slice decode would race on it. */ avctx->thread_count = 1; avctx->thread_type = 0; /* PR-A6: keep libavcodec's deblock ON so AVFrame is the post-deblock * reference we validate daedalus against. Per-MB pre_deblock * snapshots taken in the inspection callback (before deblock crosses * into this MB's region) provide daedalus with pre-deblock input. */ #endif if (avcodec_open2(avctx, codec, NULL) < 0) { fprintf(stderr, "avcodec_open2 failed\n"); avformat_close_input(&fmt); return 2; } AVPacket *pkt = av_packet_alloc(); AVFrame *fr = av_frame_alloc(); /* ---- Allocate output buffers + state needed before first decode ---- */ daedalus_decoder *dec = NULL; uint8_t *out_y_dadec = NULL, *out_uv_dadec = NULL; uint8_t *out_y_ref = NULL, *out_uv_ref = NULL; size_t y_size = 0, uv_size = 0; FILE *out_dadec_f = NULL, *out_ref_f = NULL; int rc = 0; int n_frames = 0; size_t total_y_diffs = 0, total_uv_diffs = 0; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB /* Init inspect state BEFORE the first avcodec_send_packet — the * callback fires from inside send_packet (i.e. before the first * receive_frame ever returns), so lazy-init after-the-fact * would miss the entire first frame. Use codecpar dims; round * up to MB granularity (H.264 codes 1080 height as 1088). */ struct inspect_state inspect_st = {0}; { const AVCodecParameters *cp = fmt->streams[vstream]->codecpar; const int W_round = (cp->width + 15) & ~15; const int H_round = (cp->height + 15) & ~15; inspect_st.mb_w = W_round / 16; inspect_st.mb_h = H_round / 16; inspect_st.seen = calloc(1, (size_t) inspect_st.mb_w * inspect_st.mb_h); if (!inspect_st.seen) { rc = 1; goto cleanup; } #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS inspect_st.captures = calloc((size_t) inspect_st.mb_w * inspect_st.mb_h, sizeof(*inspect_st.captures)); if (!inspect_st.captures) { rc = 1; goto cleanup; } #endif } ff_h264_set_mb_inspect_cb(avctx, inspect_cb, &inspect_st); int inspect_total_cbs = 0; int inspect_total_duplicate = 0; int inspect_total_oob = 0; int inspect_total_missing = 0; #endif /* ---- daedalus_decoder is lazy-created on the first AVFrame * (coded width/height come from the bitstream's SPS via * libavcodec). ---- */ while (av_read_frame(fmt, pkt) >= 0) { if (pkt->stream_index != vstream) { av_packet_unref(pkt); continue; } if (avcodec_send_packet(avctx, pkt) < 0) { fprintf(stderr, "send_packet failed\n"); rc = 2; goto cleanup; } av_packet_unref(pkt); for (;;) { int ret = avcodec_receive_frame(avctx, fr); if (ret == AVERROR(EAGAIN)) break; if (ret < 0) { fprintf(stderr, "receive_frame failed: %d\n", ret); rc = 2; goto cleanup; } /* Lazily create the daedalus_decoder + output planes on * the first frame so the SPS-derived coded width/height * are known. */ if (!dec) { /* Coded (= MB-aligned) dimensions are on AVCodecContext, * not AVFrame (which carries the cropped display size). */ const int W = avctx->coded_width ? avctx->coded_width : fr->width; const int H = avctx->coded_height ? avctx->coded_height : fr->height; if ((W & 15) || (H & 15)) { fprintf(stderr, "coded dims %dx%d not mod-16; skip\n", W, H); rc = 2; goto cleanup; } dec = daedalus_decoder_create(W, H); if (!dec) { fprintf(stderr, "daedalus_decoder_create failed\n"); rc = 3; goto cleanup; } daedalus_decoder_set_substrate(dec, parse_substrate(substrate_str)); y_size = (size_t) W * (size_t) H; uv_size = y_size / 2; out_y_dadec = malloc(y_size); out_uv_dadec = malloc(uv_size); out_y_ref = malloc(y_size); out_uv_ref = malloc(uv_size); out_dadec_f = fopen(out_dadec_path, "wb"); out_ref_f = fopen(out_ref_path, "wb"); if (!out_y_dadec || !out_uv_dadec || !out_y_ref || !out_uv_ref || !out_dadec_f || !out_ref_f) { fprintf(stderr, "alloc / fopen failed\n"); rc = 1; goto cleanup; } printf("daedalus_decode_h264: %dx%d, substrate=%s\n", W, H, substrate_str); #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB printf(" inspection callback: ACTIVE (patched libavcodec); " "mb-grid %dx%d\n", inspect_st.mb_w, inspect_st.mb_h); #else printf(" inspection callback: not built in (stock libavcodec)\n"); #endif } /* Pack each MB's predicted samples from the AVFrame. * Coeffs = 0; no edges; daedalus_decoder will reproduce * exactly the AVFrame pixels. Use coded_width/coded_height * for MB-grid alignment (e.g. 1920x1088 for 1080p display). */ const int coded_w = avctx->coded_width ? avctx->coded_width : avctx->width; const int coded_h = avctx->coded_height ? avctx->coded_height : avctx->height; const int mb_w = coded_w / 16; const int mb_h = coded_h / 16; uint8_t mb_pred[384]; int16_t mb_coeffs[384] = {0}; struct daedalus_decoder_edge mb_edges[16]; struct daedalus_decoder_mb_input mb = {0}; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS /* PR-A6 edge derivation: a = 52 + slice_alpha_c0_offset, * b = 52 + slice_beta_offset (per FFmpeg loopfilter.c * convention; absorbs the offset into the tripled tables). */ const int slice_a = 52 + inspect_st.slice_alpha_c0_offset; const int slice_b = 52 + inspect_st.slice_beta_offset; /* FFmpeg's h264_slice.c inverts the spec's disable_deblocking_filter_idc * via `sl->deblocking_filter ^= 1` (line ~1901). Internal convention: * 0 = disabled (spec = 1) * 1 = enabled (spec = 0) * 2 = enabled-but-not-across-slice-boundaries (unchanged) * So deblock is OFF iff sl->deblocking_filter == 0. */ const int deblock_off = inspect_st.slice_deblock_disable == 0; #endif for (int my = 0; my < mb_h; my++) { for (int mx = 0; mx < mb_w; mx++) { /* Default: identity-passthrough — luma from AVFrame, * chroma from AVFrame, coeffs all zero, no edges. */ pack_mb_predicted(fr, mx, my, mb_pred); memset(mb_coeffs, 0, sizeof(mb_coeffs)); int n_edges = 0; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS /* PR-A6: feed daedalus pre-deblock pixels from the * per-MB snapshots taken in the callback (AVFrame is * now post-deblock — used as reference, not as input). */ const int mb_idx = my * mb_w + mx; const struct mb_capture *cap = &inspect_st.captures[mb_idx]; /* Luma: P_rec for real-coeffs MBs, raw pre-deblock snap * otherwise (with zero coeffs). Both produce the same * pre-deblock state after daedalus IDCT-add. */ if (cap->valid) { memcpy(mb_pred, cap->predicted, 256); for (int i = 0; i < 256; i++) mb_coeffs[i] = cap->coeffs[i]; } else { memcpy(mb_pred, cap->pre_deblock_snap_y, 256); } /* Chroma: always identity-passthrough from snap. * Chroma DC Hadamard + chroma residual extraction is * a follow-up (PR-A4). */ memcpy(mb_pred + 256, cap->pre_deblock_snap_cb, 64); memcpy(mb_pred + 256 + 64, cap->pre_deblock_snap_cr, 64); /* Derive deblock edges for this MB. Spec §8.7.2: * - Frame-boundary edges: skip (bS=0 — kernel reads p3 at -4). * - MB-boundary edges with intra neighbour: bS=4. * - Internal MB edges within intra MB: bS=3. * - 8x8 DCT MBs: internal edges only at col/row 8 (the * single 8x8-block boundary inside the MB). * For non-intra MB types in mixed streams the bS rules * differ; we'd need cbp/MV/ref info from sl context for * those. Our test stream is all-intra, so simplified. */ if (!deblock_off && cap->mb_type_intra && !getenv("DAEDALUS_SKIP_EDGES")) { const int qp_self = cap->qp_y; const int qp_left = (mx > 0) ? inspect_st.captures[mb_idx - 1].qp_y : qp_self; const int qp_top = (my > 0) ? inspect_st.captures[mb_idx - mb_w].qp_y : qp_self; const int qpc_self = chroma_qp_table[qp_self]; const int qpc_left = chroma_qp_table[qp_left]; const int qpc_top = chroma_qp_table[qp_top]; const int qp_avg_left = (qp_self + qp_left + 1) >> 1; const int qp_avg_top = (qp_self + qp_top + 1) >> 1; const int qpc_avg_left = (qpc_self + qpc_left + 1) >> 1; const int qpc_avg_top = (qpc_self + qpc_top + 1) >> 1; /* Helper macro to emit one edge. bS=0 (skip) * edges are still emitted with bS=0 — daedalus's * partitioner filters them out. */ #define EMIT_EDGE(orient_, plane_, edge_idx_, bS_, qp_) do { \ if (n_edges >= 16) break; \ struct daedalus_decoder_edge *e = &mb_edges[n_edges++]; \ e->mb_x = (uint16_t) mx; \ e->mb_y = (uint16_t) my; \ e->edge_idx = (uint8_t) (edge_idx_); \ e->orient = (uint8_t) (orient_); \ e->plane = (uint8_t) (plane_); \ e->bS = (uint8_t) (bS_); \ e->alpha = alpha_table[(qp_) + slice_a]; \ e->beta = beta_table [(qp_) + slice_b]; \ const int8_t *tc = tc0_table[(qp_) + slice_a]; \ e->tc0[0] = tc[(bS_) <= 3 ? (bS_) : 0]; \ e->tc0[1] = tc[(bS_) <= 3 ? (bS_) : 0]; \ e->tc0[2] = tc[(bS_) <= 3 ? (bS_) : 0]; \ e->tc0[3] = tc[(bS_) <= 3 ? (bS_) : 0]; \ } while (0) /* Luma V edges: 4 at col 0, 4, 8, 12. Internal * edges at 4/12 are skipped for 8x8 DCT MBs. */ EMIT_EDGE(0, 0, 0, (mx > 0) ? 4 : 0, qp_avg_left); if (!cap->transform_8x8) EMIT_EDGE(0, 0, 1, 3, qp_self); EMIT_EDGE(0, 0, 2, 3, qp_self); if (!cap->transform_8x8) EMIT_EDGE(0, 0, 3, 3, qp_self); /* Luma H edges: 4 at row 0, 4, 8, 12. */ EMIT_EDGE(1, 0, 0, (my > 0) ? 4 : 0, qp_avg_top); if (!cap->transform_8x8) EMIT_EDGE(1, 0, 1, 3, qp_self); EMIT_EDGE(1, 0, 2, 3, qp_self); if (!cap->transform_8x8) EMIT_EDGE(1, 0, 3, 3, qp_self); /* Chroma V edges: 2 per plane (Cb=1, Cr=2). */ EMIT_EDGE(0, 1, 0, (mx > 0) ? 4 : 0, qpc_avg_left); if (!cap->transform_8x8) EMIT_EDGE(0, 1, 1, 3, qpc_self); EMIT_EDGE(0, 2, 0, (mx > 0) ? 4 : 0, qpc_avg_left); if (!cap->transform_8x8) EMIT_EDGE(0, 2, 1, 3, qpc_self); /* Chroma H edges. */ EMIT_EDGE(1, 1, 0, (my > 0) ? 4 : 0, qpc_avg_top); if (!cap->transform_8x8) EMIT_EDGE(1, 1, 1, 3, qpc_self); EMIT_EDGE(1, 2, 0, (my > 0) ? 4 : 0, qpc_avg_top); if (!cap->transform_8x8) EMIT_EDGE(1, 2, 1, 3, qpc_self); #undef EMIT_EDGE } #endif mb.mb_x = (uint16_t) mx; mb.mb_y = (uint16_t) my; mb.transform_8x8 = 0; mb.coeffs = mb_coeffs; mb.predicted = mb_pred; mb.edges = (n_edges > 0) ? mb_edges : NULL; mb.n_edges = (uint8_t) n_edges; if (mx == 0 && my == 0 && getenv("DAEDALUS_DUMP_EDGES")) { fprintf(stderr, "MB(0,0): n_edges=%d, deblock_off=%d, intra=%d\n", n_edges, deblock_off, cap->mb_type_intra); for (int e = 0; e < n_edges; e++) { fprintf(stderr, " edge[%d]: orient=%d plane=%d edge_idx=%d bS=%d alpha=%d beta=%d tc0=%d,%d,%d,%d\n", e, mb_edges[e].orient, mb_edges[e].plane, mb_edges[e].edge_idx, mb_edges[e].bS, mb_edges[e].alpha, mb_edges[e].beta, mb_edges[e].tc0[0], mb_edges[e].tc0[1], mb_edges[e].tc0[2], mb_edges[e].tc0[3]); } } if (daedalus_decoder_append_mb(dec, &mb) != 0) { fprintf(stderr, "append_mb (%d,%d) failed\n", mx, my); rc = 3; goto cleanup; } } } int frc = daedalus_decoder_flush_frame(dec, out_y_dadec, (size_t) coded_w, out_uv_dadec, (size_t) coded_w); if (frc != 0) { fprintf(stderr, "flush_frame frame %d rc=%d\n", n_frames, frc); rc = 3; goto cleanup; } /* Build the reference NV12 from the AVFrame for comparison. */ avframe_to_nv12(fr, out_y_ref, (size_t) coded_w, out_uv_ref, (size_t) coded_w, coded_w, coded_h); /* (PR-A3b's pre_deblock vs AVFrame DIAG check is removed in * PR-A6: with libavcodec's deblock now ENABLED, AVFrame is * post-deblock and intentionally differs from the per-MB * pre_deblock snapshots taken in the callback.) */ /* Byte-exact compare + first-diff diagnostic. */ size_t y_diffs = 0, uv_diffs = 0; size_t y_first_diff = (size_t) -1; for (size_t i = 0; i < y_size; i++) if (out_y_dadec[i] != out_y_ref[i]) { if (y_first_diff == (size_t) -1) y_first_diff = i; y_diffs++; } for (size_t i = 0; i < uv_size; i++) if (out_uv_dadec[i] != out_uv_ref[i]) uv_diffs++; if (y_diffs && y_first_diff != (size_t) -1) { const size_t row = y_first_diff / (size_t) avctx->width; const size_t col = y_first_diff % (size_t) avctx->width; const size_t mb_x = col / 16; const size_t mb_y = row / 8; /* not row/16 — chroma row uses /8 so use raw row here */ #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS const int mb_idx = (int)(row / 16) * mb_w + (int) mb_x; const int real = (mb_idx >= 0 && mb_idx < mb_w * mb_h) ? inspect_st.captures[mb_idx].valid : -1; printf(" first Y diff @ byte %zu = (row %zu, col %zu) in MB(%zu,%zu) [real-coeffs=%d]; " "dadec=%u ref=%u\n", y_first_diff, row, col, mb_x, row / 16, real, out_y_dadec[y_first_diff], out_y_ref[y_first_diff]); #else (void) mb_x; (void) mb_y; printf(" first Y diff @ byte %zu = (row %zu, col %zu); dadec=%u ref=%u\n", y_first_diff, row, col, out_y_dadec[y_first_diff], out_y_ref[y_first_diff]); #endif } total_y_diffs += y_diffs; total_uv_diffs += uv_diffs; #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB { const int expected = mb_w * mb_h; /* Count MBs that fired the callback. */ int seen_count = 0; for (int i = 0; i < expected; i++) if (inspect_st.seen[i]) seen_count++; int missing = expected - seen_count; if (missing || inspect_st.duplicate_mbs || inspect_st.out_of_bounds) { fprintf(stderr, " frame %d: callback invariants: fired=%d expected=%d " "missing=%d duplicates=%d oob=%d\n", n_frames, inspect_st.n_cbs_this_frame, expected, missing, inspect_st.duplicate_mbs, inspect_st.out_of_bounds); rc = 4; } inspect_total_cbs += inspect_st.n_cbs_this_frame; inspect_total_duplicate += inspect_st.duplicate_mbs; inspect_total_oob += inspect_st.out_of_bounds; inspect_total_missing += missing; /* Reset for next frame. */ inspect_st.n_cbs_this_frame = 0; inspect_st.duplicate_mbs = 0; inspect_st.out_of_bounds = 0; memset(inspect_st.seen, 0, (size_t) expected); #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS printf(" frame %d: real-coeffs path %d MBs, " "skipped intra16x16=%d 8x8dct=%d other=%d\n", n_frames, inspect_st.real_coeffs_mbs, inspect_st.skipped_intra16x16, inspect_st.skipped_8x8dct, inspect_st.skipped_other); inspect_st.real_coeffs_mbs = 0; inspect_st.skipped_intra16x16 = 0; inspect_st.skipped_8x8dct = 0; inspect_st.skipped_other = 0; memset(inspect_st.captures, 0, (size_t) expected * sizeof(*inspect_st.captures)); #endif } #endif printf(" frame %d: Y diff %zu/%zu UV diff %zu/%zu%s\n", n_frames, y_diffs, y_size, uv_diffs, uv_size, (y_diffs || uv_diffs) ? " ***" : ""); /* Write both YUVs to disk. */ fwrite(out_y_dadec, 1, y_size, out_dadec_f); fwrite(out_uv_dadec, 1, uv_size, out_dadec_f); fwrite(out_y_ref, 1, y_size, out_ref_f); fwrite(out_uv_ref, 1, uv_size, out_ref_f); n_frames++; if (max_frames > 0 && n_frames >= max_frames) goto drained; } } /* Flush libavcodec for any remaining buffered frames. */ avcodec_send_packet(avctx, NULL); for (;;) { int ret = avcodec_receive_frame(avctx, fr); if (ret < 0) break; (void) ret; /* Same loop body as above would go here; omitted for brevity — * stock libavcodec rarely buffers I-only streams. */ n_frames++; } drained: printf("\n%d frames decoded; total Y diff %zu, UV diff %zu\n", n_frames, total_y_diffs, total_uv_diffs); #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB printf("inspection callback: %d total invocations, %d missing, %d duplicates, %d oob\n", inspect_total_cbs, inspect_total_missing, inspect_total_duplicate, inspect_total_oob); if (inspect_total_missing || inspect_total_duplicate || inspect_total_oob) rc = 4; #endif if (rc == 0 && (total_y_diffs || total_uv_diffs)) { printf("FAIL: daedalus-decoder output does NOT match libavcodec reference byte-for-byte\n"); rc = 4; } else if (rc == 0) { printf("PASS: byte-exact identity-passthrough across %d frames\n", n_frames); } else { printf("FAIL: %s\n", (total_y_diffs || total_uv_diffs) ? "byte-exact comparison failed" : "inspection callback invariants violated"); } cleanup: if (out_dadec_f) fclose(out_dadec_f); if (out_ref_f) fclose(out_ref_f); free(out_uv_ref); free(out_y_ref); free(out_uv_dadec);free(out_y_dadec); #ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB free(inspect_st.seen); # ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS free(inspect_st.captures); # endif #endif if (dec) daedalus_decoder_destroy(dec); av_frame_free(&fr); av_packet_free(&pkt); avcodec_free_context(&avctx); avformat_close_input(&fmt); return rc; }