Files
daedalus-decoder/tools/daedalus_decode_h264.c
T

771 lines
33 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/* SPDX-License-Identifier: BSD-2-Clause */
/*
* daedalus_decode_h264 — option A standalone test harness for
* daedalus-decoder against real H.264 streams.
*
* Decodes an H.264 file via stock libavcodec (the reference), AND
* in parallel runs the same frame through daedalus-decoder in
* identity-passthrough mode (predicted = libavcodec's reconstructed
* frame, coeffs = 0, no deblock edges). Writes both outputs as
* NV12 YUV, then byte-exact diffs.
*
* PR-A1b purpose: validate the daedalus-decoder data path / API
* contract at real-stream frame sizes (16k+ MBs at 1080p, real
* H.264-decoded predicted-sample distributions), without yet
* requiring per-MB internal state extraction from libavcodec.
* Follow-up PRs (A2+) extend this harness to feed REAL per-MB
* state (residual coeffs, pre-residual predicted, deblock edges)
* via the per-MB inspection callback added in marfrit-packages
* patch 0016 (PR #106).
*
* Identity-passthrough math:
* - mb_input.predicted = AVFrame pixels at this MB's raster pos
* - mb_input.coeffs = 384 int16's, all zero
* - mb_input.edges = NULL, n_edges = 0
* Then flush_frame:
* scratch_y/_uv pre-fill from predicted_y/_uv = AVFrame pixels
* IDCT dispatches with all-zero coeffs add 0 (no-op)
* No deblock dispatches (no edges)
* copy-out to caller's planes
* Result MUST equal AVFrame pixels byte-for-byte.
*
* Invoke:
* daedalus_decode_h264 [--substrate cpu|qpu|auto]
* [--max-frames N]
* <input.h264> <output_dadec.yuv> <output_ref.yuv>
*
* Exit status:
* 0 — bit-exact match across all decoded frames
* 1 — argument / setup error
* 2 — decode error from libavcodec
* 3 — daedalus-decoder error (ctx, append, flush)
* 4 — bit-exact comparison failed (diff > 0 bytes)
*/
#define _POSIX_C_SOURCE 200809L
#include "daedalus_decoder.h"
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
/* Per-MB inspection callback API — provided by the patched FFmpeg
* fork via marfrit-packages patches 0016 + 0017.
*
* When DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS is defined (CMake sets it
* alongside DAEDALUS_FFMPEG_SRC), we include libavcodec's INTERNAL
* h264dec.h header to dereference H264Context fields — specifically
* h->mb_inspect_coeffs (the 0017 side buffer holding pre-IDCT-
* destruction sl->mb), h->cur_pic.f (pre-deblock reconstructed pixels),
* and h->cur_pic.mb_type[mb_xy] for the mb-type gate. The same
* configure-time config.h that built the static libavcodec.a is
* picked up via -DHAVE_AV_CONFIG_H + -I path; ABI match is automatic.
*
* When only DAEDALUS_HAVE_H264_MB_INSPECT_CB is defined (no source
* tree available — e.g. building against a distro-shipped patched
* libavcodec), the H264Context stays opaque and we fall back to
* identity-passthrough across all MBs.
*
* When neither is defined: stock libavcodec, no callback, identity-
* passthrough only (PR-A1b behaviour). */
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
# include "libavcodec/h264dec.h"
# include "libavcodec/h264.h" /* IS_INTRA4x4 / IS_8x8DCT / IS_INTRA_PCM */
#elif defined(DAEDALUS_HAVE_H264_MB_INSPECT_CB)
struct H264Context;
#endif
#if defined(DAEDALUS_HAVE_H264_MB_INSPECT_CB) || defined(DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS)
typedef void (*ff_h264_mb_inspect_cb)(void *opaque,
const struct H264Context *h,
int mb_x, int mb_y);
void ff_h264_set_mb_inspect_cb(AVCodecContext *avctx,
ff_h264_mb_inspect_cb cb, void *opaque);
#endif
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
static const char *substrate_str = "auto";
static int max_frames = -1;
/* Inspection-callback state: per-frame counter + "each MB seen exactly
* once" check. Bitmap, not raster-order — libavcodec's MB threading +
* multi-slice frames mean MBs reach the callback out of strict order;
* contract is "every MB fires the callback exactly once per frame".
*
* When real-coeff extraction is compiled in (PR-A3+), we ALSO maintain
* a per-MB capture buffer (real-coeffs path) so the main loop can
* drive daedalus_decoder_append_mb with REAL pre-residual P + real
* coefficients for MBs that satisfy the gate (Intra_4x4, no 8x8 DCT,
* no PCM). Other MBs stay on identity-passthrough. */
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
struct mb_capture {
int valid; /* 1 = real-coeffs path, 0 = identity passthrough */
int16_t coeffs[256]; /* luma, column-major within 4x4, raster block order */
uint8_t predicted[256]; /* luma P recovered = pre_deblock - clipped IDCT(C) */
};
struct inspect_state {
int n_cbs_this_frame;
int mb_w, mb_h;
uint8_t *seen; /* mb_w * mb_h bitmap */
int duplicate_mbs;
int out_of_bounds;
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
struct mb_capture *captures; /* mb_w * mb_h entries */
int real_coeffs_mbs; /* count of MBs in real-coeffs path this frame */
int skipped_intra16x16;
int skipped_8x8dct;
int skipped_other;
#endif
};
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
/* libavcodec's sl->mb stores coefficients in RASTER (row-major) order,
* not zig-zag scan order — h264_cavlc.c does
* block[*scantable] = (level * qmul[*scantable] + 32) >> 6
* where *scantable advances through ff_zigzag_scan[] which contains
* RASTER positions (row*4 + col). So sl->mb[i] = coef at raster
* position i = (i/4, i%4) = (row, col). No inverse-zigzag needed;
* just transpose row-major → column-major (daedalus's convention). */
/* H.264 §6.4.3 4x4 luma block scan within MB (z-scan).
* Maps raster-block-idx (sb_y*4+sb_x) → libavcodec sl->mb's z-scan idx.
* Z-scan happens to be its own inverse (symmetric mapping). */
static const uint8_t raster_to_zscan[16] = {
0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15
};
/* H.264 4x4 IDCT — transcribed from daedalus-fourier
* tests/test_idct_bitexact.c (which itself mirrors h264_idct4_ref.c).
* Outputs row-major 16-element residual; clip + shift happens in
* the consumer. */
static void h264_idct4_butterfly(const int d[4], int out[4]) {
int e = d[0] + d[2];
int f = d[0] - d[2];
int g = (d[1] >> 1) - d[3];
int h = d[1] + (d[3] >> 1);
out[0] = e + h;
out[1] = f + g;
out[2] = f - g;
out[3] = e - h;
}
static void ref_idct4_compute(const int16_t block[16], int out[16]) {
/* block COLUMN-MAJOR: block[c*4+r] = coef at (row=r, col=c). */
int tmp[4][4];
for (int r = 0; r < 4; r++) {
int d[4] = { block[0*4+r], block[1*4+r], block[2*4+r], block[3*4+r] };
int o[4];
h264_idct4_butterfly(d, o);
for (int c = 0; c < 4; c++) tmp[r][c] = o[c];
}
for (int c = 0; c < 4; c++) {
int d[4] = { tmp[0][c], tmp[1][c], tmp[2][c], tmp[3][c] };
int o[4];
h264_idct4_butterfly(d, o);
for (int r = 0; r < 4; r++) out[r*4+c] = o[r];
}
}
#endif /* DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS */
static void inspect_cb(void *opaque,
const struct H264Context *h,
int mb_x, int mb_y)
{
struct inspect_state *st = opaque;
#ifndef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
(void) h;
#endif
if (mb_x < 0 || mb_x >= st->mb_w || mb_y < 0 || mb_y >= st->mb_h) {
st->out_of_bounds++;
st->n_cbs_this_frame++;
return;
}
const size_t idx = (size_t) mb_y * st->mb_w + (size_t) mb_x;
if (st->seen[idx]) st->duplicate_mbs++;
st->seen[idx] = 1;
st->n_cbs_this_frame++;
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
/* Real-coeffs path: extract per-MB state for daedalus-decoder
* IDCT validation on this MB. Gate: only Intra_4x4 + 4x4 transform
* + non-PCM is supported in PR-A3b — other MB flavours fall back
* to identity-passthrough in the main loop. */
struct mb_capture *cap = &st->captures[idx];
cap->valid = 0; /* default to passthrough */
const int mb_xy = mb_y * h->mb_stride + mb_x;
const uint32_t mb_type = h->cur_pic.mb_type[mb_xy];
if (!IS_INTRA4x4(mb_type)) {
if (IS_INTRA16x16(mb_type)) st->skipped_intra16x16++;
else st->skipped_other++;
return;
}
if (IS_8x8DCT(mb_type)) { st->skipped_8x8dct++; return; }
if (IS_INTRA_PCM(mb_type)) { st->skipped_other++; return; }
/* Snapshot luma pre-deblock pixels from cur_pic. */
const uint8_t *luma_plane = h->cur_pic.f->data[0];
const int luma_stride = h->cur_pic.f->linesize[0];
const uint8_t *mb_pixels = luma_plane + (ptrdiff_t) mb_y * 16 * luma_stride
+ mb_x * 16;
/* Coefficients are in sl->mb at end of entropy decode but zeroed by
* the time the callback fires (IDCT-add consumed them). Patch 0017
* preserves them in h->mb_inspect_coeffs[16 * 48] BEFORE IDCT runs,
* so we read from there. */
const int16_t *zz_mb = h->mb_inspect_coeffs; /* layout matches sl->mb 8-bit half */
for (int r_block = 0; r_block < 16; r_block++) {
const int z_block = raster_to_zscan[r_block];
const int16_t *row_block = &zz_mb[z_block * 16];
/* sl->mb is row-major; daedalus wants column-major. Transpose. */
int16_t col[16];
for (int r = 0; r < 4; r++)
for (int c = 0; c < 4; c++)
col[c * 4 + r] = row_block[r * 4 + c];
memcpy(&cap->coeffs[r_block * 16], col, 16 * sizeof(int16_t));
/* IDCT → row-major 16-int residual. */
int idct_row[16];
ref_idct4_compute(col, idct_row);
/* P = clip(pre_deblock - ((IDCT + 32) >> 6)) for each pixel.
* Symmetric: daedalus IDCT-add will undo the subtract, including
* for saturating cases (where the same shift puts the value back
* at the same clip boundary). */
const int sb_y = r_block >> 2;
const int sb_x = r_block & 3;
for (int r = 0; r < 4; r++) {
for (int c = 0; c < 4; c++) {
const int pre_db = mb_pixels[(sb_y * 4 + r) * luma_stride + sb_x * 4 + c];
const int shift = (idct_row[r * 4 + c] + 32) >> 6;
int p = pre_db - shift;
if (p < 0) p = 0;
if (p > 255) p = 255;
cap->predicted[(sb_y * 4 + r) * 16 + (sb_x * 4 + c)] = (uint8_t) p;
}
}
}
cap->valid = 1;
st->real_coeffs_mbs++;
/* One-shot diagnostic: dump block 1 (z=raster=1) of MB(3, 0). */
if (mb_x == 3 && mb_y == 0) {
const int16_t *zz = &zz_mb[1 * 16]; /* z_block = raster_block = 1 */
const struct mb_capture *capdiag = &st->captures[mb_y * st->mb_w + mb_x];
fprintf(stderr, " MB(3,0) block z=1 raster coeffs (sl->mb):");
for (int p = 0; p < 16; p++) fprintf(stderr, " %d", (int) zz[p]);
fprintf(stderr, "\n");
fprintf(stderr, " MB(3,0) block z=1 col_major coeffs (after transpose):");
for (int i = 0; i < 16; i++) fprintf(stderr, " %d", (int) capdiag->coeffs[1 * 16 + i]);
fprintf(stderr, "\n");
/* Recompute IDCT for this block (already done in the loop above but
* print here for visibility). */
int idct_print[16];
ref_idct4_compute(&capdiag->coeffs[1 * 16], idct_print);
fprintf(stderr, " MB(3,0) block z=1 IDCT row-major (raw, pre-shift):");
for (int i = 0; i < 16; i++) fprintf(stderr, " %d", idct_print[i]);
fprintf(stderr, "\n");
fprintf(stderr, " MB(3,0) block z=1 IDCT (+32)>>6:");
for (int i = 0; i < 16; i++) fprintf(stderr, " %d", (idct_print[i] + 32) >> 6);
fprintf(stderr, "\n");
const uint8_t *bpix = mb_pixels + 0 * luma_stride + 4; /* sb_y=0, sb_x=1 → cols 4..7 within MB */
fprintf(stderr, " MB(3,0) block z=1 pre_deblock pixels:\n");
for (int r = 0; r < 4; r++) {
fprintf(stderr, " ");
for (int c = 0; c < 4; c++)
fprintf(stderr, " %3u", bpix[r * luma_stride + c]);
fprintf(stderr, "\n");
}
fprintf(stderr, " MB(3,0) block z=1 P_rec (= pre_deblock - shift):\n");
for (int r = 0; r < 4; r++) {
fprintf(stderr, " ");
for (int c = 0; c < 4; c++)
fprintf(stderr, " %3u", capdiag->predicted[(0*4+r) * 16 + (1*4+c)]);
fprintf(stderr, "\n");
}
/* And what daedalus_decoder SHOULD produce: clip(P_rec + shift). */
fprintf(stderr, " MB(3,0) block z=1 expected daedalus output = clip(P_rec + shift):\n");
for (int r = 0; r < 4; r++) {
fprintf(stderr, " ");
for (int c = 0; c < 4; c++) {
int p_rec = capdiag->predicted[(0*4+r) * 16 + (1*4+c)];
int sh = (idct_print[r*4+c] + 32) >> 6;
int e = p_rec + sh;
if (e < 0) e = 0; if (e > 255) e = 255;
fprintf(stderr, " %3d", e);
}
fprintf(stderr, "\n");
}
}
#endif
}
#endif
/* Extract one MB's predicted-samples block from a YUV420P AVFrame
* (stock libavcodec) and pack it into the 384-byte mb_input.predicted
* layout: 16x16 luma raster, then 8x8 Cb raster, then 8x8 Cr raster.
*
* AVFrame's data[] points at separate Y / U / V planes (or NV12's
* interleaved UV — we handle both via the pix_fmt branch). */
static void pack_mb_predicted(const AVFrame *fr, int mb_x, int mb_y,
uint8_t out[384])
{
const int y_off = mb_y * 16 * fr->linesize[0] + mb_x * 16;
const int uv_off = mb_y * 8 * fr->linesize[1] + mb_x * 8;
/* Luma: 16 rows × 16 cols */
for (int r = 0; r < 16; r++)
memcpy(&out[r * 16],
&fr->data[0][y_off + r * fr->linesize[0]],
16);
/* Chroma: 8 rows × 8 cols per component */
if (fr->format == AV_PIX_FMT_YUV420P) {
for (int r = 0; r < 8; r++) {
memcpy(&out[256 + r * 8],
&fr->data[1][uv_off + r * fr->linesize[1]], 8);
memcpy(&out[256 + 64 + r * 8],
&fr->data[2][uv_off + r * fr->linesize[2]], 8);
}
} else if (fr->format == AV_PIX_FMT_NV12) {
/* NV12: interleaved UV plane, deinterleave into Cb/Cr halves */
const int uv_off_nv12 = mb_y * 8 * fr->linesize[1] + mb_x * 16;
for (int r = 0; r < 8; r++) {
for (int c = 0; c < 8; c++) {
out[256 + r * 8 + c] = fr->data[1][uv_off_nv12 + r * fr->linesize[1] + c * 2 + 0];
out[256 + 64 + r * 8 + c] = fr->data[1][uv_off_nv12 + r * fr->linesize[1] + c * 2 + 1];
}
}
} else {
/* Unsupported pixel format — zero out chroma (test will fail loud) */
memset(&out[256], 0, 128);
}
}
/* Convert an AVFrame (YUV420P or NV12) to NV12 in caller-provided
* planes. Used to write the reference YUV file. */
static void avframe_to_nv12(const AVFrame *fr, uint8_t *out_y, size_t y_stride,
uint8_t *out_uv, size_t uv_stride,
int width, int height)
{
/* Y plane: row-major copy from src linesize to dst stride */
for (int r = 0; r < height; r++)
memcpy(&out_y[(size_t) r * y_stride],
&fr->data[0][(size_t) r * fr->linesize[0]],
(size_t) width);
if (fr->format == AV_PIX_FMT_NV12) {
for (int r = 0; r < height / 2; r++)
memcpy(&out_uv[(size_t) r * uv_stride],
&fr->data[1][(size_t) r * fr->linesize[1]],
(size_t) width);
} else if (fr->format == AV_PIX_FMT_YUV420P) {
/* Interleave U+V → NV12 UV */
const int cw = width / 2, ch = height / 2;
for (int r = 0; r < ch; r++) {
for (int c = 0; c < cw; c++) {
out_uv[(size_t) r * uv_stride + (size_t) c * 2 + 0] =
fr->data[1][(size_t) r * fr->linesize[1] + c];
out_uv[(size_t) r * uv_stride + (size_t) c * 2 + 1] =
fr->data[2][(size_t) r * fr->linesize[2] + c];
}
}
}
}
static int parse_args(int argc, char **argv,
const char **in_path,
const char **out_dadec_path,
const char **out_ref_path)
{
int i = 1;
while (i < argc && argv[i][0] == '-') {
if (!strcmp(argv[i], "--substrate") && i + 1 < argc) {
substrate_str = argv[++i];
} else if (!strcmp(argv[i], "--max-frames") && i + 1 < argc) {
max_frames = atoi(argv[++i]);
} else {
fprintf(stderr, "unknown option: %s\n", argv[i]);
return -1;
}
i++;
}
if (argc - i != 3) {
fprintf(stderr,
"usage: %s [--substrate cpu|qpu|auto] [--max-frames N] "
"<input.h264> <output_dadec.yuv> <output_ref.yuv>\n", argv[0]);
return -1;
}
*in_path = argv[i + 0];
*out_dadec_path = argv[i + 1];
*out_ref_path = argv[i + 2];
return 0;
}
static daedalus_decoder_substrate parse_substrate(const char *s)
{
if (!strcmp(s, "cpu")) return DAEDALUS_DECODER_SUBSTRATE_CPU;
if (!strcmp(s, "qpu")) return DAEDALUS_DECODER_SUBSTRATE_QPU;
return DAEDALUS_DECODER_SUBSTRATE_AUTO;
}
int main(int argc, char **argv)
{
const char *in_path, *out_dadec_path, *out_ref_path;
if (parse_args(argc, argv, &in_path, &out_dadec_path, &out_ref_path) != 0)
return 1;
/* ---- Open input via libavformat (so we get NAL framing for free
* from the raw .h264 elementary stream demuxer). ---- */
AVFormatContext *fmt = NULL;
if (avformat_open_input(&fmt, in_path, NULL, NULL) < 0) {
fprintf(stderr, "avformat_open_input(%s) failed\n", in_path);
return 2;
}
if (avformat_find_stream_info(fmt, NULL) < 0) {
fprintf(stderr, "avformat_find_stream_info failed\n");
avformat_close_input(&fmt); return 2;
}
int vstream = -1;
for (unsigned s = 0; s < fmt->nb_streams; s++)
if (fmt->streams[s]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
vstream = (int) s; break;
}
if (vstream < 0) {
fprintf(stderr, "no video stream in %s\n", in_path);
avformat_close_input(&fmt); return 2;
}
/* ---- Open H.264 decoder ---- */
const AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264);
AVCodecContext *avctx = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(avctx, fmt->streams[vstream]->codecpar);
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
/* Patch 0017's coefficient side buffer lives in H264Context (single
* per-stream); multi-threaded slice decode would race on it. Force
* single-thread. Also disable libavcodec's deblock so AVFrame is
* pre-deblock and the P-recovery math is exact. */
avctx->thread_count = 1;
avctx->thread_type = 0;
avctx->skip_loop_filter = AVDISCARD_ALL;
#endif
if (avcodec_open2(avctx, codec, NULL) < 0) {
fprintf(stderr, "avcodec_open2 failed\n");
avformat_close_input(&fmt); return 2;
}
AVPacket *pkt = av_packet_alloc();
AVFrame *fr = av_frame_alloc();
/* ---- Allocate output buffers + state needed before first decode ---- */
daedalus_decoder *dec = NULL;
uint8_t *out_y_dadec = NULL, *out_uv_dadec = NULL;
uint8_t *out_y_ref = NULL, *out_uv_ref = NULL;
size_t y_size = 0, uv_size = 0;
FILE *out_dadec_f = NULL, *out_ref_f = NULL;
int rc = 0;
int n_frames = 0;
size_t total_y_diffs = 0, total_uv_diffs = 0;
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
/* Init inspect state BEFORE the first avcodec_send_packet — the
* callback fires from inside send_packet (i.e. before the first
* receive_frame ever returns), so lazy-init after-the-fact
* would miss the entire first frame. Use codecpar dims; round
* up to MB granularity (H.264 codes 1080 height as 1088). */
struct inspect_state inspect_st = {0};
{
const AVCodecParameters *cp = fmt->streams[vstream]->codecpar;
const int W_round = (cp->width + 15) & ~15;
const int H_round = (cp->height + 15) & ~15;
inspect_st.mb_w = W_round / 16;
inspect_st.mb_h = H_round / 16;
inspect_st.seen = calloc(1, (size_t) inspect_st.mb_w * inspect_st.mb_h);
if (!inspect_st.seen) { rc = 1; goto cleanup; }
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
inspect_st.captures = calloc((size_t) inspect_st.mb_w * inspect_st.mb_h,
sizeof(*inspect_st.captures));
if (!inspect_st.captures) { rc = 1; goto cleanup; }
#endif
}
ff_h264_set_mb_inspect_cb(avctx, inspect_cb, &inspect_st);
int inspect_total_cbs = 0;
int inspect_total_duplicate = 0;
int inspect_total_oob = 0;
int inspect_total_missing = 0;
#endif
/* ---- daedalus_decoder is lazy-created on the first AVFrame
* (coded width/height come from the bitstream's SPS via
* libavcodec). ---- */
while (av_read_frame(fmt, pkt) >= 0) {
if (pkt->stream_index != vstream) { av_packet_unref(pkt); continue; }
if (avcodec_send_packet(avctx, pkt) < 0) {
fprintf(stderr, "send_packet failed\n");
rc = 2; goto cleanup;
}
av_packet_unref(pkt);
for (;;) {
int ret = avcodec_receive_frame(avctx, fr);
if (ret == AVERROR(EAGAIN)) break;
if (ret < 0) {
fprintf(stderr, "receive_frame failed: %d\n", ret);
rc = 2; goto cleanup;
}
/* Lazily create the daedalus_decoder + output planes on
* the first frame so the SPS-derived coded width/height
* are known. */
if (!dec) {
/* Coded (= MB-aligned) dimensions are on AVCodecContext,
* not AVFrame (which carries the cropped display size). */
const int W = avctx->coded_width ? avctx->coded_width : fr->width;
const int H = avctx->coded_height ? avctx->coded_height : fr->height;
if ((W & 15) || (H & 15)) {
fprintf(stderr, "coded dims %dx%d not mod-16; skip\n", W, H);
rc = 2; goto cleanup;
}
dec = daedalus_decoder_create(W, H);
if (!dec) {
fprintf(stderr, "daedalus_decoder_create failed\n");
rc = 3; goto cleanup;
}
daedalus_decoder_set_substrate(dec, parse_substrate(substrate_str));
y_size = (size_t) W * (size_t) H;
uv_size = y_size / 2;
out_y_dadec = malloc(y_size);
out_uv_dadec = malloc(uv_size);
out_y_ref = malloc(y_size);
out_uv_ref = malloc(uv_size);
out_dadec_f = fopen(out_dadec_path, "wb");
out_ref_f = fopen(out_ref_path, "wb");
if (!out_y_dadec || !out_uv_dadec || !out_y_ref || !out_uv_ref ||
!out_dadec_f || !out_ref_f) {
fprintf(stderr, "alloc / fopen failed\n");
rc = 1; goto cleanup;
}
printf("daedalus_decode_h264: %dx%d, substrate=%s\n",
W, H, substrate_str);
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
printf(" inspection callback: ACTIVE (patched libavcodec); "
"mb-grid %dx%d\n", inspect_st.mb_w, inspect_st.mb_h);
#else
printf(" inspection callback: not built in (stock libavcodec)\n");
#endif
}
/* Pack each MB's predicted samples from the AVFrame.
* Coeffs = 0; no edges; daedalus_decoder will reproduce
* exactly the AVFrame pixels. Use coded_width/coded_height
* for MB-grid alignment (e.g. 1920x1088 for 1080p display). */
const int coded_w = avctx->coded_width ? avctx->coded_width : avctx->width;
const int coded_h = avctx->coded_height ? avctx->coded_height : avctx->height;
const int mb_w = coded_w / 16;
const int mb_h = coded_h / 16;
uint8_t mb_pred[384];
int16_t mb_coeffs[384] = {0};
struct daedalus_decoder_mb_input mb = {0};
for (int my = 0; my < mb_h; my++) {
for (int mx = 0; mx < mb_w; mx++) {
/* Default: identity-passthrough — luma from AVFrame,
* chroma from AVFrame, coeffs all zero. */
pack_mb_predicted(fr, mx, my, mb_pred);
memset(mb_coeffs, 0, sizeof(mb_coeffs));
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
/* Real-coeffs path: if the callback captured this MB
* as Intra_4x4 / 4x4-DCT, override luma predicted
* with the recovered P and use the real luma coeffs.
* Chroma stays identity-passthrough (PR-A3b scope —
* chroma DC Hadamard + 8x8 transform follow-ups). */
const int mb_idx = my * mb_w + mx;
const struct mb_capture *cap = &inspect_st.captures[mb_idx];
if (cap->valid) {
memcpy(mb_pred, cap->predicted, 256);
for (int i = 0; i < 256; i++)
mb_coeffs[i] = cap->coeffs[i];
}
#endif
mb.mb_x = (uint16_t) mx;
mb.mb_y = (uint16_t) my;
mb.transform_8x8 = 0;
mb.coeffs = mb_coeffs;
mb.predicted = mb_pred;
mb.edges = NULL;
mb.n_edges = 0;
if (daedalus_decoder_append_mb(dec, &mb) != 0) {
fprintf(stderr, "append_mb (%d,%d) failed\n", mx, my);
rc = 3; goto cleanup;
}
}
}
int frc = daedalus_decoder_flush_frame(dec,
out_y_dadec, (size_t) coded_w,
out_uv_dadec, (size_t) coded_w);
if (frc != 0) {
fprintf(stderr, "flush_frame frame %d rc=%d\n", n_frames, frc);
rc = 3; goto cleanup;
}
/* Build the reference NV12 from the AVFrame for comparison. */
avframe_to_nv12(fr, out_y_ref, (size_t) coded_w,
out_uv_ref, (size_t) coded_w,
coded_w, coded_h);
/* Byte-exact compare + first-diff diagnostic. */
size_t y_diffs = 0, uv_diffs = 0;
size_t y_first_diff = (size_t) -1;
for (size_t i = 0; i < y_size; i++)
if (out_y_dadec[i] != out_y_ref[i]) {
if (y_first_diff == (size_t) -1) y_first_diff = i;
y_diffs++;
}
for (size_t i = 0; i < uv_size; i++)
if (out_uv_dadec[i] != out_uv_ref[i]) uv_diffs++;
if (y_diffs && y_first_diff != (size_t) -1) {
const size_t row = y_first_diff / (size_t) avctx->width;
const size_t col = y_first_diff % (size_t) avctx->width;
const size_t mb_x = col / 16;
const size_t mb_y = row / 8; /* not row/16 — chroma row uses /8 so use raw row here */
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
const int mb_idx = (int)(row / 16) * mb_w + (int) mb_x;
const int real = (mb_idx >= 0 && mb_idx < mb_w * mb_h)
? inspect_st.captures[mb_idx].valid : -1;
printf(" first Y diff @ byte %zu = (row %zu, col %zu) in MB(%zu,%zu) [real-coeffs=%d]; "
"dadec=%u ref=%u\n",
y_first_diff, row, col, mb_x, row / 16,
real, out_y_dadec[y_first_diff], out_y_ref[y_first_diff]);
#else
(void) mb_x; (void) mb_y;
printf(" first Y diff @ byte %zu = (row %zu, col %zu); dadec=%u ref=%u\n",
y_first_diff, row, col,
out_y_dadec[y_first_diff], out_y_ref[y_first_diff]);
#endif
}
total_y_diffs += y_diffs;
total_uv_diffs += uv_diffs;
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
{
const int expected = mb_w * mb_h;
/* Count MBs that fired the callback. */
int seen_count = 0;
for (int i = 0; i < expected; i++)
if (inspect_st.seen[i]) seen_count++;
int missing = expected - seen_count;
if (missing || inspect_st.duplicate_mbs || inspect_st.out_of_bounds) {
fprintf(stderr,
" frame %d: callback invariants: fired=%d expected=%d "
"missing=%d duplicates=%d oob=%d\n",
n_frames, inspect_st.n_cbs_this_frame, expected,
missing, inspect_st.duplicate_mbs, inspect_st.out_of_bounds);
rc = 4;
}
inspect_total_cbs += inspect_st.n_cbs_this_frame;
inspect_total_duplicate += inspect_st.duplicate_mbs;
inspect_total_oob += inspect_st.out_of_bounds;
inspect_total_missing += missing;
/* Reset for next frame. */
inspect_st.n_cbs_this_frame = 0;
inspect_st.duplicate_mbs = 0;
inspect_st.out_of_bounds = 0;
memset(inspect_st.seen, 0, (size_t) expected);
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
printf(" frame %d: real-coeffs path %d MBs, "
"skipped intra16x16=%d 8x8dct=%d other=%d\n",
n_frames, inspect_st.real_coeffs_mbs,
inspect_st.skipped_intra16x16,
inspect_st.skipped_8x8dct,
inspect_st.skipped_other);
inspect_st.real_coeffs_mbs = 0;
inspect_st.skipped_intra16x16 = 0;
inspect_st.skipped_8x8dct = 0;
inspect_st.skipped_other = 0;
memset(inspect_st.captures, 0,
(size_t) expected * sizeof(*inspect_st.captures));
#endif
}
#endif
printf(" frame %d: Y diff %zu/%zu UV diff %zu/%zu%s\n",
n_frames, y_diffs, y_size, uv_diffs, uv_size,
(y_diffs || uv_diffs) ? " ***" : "");
/* Write both YUVs to disk. */
fwrite(out_y_dadec, 1, y_size, out_dadec_f);
fwrite(out_uv_dadec, 1, uv_size, out_dadec_f);
fwrite(out_y_ref, 1, y_size, out_ref_f);
fwrite(out_uv_ref, 1, uv_size, out_ref_f);
n_frames++;
if (max_frames > 0 && n_frames >= max_frames) goto drained;
}
}
/* Flush libavcodec for any remaining buffered frames. */
avcodec_send_packet(avctx, NULL);
for (;;) {
int ret = avcodec_receive_frame(avctx, fr);
if (ret < 0) break;
(void) ret;
/* Same loop body as above would go here; omitted for brevity —
* stock libavcodec rarely buffers I-only streams. */
n_frames++;
}
drained:
printf("\n%d frames decoded; total Y diff %zu, UV diff %zu\n",
n_frames, total_y_diffs, total_uv_diffs);
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
printf("inspection callback: %d total invocations, %d missing, %d duplicates, %d oob\n",
inspect_total_cbs, inspect_total_missing, inspect_total_duplicate, inspect_total_oob);
if (inspect_total_missing || inspect_total_duplicate || inspect_total_oob)
rc = 4;
#endif
if (rc == 0 && (total_y_diffs || total_uv_diffs)) {
printf("FAIL: daedalus-decoder output does NOT match libavcodec reference byte-for-byte\n");
rc = 4;
} else if (rc == 0) {
printf("PASS: byte-exact identity-passthrough across %d frames\n", n_frames);
} else {
printf("FAIL: %s\n",
(total_y_diffs || total_uv_diffs) ? "byte-exact comparison failed"
: "inspection callback invariants violated");
}
cleanup:
if (out_dadec_f) fclose(out_dadec_f);
if (out_ref_f) fclose(out_ref_f);
free(out_uv_ref); free(out_y_ref);
free(out_uv_dadec);free(out_y_dadec);
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
free(inspect_st.seen);
# ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
free(inspect_st.captures);
# endif
#endif
if (dec) daedalus_decoder_destroy(dec);
av_frame_free(&fr);
av_packet_free(&pkt);
avcodec_free_context(&avctx);
avformat_close_input(&fmt);
return rc;
}