/* SPDX-License-Identifier: BSD-2-Clause */ /* * daedalus_decode_h264 — option A standalone test harness for * daedalus-decoder against real H.264 streams. * * Decodes an H.264 file via stock libavcodec (the reference), AND * in parallel runs the same frame through daedalus-decoder in * identity-passthrough mode (predicted = libavcodec's reconstructed * frame, coeffs = 0, no deblock edges). Writes both outputs as * NV12 YUV, then byte-exact diffs. * * PR-A1b purpose: validate the daedalus-decoder data path / API * contract at real-stream frame sizes (16k+ MBs at 1080p, real * H.264-decoded predicted-sample distributions), without yet * requiring per-MB internal state extraction from libavcodec. * Follow-up PRs (A2+) extend this harness to feed REAL per-MB * state (residual coeffs, pre-residual predicted, deblock edges) * via the per-MB inspection callback added in marfrit-packages * patch 0016 (PR #106). * * Identity-passthrough math: * - mb_input.predicted = AVFrame pixels at this MB's raster pos * - mb_input.coeffs = 384 int16's, all zero * - mb_input.edges = NULL, n_edges = 0 * Then flush_frame: * scratch_y/_uv pre-fill from predicted_y/_uv = AVFrame pixels * IDCT dispatches with all-zero coeffs add 0 (no-op) * No deblock dispatches (no edges) * copy-out to caller's planes * Result MUST equal AVFrame pixels byte-for-byte. * * Invoke: * daedalus_decode_h264 [--substrate cpu|qpu|auto] * [--max-frames N] * * * Exit status: * 0 — bit-exact match across all decoded frames * 1 — argument / setup error * 2 — decode error from libavcodec * 3 — daedalus-decoder error (ctx, append, flush) * 4 — bit-exact comparison failed (diff > 0 bytes) */ #define _POSIX_C_SOURCE 200809L #include "daedalus_decoder.h" #include #include #include #include #include #include #include #include static const char *substrate_str = "auto"; static int max_frames = -1; /* Extract one MB's predicted-samples block from a YUV420P AVFrame * (stock libavcodec) and pack it into the 384-byte mb_input.predicted * layout: 16x16 luma raster, then 8x8 Cb raster, then 8x8 Cr raster. * * AVFrame's data[] points at separate Y / U / V planes (or NV12's * interleaved UV — we handle both via the pix_fmt branch). */ static void pack_mb_predicted(const AVFrame *fr, int mb_x, int mb_y, uint8_t out[384]) { const int y_off = mb_y * 16 * fr->linesize[0] + mb_x * 16; const int uv_off = mb_y * 8 * fr->linesize[1] + mb_x * 8; /* Luma: 16 rows × 16 cols */ for (int r = 0; r < 16; r++) memcpy(&out[r * 16], &fr->data[0][y_off + r * fr->linesize[0]], 16); /* Chroma: 8 rows × 8 cols per component */ if (fr->format == AV_PIX_FMT_YUV420P) { for (int r = 0; r < 8; r++) { memcpy(&out[256 + r * 8], &fr->data[1][uv_off + r * fr->linesize[1]], 8); memcpy(&out[256 + 64 + r * 8], &fr->data[2][uv_off + r * fr->linesize[2]], 8); } } else if (fr->format == AV_PIX_FMT_NV12) { /* NV12: interleaved UV plane, deinterleave into Cb/Cr halves */ const int uv_off_nv12 = mb_y * 8 * fr->linesize[1] + mb_x * 16; for (int r = 0; r < 8; r++) { for (int c = 0; c < 8; c++) { out[256 + r * 8 + c] = fr->data[1][uv_off_nv12 + r * fr->linesize[1] + c * 2 + 0]; out[256 + 64 + r * 8 + c] = fr->data[1][uv_off_nv12 + r * fr->linesize[1] + c * 2 + 1]; } } } else { /* Unsupported pixel format — zero out chroma (test will fail loud) */ memset(&out[256], 0, 128); } } /* Convert an AVFrame (YUV420P or NV12) to NV12 in caller-provided * planes. Used to write the reference YUV file. */ static void avframe_to_nv12(const AVFrame *fr, uint8_t *out_y, size_t y_stride, uint8_t *out_uv, size_t uv_stride, int width, int height) { /* Y plane: row-major copy from src linesize to dst stride */ for (int r = 0; r < height; r++) memcpy(&out_y[(size_t) r * y_stride], &fr->data[0][(size_t) r * fr->linesize[0]], (size_t) width); if (fr->format == AV_PIX_FMT_NV12) { for (int r = 0; r < height / 2; r++) memcpy(&out_uv[(size_t) r * uv_stride], &fr->data[1][(size_t) r * fr->linesize[1]], (size_t) width); } else if (fr->format == AV_PIX_FMT_YUV420P) { /* Interleave U+V → NV12 UV */ const int cw = width / 2, ch = height / 2; for (int r = 0; r < ch; r++) { for (int c = 0; c < cw; c++) { out_uv[(size_t) r * uv_stride + (size_t) c * 2 + 0] = fr->data[1][(size_t) r * fr->linesize[1] + c]; out_uv[(size_t) r * uv_stride + (size_t) c * 2 + 1] = fr->data[2][(size_t) r * fr->linesize[2] + c]; } } } } static int parse_args(int argc, char **argv, const char **in_path, const char **out_dadec_path, const char **out_ref_path) { int i = 1; while (i < argc && argv[i][0] == '-') { if (!strcmp(argv[i], "--substrate") && i + 1 < argc) { substrate_str = argv[++i]; } else if (!strcmp(argv[i], "--max-frames") && i + 1 < argc) { max_frames = atoi(argv[++i]); } else { fprintf(stderr, "unknown option: %s\n", argv[i]); return -1; } i++; } if (argc - i != 3) { fprintf(stderr, "usage: %s [--substrate cpu|qpu|auto] [--max-frames N] " " \n", argv[0]); return -1; } *in_path = argv[i + 0]; *out_dadec_path = argv[i + 1]; *out_ref_path = argv[i + 2]; return 0; } static daedalus_decoder_substrate parse_substrate(const char *s) { if (!strcmp(s, "cpu")) return DAEDALUS_DECODER_SUBSTRATE_CPU; if (!strcmp(s, "qpu")) return DAEDALUS_DECODER_SUBSTRATE_QPU; return DAEDALUS_DECODER_SUBSTRATE_AUTO; } int main(int argc, char **argv) { const char *in_path, *out_dadec_path, *out_ref_path; if (parse_args(argc, argv, &in_path, &out_dadec_path, &out_ref_path) != 0) return 1; /* ---- Open input via libavformat (so we get NAL framing for free * from the raw .h264 elementary stream demuxer). ---- */ AVFormatContext *fmt = NULL; if (avformat_open_input(&fmt, in_path, NULL, NULL) < 0) { fprintf(stderr, "avformat_open_input(%s) failed\n", in_path); return 2; } if (avformat_find_stream_info(fmt, NULL) < 0) { fprintf(stderr, "avformat_find_stream_info failed\n"); avformat_close_input(&fmt); return 2; } int vstream = -1; for (unsigned s = 0; s < fmt->nb_streams; s++) if (fmt->streams[s]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { vstream = (int) s; break; } if (vstream < 0) { fprintf(stderr, "no video stream in %s\n", in_path); avformat_close_input(&fmt); return 2; } /* ---- Open H.264 decoder ---- */ const AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264); AVCodecContext *avctx = avcodec_alloc_context3(codec); avcodec_parameters_to_context(avctx, fmt->streams[vstream]->codecpar); if (avcodec_open2(avctx, codec, NULL) < 0) { fprintf(stderr, "avcodec_open2 failed\n"); avformat_close_input(&fmt); return 2; } AVPacket *pkt = av_packet_alloc(); AVFrame *fr = av_frame_alloc(); /* ---- Create daedalus_decoder. Coded width/height come from * the bitstream's SPS via libavcodec (after the first packet * is decoded — defer creation until then). ---- */ daedalus_decoder *dec = NULL; uint8_t *out_y_dadec = NULL, *out_uv_dadec = NULL; uint8_t *out_y_ref = NULL, *out_uv_ref = NULL; size_t y_size = 0, uv_size = 0; FILE *out_dadec_f = NULL, *out_ref_f = NULL; int rc = 0; int n_frames = 0; size_t total_y_diffs = 0, total_uv_diffs = 0; while (av_read_frame(fmt, pkt) >= 0) { if (pkt->stream_index != vstream) { av_packet_unref(pkt); continue; } if (avcodec_send_packet(avctx, pkt) < 0) { fprintf(stderr, "send_packet failed\n"); rc = 2; goto cleanup; } av_packet_unref(pkt); for (;;) { int ret = avcodec_receive_frame(avctx, fr); if (ret == AVERROR(EAGAIN)) break; if (ret < 0) { fprintf(stderr, "receive_frame failed: %d\n", ret); rc = 2; goto cleanup; } /* Lazily create the daedalus_decoder + output planes on * the first frame so the SPS-derived coded width/height * are known. */ if (!dec) { const int W = fr->width; const int H = fr->height; if ((W & 15) || (H & 15)) { fprintf(stderr, "coded dims %dx%d not mod-16; skip\n", W, H); rc = 2; goto cleanup; } dec = daedalus_decoder_create(W, H); if (!dec) { fprintf(stderr, "daedalus_decoder_create failed\n"); rc = 3; goto cleanup; } daedalus_decoder_set_substrate(dec, parse_substrate(substrate_str)); y_size = (size_t) W * (size_t) H; uv_size = y_size / 2; out_y_dadec = malloc(y_size); out_uv_dadec = malloc(uv_size); out_y_ref = malloc(y_size); out_uv_ref = malloc(uv_size); out_dadec_f = fopen(out_dadec_path, "wb"); out_ref_f = fopen(out_ref_path, "wb"); if (!out_y_dadec || !out_uv_dadec || !out_y_ref || !out_uv_ref || !out_dadec_f || !out_ref_f) { fprintf(stderr, "alloc / fopen failed\n"); rc = 1; goto cleanup; } printf("daedalus_decode_h264: %dx%d, substrate=%s\n", W, H, substrate_str); } /* Pack each MB's predicted samples from the AVFrame. * Coeffs = 0; no edges; daedalus_decoder will reproduce * exactly the AVFrame pixels. */ const int mb_w = avctx->width / 16; const int mb_h = avctx->height / 16; uint8_t mb_pred[384]; int16_t mb_coeffs[384] = {0}; struct daedalus_decoder_mb_input mb = {0}; for (int my = 0; my < mb_h; my++) { for (int mx = 0; mx < mb_w; mx++) { pack_mb_predicted(fr, mx, my, mb_pred); mb.mb_x = (uint16_t) mx; mb.mb_y = (uint16_t) my; mb.transform_8x8 = 0; mb.coeffs = mb_coeffs; mb.predicted = mb_pred; mb.edges = NULL; mb.n_edges = 0; if (daedalus_decoder_append_mb(dec, &mb) != 0) { fprintf(stderr, "append_mb (%d,%d) failed\n", mx, my); rc = 3; goto cleanup; } } } int frc = daedalus_decoder_flush_frame(dec, out_y_dadec, (size_t) avctx->width, out_uv_dadec, (size_t) avctx->width); if (frc != 0) { fprintf(stderr, "flush_frame frame %d rc=%d\n", n_frames, frc); rc = 3; goto cleanup; } /* Build the reference NV12 from the AVFrame for comparison. */ avframe_to_nv12(fr, out_y_ref, (size_t) avctx->width, out_uv_ref, (size_t) avctx->width, avctx->width, avctx->height); /* Byte-exact compare. */ size_t y_diffs = 0, uv_diffs = 0; for (size_t i = 0; i < y_size; i++) if (out_y_dadec[i] != out_y_ref[i]) y_diffs++; for (size_t i = 0; i < uv_size; i++) if (out_uv_dadec[i] != out_uv_ref[i]) uv_diffs++; total_y_diffs += y_diffs; total_uv_diffs += uv_diffs; printf(" frame %d: Y diff %zu/%zu UV diff %zu/%zu%s\n", n_frames, y_diffs, y_size, uv_diffs, uv_size, (y_diffs || uv_diffs) ? " ***" : ""); /* Write both YUVs to disk. */ fwrite(out_y_dadec, 1, y_size, out_dadec_f); fwrite(out_uv_dadec, 1, uv_size, out_dadec_f); fwrite(out_y_ref, 1, y_size, out_ref_f); fwrite(out_uv_ref, 1, uv_size, out_ref_f); n_frames++; if (max_frames > 0 && n_frames >= max_frames) goto drained; } } /* Flush libavcodec for any remaining buffered frames. */ avcodec_send_packet(avctx, NULL); for (;;) { int ret = avcodec_receive_frame(avctx, fr); if (ret < 0) break; (void) ret; /* Same loop body as above would go here; omitted for brevity — * stock libavcodec rarely buffers I-only streams. */ n_frames++; } drained: printf("\n%d frames decoded; total Y diff %zu, UV diff %zu\n", n_frames, total_y_diffs, total_uv_diffs); if (total_y_diffs || total_uv_diffs) { printf("FAIL: daedalus-decoder output does NOT match libavcodec reference byte-for-byte\n"); rc = 4; } else { printf("PASS: byte-exact identity-passthrough across %d frames\n", n_frames); } cleanup: if (out_dadec_f) fclose(out_dadec_f); if (out_ref_f) fclose(out_ref_f); free(out_uv_ref); free(out_y_ref); free(out_uv_dadec);free(out_y_dadec); if (dec) daedalus_decoder_destroy(dec); av_frame_free(&fr); av_packet_free(&pkt); avcodec_free_context(&avctx); avformat_close_input(&fmt); return rc; }