Files
daedalus-decoder/tools/daedalus_decode_h264.c
T
2026-05-26 07:02:29 +02:00

457 lines
18 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/* SPDX-License-Identifier: BSD-2-Clause */
/*
* daedalus_decode_h264 — option A standalone test harness for
* daedalus-decoder against real H.264 streams.
*
* Decodes an H.264 file via stock libavcodec (the reference), AND
* in parallel runs the same frame through daedalus-decoder in
* identity-passthrough mode (predicted = libavcodec's reconstructed
* frame, coeffs = 0, no deblock edges). Writes both outputs as
* NV12 YUV, then byte-exact diffs.
*
* PR-A1b purpose: validate the daedalus-decoder data path / API
* contract at real-stream frame sizes (16k+ MBs at 1080p, real
* H.264-decoded predicted-sample distributions), without yet
* requiring per-MB internal state extraction from libavcodec.
* Follow-up PRs (A2+) extend this harness to feed REAL per-MB
* state (residual coeffs, pre-residual predicted, deblock edges)
* via the per-MB inspection callback added in marfrit-packages
* patch 0016 (PR #106).
*
* Identity-passthrough math:
* - mb_input.predicted = AVFrame pixels at this MB's raster pos
* - mb_input.coeffs = 384 int16's, all zero
* - mb_input.edges = NULL, n_edges = 0
* Then flush_frame:
* scratch_y/_uv pre-fill from predicted_y/_uv = AVFrame pixels
* IDCT dispatches with all-zero coeffs add 0 (no-op)
* No deblock dispatches (no edges)
* copy-out to caller's planes
* Result MUST equal AVFrame pixels byte-for-byte.
*
* Invoke:
* daedalus_decode_h264 [--substrate cpu|qpu|auto]
* [--max-frames N]
* <input.h264> <output_dadec.yuv> <output_ref.yuv>
*
* Exit status:
* 0 — bit-exact match across all decoded frames
* 1 — argument / setup error
* 2 — decode error from libavcodec
* 3 — daedalus-decoder error (ctx, append, flush)
* 4 — bit-exact comparison failed (diff > 0 bytes)
*/
#define _POSIX_C_SOURCE 200809L
#include "daedalus_decoder.h"
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/imgutils.h>
/* Per-MB inspection callback API — provided by the patched FFmpeg
* fork via marfrit-packages 0016. The H264Context struct itself
* remains internal (declared in libavcodec/h264dec.h which isn't
* installed), so we only forward-declare it here and use it
* opaquely through the callback signature. Real per-MB state
* extraction (sl->mb coefficients, mb_type, etc.) will land in
* PR-A3 alongside an internal-header include path. */
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
struct H264Context;
typedef void (*ff_h264_mb_inspect_cb)(void *opaque,
const struct H264Context *h,
int mb_x, int mb_y);
void ff_h264_set_mb_inspect_cb(AVCodecContext *avctx,
ff_h264_mb_inspect_cb cb, void *opaque);
#endif
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
static const char *substrate_str = "auto";
static int max_frames = -1;
/* Inspection-callback state: per-frame counter + raster-order check.
* Reset at end of each frame by the main loop. */
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
struct inspect_state {
int n_cbs_this_frame;
int expected_mb_w;
int raster_violations;
int last_mb_x, last_mb_y; /* for raster-order detection */
};
static void inspect_cb(void *opaque,
const struct H264Context *h,
int mb_x, int mb_y)
{
(void) h;
struct inspect_state *st = opaque;
/* Raster order check: each subsequent (mb_x, mb_y) should be the
* raster successor of the previous one within the frame. */
if (st->n_cbs_this_frame > 0) {
int expected_x = (st->last_mb_x + 1) % st->expected_mb_w;
int expected_y = st->last_mb_y + (st->last_mb_x + 1 == st->expected_mb_w ? 1 : 0);
if (mb_x != expected_x || mb_y != expected_y)
st->raster_violations++;
}
st->last_mb_x = mb_x;
st->last_mb_y = mb_y;
st->n_cbs_this_frame++;
}
#endif
/* Extract one MB's predicted-samples block from a YUV420P AVFrame
* (stock libavcodec) and pack it into the 384-byte mb_input.predicted
* layout: 16x16 luma raster, then 8x8 Cb raster, then 8x8 Cr raster.
*
* AVFrame's data[] points at separate Y / U / V planes (or NV12's
* interleaved UV — we handle both via the pix_fmt branch). */
static void pack_mb_predicted(const AVFrame *fr, int mb_x, int mb_y,
uint8_t out[384])
{
const int y_off = mb_y * 16 * fr->linesize[0] + mb_x * 16;
const int uv_off = mb_y * 8 * fr->linesize[1] + mb_x * 8;
/* Luma: 16 rows × 16 cols */
for (int r = 0; r < 16; r++)
memcpy(&out[r * 16],
&fr->data[0][y_off + r * fr->linesize[0]],
16);
/* Chroma: 8 rows × 8 cols per component */
if (fr->format == AV_PIX_FMT_YUV420P) {
for (int r = 0; r < 8; r++) {
memcpy(&out[256 + r * 8],
&fr->data[1][uv_off + r * fr->linesize[1]], 8);
memcpy(&out[256 + 64 + r * 8],
&fr->data[2][uv_off + r * fr->linesize[2]], 8);
}
} else if (fr->format == AV_PIX_FMT_NV12) {
/* NV12: interleaved UV plane, deinterleave into Cb/Cr halves */
const int uv_off_nv12 = mb_y * 8 * fr->linesize[1] + mb_x * 16;
for (int r = 0; r < 8; r++) {
for (int c = 0; c < 8; c++) {
out[256 + r * 8 + c] = fr->data[1][uv_off_nv12 + r * fr->linesize[1] + c * 2 + 0];
out[256 + 64 + r * 8 + c] = fr->data[1][uv_off_nv12 + r * fr->linesize[1] + c * 2 + 1];
}
}
} else {
/* Unsupported pixel format — zero out chroma (test will fail loud) */
memset(&out[256], 0, 128);
}
}
/* Convert an AVFrame (YUV420P or NV12) to NV12 in caller-provided
* planes. Used to write the reference YUV file. */
static void avframe_to_nv12(const AVFrame *fr, uint8_t *out_y, size_t y_stride,
uint8_t *out_uv, size_t uv_stride,
int width, int height)
{
/* Y plane: row-major copy from src linesize to dst stride */
for (int r = 0; r < height; r++)
memcpy(&out_y[(size_t) r * y_stride],
&fr->data[0][(size_t) r * fr->linesize[0]],
(size_t) width);
if (fr->format == AV_PIX_FMT_NV12) {
for (int r = 0; r < height / 2; r++)
memcpy(&out_uv[(size_t) r * uv_stride],
&fr->data[1][(size_t) r * fr->linesize[1]],
(size_t) width);
} else if (fr->format == AV_PIX_FMT_YUV420P) {
/* Interleave U+V → NV12 UV */
const int cw = width / 2, ch = height / 2;
for (int r = 0; r < ch; r++) {
for (int c = 0; c < cw; c++) {
out_uv[(size_t) r * uv_stride + (size_t) c * 2 + 0] =
fr->data[1][(size_t) r * fr->linesize[1] + c];
out_uv[(size_t) r * uv_stride + (size_t) c * 2 + 1] =
fr->data[2][(size_t) r * fr->linesize[2] + c];
}
}
}
}
static int parse_args(int argc, char **argv,
const char **in_path,
const char **out_dadec_path,
const char **out_ref_path)
{
int i = 1;
while (i < argc && argv[i][0] == '-') {
if (!strcmp(argv[i], "--substrate") && i + 1 < argc) {
substrate_str = argv[++i];
} else if (!strcmp(argv[i], "--max-frames") && i + 1 < argc) {
max_frames = atoi(argv[++i]);
} else {
fprintf(stderr, "unknown option: %s\n", argv[i]);
return -1;
}
i++;
}
if (argc - i != 3) {
fprintf(stderr,
"usage: %s [--substrate cpu|qpu|auto] [--max-frames N] "
"<input.h264> <output_dadec.yuv> <output_ref.yuv>\n", argv[0]);
return -1;
}
*in_path = argv[i + 0];
*out_dadec_path = argv[i + 1];
*out_ref_path = argv[i + 2];
return 0;
}
static daedalus_decoder_substrate parse_substrate(const char *s)
{
if (!strcmp(s, "cpu")) return DAEDALUS_DECODER_SUBSTRATE_CPU;
if (!strcmp(s, "qpu")) return DAEDALUS_DECODER_SUBSTRATE_QPU;
return DAEDALUS_DECODER_SUBSTRATE_AUTO;
}
int main(int argc, char **argv)
{
const char *in_path, *out_dadec_path, *out_ref_path;
if (parse_args(argc, argv, &in_path, &out_dadec_path, &out_ref_path) != 0)
return 1;
/* ---- Open input via libavformat (so we get NAL framing for free
* from the raw .h264 elementary stream demuxer). ---- */
AVFormatContext *fmt = NULL;
if (avformat_open_input(&fmt, in_path, NULL, NULL) < 0) {
fprintf(stderr, "avformat_open_input(%s) failed\n", in_path);
return 2;
}
if (avformat_find_stream_info(fmt, NULL) < 0) {
fprintf(stderr, "avformat_find_stream_info failed\n");
avformat_close_input(&fmt); return 2;
}
int vstream = -1;
for (unsigned s = 0; s < fmt->nb_streams; s++)
if (fmt->streams[s]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
vstream = (int) s; break;
}
if (vstream < 0) {
fprintf(stderr, "no video stream in %s\n", in_path);
avformat_close_input(&fmt); return 2;
}
/* ---- Open H.264 decoder ---- */
const AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264);
AVCodecContext *avctx = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(avctx, fmt->streams[vstream]->codecpar);
if (avcodec_open2(avctx, codec, NULL) < 0) {
fprintf(stderr, "avcodec_open2 failed\n");
avformat_close_input(&fmt); return 2;
}
AVPacket *pkt = av_packet_alloc();
AVFrame *fr = av_frame_alloc();
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
struct inspect_state inspect_st = {0};
ff_h264_set_mb_inspect_cb(avctx, inspect_cb, &inspect_st);
int inspect_total_cbs = 0;
int inspect_total_violations = 0;
#endif
/* ---- Create daedalus_decoder. Coded width/height come from
* the bitstream's SPS via libavcodec (after the first packet
* is decoded — defer creation until then). ---- */
daedalus_decoder *dec = NULL;
uint8_t *out_y_dadec = NULL, *out_uv_dadec = NULL;
uint8_t *out_y_ref = NULL, *out_uv_ref = NULL;
size_t y_size = 0, uv_size = 0;
FILE *out_dadec_f = NULL, *out_ref_f = NULL;
int rc = 0;
int n_frames = 0;
size_t total_y_diffs = 0, total_uv_diffs = 0;
while (av_read_frame(fmt, pkt) >= 0) {
if (pkt->stream_index != vstream) { av_packet_unref(pkt); continue; }
if (avcodec_send_packet(avctx, pkt) < 0) {
fprintf(stderr, "send_packet failed\n");
rc = 2; goto cleanup;
}
av_packet_unref(pkt);
for (;;) {
int ret = avcodec_receive_frame(avctx, fr);
if (ret == AVERROR(EAGAIN)) break;
if (ret < 0) {
fprintf(stderr, "receive_frame failed: %d\n", ret);
rc = 2; goto cleanup;
}
/* Lazily create the daedalus_decoder + output planes on
* the first frame so the SPS-derived coded width/height
* are known. */
if (!dec) {
/* Coded (= MB-aligned) dimensions are on AVCodecContext,
* not AVFrame (which carries the cropped display size). */
const int W = avctx->coded_width ? avctx->coded_width : fr->width;
const int H = avctx->coded_height ? avctx->coded_height : fr->height;
if ((W & 15) || (H & 15)) {
fprintf(stderr, "coded dims %dx%d not mod-16; skip\n", W, H);
rc = 2; goto cleanup;
}
dec = daedalus_decoder_create(W, H);
if (!dec) {
fprintf(stderr, "daedalus_decoder_create failed\n");
rc = 3; goto cleanup;
}
daedalus_decoder_set_substrate(dec, parse_substrate(substrate_str));
y_size = (size_t) W * (size_t) H;
uv_size = y_size / 2;
out_y_dadec = malloc(y_size);
out_uv_dadec = malloc(uv_size);
out_y_ref = malloc(y_size);
out_uv_ref = malloc(uv_size);
out_dadec_f = fopen(out_dadec_path, "wb");
out_ref_f = fopen(out_ref_path, "wb");
if (!out_y_dadec || !out_uv_dadec || !out_y_ref || !out_uv_ref ||
!out_dadec_f || !out_ref_f) {
fprintf(stderr, "alloc / fopen failed\n");
rc = 1; goto cleanup;
}
printf("daedalus_decode_h264: %dx%d, substrate=%s\n",
W, H, substrate_str);
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
inspect_st.expected_mb_w = W / 16;
printf(" inspection callback: ACTIVE (patched libavcodec)\n");
#else
printf(" inspection callback: not built in (stock libavcodec)\n");
#endif
}
/* Pack each MB's predicted samples from the AVFrame.
* Coeffs = 0; no edges; daedalus_decoder will reproduce
* exactly the AVFrame pixels. Use coded_width/coded_height
* for MB-grid alignment (e.g. 1920x1088 for 1080p display). */
const int coded_w = avctx->coded_width ? avctx->coded_width : avctx->width;
const int coded_h = avctx->coded_height ? avctx->coded_height : avctx->height;
const int mb_w = coded_w / 16;
const int mb_h = coded_h / 16;
uint8_t mb_pred[384];
int16_t mb_coeffs[384] = {0};
struct daedalus_decoder_mb_input mb = {0};
for (int my = 0; my < mb_h; my++) {
for (int mx = 0; mx < mb_w; mx++) {
pack_mb_predicted(fr, mx, my, mb_pred);
mb.mb_x = (uint16_t) mx;
mb.mb_y = (uint16_t) my;
mb.transform_8x8 = 0;
mb.coeffs = mb_coeffs;
mb.predicted = mb_pred;
mb.edges = NULL;
mb.n_edges = 0;
if (daedalus_decoder_append_mb(dec, &mb) != 0) {
fprintf(stderr, "append_mb (%d,%d) failed\n", mx, my);
rc = 3; goto cleanup;
}
}
}
int frc = daedalus_decoder_flush_frame(dec,
out_y_dadec, (size_t) coded_w,
out_uv_dadec, (size_t) coded_w);
if (frc != 0) {
fprintf(stderr, "flush_frame frame %d rc=%d\n", n_frames, frc);
rc = 3; goto cleanup;
}
/* Build the reference NV12 from the AVFrame for comparison. */
avframe_to_nv12(fr, out_y_ref, (size_t) coded_w,
out_uv_ref, (size_t) coded_w,
coded_w, coded_h);
/* Byte-exact compare. */
size_t y_diffs = 0, uv_diffs = 0;
for (size_t i = 0; i < y_size; i++)
if (out_y_dadec[i] != out_y_ref[i]) y_diffs++;
for (size_t i = 0; i < uv_size; i++)
if (out_uv_dadec[i] != out_uv_ref[i]) uv_diffs++;
total_y_diffs += y_diffs;
total_uv_diffs += uv_diffs;
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
{
const int expected = mb_w * mb_h;
if (inspect_st.n_cbs_this_frame != expected) {
fprintf(stderr,
" frame %d: callback fired %d times, expected %d (mb_w*mb_h)\n",
n_frames, inspect_st.n_cbs_this_frame, expected);
rc = 4; /* treat as bit-exact failure */
}
inspect_total_cbs += inspect_st.n_cbs_this_frame;
inspect_total_violations += inspect_st.raster_violations;
/* Reset for next frame. */
inspect_st.n_cbs_this_frame = 0;
inspect_st.raster_violations = 0;
inspect_st.last_mb_x = 0;
inspect_st.last_mb_y = 0;
}
#endif
printf(" frame %d: Y diff %zu/%zu UV diff %zu/%zu%s\n",
n_frames, y_diffs, y_size, uv_diffs, uv_size,
(y_diffs || uv_diffs) ? " ***" : "");
/* Write both YUVs to disk. */
fwrite(out_y_dadec, 1, y_size, out_dadec_f);
fwrite(out_uv_dadec, 1, uv_size, out_dadec_f);
fwrite(out_y_ref, 1, y_size, out_ref_f);
fwrite(out_uv_ref, 1, uv_size, out_ref_f);
n_frames++;
if (max_frames > 0 && n_frames >= max_frames) goto drained;
}
}
/* Flush libavcodec for any remaining buffered frames. */
avcodec_send_packet(avctx, NULL);
for (;;) {
int ret = avcodec_receive_frame(avctx, fr);
if (ret < 0) break;
(void) ret;
/* Same loop body as above would go here; omitted for brevity —
* stock libavcodec rarely buffers I-only streams. */
n_frames++;
}
drained:
printf("\n%d frames decoded; total Y diff %zu, UV diff %zu\n",
n_frames, total_y_diffs, total_uv_diffs);
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
printf("inspection callback: %d total invocations, %d raster-order violations\n",
inspect_total_cbs, inspect_total_violations);
if (inspect_total_violations) rc = 4;
#endif
if (rc == 0 && (total_y_diffs || total_uv_diffs)) {
printf("FAIL: daedalus-decoder output does NOT match libavcodec reference byte-for-byte\n");
rc = 4;
} else if (rc == 0) {
printf("PASS: byte-exact identity-passthrough across %d frames\n", n_frames);
} else {
printf("FAIL: %s\n",
(total_y_diffs || total_uv_diffs) ? "byte-exact comparison failed"
: "inspection callback invariants violated");
}
cleanup:
if (out_dadec_f) fclose(out_dadec_f);
if (out_ref_f) fclose(out_ref_f);
free(out_uv_ref); free(out_y_ref);
free(out_uv_dadec);free(out_y_dadec);
if (dec) daedalus_decoder_destroy(dec);
av_frame_free(&fr);
av_packet_free(&pkt);
avcodec_free_context(&avctx);
avformat_close_input(&fmt);
return rc;
}