Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| df339c07fd | |||
| 9061350e82 | |||
| b597fc0098 | |||
| 35b4f163c6 | |||
| 44e92fa3dc |
+63
-3
@@ -195,6 +195,30 @@ if(DAEDALUS_BUILD_TOOLS)
|
||||
${DAEDALUS_FFMPEG_PREFIX}/lib/libswresample.a
|
||||
m z pthread)
|
||||
set(FFMPEG_CFLAGS_OTHER "-DDAEDALUS_HAVE_H264_MB_INSPECT_CB=1")
|
||||
|
||||
# PR-A3+ optional: also point at the patched FFmpeg SOURCE TREE
|
||||
# so the CLI can include libavcodec/h264dec.h directly and
|
||||
# dereference H264Context fields (the side-buffer mb_inspect_coeffs
|
||||
# added in marfrit-packages patch 0017, the cur_pic.f for
|
||||
# pre-deblock pixel access, etc.). When set, the internal-header
|
||||
# include codepath is compiled in.
|
||||
set(DAEDALUS_FFMPEG_SRC "" CACHE PATH
|
||||
"Path to patched FFmpeg source tree (= path to FFmpeg/ checkout where build was run; contains config.h + libavcodec/h264dec.h). Empty = h264dec.h includes are disabled.")
|
||||
if(DAEDALUS_FFMPEG_SRC)
|
||||
message(STATUS "daedalus_decode_h264: FFmpeg source at ${DAEDALUS_FFMPEG_SRC}")
|
||||
# IMPORTANT: source tree FIRST in -I order — its
|
||||
# libavutil/common.h does #include "intmath.h" with HAVE_AV_CONFIG_H,
|
||||
# which resolves to libavutil/intmath.h (in the source tree
|
||||
# only — that header isn't installed since it's arch-dispatched).
|
||||
# The installed-prefix include path's libavutil/common.h is the
|
||||
# same file textually but resolves "intmath.h" against the
|
||||
# install dir where it doesn't exist.
|
||||
set(FFMPEG_INCLUDE_DIRS ${DAEDALUS_FFMPEG_SRC})
|
||||
set(FFMPEG_CFLAGS_OTHER
|
||||
"${FFMPEG_CFLAGS_OTHER} -DDAEDALUS_HAVE_H264_MB_INSPECT_COEFFS=1 -DHAVE_AV_CONFIG_H")
|
||||
# Convert space-separated string to list (CMake idiom for compile flags).
|
||||
separate_arguments(FFMPEG_CFLAGS_OTHER UNIX_COMMAND "${FFMPEG_CFLAGS_OTHER}")
|
||||
endif()
|
||||
else()
|
||||
pkg_check_modules(FFMPEG REQUIRED libavcodec libavformat libavutil)
|
||||
message(STATUS "daedalus_decode_h264: system FFmpeg (no inspection callback)")
|
||||
@@ -213,12 +237,48 @@ endif()
|
||||
|
||||
# ---- Install ------------------------------------------------------
|
||||
#
|
||||
# Library + public header. Stage 2/3 will add a pkg-config file and
|
||||
# CMake config exports once the API stabilises; pre-0.1 the scaffold
|
||||
# install just gives the static archive a home.
|
||||
# Installs:
|
||||
# - libdaedalus_decoder.a → ${CMAKE_INSTALL_LIBDIR}
|
||||
# - include/daedalus_decoder.h → ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
# - daedalus-decoder.pc → ${CMAKE_INSTALL_LIBDIR}/pkgconfig
|
||||
#
|
||||
# The .pc lets sibling consumers (daedalus-v4l2 daemon, the
|
||||
# daedalus_decode_h264 CLI when built externally) discover the static
|
||||
# archive + headers via pkg-config. daedalus-fourier is declared as a
|
||||
# public `Requires:` because the consumer (which static-links
|
||||
# libdaedalus_decoder.a) also needs daedalus-fourier in its own link
|
||||
# line to resolve the daedalus_ctx_* / daedalus_recipe_* symbols this
|
||||
# archive references.
|
||||
#
|
||||
# Relocatable-prefix scheme mirrors daedalus-fourier's .pc generation:
|
||||
# `prefix` is derived from ${pcfiledir} so `cmake --install --prefix /foo`
|
||||
# produces a .pc that resolves prefix=/foo at lookup time, regardless of
|
||||
# what CMAKE_INSTALL_PREFIX was at configure time.
|
||||
|
||||
include(GNUInstallDirs)
|
||||
install(TARGETS daedalus_decoder
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
install(FILES include/daedalus_decoder.h
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
file(RELATIVE_PATH PKGCONFIG_PCDIR_TO_PREFIX
|
||||
"${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/pkgconfig"
|
||||
"${CMAKE_INSTALL_PREFIX}")
|
||||
|
||||
set(PKGCONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/daedalus-decoder.pc)
|
||||
file(WRITE ${PKGCONFIG_OUT}
|
||||
"prefix=\${pcfiledir}/${PKGCONFIG_PCDIR_TO_PREFIX}
|
||||
exec_prefix=\${prefix}
|
||||
libdir=\${prefix}/${CMAKE_INSTALL_LIBDIR}
|
||||
includedir=\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}
|
||||
|
||||
Name: daedalus-decoder
|
||||
Description: Frame-major H.264 decoder on V3D7 via daedalus-fourier primitives
|
||||
Version: ${PROJECT_VERSION}
|
||||
Libs: -L\${libdir} -ldaedalus_decoder
|
||||
Requires: daedalus-fourier
|
||||
Cflags: -I\${includedir}
|
||||
")
|
||||
install(FILES ${PKGCONFIG_OUT}
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig
|
||||
)
|
||||
|
||||
+556
-23
@@ -51,14 +51,32 @@
|
||||
#include <libavutil/imgutils.h>
|
||||
|
||||
/* Per-MB inspection callback API — provided by the patched FFmpeg
|
||||
* fork via marfrit-packages 0016. The H264Context struct itself
|
||||
* remains internal (declared in libavcodec/h264dec.h which isn't
|
||||
* installed), so we only forward-declare it here and use it
|
||||
* opaquely through the callback signature. Real per-MB state
|
||||
* extraction (sl->mb coefficients, mb_type, etc.) will land in
|
||||
* PR-A3 alongside an internal-header include path. */
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
|
||||
* fork via marfrit-packages patches 0016 + 0017.
|
||||
*
|
||||
* When DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS is defined (CMake sets it
|
||||
* alongside DAEDALUS_FFMPEG_SRC), we include libavcodec's INTERNAL
|
||||
* h264dec.h header to dereference H264Context fields — specifically
|
||||
* h->mb_inspect_coeffs (the 0017 side buffer holding pre-IDCT-
|
||||
* destruction sl->mb), h->cur_pic.f (pre-deblock reconstructed pixels),
|
||||
* and h->cur_pic.mb_type[mb_xy] for the mb-type gate. The same
|
||||
* configure-time config.h that built the static libavcodec.a is
|
||||
* picked up via -DHAVE_AV_CONFIG_H + -I path; ABI match is automatic.
|
||||
*
|
||||
* When only DAEDALUS_HAVE_H264_MB_INSPECT_CB is defined (no source
|
||||
* tree available — e.g. building against a distro-shipped patched
|
||||
* libavcodec), the H264Context stays opaque and we fall back to
|
||||
* identity-passthrough across all MBs.
|
||||
*
|
||||
* When neither is defined: stock libavcodec, no callback, identity-
|
||||
* passthrough only (PR-A1b behaviour). */
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
# include "libavcodec/h264dec.h"
|
||||
# include "libavcodec/h264.h" /* IS_INTRA4x4 / IS_8x8DCT / IS_INTRA_PCM */
|
||||
#elif defined(DAEDALUS_HAVE_H264_MB_INSPECT_CB)
|
||||
struct H264Context;
|
||||
#endif
|
||||
|
||||
#if defined(DAEDALUS_HAVE_H264_MB_INSPECT_CB) || defined(DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS)
|
||||
typedef void (*ff_h264_mb_inspect_cb)(void *opaque,
|
||||
const struct H264Context *h,
|
||||
int mb_x, int mb_y);
|
||||
@@ -76,35 +94,370 @@ static const char *substrate_str = "auto";
|
||||
static int max_frames = -1;
|
||||
|
||||
/* Inspection-callback state: per-frame counter + "each MB seen exactly
|
||||
* once" check. We use a bitmap rather than a raster-order assertion
|
||||
* because libavcodec's MB-level threading + multi-slice frames mean
|
||||
* MBs reach the callback in non-strictly-raster order; the contract
|
||||
* is "every MB fires the callback exactly once per frame", not "in
|
||||
* raster order". Reset at end of each frame. */
|
||||
* once" check. Bitmap, not raster-order — libavcodec's MB threading +
|
||||
* multi-slice frames mean MBs reach the callback out of strict order;
|
||||
* contract is "every MB fires the callback exactly once per frame".
|
||||
*
|
||||
* When real-coeff extraction is compiled in (PR-A3+), we ALSO maintain
|
||||
* a per-MB capture buffer (real-coeffs path) so the main loop can
|
||||
* drive daedalus_decoder_append_mb with REAL pre-residual P + real
|
||||
* coefficients for MBs that satisfy the gate (Intra_4x4, no 8x8 DCT,
|
||||
* no PCM). Other MBs stay on identity-passthrough. */
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
|
||||
struct mb_capture {
|
||||
int valid; /* 1 = real-coeffs IDCT path, 0 = identity (predicted = pre_deblock_snap) */
|
||||
int16_t coeffs[256]; /* luma, raster block order, raw sl->mb layout */
|
||||
uint8_t predicted[256]; /* luma P recovered = pre_deblock - clipped IDCT(C) */
|
||||
uint8_t pre_deblock_snap_y[256]; /* luma 16×16 pre-deblock at callback time */
|
||||
uint8_t pre_deblock_snap_cb[64]; /* Cb 8×8 pre-deblock */
|
||||
uint8_t pre_deblock_snap_cr[64]; /* Cr 8×8 pre-deblock */
|
||||
int qp_y; /* QP_Y for this MB (sl->qscale at callback time) */
|
||||
int mb_type_intra; /* 1 if MB is intra (any flavour), 0 otherwise */
|
||||
int transform_8x8; /* 1 if 8×8 DCT (affects which internal edges fire) */
|
||||
};
|
||||
|
||||
struct inspect_state {
|
||||
int n_cbs_this_frame;
|
||||
int mb_w, mb_h;
|
||||
uint8_t *seen; /* mb_w * mb_h bitmap */
|
||||
int duplicate_mbs; /* same (mb_x, mb_y) seen twice this frame */
|
||||
int out_of_bounds; /* (mb_x, mb_y) outside the coded grid */
|
||||
int duplicate_mbs;
|
||||
int out_of_bounds;
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
struct mb_capture *captures; /* mb_w * mb_h entries */
|
||||
int real_coeffs_mbs; /* count of MBs in real-coeffs IDCT path this frame */
|
||||
int skipped_intra16x16;
|
||||
int skipped_8x8dct;
|
||||
int skipped_other;
|
||||
/* Slice-level deblock params (captured first time the callback sees a
|
||||
* slice context). Per H.264 spec these are constant per slice; we
|
||||
* assume single-slice frames in our test stream. */
|
||||
int slice_alpha_c0_offset;
|
||||
int slice_beta_offset;
|
||||
int slice_deblock_disable; /* sl->deblocking_filter from spec */
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
/* H.264 §8.7.2.2/8.7.2.3 deblock filter tables — transcribed verbatim
|
||||
* from FFmpeg libavcodec/h264_loopfilter.c (LGPL-2.1+; algorithm + table
|
||||
* values come from the H.264 spec which is normative and unpatented).
|
||||
* Tables are size 52*3 — FFmpeg's trick to absorb slice_alpha_c0_offset +
|
||||
* slice_beta_offset (in -12..+12) into the index without bounds-clamping.
|
||||
* Usage: alpha = alpha_table[qp + a] where a = 52 + slice_alpha_c0_offset
|
||||
* (8-bit only; high-bit-depth subtracts qp_bd_offset). */
|
||||
static const uint8_t alpha_table[52*3] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
|
||||
7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
|
||||
25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
|
||||
80, 90,101,113,127,144,162,182,203,226,
|
||||
255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
255,255,255,255,255,255,255,255,255,255,255,255,255,
|
||||
};
|
||||
static const uint8_t beta_table[52*3] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
|
||||
3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
|
||||
8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
|
||||
13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
|
||||
18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
};
|
||||
static const int8_t tc0_table[52*3][4] = {
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
|
||||
{-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
|
||||
{-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
|
||||
{-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
|
||||
{-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
|
||||
{-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
|
||||
{-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
|
||||
{-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
|
||||
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
|
||||
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
|
||||
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
|
||||
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
|
||||
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
|
||||
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
|
||||
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
|
||||
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
|
||||
{-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
|
||||
};
|
||||
|
||||
/* H.264 §8.5.11 / Table 8-11: qP_y → qP_chroma mapping for chroma_qp_index_offset == 0.
|
||||
* For qP_y < 30, qP_c = qP_y. Above that, the spec table compresses. */
|
||||
static const uint8_t chroma_qp_table[52] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30,
|
||||
31, 32, 32, 33, 34, 34, 35, 35, 36, 36, 37, 37, 37, 38, 38, 38,
|
||||
39, 39, 39, 39,
|
||||
};
|
||||
|
||||
/* libavcodec's sl->mb stores coefficients in RASTER (row-major) order,
|
||||
* not zig-zag scan order — h264_cavlc.c does
|
||||
* block[*scantable] = (level * qmul[*scantable] + 32) >> 6
|
||||
* where *scantable advances through ff_zigzag_scan[] which contains
|
||||
* RASTER positions (row*4 + col). So sl->mb[i] = coef at raster
|
||||
* position i = (i/4, i%4) = (row, col). No inverse-zigzag needed;
|
||||
* just transpose row-major → column-major (daedalus's convention). */
|
||||
|
||||
/* H.264 §6.4.3 4x4 luma block scan within MB (z-scan).
|
||||
* Maps raster-block-idx (sb_y*4+sb_x) → libavcodec sl->mb's z-scan idx.
|
||||
* Z-scan happens to be its own inverse (symmetric mapping). */
|
||||
static const uint8_t raster_to_zscan[16] = {
|
||||
0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15
|
||||
};
|
||||
|
||||
/* H.264 4x4 IDCT — transcribed from daedalus-fourier
|
||||
* tests/test_idct_bitexact.c (which itself mirrors h264_idct4_ref.c).
|
||||
* Outputs row-major 16-element residual; clip + shift happens in
|
||||
* the consumer. */
|
||||
static void h264_idct4_butterfly(const int d[4], int out[4]) {
|
||||
int e = d[0] + d[2];
|
||||
int f = d[0] - d[2];
|
||||
int g = (d[1] >> 1) - d[3];
|
||||
int h = d[1] + (d[3] >> 1);
|
||||
out[0] = e + h;
|
||||
out[1] = f + g;
|
||||
out[2] = f - g;
|
||||
out[3] = e - h;
|
||||
}
|
||||
static void ref_idct4_compute(const int16_t block[16], int out[16]) {
|
||||
/* block COLUMN-MAJOR: block[c*4+r] = coef at (row=r, col=c).
|
||||
*
|
||||
* Pass order: COLUMN-pass first, then ROW-pass — matches FFmpeg's
|
||||
* h264idct_template.c. The pass order matters for integer
|
||||
* arithmetic with `>>1` on signed values (which round toward -inf
|
||||
* for odd negatives in C); row-first vs column-first orders can
|
||||
* disagree by 1 unit at the intermediate stage, propagating to
|
||||
* the final pixel residual.
|
||||
*
|
||||
* (daedalus-fourier's tests/h264_idct4_ref.c does ROW-first, which
|
||||
* matches its NEON kernel + GPU shader bit-exact within the
|
||||
* package but DIVERGES from FFmpeg's IDCT for some inputs. PR-A3b
|
||||
* surfaces the divergence; investigating the fix is a daedalus-
|
||||
* fourier follow-up — see task #184.) */
|
||||
int tmp[4][4];
|
||||
/* Column pass: process each column c independently. */
|
||||
for (int c = 0; c < 4; c++) {
|
||||
int d[4] = { block[c*4+0], block[c*4+1], block[c*4+2], block[c*4+3] };
|
||||
int o[4];
|
||||
h264_idct4_butterfly(d, o);
|
||||
for (int r = 0; r < 4; r++) tmp[r][c] = o[r];
|
||||
}
|
||||
/* Row pass: process each row r. */
|
||||
for (int r = 0; r < 4; r++) {
|
||||
int d[4] = { tmp[r][0], tmp[r][1], tmp[r][2], tmp[r][3] };
|
||||
int o[4];
|
||||
h264_idct4_butterfly(d, o);
|
||||
for (int c = 0; c < 4; c++) out[r*4+c] = o[c];
|
||||
}
|
||||
}
|
||||
#endif /* DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS */
|
||||
|
||||
static void inspect_cb(void *opaque,
|
||||
const struct H264Context *h,
|
||||
int mb_x, int mb_y)
|
||||
{
|
||||
(void) h;
|
||||
struct inspect_state *st = opaque;
|
||||
#ifndef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
(void) h;
|
||||
#endif
|
||||
|
||||
if (mb_x < 0 || mb_x >= st->mb_w || mb_y < 0 || mb_y >= st->mb_h) {
|
||||
st->out_of_bounds++;
|
||||
} else {
|
||||
const size_t idx = (size_t) mb_y * st->mb_w + (size_t) mb_x;
|
||||
if (st->seen[idx]) st->duplicate_mbs++;
|
||||
st->seen[idx] = 1;
|
||||
st->n_cbs_this_frame++;
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t idx = (size_t) mb_y * st->mb_w + (size_t) mb_x;
|
||||
if (st->seen[idx]) st->duplicate_mbs++;
|
||||
st->seen[idx] = 1;
|
||||
st->n_cbs_this_frame++;
|
||||
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
/* Capture slice-level deblock params once. Per spec they're constant
|
||||
* per slice; for our single-slice test streams we just keep the
|
||||
* latest values seen. */
|
||||
{
|
||||
const H264SliceContext *sl = &h->slice_ctx[0];
|
||||
st->slice_alpha_c0_offset = sl->slice_alpha_c0_offset;
|
||||
st->slice_beta_offset = sl->slice_beta_offset;
|
||||
st->slice_deblock_disable = sl->deblocking_filter;
|
||||
}
|
||||
|
||||
/* Real-coeffs path: extract per-MB state for daedalus-decoder
|
||||
* IDCT validation on this MB. Gate: only Intra_4x4 + 4x4 transform
|
||||
* + non-PCM is supported in PR-A3b — other MB flavours fall back
|
||||
* to identity-passthrough in the main loop. */
|
||||
struct mb_capture *cap = &st->captures[idx];
|
||||
cap->valid = 0; /* default to passthrough */
|
||||
|
||||
const int mb_xy = mb_y * h->mb_stride + mb_x;
|
||||
const uint32_t mb_type = h->cur_pic.mb_type[mb_xy];
|
||||
|
||||
/* Capture state needed for deblock edge derivation, regardless
|
||||
* of whether this MB takes the real-coeffs IDCT path. */
|
||||
cap->qp_y = h->cur_pic.qscale_table[mb_xy];
|
||||
cap->mb_type_intra = IS_INTRA(mb_type) ? 1 : 0;
|
||||
cap->transform_8x8 = IS_8x8DCT(mb_type) ? 1 : 0;
|
||||
|
||||
/* Snapshot pre-deblock pixels for all 3 planes at this MB's position. */
|
||||
{
|
||||
const int y_stride = h->cur_pic.f->linesize[0];
|
||||
const int uv_stride = h->cur_pic.f->linesize[1];
|
||||
const uint8_t *mb_y_px = h->cur_pic.f->data[0]
|
||||
+ (ptrdiff_t) mb_y * 16 * y_stride + mb_x * 16;
|
||||
const uint8_t *mb_cb_px = h->cur_pic.f->data[1]
|
||||
+ (ptrdiff_t) mb_y * 8 * uv_stride + mb_x * 8;
|
||||
const uint8_t *mb_cr_px = h->cur_pic.f->data[2]
|
||||
+ (ptrdiff_t) mb_y * 8 * uv_stride + mb_x * 8;
|
||||
for (int r = 0; r < 16; r++)
|
||||
memcpy(&cap->pre_deblock_snap_y[r * 16], &mb_y_px[r * y_stride], 16);
|
||||
for (int r = 0; r < 8; r++) {
|
||||
memcpy(&cap->pre_deblock_snap_cb[r * 8], &mb_cb_px[r * uv_stride], 8);
|
||||
memcpy(&cap->pre_deblock_snap_cr[r * 8], &mb_cr_px[r * uv_stride], 8);
|
||||
}
|
||||
}
|
||||
|
||||
if (!IS_INTRA4x4(mb_type)) {
|
||||
if (IS_INTRA16x16(mb_type)) st->skipped_intra16x16++;
|
||||
else st->skipped_other++;
|
||||
return;
|
||||
}
|
||||
if (IS_8x8DCT(mb_type)) { st->skipped_8x8dct++; return; }
|
||||
if (IS_INTRA_PCM(mb_type)) { st->skipped_other++; return; }
|
||||
|
||||
/* Snapshot luma pre-deblock pixels from cur_pic. */
|
||||
const uint8_t *luma_plane = h->cur_pic.f->data[0];
|
||||
const int luma_stride = h->cur_pic.f->linesize[0];
|
||||
const uint8_t *mb_pixels = luma_plane + (ptrdiff_t) mb_y * 16 * luma_stride
|
||||
+ mb_x * 16;
|
||||
|
||||
/* (pre_deblock_snap_y is already populated above for all 3 planes;
|
||||
* we use it later in the main loop as the daedalus predicted input.) */
|
||||
|
||||
/* Coefficients are in sl->mb at end of entropy decode but zeroed by
|
||||
* the time the callback fires (IDCT-add consumed them). Patch 0017
|
||||
* preserves them in h->mb_inspect_coeffs[16 * 48] BEFORE IDCT runs,
|
||||
* so we read from there. */
|
||||
const int16_t *zz_mb = h->mb_inspect_coeffs; /* layout matches sl->mb 8-bit half */
|
||||
|
||||
for (int r_block = 0; r_block < 16; r_block++) {
|
||||
const int z_block = raster_to_zscan[r_block];
|
||||
const int16_t *block_raw = &zz_mb[z_block * 16];
|
||||
|
||||
/* sl->mb stores 16 int16 per block. Empirical finding (via
|
||||
* /tmp/idct_compare.c, 2026-05-26): daedalus-fourier's C ref
|
||||
* IDCT and FFmpeg's C ref IDCT produce IDENTICAL output for
|
||||
* the same input array — the "column-major vs row-major"
|
||||
* labelling is decoration; both functions implement the same
|
||||
* H.264 spec IDCT on a 16-int16 input. So we feed daedalus
|
||||
* the raw sl->mb data unchanged. Previous attempt to
|
||||
* transpose row-major→column-major was wrong — the transpose
|
||||
* changed the IDCT result. */
|
||||
int16_t col[16];
|
||||
memcpy(col, block_raw, 16 * sizeof(int16_t));
|
||||
|
||||
memcpy(&cap->coeffs[r_block * 16], col, 16 * sizeof(int16_t));
|
||||
|
||||
/* IDCT → row-major 16-int residual. */
|
||||
int idct_row[16];
|
||||
ref_idct4_compute(col, idct_row);
|
||||
|
||||
/* P = clip(pre_deblock - ((IDCT + 32) >> 6)) for each pixel.
|
||||
* Symmetric: daedalus IDCT-add will undo the subtract, including
|
||||
* for saturating cases (where the same shift puts the value back
|
||||
* at the same clip boundary). */
|
||||
const int sb_y = r_block >> 2;
|
||||
const int sb_x = r_block & 3;
|
||||
for (int r = 0; r < 4; r++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
const int pre_db = mb_pixels[(sb_y * 4 + r) * luma_stride + sb_x * 4 + c];
|
||||
const int shift = (idct_row[r * 4 + c] + 32) >> 6;
|
||||
int p = pre_db - shift;
|
||||
if (p < 0) p = 0;
|
||||
if (p > 255) p = 255;
|
||||
cap->predicted[(sb_y * 4 + r) * 16 + (sb_x * 4 + c)] = (uint8_t) p;
|
||||
}
|
||||
}
|
||||
}
|
||||
cap->valid = 1;
|
||||
st->real_coeffs_mbs++;
|
||||
|
||||
/* One-shot diagnostic enabled by DAEDALUS_DUMP_MB_3_0 env var. */
|
||||
if (mb_x == 3 && mb_y == 0 && getenv("DAEDALUS_DUMP_MB_3_0")) {
|
||||
const int16_t *zz = &zz_mb[1 * 16]; /* z_block = raster_block = 1 */
|
||||
const struct mb_capture *capdiag = &st->captures[mb_y * st->mb_w + mb_x];
|
||||
fprintf(stderr, " MB(3,0) block z=1 raster coeffs (sl->mb):");
|
||||
for (int p = 0; p < 16; p++) fprintf(stderr, " %d", (int) zz[p]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " MB(3,0) block z=1 col_major coeffs (after transpose):");
|
||||
for (int i = 0; i < 16; i++) fprintf(stderr, " %d", (int) capdiag->coeffs[1 * 16 + i]);
|
||||
fprintf(stderr, "\n");
|
||||
/* Recompute IDCT for this block (already done in the loop above but
|
||||
* print here for visibility). */
|
||||
int idct_print[16];
|
||||
ref_idct4_compute(&capdiag->coeffs[1 * 16], idct_print);
|
||||
fprintf(stderr, " MB(3,0) block z=1 IDCT row-major (raw, pre-shift):");
|
||||
for (int i = 0; i < 16; i++) fprintf(stderr, " %d", idct_print[i]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " MB(3,0) block z=1 IDCT (+32)>>6:");
|
||||
for (int i = 0; i < 16; i++) fprintf(stderr, " %d", (idct_print[i] + 32) >> 6);
|
||||
fprintf(stderr, "\n");
|
||||
const uint8_t *bpix = mb_pixels + 0 * luma_stride + 4; /* sb_y=0, sb_x=1 → cols 4..7 within MB */
|
||||
fprintf(stderr, " MB(3,0) block z=1 pre_deblock pixels:\n");
|
||||
for (int r = 0; r < 4; r++) {
|
||||
fprintf(stderr, " ");
|
||||
for (int c = 0; c < 4; c++)
|
||||
fprintf(stderr, " %3u", bpix[r * luma_stride + c]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
fprintf(stderr, " MB(3,0) block z=1 P_rec (= pre_deblock - shift):\n");
|
||||
for (int r = 0; r < 4; r++) {
|
||||
fprintf(stderr, " ");
|
||||
for (int c = 0; c < 4; c++)
|
||||
fprintf(stderr, " %3u", capdiag->predicted[(0*4+r) * 16 + (1*4+c)]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
/* And what daedalus_decoder SHOULD produce: clip(P_rec + shift). */
|
||||
fprintf(stderr, " MB(3,0) block z=1 expected daedalus output = clip(P_rec + shift):\n");
|
||||
for (int r = 0; r < 4; r++) {
|
||||
fprintf(stderr, " ");
|
||||
for (int c = 0; c < 4; c++) {
|
||||
int p_rec = capdiag->predicted[(0*4+r) * 16 + (1*4+c)];
|
||||
int sh = (idct_print[r*4+c] + 32) >> 6;
|
||||
int e = p_rec + sh;
|
||||
if (e < 0) e = 0; if (e > 255) e = 255;
|
||||
fprintf(stderr, " %3d", e);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -247,6 +600,18 @@ int main(int argc, char **argv)
|
||||
const AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264);
|
||||
AVCodecContext *avctx = avcodec_alloc_context3(codec);
|
||||
avcodec_parameters_to_context(avctx, fmt->streams[vstream]->codecpar);
|
||||
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
/* Patch 0017's coefficient side buffer lives in H264Context (single
|
||||
* per-stream); multi-threaded slice decode would race on it. */
|
||||
avctx->thread_count = 1;
|
||||
avctx->thread_type = 0;
|
||||
/* PR-A6: keep libavcodec's deblock ON so AVFrame is the post-deblock
|
||||
* reference we validate daedalus against. Per-MB pre_deblock
|
||||
* snapshots taken in the inspection callback (before deblock crosses
|
||||
* into this MB's region) provide daedalus with pre-deblock input. */
|
||||
#endif
|
||||
|
||||
if (avcodec_open2(avctx, codec, NULL) < 0) {
|
||||
fprintf(stderr, "avcodec_open2 failed\n");
|
||||
avformat_close_input(&fmt); return 2;
|
||||
@@ -280,6 +645,11 @@ int main(int argc, char **argv)
|
||||
inspect_st.mb_h = H_round / 16;
|
||||
inspect_st.seen = calloc(1, (size_t) inspect_st.mb_w * inspect_st.mb_h);
|
||||
if (!inspect_st.seen) { rc = 1; goto cleanup; }
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
inspect_st.captures = calloc((size_t) inspect_st.mb_w * inspect_st.mb_h,
|
||||
sizeof(*inspect_st.captures));
|
||||
if (!inspect_st.captures) { rc = 1; goto cleanup; }
|
||||
#endif
|
||||
}
|
||||
ff_h264_set_mb_inspect_cb(avctx, inspect_cb, &inspect_st);
|
||||
int inspect_total_cbs = 0;
|
||||
@@ -360,17 +730,133 @@ int main(int argc, char **argv)
|
||||
const int mb_h = coded_h / 16;
|
||||
uint8_t mb_pred[384];
|
||||
int16_t mb_coeffs[384] = {0};
|
||||
struct daedalus_decoder_edge mb_edges[16];
|
||||
struct daedalus_decoder_mb_input mb = {0};
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
/* PR-A6 edge derivation: a = 52 + slice_alpha_c0_offset,
|
||||
* b = 52 + slice_beta_offset (per FFmpeg loopfilter.c
|
||||
* convention; absorbs the offset into the tripled tables). */
|
||||
const int slice_a = 52 + inspect_st.slice_alpha_c0_offset;
|
||||
const int slice_b = 52 + inspect_st.slice_beta_offset;
|
||||
/* FFmpeg's h264_slice.c inverts the spec's disable_deblocking_filter_idc
|
||||
* via `sl->deblocking_filter ^= 1` (line ~1901). Internal convention:
|
||||
* 0 = disabled (spec = 1)
|
||||
* 1 = enabled (spec = 0)
|
||||
* 2 = enabled-but-not-across-slice-boundaries (unchanged)
|
||||
* So deblock is OFF iff sl->deblocking_filter == 0. */
|
||||
const int deblock_off = inspect_st.slice_deblock_disable == 0;
|
||||
#endif
|
||||
for (int my = 0; my < mb_h; my++) {
|
||||
for (int mx = 0; mx < mb_w; mx++) {
|
||||
/* Default: identity-passthrough — luma from AVFrame,
|
||||
* chroma from AVFrame, coeffs all zero, no edges. */
|
||||
pack_mb_predicted(fr, mx, my, mb_pred);
|
||||
memset(mb_coeffs, 0, sizeof(mb_coeffs));
|
||||
int n_edges = 0;
|
||||
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
/* PR-A6: feed daedalus pre-deblock pixels from the
|
||||
* per-MB snapshots taken in the callback (AVFrame is
|
||||
* now post-deblock — used as reference, not as input). */
|
||||
const int mb_idx = my * mb_w + mx;
|
||||
const struct mb_capture *cap = &inspect_st.captures[mb_idx];
|
||||
|
||||
/* Luma: P_rec for real-coeffs MBs, raw pre-deblock snap
|
||||
* otherwise (with zero coeffs). Both produce the same
|
||||
* pre-deblock state after daedalus IDCT-add. */
|
||||
if (cap->valid) {
|
||||
memcpy(mb_pred, cap->predicted, 256);
|
||||
for (int i = 0; i < 256; i++)
|
||||
mb_coeffs[i] = cap->coeffs[i];
|
||||
} else {
|
||||
memcpy(mb_pred, cap->pre_deblock_snap_y, 256);
|
||||
}
|
||||
/* Chroma: always identity-passthrough from snap.
|
||||
* Chroma DC Hadamard + chroma residual extraction is
|
||||
* a follow-up (PR-A4). */
|
||||
memcpy(mb_pred + 256, cap->pre_deblock_snap_cb, 64);
|
||||
memcpy(mb_pred + 256 + 64, cap->pre_deblock_snap_cr, 64);
|
||||
|
||||
/* Derive deblock edges for this MB. Spec §8.7.2:
|
||||
* - Frame-boundary edges: skip (bS=0 — kernel reads p3 at -4).
|
||||
* - MB-boundary edges with intra neighbour: bS=4.
|
||||
* - Internal MB edges within intra MB: bS=3.
|
||||
* - 8x8 DCT MBs: internal edges only at col/row 8 (the
|
||||
* single 8x8-block boundary inside the MB).
|
||||
* For non-intra MB types in mixed streams the bS rules
|
||||
* differ; we'd need cbp/MV/ref info from sl context for
|
||||
* those. Our test stream is all-intra, so simplified. */
|
||||
if (!deblock_off && cap->mb_type_intra && !getenv("DAEDALUS_SKIP_EDGES")) {
|
||||
const int qp_self = cap->qp_y;
|
||||
const int qp_left = (mx > 0)
|
||||
? inspect_st.captures[mb_idx - 1].qp_y : qp_self;
|
||||
const int qp_top = (my > 0)
|
||||
? inspect_st.captures[mb_idx - mb_w].qp_y : qp_self;
|
||||
const int qpc_self = chroma_qp_table[qp_self];
|
||||
const int qpc_left = chroma_qp_table[qp_left];
|
||||
const int qpc_top = chroma_qp_table[qp_top];
|
||||
const int qp_avg_left = (qp_self + qp_left + 1) >> 1;
|
||||
const int qp_avg_top = (qp_self + qp_top + 1) >> 1;
|
||||
const int qpc_avg_left = (qpc_self + qpc_left + 1) >> 1;
|
||||
const int qpc_avg_top = (qpc_self + qpc_top + 1) >> 1;
|
||||
|
||||
/* Helper macro to emit one edge. bS=0 (skip)
|
||||
* edges are still emitted with bS=0 — daedalus's
|
||||
* partitioner filters them out. */
|
||||
#define EMIT_EDGE(orient_, plane_, edge_idx_, bS_, qp_) do { \
|
||||
if (n_edges >= 16) break; \
|
||||
struct daedalus_decoder_edge *e = &mb_edges[n_edges++]; \
|
||||
e->mb_x = (uint16_t) mx; \
|
||||
e->mb_y = (uint16_t) my; \
|
||||
e->edge_idx = (uint8_t) (edge_idx_); \
|
||||
e->orient = (uint8_t) (orient_); \
|
||||
e->plane = (uint8_t) (plane_); \
|
||||
e->bS = (uint8_t) (bS_); \
|
||||
e->alpha = alpha_table[(qp_) + slice_a]; \
|
||||
e->beta = beta_table [(qp_) + slice_b]; \
|
||||
const int8_t *tc = tc0_table[(qp_) + slice_a]; \
|
||||
e->tc0[0] = tc[(bS_) <= 3 ? (bS_) : 0]; \
|
||||
e->tc0[1] = tc[(bS_) <= 3 ? (bS_) : 0]; \
|
||||
e->tc0[2] = tc[(bS_) <= 3 ? (bS_) : 0]; \
|
||||
e->tc0[3] = tc[(bS_) <= 3 ? (bS_) : 0]; \
|
||||
} while (0)
|
||||
|
||||
/* Luma V edges: 4 at col 0, 4, 8, 12. Internal
|
||||
* edges at 4/12 are skipped for 8x8 DCT MBs. */
|
||||
EMIT_EDGE(0, 0, 0, (mx > 0) ? 4 : 0, qp_avg_left);
|
||||
if (!cap->transform_8x8) EMIT_EDGE(0, 0, 1, 3, qp_self);
|
||||
EMIT_EDGE(0, 0, 2, 3, qp_self);
|
||||
if (!cap->transform_8x8) EMIT_EDGE(0, 0, 3, 3, qp_self);
|
||||
|
||||
/* Luma H edges: 4 at row 0, 4, 8, 12. */
|
||||
EMIT_EDGE(1, 0, 0, (my > 0) ? 4 : 0, qp_avg_top);
|
||||
if (!cap->transform_8x8) EMIT_EDGE(1, 0, 1, 3, qp_self);
|
||||
EMIT_EDGE(1, 0, 2, 3, qp_self);
|
||||
if (!cap->transform_8x8) EMIT_EDGE(1, 0, 3, 3, qp_self);
|
||||
|
||||
/* Chroma V edges: 2 per plane (Cb=1, Cr=2). */
|
||||
EMIT_EDGE(0, 1, 0, (mx > 0) ? 4 : 0, qpc_avg_left);
|
||||
if (!cap->transform_8x8) EMIT_EDGE(0, 1, 1, 3, qpc_self);
|
||||
EMIT_EDGE(0, 2, 0, (mx > 0) ? 4 : 0, qpc_avg_left);
|
||||
if (!cap->transform_8x8) EMIT_EDGE(0, 2, 1, 3, qpc_self);
|
||||
|
||||
/* Chroma H edges. */
|
||||
EMIT_EDGE(1, 1, 0, (my > 0) ? 4 : 0, qpc_avg_top);
|
||||
if (!cap->transform_8x8) EMIT_EDGE(1, 1, 1, 3, qpc_self);
|
||||
EMIT_EDGE(1, 2, 0, (my > 0) ? 4 : 0, qpc_avg_top);
|
||||
if (!cap->transform_8x8) EMIT_EDGE(1, 2, 1, 3, qpc_self);
|
||||
|
||||
#undef EMIT_EDGE
|
||||
}
|
||||
#endif
|
||||
|
||||
mb.mb_x = (uint16_t) mx;
|
||||
mb.mb_y = (uint16_t) my;
|
||||
mb.transform_8x8 = 0;
|
||||
mb.coeffs = mb_coeffs;
|
||||
mb.predicted = mb_pred;
|
||||
mb.edges = NULL;
|
||||
mb.n_edges = 0;
|
||||
mb.edges = (n_edges > 0) ? mb_edges : NULL;
|
||||
mb.n_edges = (uint8_t) n_edges;
|
||||
if (daedalus_decoder_append_mb(dec, &mb) != 0) {
|
||||
fprintf(stderr, "append_mb (%d,%d) failed\n", mx, my);
|
||||
rc = 3; goto cleanup;
|
||||
@@ -391,12 +877,41 @@ int main(int argc, char **argv)
|
||||
out_uv_ref, (size_t) coded_w,
|
||||
coded_w, coded_h);
|
||||
|
||||
/* Byte-exact compare. */
|
||||
/* (PR-A3b's pre_deblock vs AVFrame DIAG check is removed in
|
||||
* PR-A6: with libavcodec's deblock now ENABLED, AVFrame is
|
||||
* post-deblock and intentionally differs from the per-MB
|
||||
* pre_deblock snapshots taken in the callback.) */
|
||||
|
||||
/* Byte-exact compare + first-diff diagnostic. */
|
||||
size_t y_diffs = 0, uv_diffs = 0;
|
||||
size_t y_first_diff = (size_t) -1;
|
||||
for (size_t i = 0; i < y_size; i++)
|
||||
if (out_y_dadec[i] != out_y_ref[i]) y_diffs++;
|
||||
if (out_y_dadec[i] != out_y_ref[i]) {
|
||||
if (y_first_diff == (size_t) -1) y_first_diff = i;
|
||||
y_diffs++;
|
||||
}
|
||||
for (size_t i = 0; i < uv_size; i++)
|
||||
if (out_uv_dadec[i] != out_uv_ref[i]) uv_diffs++;
|
||||
if (y_diffs && y_first_diff != (size_t) -1) {
|
||||
const size_t row = y_first_diff / (size_t) avctx->width;
|
||||
const size_t col = y_first_diff % (size_t) avctx->width;
|
||||
const size_t mb_x = col / 16;
|
||||
const size_t mb_y = row / 8; /* not row/16 — chroma row uses /8 so use raw row here */
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
const int mb_idx = (int)(row / 16) * mb_w + (int) mb_x;
|
||||
const int real = (mb_idx >= 0 && mb_idx < mb_w * mb_h)
|
||||
? inspect_st.captures[mb_idx].valid : -1;
|
||||
printf(" first Y diff @ byte %zu = (row %zu, col %zu) in MB(%zu,%zu) [real-coeffs=%d]; "
|
||||
"dadec=%u ref=%u\n",
|
||||
y_first_diff, row, col, mb_x, row / 16,
|
||||
real, out_y_dadec[y_first_diff], out_y_ref[y_first_diff]);
|
||||
#else
|
||||
(void) mb_x; (void) mb_y;
|
||||
printf(" first Y diff @ byte %zu = (row %zu, col %zu); dadec=%u ref=%u\n",
|
||||
y_first_diff, row, col,
|
||||
out_y_dadec[y_first_diff], out_y_ref[y_first_diff]);
|
||||
#endif
|
||||
}
|
||||
total_y_diffs += y_diffs;
|
||||
total_uv_diffs += uv_diffs;
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
|
||||
@@ -424,6 +939,21 @@ int main(int argc, char **argv)
|
||||
inspect_st.duplicate_mbs = 0;
|
||||
inspect_st.out_of_bounds = 0;
|
||||
memset(inspect_st.seen, 0, (size_t) expected);
|
||||
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
printf(" frame %d: real-coeffs path %d MBs, "
|
||||
"skipped intra16x16=%d 8x8dct=%d other=%d\n",
|
||||
n_frames, inspect_st.real_coeffs_mbs,
|
||||
inspect_st.skipped_intra16x16,
|
||||
inspect_st.skipped_8x8dct,
|
||||
inspect_st.skipped_other);
|
||||
inspect_st.real_coeffs_mbs = 0;
|
||||
inspect_st.skipped_intra16x16 = 0;
|
||||
inspect_st.skipped_8x8dct = 0;
|
||||
inspect_st.skipped_other = 0;
|
||||
memset(inspect_st.captures, 0,
|
||||
(size_t) expected * sizeof(*inspect_st.captures));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
printf(" frame %d: Y diff %zu/%zu UV diff %zu/%zu%s\n",
|
||||
@@ -478,6 +1008,9 @@ cleanup:
|
||||
free(out_uv_dadec);free(out_y_dadec);
|
||||
#ifdef DAEDALUS_HAVE_H264_MB_INSPECT_CB
|
||||
free(inspect_st.seen);
|
||||
# ifdef DAEDALUS_HAVE_H264_MB_INSPECT_COEFFS
|
||||
free(inspect_st.captures);
|
||||
# endif
|
||||
#endif
|
||||
if (dec) daedalus_decoder_destroy(dec);
|
||||
av_frame_free(&fr);
|
||||
|
||||
Reference in New Issue
Block a user