From 87cbb9b70a474181417547f791188ef2683976d3 Mon Sep 17 00:00:00 2001 From: claude-noether Date: Tue, 26 May 2026 05:57:49 +0200 Subject: [PATCH] ffmpeg-v4l2-request-fourier: per-MB inspection callback for H.264 (0016) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds 0016-h264-mb-inspect-callback.patch to the FFmpeg fork. Adds an opt-in callback fired by ff_h264_hl_decode_mb after the existing pixel work, for tools that need per-MB visibility into H.264 decode. API: typedef void (*ff_h264_mb_inspect_cb)(void *opaque, const struct H264Context *h, int mb_x, int mb_y); void ff_h264_set_mb_inspect_cb(AVCodecContext *avctx, ff_h264_mb_inspect_cb cb, void *opaque); Two new fields appended to H264Context (internal struct, declared in h264dec.h not h264.h, no ABI surface to non-libavcodec callers). Callback fires post-pixel-work for every MB in coded order; receives const H264Context* so it can inspect any state (slice ctx via h->slice_ctx, reconstructed pixels via h->cur_pic.f->data[plane], etc.). Default (cb==NULL): zero behaviour change, one load + one branch per MB in the decoder hot path. Shape distinction: per-MB observation, NOT per-kernel function-pointer hijack (the 0003-0014 substitution-arc pattern that PR #105 reverted + daedalus-fourier PR #37's measurement-correction architecturally retired). Per-block synchronous Vulkan dispatch from libavcodec is non-competitive; per-MB CPU-side observation feeding a per-frame daedalus-decoder batch submit is the right shape (frame-major UMA dispatch verdict, memory: dejavu). Used by: - daedalus-decoder/tools/daedalus_decode_h264 (PR-A1b, follow-up) - future daedalus-v4l2 daemon refactor Wired into arch PKGBUILD source[] + prepare() and debian build-deb.sh patch sequence. pkgrel bumped 12 → 13. Refs reauktion/daedalus-decoder!12. --- .../0016-h264-mb-inspect-callback.patch | 115 ++++++++++++++++++ arch/ffmpeg-v4l2-request-fourier/PKGBUILD | 8 +- .../0016-h264-mb-inspect-callback.patch | 115 ++++++++++++++++++ .../ffmpeg-v4l2-request-fourier/build-deb.sh | 3 +- 4 files changed, 237 insertions(+), 4 deletions(-) create mode 100644 arch/ffmpeg-v4l2-request-fourier/0016-h264-mb-inspect-callback.patch create mode 100644 debian/ffmpeg-v4l2-request-fourier/0016-h264-mb-inspect-callback.patch diff --git a/arch/ffmpeg-v4l2-request-fourier/0016-h264-mb-inspect-callback.patch b/arch/ffmpeg-v4l2-request-fourier/0016-h264-mb-inspect-callback.patch new file mode 100644 index 000000000..22871dc4d --- /dev/null +++ b/arch/ffmpeg-v4l2-request-fourier/0016-h264-mb-inspect-callback.patch @@ -0,0 +1,115 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Markus Fritsche +Date: Tue, 26 May 2026 06:00:00 +0200 +Subject: [PATCH] avcodec/h264: per-MB inspection callback (daedalus-decoder + hook) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Adds an opt-in callback fired in ff_h264_hl_decode_mb after the +existing pixel work, used by tools that need per-MB visibility into +the H.264 decode. Initially driven by daedalus-decoder's CLI test +harness (tools/daedalus_decode_h264) which shadows libavcodec's +decode with a frame-major daedalus-decoder run for byte-exact diff +on real H.264 streams; later target is a daedalus-v4l2 daemon +refactor that drives daedalus_decoder_append_mb directly from the +callback instead of letting libavcodec do per-MB pixel work. + +Shape: ONE inspection point per MB. Distinct from the per-kernel +function-pointer-hijack pattern that used to live in 0003-0014 +patches (now reverted via 0015 for ctx, and architecturally retired +per daedalus-fourier PR #37's measurement-correction). Per-block +synchronous Vulkan dispatch from libavcodec was structurally non- +competitive; per-MB CPU-side observation feeding a per-frame batch +submit is the right shape. + +Two new fields in H264Context (appended at end of struct; no ABI +surface visible to non-libavcodec callers since H264Context is +internal — declared in h264dec.h, not h264.h). One new exported +function ff_h264_set_mb_inspect_cb to set them. + +Zero behaviour change when cb == NULL (the default): one load + +one branch per MB in the decoder hot path, both branch-predicted +to fall through. + +Used by: + - daedalus-decoder/tools/daedalus_decode_h264 (PR-A1b) + - future daedalus-v4l2 daemon refactor + +Refs reauktion/daedalus-decoder!12 (Stage 2 PR-b complete). +--- + libavcodec/h264_mb.c | 20 ++++++++++++++++++++ + libavcodec/h264dec.h | 26 ++++++++++++++++++++++++++ + 2 files changed, 46 insertions(+) + +--- a/libavcodec/h264dec.h ++++ b/libavcodec/h264dec.h +@@ -334,6 +334,16 @@ + int pic_order_cnt_bit_size; + } H264SliceContext; + ++/* Per-MB inspection callback type — see ff_h264_set_mb_inspect_cb() ++ * below. Fired by ff_h264_hl_decode_mb after the existing pixel work ++ * for every macroblock in coded order. Receives a const H264Context* ++ * so the callback can inspect any slice/picture state (h->slice_ctx ++ * for current slice, h->cur_pic.f->data[plane] for reconstructed ++ * samples, etc.). */ ++typedef void (*ff_h264_mb_inspect_cb)(void *opaque, ++ const struct H264Context *h, ++ int mb_x, int mb_y); ++ + /** + * H264Context + */ +@@ -579,6 +589,10 @@ + int non_gray; ///< Did we encounter a intra frame after a gray gap frame + int noref_gray; + int skip_gray; ++ ++ /* Per-MB inspection hook — set via ff_h264_set_mb_inspect_cb. */ ++ ff_h264_mb_inspect_cb mb_inspect_cb; ++ void *mb_inspect_opaque; + } H264Context; + + extern const uint16_t ff_h264_mb_sizes[4]; +@@ -607,6 +621,16 @@ + const H2645NAL *nal, void *logctx); + + void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl); ++ ++/** ++ * Install an opt-in per-MB inspection callback that fires from ++ * ff_h264_hl_decode_mb after each macroblock's pixel work. Default ++ * is NULL (no callback installed); the check is a single branch on ++ * the decoder hot path. See ff_h264_mb_inspect_cb for signature. ++ */ ++void ff_h264_set_mb_inspect_cb(AVCodecContext *avctx, ++ ff_h264_mb_inspect_cb cb, void *opaque); ++ + void ff_h264_decode_init_vlc(void); + + /** +--- a/libavcodec/h264_mb.c ++++ b/libavcodec/h264_mb.c +@@ -815,4 +815,20 @@ + hl_decode_mb_simple_16(h, sl); + } else + hl_decode_mb_simple_8(h, sl); ++ ++ /* Per-MB inspection callback (opt-in via ff_h264_set_mb_inspect_cb). ++ * Fired AFTER pixel work — reconstructed samples are in ++ * h->cur_pic.f->data[plane] at the MB's raster position by the ++ * time this runs. Callback may inspect slice context via ++ * h->slice_ctx + sl->mb_xy, coeffs via sl->mb, etc. */ ++ if (h->mb_inspect_cb) ++ h->mb_inspect_cb(h->mb_inspect_opaque, h, sl->mb_x, sl->mb_y); ++} ++ ++void ff_h264_set_mb_inspect_cb(AVCodecContext *avctx, ++ ff_h264_mb_inspect_cb cb, void *opaque) ++{ ++ H264Context *h = avctx->priv_data; ++ h->mb_inspect_cb = cb; ++ h->mb_inspect_opaque = opaque; + } diff --git a/arch/ffmpeg-v4l2-request-fourier/PKGBUILD b/arch/ffmpeg-v4l2-request-fourier/PKGBUILD index 6f4d479f4..e9bc3907b 100644 --- a/arch/ffmpeg-v4l2-request-fourier/PKGBUILD +++ b/arch/ffmpeg-v4l2-request-fourier/PKGBUILD @@ -24,7 +24,7 @@ _srcname=FFmpeg _version='8.1' _commit='b57fbbe50c9b2656fad86a1a7eeabfd2b2a50935' # v4l2-request-n8.1 tip 2026-04-24 pkgver=8.1.r123329.b57fbbe -pkgrel=12 # pkgrel=12 — REVERT pkgrel=11 ctx flip; daedalus-fourier PR #36 4.30x headline was measurement artifact (PR #37 corrects: QPU 22x SLOWER than CPU) +pkgrel=13 # pkgrel=13 — per-MB inspection callback (0016) for daedalus-decoder CLI test harness; observation-only, no behaviour change to existing decode path epoch=2 # daedalus-fourier pin. 209a421 = PR #2 merge (Phase 8c — public API @@ -102,8 +102,9 @@ source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}" '0012-h264-qpel-rest-daedalus-fourier.patch' '0013-h264-deblock-chroma-intra-daedalus-fourier.patch' '0014-h264-ctx-qpu-capable.patch' - '0015-h264-ctx-revert-to-no-qpu.patch') -sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP') + '0015-h264-ctx-revert-to-no-qpu.patch' + '0016-h264-mb-inspect-callback.patch') +sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP') pkgver() { cd "${_srcname}" @@ -129,6 +130,7 @@ prepare() { patch -Np1 -i "${srcdir}/0013-h264-deblock-chroma-intra-daedalus-fourier.patch" patch -Np1 -i "${srcdir}/0014-h264-ctx-qpu-capable.patch" patch -Np1 -i "${srcdir}/0015-h264-ctx-revert-to-no-qpu.patch" + patch -Np1 -i "${srcdir}/0016-h264-mb-inspect-callback.patch" } build() { diff --git a/debian/ffmpeg-v4l2-request-fourier/0016-h264-mb-inspect-callback.patch b/debian/ffmpeg-v4l2-request-fourier/0016-h264-mb-inspect-callback.patch new file mode 100644 index 000000000..22871dc4d --- /dev/null +++ b/debian/ffmpeg-v4l2-request-fourier/0016-h264-mb-inspect-callback.patch @@ -0,0 +1,115 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Markus Fritsche +Date: Tue, 26 May 2026 06:00:00 +0200 +Subject: [PATCH] avcodec/h264: per-MB inspection callback (daedalus-decoder + hook) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Adds an opt-in callback fired in ff_h264_hl_decode_mb after the +existing pixel work, used by tools that need per-MB visibility into +the H.264 decode. Initially driven by daedalus-decoder's CLI test +harness (tools/daedalus_decode_h264) which shadows libavcodec's +decode with a frame-major daedalus-decoder run for byte-exact diff +on real H.264 streams; later target is a daedalus-v4l2 daemon +refactor that drives daedalus_decoder_append_mb directly from the +callback instead of letting libavcodec do per-MB pixel work. + +Shape: ONE inspection point per MB. Distinct from the per-kernel +function-pointer-hijack pattern that used to live in 0003-0014 +patches (now reverted via 0015 for ctx, and architecturally retired +per daedalus-fourier PR #37's measurement-correction). Per-block +synchronous Vulkan dispatch from libavcodec was structurally non- +competitive; per-MB CPU-side observation feeding a per-frame batch +submit is the right shape. + +Two new fields in H264Context (appended at end of struct; no ABI +surface visible to non-libavcodec callers since H264Context is +internal — declared in h264dec.h, not h264.h). One new exported +function ff_h264_set_mb_inspect_cb to set them. + +Zero behaviour change when cb == NULL (the default): one load + +one branch per MB in the decoder hot path, both branch-predicted +to fall through. + +Used by: + - daedalus-decoder/tools/daedalus_decode_h264 (PR-A1b) + - future daedalus-v4l2 daemon refactor + +Refs reauktion/daedalus-decoder!12 (Stage 2 PR-b complete). +--- + libavcodec/h264_mb.c | 20 ++++++++++++++++++++ + libavcodec/h264dec.h | 26 ++++++++++++++++++++++++++ + 2 files changed, 46 insertions(+) + +--- a/libavcodec/h264dec.h ++++ b/libavcodec/h264dec.h +@@ -334,6 +334,16 @@ + int pic_order_cnt_bit_size; + } H264SliceContext; + ++/* Per-MB inspection callback type — see ff_h264_set_mb_inspect_cb() ++ * below. Fired by ff_h264_hl_decode_mb after the existing pixel work ++ * for every macroblock in coded order. Receives a const H264Context* ++ * so the callback can inspect any slice/picture state (h->slice_ctx ++ * for current slice, h->cur_pic.f->data[plane] for reconstructed ++ * samples, etc.). */ ++typedef void (*ff_h264_mb_inspect_cb)(void *opaque, ++ const struct H264Context *h, ++ int mb_x, int mb_y); ++ + /** + * H264Context + */ +@@ -579,6 +589,10 @@ + int non_gray; ///< Did we encounter a intra frame after a gray gap frame + int noref_gray; + int skip_gray; ++ ++ /* Per-MB inspection hook — set via ff_h264_set_mb_inspect_cb. */ ++ ff_h264_mb_inspect_cb mb_inspect_cb; ++ void *mb_inspect_opaque; + } H264Context; + + extern const uint16_t ff_h264_mb_sizes[4]; +@@ -607,6 +621,16 @@ + const H2645NAL *nal, void *logctx); + + void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl); ++ ++/** ++ * Install an opt-in per-MB inspection callback that fires from ++ * ff_h264_hl_decode_mb after each macroblock's pixel work. Default ++ * is NULL (no callback installed); the check is a single branch on ++ * the decoder hot path. See ff_h264_mb_inspect_cb for signature. ++ */ ++void ff_h264_set_mb_inspect_cb(AVCodecContext *avctx, ++ ff_h264_mb_inspect_cb cb, void *opaque); ++ + void ff_h264_decode_init_vlc(void); + + /** +--- a/libavcodec/h264_mb.c ++++ b/libavcodec/h264_mb.c +@@ -815,4 +815,20 @@ + hl_decode_mb_simple_16(h, sl); + } else + hl_decode_mb_simple_8(h, sl); ++ ++ /* Per-MB inspection callback (opt-in via ff_h264_set_mb_inspect_cb). ++ * Fired AFTER pixel work — reconstructed samples are in ++ * h->cur_pic.f->data[plane] at the MB's raster position by the ++ * time this runs. Callback may inspect slice context via ++ * h->slice_ctx + sl->mb_xy, coeffs via sl->mb, etc. */ ++ if (h->mb_inspect_cb) ++ h->mb_inspect_cb(h->mb_inspect_opaque, h, sl->mb_x, sl->mb_y); ++} ++ ++void ff_h264_set_mb_inspect_cb(AVCodecContext *avctx, ++ ff_h264_mb_inspect_cb cb, void *opaque) ++{ ++ H264Context *h = avctx->priv_data; ++ h->mb_inspect_cb = cb; ++ h->mb_inspect_opaque = opaque; + } diff --git a/debian/ffmpeg-v4l2-request-fourier/build-deb.sh b/debian/ffmpeg-v4l2-request-fourier/build-deb.sh index cea84a25a..19d85ad18 100755 --- a/debian/ffmpeg-v4l2-request-fourier/build-deb.sh +++ b/debian/ffmpeg-v4l2-request-fourier/build-deb.sh @@ -33,7 +33,7 @@ FFMPEG_VERSION=8.1 # epoch 2 matches Debian's stock ffmpeg (currently 7:7.1.x in trixie); # +rfourier suffix to avoid colliding with upstream/Debian rebuilds. PKGVER=2:${FFMPEG_VERSION}+rfourier+gb57fbbe -PKGREL=12 # pkgrel=12 — REVERT pkgrel=11 ctx flip; daedalus-fourier PR #36 4.30x headline was measurement artifact (PR #37 corrects: QPU 22x SLOWER than CPU) +PKGREL=13 # pkgrel=13 — per-MB inspection callback (0016) for daedalus-decoder CLI test harness; observation-only, no behaviour change to existing decode path # (cycle 9 of the daedalus-v4l2#11 step 2 substitution arc; closes # the libavcodec.so substitution sequence 6 IDCT4 / 7 IDCT8 / # 8 luma-v deblock / 9 qpel mc20). Pulls daedalus-fourier PR #2 @@ -82,6 +82,7 @@ patch -Np1 -i "$HERE/0012-h264-qpel-rest-daedalus-fourier.patch" patch -Np1 -i "$HERE/0013-h264-deblock-chroma-intra-daedalus-fourier.patch" patch -Np1 -i "$HERE/0014-h264-ctx-qpu-capable.patch" patch -Np1 -i "$HERE/0015-h264-ctx-revert-to-no-qpu.patch" +patch -Np1 -i "$HERE/0016-h264-mb-inspect-callback.patch" # --- daedalus-fourier: fetch + build static .a with PIC, install to a # per-build prefix; libavcodec.so links it into the shared object so