d8aa3aae8d
Substitutes H264DSPContext.chroma_dc_dequant_idct in the 4:2:0 / bit_depth=8 init path with a wrapper that composes the daedalus chroma DC Hadamard primitive (daedalus-fourier PR #25) with the qmul scaling FFmpeg's reference does in one fused function (h264idct_template.c::ff_h264_chroma_dc_dequant_idct). Algorithm per H.264 §8.5.11.1 / §8.5.11.2: 1. Extract 4 DCs from the scattered positions in the per-MB coefficient buffer (stride=32, xStride=16) 2. 2x2 Hadamard transform (daedalus primitive) 3. qmul scale + >> 7, write back to original positions Bit-exact against ff_h264_chroma_dc_dequant_idct_8_c. The Hadamard itself is gated by the fourier PR #23 7-case test suite (including the H·H = 4·I algebraic invariant), and the public-API parity test added in PR #25 confirms the src/ symbol matches the test ref. 4:2:2 chroma stays on the in-tree ff_h264_chroma422_dc_dequant_idct_c path — same chroma_format_idc<=1 gating shape as 0009 chroma deblock. Pin bump: _daedalus_fourier_commit / DAEDALUS_FOURIER_COMMIT bumped to b9f9ff2a (post-PR #25) so the build picks up the public daedalus_h264_chroma_dc_hadamard_2x2 symbol. Verified the patch applies cleanly on top of 0001-0010 against the pinned upstream commit b57fbbe5 on hertz.
102 lines
5.1 KiB
Diff
102 lines
5.1 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: claude-noether <claude-noether@noreply.localhost>
|
|
Date: Sun, 25 May 2026 13:00:00 +0200
|
|
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 chroma DC Hadamard through daedalus-fourier
|
|
|
|
Substitutes H264DSPContext.chroma_dc_dequant_idct in the
|
|
4:2:0 / bit_depth=8 init path with a wrapper that composes
|
|
the daedalus chroma DC Hadamard primitive (fourier PR #25)
|
|
with qmul scaling FFmpeg does in one fused function.
|
|
|
|
Bit-exact against ff_h264_chroma_dc_dequant_idct_8_c.
|
|
Hadamard correctness gated by fourier PR #23 test suite.
|
|
|
|
4:2:2 chroma stays on the in-tree 422 variant (same
|
|
gating shape as 0009 chroma deblock substitution).
|
|
|
|
Requires daedalus-fourier commit b9f9ff2 or later (PR #25
|
|
exposing the public Hadamard symbol). Pin bumps in PKGBUILD
|
|
and build-deb.sh come in the same commit.
|
|
---
|
|
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
|
--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:38:32.019491484 +0200
|
|
+++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:38:32.033821507 +0200
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * H.264 4x4 / 8x8 IDCT + luma v/h (inter + intra) + chroma v/h deblock — daedalus-fourier substitution shims.
|
|
+ * H.264 4x4 / 8x8 IDCT + luma v/h (inter+intra) + chroma v/h deblock + chroma DC Hadamard — daedalus-fourier substitution shims.
|
|
*
|
|
* Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
|
* H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
|
@@ -9,6 +9,7 @@
|
|
* H264DSPContext.h_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_h
|
|
* H264DSPContext.v_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_v_intra
|
|
* H264DSPContext.h_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_h_intra
|
|
+ * H264DSPContext.chroma_dc_dequant_idct → daedalus_h264_chroma_dc_hadamard_2x2 + caller-side qmul
|
|
* instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
|
* picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
|
* is CPU primary with QPU opportunistic — the ctx below is no-QPU,
|
|
@@ -60,6 +61,7 @@
|
|
int alpha, int beta);
|
|
void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
|
int alpha, int beta);
|
|
+void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul);
|
|
|
|
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
|
{
|
|
@@ -187,3 +189,32 @@
|
|
daedalus_recipe_dispatch_h264_deblock_luma_h_intra(g_dctx, pix, (size_t)stride,
|
|
1, &meta);
|
|
}
|
|
+
|
|
+/* Composes daedalus_h264_chroma_dc_hadamard_2x2 with the qmul scaling
|
|
+ * that FFmpeg's reference does in one fused function (h264idct_template.c
|
|
+ * ff_h264_chroma_dc_dequant_idct).
|
|
+ *
|
|
+ * The 4 DC coefficients are scattered across the per-MB coefficient
|
|
+ * buffer at offsets [r*stride + c*xStride] (stride=32, xStride=16).
|
|
+ * Extract into a contiguous int16[4], run the Hadamard, then apply
|
|
+ * the qmul scale and write back to the original positions.
|
|
+ *
|
|
+ * No daedalus ctx needed; the Hadamard is a pure stateless primitive.
|
|
+ */
|
|
+void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul)
|
|
+{
|
|
+ enum { stride = 32, xStride = 16 };
|
|
+ int16_t dc[4];
|
|
+
|
|
+ dc[0] = block[stride*0 + xStride*0];
|
|
+ dc[1] = block[stride*0 + xStride*1];
|
|
+ dc[2] = block[stride*1 + xStride*0];
|
|
+ dc[3] = block[stride*1 + xStride*1];
|
|
+
|
|
+ daedalus_h264_chroma_dc_hadamard_2x2(dc);
|
|
+
|
|
+ block[stride*0 + xStride*0] = (int16_t)((int)dc[0] * qmul >> 7);
|
|
+ block[stride*0 + xStride*1] = (int16_t)((int)dc[1] * qmul >> 7);
|
|
+ block[stride*1 + xStride*0] = (int16_t)((int)dc[2] * qmul >> 7);
|
|
+ block[stride*1 + xStride*1] = (int16_t)((int)dc[3] * qmul >> 7);
|
|
+}
|
|
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
|
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:38:32.020346459 +0200
|
|
+++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:38:32.033909804 +0200
|
|
@@ -41,6 +41,7 @@
|
|
int beta);
|
|
void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
|
int alpha, int beta);
|
|
+void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul);
|
|
void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
|
int beta, int8_t *tc0);
|
|
void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
|
@@ -135,6 +136,7 @@
|
|
c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
|
|
|
|
if (chroma_format_idc <= 1) {
|
|
+ c->chroma_dc_dequant_idct = ff_h264_chroma_dc_dequant_idct_daedalus;
|
|
c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_daedalus;
|
|
c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
|
|
c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
|
|
--
|
|
2.47.3
|
|
|