diff --git a/arch/ffmpeg-v4l2-request-fourier/0013-h264-deblock-chroma-intra-daedalus-fourier.patch b/arch/ffmpeg-v4l2-request-fourier/0013-h264-deblock-chroma-intra-daedalus-fourier.patch new file mode 100644 index 000000000..a72c80d8f --- /dev/null +++ b/arch/ffmpeg-v4l2-request-fourier/0013-h264-deblock-chroma-intra-daedalus-fourier.patch @@ -0,0 +1,120 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: claude-noether +Date: Sun, 25 May 2026 14:30:00 +0200 +Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 chroma intra deblock (4:2:0) through daedalus-fourier + +Substitutes c->v_loop_filter_chroma_intra and c->h_loop_filter_chroma_intra +with daedalus wrappers in the bit_depth=8 / chroma_format_idc<=1 (4:2:0) +branch. 4:2:2 stays on the in-tree NEON path (the daedalus chroma intra +dispatch is 4:2:0-only). + +The fourier dispatches were exposed in PR #11 (DEFINE_INTRA_DISPATCH +macro generates the public daedalus_dispatch_h264_deblock_chroma_*_intra +symbols + recipe wrappers). + +Re-architects the chroma init: v_loop_filter_chroma_intra was previously +assigned unconditionally to the NEON variant (which works for both 4:2:0 +and 4:2:2). We now assign it INSIDE both branches of the chroma_format_idc +conditional, with the 4:2:0 branch picking daedalus and the 4:2:2 branch +keeping NEON. No regression for 4:2:2 streams. + +Same NEON-to-NEON via recipe shape as 0010 luma intra. + +Refs reauktion/daedalus-v4l2#11 — substitution arc chroma intra. +--- +diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c +--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 14:21:08.267156263 +0200 ++++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 14:21:08.287745931 +0200 +@@ -1,5 +1,5 @@ + /* +- * H.264 4x4 / 8x8 IDCT + luma v/h (inter+intra) + chroma v/h deblock + chroma DC Hadamard — daedalus-fourier substitution shims. ++ * H.264 4x4 / 8x8 IDCT + luma v/h (inter+intra) + chroma v/h (inter+intra) deblock + chroma DC Hadamard — daedalus-fourier substitution shims. + * + * Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4 + * H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8 +@@ -9,6 +9,8 @@ + * H264DSPContext.h_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_h + * H264DSPContext.v_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_v_intra + * H264DSPContext.h_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_h_intra ++ * H264DSPContext.v_loop_filter_chroma_intra → daedalus_recipe_dispatch_h264_deblock_chroma_v_intra ++ * H264DSPContext.h_loop_filter_chroma_intra → daedalus_recipe_dispatch_h264_deblock_chroma_h_intra + * H264DSPContext.chroma_dc_dequant_idct → daedalus_h264_chroma_dc_hadamard_2x2 + caller-side qmul + * instead of the in-tree ff_h264_*_neon assembly. The recipe layer + * picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8 +@@ -61,6 +63,10 @@ + int alpha, int beta); + void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, + int alpha, int beta); ++void ff_h264_v_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta); ++void ff_h264_h_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta); + void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul); + + void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride) +@@ -218,3 +224,30 @@ + block[stride*1 + xStride*0] = (int16_t)((int)dc[2] * qmul >> 7); + block[stride*1 + xStride*1] = (int16_t)((int)dc[3] * qmul >> 7); + } ++ ++void ff_h264_v_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta) ++{ ++ daedalus_h264_deblock_meta meta = { ++ .dst_off = 0, ++ .alpha = alpha, ++ .beta = beta, ++ }; ++ /* tc0[] unused for intra (bS=4 hardcodes the strength). */ ++ pthread_once(&g_dctx_once, daedalus_ctx_init_once); ++ daedalus_recipe_dispatch_h264_deblock_chroma_v_intra(g_dctx, pix, (size_t)stride, ++ 1, &meta); ++} ++ ++void ff_h264_h_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta) ++{ ++ daedalus_h264_deblock_meta meta = { ++ .dst_off = 0, ++ .alpha = alpha, ++ .beta = beta, ++ }; ++ pthread_once(&g_dctx_once, daedalus_ctx_init_once); ++ daedalus_recipe_dispatch_h264_deblock_chroma_h_intra(g_dctx, pix, (size_t)stride, ++ 1, &meta); ++} +diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c +--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 14:21:08.268311057 +0200 ++++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 14:21:08.287886563 +0200 +@@ -42,6 +42,10 @@ + void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, + int alpha, int beta); + void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul); ++void ff_h264_v_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta); ++void ff_h264_h_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta); + void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, + int beta, int8_t *tc0); + void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride, +@@ -133,14 +137,15 @@ + c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_daedalus; + + c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_daedalus; +- c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon; + + if (chroma_format_idc <= 1) { + c->chroma_dc_dequant_idct = ff_h264_chroma_dc_dequant_idct_daedalus; ++ c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_daedalus; + c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_daedalus; +- c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon; ++ c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_daedalus; + c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon; + } else { ++ c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon; + c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon; + c->h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon; + c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon; +-- +2.47.3 + diff --git a/arch/ffmpeg-v4l2-request-fourier/PKGBUILD b/arch/ffmpeg-v4l2-request-fourier/PKGBUILD index f2e145c35..6cff6ac25 100644 --- a/arch/ffmpeg-v4l2-request-fourier/PKGBUILD +++ b/arch/ffmpeg-v4l2-request-fourier/PKGBUILD @@ -99,8 +99,9 @@ source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}" '0009-h264-deblock-chroma-daedalus-fourier.patch' '0010-h264-deblock-luma-intra-daedalus-fourier.patch' '0011-h264-chroma-dc-hadamard-daedalus-fourier.patch' - '0012-h264-qpel-rest-daedalus-fourier.patch') -sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP') + '0012-h264-qpel-rest-daedalus-fourier.patch' + '0013-h264-deblock-chroma-intra-daedalus-fourier.patch') +sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP') pkgver() { cd "${_srcname}" @@ -123,6 +124,7 @@ prepare() { patch -Np1 -i "${srcdir}/0010-h264-deblock-luma-intra-daedalus-fourier.patch" patch -Np1 -i "${srcdir}/0011-h264-chroma-dc-hadamard-daedalus-fourier.patch" patch -Np1 -i "${srcdir}/0012-h264-qpel-rest-daedalus-fourier.patch" + patch -Np1 -i "${srcdir}/0013-h264-deblock-chroma-intra-daedalus-fourier.patch" } build() { diff --git a/debian/ffmpeg-v4l2-request-fourier/0013-h264-deblock-chroma-intra-daedalus-fourier.patch b/debian/ffmpeg-v4l2-request-fourier/0013-h264-deblock-chroma-intra-daedalus-fourier.patch new file mode 100644 index 000000000..a72c80d8f --- /dev/null +++ b/debian/ffmpeg-v4l2-request-fourier/0013-h264-deblock-chroma-intra-daedalus-fourier.patch @@ -0,0 +1,120 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: claude-noether +Date: Sun, 25 May 2026 14:30:00 +0200 +Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 chroma intra deblock (4:2:0) through daedalus-fourier + +Substitutes c->v_loop_filter_chroma_intra and c->h_loop_filter_chroma_intra +with daedalus wrappers in the bit_depth=8 / chroma_format_idc<=1 (4:2:0) +branch. 4:2:2 stays on the in-tree NEON path (the daedalus chroma intra +dispatch is 4:2:0-only). + +The fourier dispatches were exposed in PR #11 (DEFINE_INTRA_DISPATCH +macro generates the public daedalus_dispatch_h264_deblock_chroma_*_intra +symbols + recipe wrappers). + +Re-architects the chroma init: v_loop_filter_chroma_intra was previously +assigned unconditionally to the NEON variant (which works for both 4:2:0 +and 4:2:2). We now assign it INSIDE both branches of the chroma_format_idc +conditional, with the 4:2:0 branch picking daedalus and the 4:2:2 branch +keeping NEON. No regression for 4:2:2 streams. + +Same NEON-to-NEON via recipe shape as 0010 luma intra. + +Refs reauktion/daedalus-v4l2#11 — substitution arc chroma intra. +--- +diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c +--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 14:21:08.267156263 +0200 ++++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 14:21:08.287745931 +0200 +@@ -1,5 +1,5 @@ + /* +- * H.264 4x4 / 8x8 IDCT + luma v/h (inter+intra) + chroma v/h deblock + chroma DC Hadamard — daedalus-fourier substitution shims. ++ * H.264 4x4 / 8x8 IDCT + luma v/h (inter+intra) + chroma v/h (inter+intra) deblock + chroma DC Hadamard — daedalus-fourier substitution shims. + * + * Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4 + * H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8 +@@ -9,6 +9,8 @@ + * H264DSPContext.h_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_h + * H264DSPContext.v_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_v_intra + * H264DSPContext.h_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_h_intra ++ * H264DSPContext.v_loop_filter_chroma_intra → daedalus_recipe_dispatch_h264_deblock_chroma_v_intra ++ * H264DSPContext.h_loop_filter_chroma_intra → daedalus_recipe_dispatch_h264_deblock_chroma_h_intra + * H264DSPContext.chroma_dc_dequant_idct → daedalus_h264_chroma_dc_hadamard_2x2 + caller-side qmul + * instead of the in-tree ff_h264_*_neon assembly. The recipe layer + * picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8 +@@ -61,6 +63,10 @@ + int alpha, int beta); + void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, + int alpha, int beta); ++void ff_h264_v_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta); ++void ff_h264_h_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta); + void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul); + + void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride) +@@ -218,3 +224,30 @@ + block[stride*1 + xStride*0] = (int16_t)((int)dc[2] * qmul >> 7); + block[stride*1 + xStride*1] = (int16_t)((int)dc[3] * qmul >> 7); + } ++ ++void ff_h264_v_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta) ++{ ++ daedalus_h264_deblock_meta meta = { ++ .dst_off = 0, ++ .alpha = alpha, ++ .beta = beta, ++ }; ++ /* tc0[] unused for intra (bS=4 hardcodes the strength). */ ++ pthread_once(&g_dctx_once, daedalus_ctx_init_once); ++ daedalus_recipe_dispatch_h264_deblock_chroma_v_intra(g_dctx, pix, (size_t)stride, ++ 1, &meta); ++} ++ ++void ff_h264_h_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta) ++{ ++ daedalus_h264_deblock_meta meta = { ++ .dst_off = 0, ++ .alpha = alpha, ++ .beta = beta, ++ }; ++ pthread_once(&g_dctx_once, daedalus_ctx_init_once); ++ daedalus_recipe_dispatch_h264_deblock_chroma_h_intra(g_dctx, pix, (size_t)stride, ++ 1, &meta); ++} +diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c +--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 14:21:08.268311057 +0200 ++++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 14:21:08.287886563 +0200 +@@ -42,6 +42,10 @@ + void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, + int alpha, int beta); + void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul); ++void ff_h264_v_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta); ++void ff_h264_h_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride, ++ int alpha, int beta); + void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, + int beta, int8_t *tc0); + void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride, +@@ -133,14 +137,15 @@ + c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_daedalus; + + c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_daedalus; +- c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon; + + if (chroma_format_idc <= 1) { + c->chroma_dc_dequant_idct = ff_h264_chroma_dc_dequant_idct_daedalus; ++ c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_daedalus; + c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_daedalus; +- c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon; ++ c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_daedalus; + c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon; + } else { ++ c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon; + c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon; + c->h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon; + c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon; +-- +2.47.3 + diff --git a/debian/ffmpeg-v4l2-request-fourier/build-deb.sh b/debian/ffmpeg-v4l2-request-fourier/build-deb.sh index a9e7131af..29ad07380 100755 --- a/debian/ffmpeg-v4l2-request-fourier/build-deb.sh +++ b/debian/ffmpeg-v4l2-request-fourier/build-deb.sh @@ -79,6 +79,7 @@ patch -Np1 -i "$HERE/0009-h264-deblock-chroma-daedalus-fourier.patch" patch -Np1 -i "$HERE/0010-h264-deblock-luma-intra-daedalus-fourier.patch" patch -Np1 -i "$HERE/0011-h264-chroma-dc-hadamard-daedalus-fourier.patch" patch -Np1 -i "$HERE/0012-h264-qpel-rest-daedalus-fourier.patch" +patch -Np1 -i "$HERE/0013-h264-deblock-chroma-intra-daedalus-fourier.patch" # --- daedalus-fourier: fetch + build static .a with PIC, install to a # per-build prefix; libavcodec.so links it into the shared object so