ffmpeg-v4l2-request-fourier: route H.264 chroma v/h deblock through daedalus-fourier (0009)
Chroma siblings of 0005 (luma_v) and 0008 (luma_h). Same
NEON-to-NEON pattern via the daedalus recipe layer:
H264DSPContext.v_loop_filter_chroma →
daedalus_recipe_dispatch_h264_deblock_chroma_v
H264DSPContext.h_loop_filter_chroma →
daedalus_recipe_dispatch_h264_deblock_chroma_h
Both kernels landed in daedalus-fourier PR #10. Recipe table routes
AUTO to CPU NEON (no chroma QPU shaders yet), so this is plumbing-
only and stays bit-exact against the in-tree NEON.
Intra chroma (bS=4) loop filters remain on in-tree NEON;
daedalus_h264_deblock_meta covers the non-intra (bS<4) path.
Verified the patch applies cleanly on top of 0001-0008 against the
pinned upstream commit b57fbbe5 on hertz. Wires the new patch into
both arch/PKGBUILD and debian/build-deb.sh.
This commit is contained in:
+127
@@ -0,0 +1,127 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: claude-noether <claude-noether@noreply.localhost>
|
||||
Date: Sun, 25 May 2026 12:00:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 chroma v/h deblock through daedalus-fourier
|
||||
|
||||
Chroma siblings of 0005 (luma_v) and 0008 (luma_h). Same
|
||||
NEON-to-NEON pattern via the daedalus recipe layer:
|
||||
|
||||
H264DSPContext.v_loop_filter_chroma →
|
||||
daedalus_recipe_dispatch_h264_deblock_chroma_v
|
||||
H264DSPContext.h_loop_filter_chroma →
|
||||
daedalus_recipe_dispatch_h264_deblock_chroma_h
|
||||
|
||||
Both kernels landed in daedalus-fourier PR #10. Recipe table
|
||||
routes AUTO to CPU NEON (no chroma QPU shaders yet), so this
|
||||
is plumbing-only and stays bit-exact against the in-tree NEON.
|
||||
|
||||
Intra chroma (bS=4) loop filters remain on in-tree NEON;
|
||||
daedalus_h264_deblock_meta covers the non-intra (bS<4) path.
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 8 chroma.
|
||||
---
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:15:45.995368233 +0200
|
||||
+++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:15:46.015839177 +0200
|
||||
@@ -1,10 +1,12 @@
|
||||
/*
|
||||
- * H.264 4x4 / 8x8 IDCT + luma v/h deblock — daedalus-fourier substitution shims.
|
||||
+ * H.264 4x4 / 8x8 IDCT + luma v/h + chroma v/h deblock — daedalus-fourier substitution shims.
|
||||
*
|
||||
* Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
* H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
- * H264DSPContext.v_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_v
|
||||
- * H264DSPContext.h_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_h
|
||||
+ * H264DSPContext.v_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_v
|
||||
+ * H264DSPContext.h_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_h
|
||||
+ * H264DSPContext.v_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_v
|
||||
+ * H264DSPContext.h_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_h
|
||||
* instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
||||
* picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
||||
* is CPU primary with QPU opportunistic — the ctx below is no-QPU,
|
||||
@@ -48,6 +50,10 @@
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
+void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
+void ff_h264_h_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -106,3 +112,41 @@
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_h(g_dctx, pix, (size_t)stride,
|
||||
1, &meta);
|
||||
}
|
||||
+
|
||||
+void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ meta.tc0[0] = tc0[0];
|
||||
+ meta.tc0[1] = tc0[1];
|
||||
+ meta.tc0[2] = tc0[2];
|
||||
+ meta.tc0[3] = tc0[3];
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_chroma_v(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
+
|
||||
+void ff_h264_h_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ meta.tc0[0] = tc0[0];
|
||||
+ meta.tc0[1] = tc0[1];
|
||||
+ meta.tc0[2] = tc0[2];
|
||||
+ meta.tc0[3] = tc0[3];
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_chroma_h(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:15:45.996482360 +0200
|
||||
+++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:15:46.025604910 +0200
|
||||
@@ -39,8 +39,12 @@
|
||||
int beta);
|
||||
void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
+void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
+void ff_h264_h_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride,
|
||||
@@ -123,11 +127,11 @@
|
||||
c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
|
||||
c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
|
||||
|
||||
- c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
|
||||
+ c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_daedalus;
|
||||
c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
|
||||
|
||||
if (chroma_format_idc <= 1) {
|
||||
- c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
|
||||
+ c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_daedalus;
|
||||
c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
|
||||
c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
|
||||
} else {
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -75,6 +75,7 @@ patch -Np1 -i "$HERE/0005-h264-deblock-luma-v-daedalus-fourier.patch"
|
||||
patch -Np1 -i "$HERE/0006-h264-restore-low-delay.patch"
|
||||
patch -Np1 -i "$HERE/0007-h264-qpel-mc20-daedalus-fourier.patch"
|
||||
patch -Np1 -i "$HERE/0008-h264-deblock-luma-h-daedalus-fourier.patch"
|
||||
patch -Np1 -i "$HERE/0009-h264-deblock-chroma-daedalus-fourier.patch"
|
||||
|
||||
# --- daedalus-fourier: fetch + build static .a with PIC, install to a
|
||||
# per-build prefix; libavcodec.so links it into the shared object so
|
||||
|
||||
Reference in New Issue
Block a user