ffmpeg-v4l2-request-fourier: route H.264 luma-h deblock through daedalus-fourier (0008)
Adds patch 0008 to the substitution arc, mirroring 0005's V variant
for H.264 non-intra bS<4 horizontal luma deblock.
H264DSPContext.h_loop_filter_luma →
daedalus_recipe_dispatch_h264_deblock_luma_h
The H kernel was added to daedalus-fourier in PR #9 (vendored
ff_h264_h_loop_filter_luma_neon, wired through the same CPU-dispatch
pattern as V). Recipe table routes AUTO to CPU NEON (no QPU shader
for H yet), so this is a NEON-to-NEON substitution via the daedalus
recipe layer — same shape as 0005.
The libavcodec.so ctx remains no-QPU (daedalus_ctx_create_no_qpu),
matching the existing 0003/0004/0005/0007 patches. Higher-cycle
QPU init waits for a feature-flag gating change in a separate PR.
Intra (bS=4) h_loop_filter_luma_intra stays on the in-tree NEON .S
code; daedalus_h264_deblock_meta covers the non-intra path only.
A follow-up can route intra once daedalus-fourier exposes the
intra-h dispatch (the kernel already exists internally per fourier
PR #11).
Wires the new patch into both arch/PKGBUILD and debian/build-deb.sh
sequences. Verified the patch applies cleanly on top of 0001-0007
against the pinned upstream commit b57fbbe5 on hertz.
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: claude-noether <claude-noether@noreply.localhost>
|
||||
Date: Sun, 25 May 2026 12:00:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 luma-h deblock through daedalus-fourier
|
||||
|
||||
Sibling of 0005 (which substituted v_loop_filter_luma). Same
|
||||
NEON-to-NEON substitution: H264DSPContext.h_loop_filter_luma →
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_h. The H kernel landed
|
||||
in daedalus-fourier PR #9 (CPU NEON only — no QPU shader yet).
|
||||
|
||||
libavcodec.so ctx is no-QPU per the existing 0003-0005 / 0007
|
||||
pattern; we cannot assume Vulkan in arbitrary host processes
|
||||
(firefox-fourier RDD, mpv-fourier, etc.).
|
||||
|
||||
Intra (bS=4) h_loop_filter_luma_intra stays on the in-tree NEON .S
|
||||
code; daedalus_h264_deblock_meta only covers the non-intra path.
|
||||
An intra-h substitution can land once daedalus-fourier exposes a
|
||||
dispatch helper (the kernel already exists internally per PR #11).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 8 H.
|
||||
---
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:09:33.694760715 +0200
|
||||
+++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:09:33.715603719 +0200
|
||||
@@ -1,9 +1,10 @@
|
||||
/*
|
||||
- * H.264 4x4 / 8x8 IDCT + luma-v deblock — daedalus-fourier substitution shims.
|
||||
+ * H.264 4x4 / 8x8 IDCT + luma v/h deblock — daedalus-fourier substitution shims.
|
||||
*
|
||||
* Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
* H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
* H264DSPContext.v_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_v
|
||||
+ * H264DSPContext.h_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_h
|
||||
* instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
||||
* picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
||||
* is CPU primary with QPU opportunistic — the ctx below is no-QPU,
|
||||
@@ -45,6 +46,8 @@
|
||||
void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_v_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
+void ff_h264_h_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -84,3 +87,22 @@
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_v(g_dctx, pix, (size_t)stride,
|
||||
1, &meta);
|
||||
}
|
||||
+
|
||||
+void ff_h264_h_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ meta.tc0[0] = tc0[0];
|
||||
+ meta.tc0[1] = tc0[1];
|
||||
+ meta.tc0[2] = tc0[2];
|
||||
+ meta.tc0[3] = tc0[3];
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_luma_h(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:09:33.695937103 +0200
|
||||
+++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:09:33.715541700 +0200
|
||||
@@ -31,6 +31,8 @@
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
+void ff_h264_h_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta);
|
||||
void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
@@ -117,7 +119,7 @@
|
||||
|
||||
if (have_neon(cpu_flags) && bit_depth == 8) {
|
||||
c->v_loop_filter_luma = ff_h264_v_loop_filter_luma_daedalus;
|
||||
- c->h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
|
||||
+ c->h_loop_filter_luma = ff_h264_h_loop_filter_luma_daedalus;
|
||||
c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
|
||||
c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
|
||||
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -94,8 +94,9 @@ source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}"
|
||||
'0004-h264-idct8-daedalus-fourier.patch'
|
||||
'0005-h264-deblock-luma-v-daedalus-fourier.patch'
|
||||
'0006-h264-restore-low-delay.patch'
|
||||
'0007-h264-qpel-mc20-daedalus-fourier.patch')
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
'0007-h264-qpel-mc20-daedalus-fourier.patch'
|
||||
'0008-h264-deblock-luma-h-daedalus-fourier.patch')
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
|
||||
pkgver() {
|
||||
cd "${_srcname}"
|
||||
@@ -113,6 +114,7 @@ prepare() {
|
||||
patch -Np1 -i "${srcdir}/0005-h264-deblock-luma-v-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0006-h264-restore-low-delay.patch"
|
||||
patch -Np1 -i "${srcdir}/0007-h264-qpel-mc20-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0008-h264-deblock-luma-h-daedalus-fourier.patch"
|
||||
}
|
||||
|
||||
build() {
|
||||
|
||||
Reference in New Issue
Block a user