forked from marfrit/marfrit-packages
Merge pull request 'ffmpeg-v4l2-request-fourier: substitute H.264 IDCT 4×4 → daedalus-fourier' (#76) from claude-noether/marfrit-packages:noether/ffmpeg-fourier-idct4-daedalus into main
Reviewed-on: marfrit/marfrit-packages#76
This commit is contained in:
@@ -930,12 +930,13 @@ jobs:
|
||||
# map 1:1 to the previous Arch list; libav*-dev intentionally
|
||||
# absent (we are FFmpeg itself, providing those libs).
|
||||
retry apt-get install -y --no-install-recommends \
|
||||
build-essential git pkg-config nasm yasm \
|
||||
build-essential cmake ninja-build git pkg-config nasm yasm \
|
||||
linux-libc-dev libgl1-mesa-dev libasound2-dev libbz2-dev \
|
||||
libfontconfig-dev libfribidi-dev libgmp-dev libgnutls28-dev \
|
||||
libmp3lame-dev libass-dev libdav1d-dev libdrm-dev \
|
||||
libfreetype-dev libpulse-dev libva-dev libvorbis-dev libvpx-dev \
|
||||
libwebp-dev libx264-dev libx265-dev libxml2-dev libopus-dev \
|
||||
libvulkan-dev glslang-tools \
|
||||
v4l-utils liblzma-dev zlib1g-dev \
|
||||
curl ca-certificates openssh-client rsync dpkg-dev
|
||||
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
From f760c0541586f43334c02611fcb4c212c08ad576 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Thu, 21 May 2026 21:40:22 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 4x4 IDCT through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264DSPContext.idct_add (called per 4x4 block from the intra-4x4
|
||||
decode path in h264_mb.c) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
|
||||
The recipe layer picks the substrate; for cycle 6 (H.264 IDCT 4x4)
|
||||
the recipe is CPU NEON, so this is effectively a NEON-to-NEON
|
||||
substitution with one extra dispatch call and recipe-table lookup.
|
||||
Provides the first end-to-end exercise of the daedalus-fourier
|
||||
kernel pack inside the libavcodec.so decode hot path; follow-up
|
||||
patches wire IDCT 8x8, luma-v deblock, and qpel mc20.
|
||||
|
||||
The library context is process-global, lazily initialised under
|
||||
pthread_once on first call. We pick the no-QPU constructor because
|
||||
libavcodec.so is loaded into arbitrary host processes
|
||||
(firefox-fourier, mpv-fourier, daedalus_v4l2_daemon, ...) and we
|
||||
cannot assume the host has a usable Vulkan instance. Higher cycles
|
||||
(deblock luma-v, MC) that benefit from the QPU will provision their
|
||||
own recipe-selected context once that path is wired.
|
||||
|
||||
Bulk paths (idct_add16, idct_add16intra, idct_add8 — used for
|
||||
non-intra4x4 macroblocks) remain on the stock NEON .S implementations
|
||||
and will be batched through daedalus_recipe_dispatch_h264_idct4 with
|
||||
n_blocks>1 in a follow-up.
|
||||
|
||||
Bit-exact against ff_h264_idct_add_neon (daedalus-fourier cycle 6
|
||||
green; see marfrit/daedalus-fourier/CYCLE_LOGS.md).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2.
|
||||
---
|
||||
libavcodec/aarch64/Makefile | 3 +-
|
||||
libavcodec/aarch64/h264_idct_daedalus.c | 49 +++++++++++++++++++++++
|
||||
libavcodec/aarch64/h264dsp_init_aarch64.c | 3 +-
|
||||
3 files changed, 53 insertions(+), 2 deletions(-)
|
||||
create mode 100644 libavcodec/aarch64/h264_idct_daedalus.c
|
||||
|
||||
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
|
||||
index 41ab025..7b95fb1 100644
|
||||
--- a/libavcodec/aarch64/Makefile
|
||||
+++ b/libavcodec/aarch64/Makefile
|
||||
@@ -3,7 +3,8 @@ OBJS-$(CONFIG_AC3DSP) += aarch64/ac3dsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_FDCTDSP) += aarch64/fdctdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_init.o
|
||||
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
|
||||
-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
|
||||
+OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o \
|
||||
+ aarch64/h264_idct_daedalus.o
|
||||
OBJS-$(CONFIG_HUFFYUVDSP) += aarch64/huffyuvdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
||||
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
new file mode 100644
|
||||
index 0000000..538d223
|
||||
--- /dev/null
|
||||
+++ b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
@@ -0,0 +1,49 @@
|
||||
+/*
|
||||
+ * H.264 4x4 IDCT + add — daedalus-fourier substitution shim.
|
||||
+ *
|
||||
+ * Routes H264DSPContext.idct_add through
|
||||
+ * daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
+ * The recipe layer picks the substrate (CPU NEON by default for
|
||||
+ * cycle 6; future cycles may dispatch to V3D opportunistically).
|
||||
+ *
|
||||
+ * FFmpeg's 4x4 block memory layout matches daedalus's column-major
|
||||
+ * convention: block[r + 4*c] = coefficient at (row r, col c). Both
|
||||
+ * sides destructively zero the block after the transform.
|
||||
+ *
|
||||
+ * The library context is process-global and lazily initialised under
|
||||
+ * pthread_once. We pick the no-QPU constructor here because
|
||||
+ * libavcodec.so is loaded into arbitrary host processes
|
||||
+ * (firefox-fourier, mpv-fourier, daedalus_v4l2_daemon, ...) and we
|
||||
+ * cannot assume the host has a usable Vulkan instance. Higher cycles
|
||||
+ * (deblock, MC) that benefit from the QPU initialise their own
|
||||
+ * recipe-selected context once that path is wired.
|
||||
+ */
|
||||
+
|
||||
+#include <pthread.h>
|
||||
+#include <stddef.h>
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+#include <daedalus.h>
|
||||
+
|
||||
+#include "libavutil/attributes.h"
|
||||
+#include "libavcodec/h264dsp.h"
|
||||
+
|
||||
+static daedalus_ctx *g_dctx;
|
||||
+static pthread_once_t g_dctx_once = PTHREAD_ONCE_INIT;
|
||||
+
|
||||
+static void daedalus_ctx_init_once(void)
|
||||
+{
|
||||
+ g_dctx = daedalus_ctx_create_no_qpu();
|
||||
+}
|
||||
+
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
+
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
+{
|
||||
+ static const daedalus_h264_block_meta meta = { .dst_off = 0 };
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_idct4(g_dctx, dst, (size_t)stride,
|
||||
+ block, 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
index c684574..b993df2 100644
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
@@ -66,6 +66,7 @@ void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride
|
||||
int weights, int offset);
|
||||
|
||||
void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
|
||||
int16_t *block, int stride,
|
||||
@@ -139,7 +140,7 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
|
||||
c->biweight_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
|
||||
c->biweight_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
|
||||
|
||||
- c->idct_add = ff_h264_idct_add_neon;
|
||||
+ c->idct_add = ff_h264_idct_add_daedalus;
|
||||
c->idct_dc_add = ff_h264_idct_dc_add_neon;
|
||||
c->idct_add16 = ff_h264_idct_add16_neon;
|
||||
c->idct_add16intra = ff_h264_idct_add16intra_neon;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -24,8 +24,13 @@ _srcname=FFmpeg
|
||||
_version='8.1'
|
||||
_commit='b57fbbe50c9b2656fad86a1a7eeabfd2b2a50935' # v4l2-request-n8.1 tip 2026-04-24
|
||||
pkgver=8.1.r123329.b57fbbe
|
||||
pkgrel=5
|
||||
pkgrel=6 # pkgrel=6 — H.264 IDCT 4x4 daedalus-fourier substitution (2026-05-21)
|
||||
epoch=2
|
||||
|
||||
# daedalus-fourier pin — first kernel substitution in libavcodec
|
||||
# (cycle 6 H.264 IDCT 4x4). Same SHA as the daedalus-v4l2 daemon's
|
||||
# inline build; lockstep with that until the public API rolls.
|
||||
_daedalus_fourier_commit='d87239d8172307d9a1b93c95cbed116d175b85cc'
|
||||
pkgdesc='FFmpeg with V4L2 Request API hwaccel (Rockchip / Allwinner stateless decode)'
|
||||
arch=('aarch64')
|
||||
url='https://github.com/Kwiboo/FFmpeg'
|
||||
@@ -34,6 +39,7 @@ depends=(
|
||||
alsa-lib
|
||||
bzip2
|
||||
fontconfig
|
||||
vulkan-icd-loader
|
||||
fribidi
|
||||
gmp
|
||||
gnutls
|
||||
@@ -59,10 +65,13 @@ depends=(
|
||||
zlib
|
||||
)
|
||||
makedepends=(
|
||||
cmake
|
||||
git
|
||||
linux-api-headers
|
||||
mesa
|
||||
nasm
|
||||
ninja
|
||||
vulkan-headers
|
||||
)
|
||||
provides=(
|
||||
libavcodec.so
|
||||
@@ -78,9 +87,11 @@ provides=(
|
||||
conflicts=(ffmpeg)
|
||||
replaces=(ffmpeg ffmpeg-v4l2-request-git)
|
||||
source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}"
|
||||
"daedalus-fourier-${_daedalus_fourier_commit}.tar.gz::https://git.reauktion.de/marfrit/daedalus-fourier/archive/${_daedalus_fourier_commit}.tar.gz"
|
||||
'0001-libudev-bypass-fallback.patch'
|
||||
'0002-nv15-to-p010-unpack.patch')
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP')
|
||||
'0002-nv15-to-p010-unpack.patch'
|
||||
'0003-h264-idct4-daedalus-fourier.patch')
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
|
||||
pkgver() {
|
||||
cd "${_srcname}"
|
||||
@@ -93,9 +104,25 @@ prepare() {
|
||||
cd "${_srcname}"
|
||||
patch -Np1 -i "${srcdir}/0001-libudev-bypass-fallback.patch"
|
||||
patch -Np1 -i "${srcdir}/0002-nv15-to-p010-unpack.patch"
|
||||
patch -Np1 -i "${srcdir}/0003-h264-idct4-daedalus-fourier.patch"
|
||||
}
|
||||
|
||||
build() {
|
||||
# --- daedalus-fourier: build static .a with PIC, install to a
|
||||
# per-build prefix; libavcodec.so links it into the shared object so
|
||||
# H264DSPContext.idct_add (and follow-up kernels) dispatch through
|
||||
# the daedalus recipe layer instead of the in-tree NEON .S code. ---
|
||||
local _fourier_prefix="${srcdir}/fourier-prefix"
|
||||
mkdir -p "${_fourier_prefix}"
|
||||
pushd "${srcdir}"/daedalus-fourier >/dev/null
|
||||
cmake -B build -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
||||
-DCMAKE_INSTALL_PREFIX="${_fourier_prefix}"
|
||||
cmake --build build --target daedalus_core
|
||||
cmake --install build
|
||||
popd >/dev/null
|
||||
|
||||
cd "${_srcname}"
|
||||
|
||||
# FFmpeg's configure resolves the compiler via `which` and bakes the
|
||||
@@ -147,6 +174,9 @@ build() {
|
||||
--enable-libx265 \
|
||||
--enable-libwebp \
|
||||
\
|
||||
--extra-cflags="-I${_fourier_prefix}/include" \
|
||||
--extra-ldflags="-L${_fourier_prefix}/lib" \
|
||||
--extra-libs="-ldaedalus_core -lvulkan -lpthread" \
|
||||
--host-cflags='-fPIC'
|
||||
|
||||
make
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
From f760c0541586f43334c02611fcb4c212c08ad576 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Thu, 21 May 2026 21:40:22 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 4x4 IDCT through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264DSPContext.idct_add (called per 4x4 block from the intra-4x4
|
||||
decode path in h264_mb.c) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
|
||||
The recipe layer picks the substrate; for cycle 6 (H.264 IDCT 4x4)
|
||||
the recipe is CPU NEON, so this is effectively a NEON-to-NEON
|
||||
substitution with one extra dispatch call and recipe-table lookup.
|
||||
Provides the first end-to-end exercise of the daedalus-fourier
|
||||
kernel pack inside the libavcodec.so decode hot path; follow-up
|
||||
patches wire IDCT 8x8, luma-v deblock, and qpel mc20.
|
||||
|
||||
The library context is process-global, lazily initialised under
|
||||
pthread_once on first call. We pick the no-QPU constructor because
|
||||
libavcodec.so is loaded into arbitrary host processes
|
||||
(firefox-fourier, mpv-fourier, daedalus_v4l2_daemon, ...) and we
|
||||
cannot assume the host has a usable Vulkan instance. Higher cycles
|
||||
(deblock luma-v, MC) that benefit from the QPU will provision their
|
||||
own recipe-selected context once that path is wired.
|
||||
|
||||
Bulk paths (idct_add16, idct_add16intra, idct_add8 — used for
|
||||
non-intra4x4 macroblocks) remain on the stock NEON .S implementations
|
||||
and will be batched through daedalus_recipe_dispatch_h264_idct4 with
|
||||
n_blocks>1 in a follow-up.
|
||||
|
||||
Bit-exact against ff_h264_idct_add_neon (daedalus-fourier cycle 6
|
||||
green; see marfrit/daedalus-fourier/CYCLE_LOGS.md).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2.
|
||||
---
|
||||
libavcodec/aarch64/Makefile | 3 +-
|
||||
libavcodec/aarch64/h264_idct_daedalus.c | 49 +++++++++++++++++++++++
|
||||
libavcodec/aarch64/h264dsp_init_aarch64.c | 3 +-
|
||||
3 files changed, 53 insertions(+), 2 deletions(-)
|
||||
create mode 100644 libavcodec/aarch64/h264_idct_daedalus.c
|
||||
|
||||
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
|
||||
index 41ab025..7b95fb1 100644
|
||||
--- a/libavcodec/aarch64/Makefile
|
||||
+++ b/libavcodec/aarch64/Makefile
|
||||
@@ -3,7 +3,8 @@ OBJS-$(CONFIG_AC3DSP) += aarch64/ac3dsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_FDCTDSP) += aarch64/fdctdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_init.o
|
||||
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
|
||||
-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
|
||||
+OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o \
|
||||
+ aarch64/h264_idct_daedalus.o
|
||||
OBJS-$(CONFIG_HUFFYUVDSP) += aarch64/huffyuvdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
||||
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
new file mode 100644
|
||||
index 0000000..538d223
|
||||
--- /dev/null
|
||||
+++ b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
@@ -0,0 +1,49 @@
|
||||
+/*
|
||||
+ * H.264 4x4 IDCT + add — daedalus-fourier substitution shim.
|
||||
+ *
|
||||
+ * Routes H264DSPContext.idct_add through
|
||||
+ * daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
+ * The recipe layer picks the substrate (CPU NEON by default for
|
||||
+ * cycle 6; future cycles may dispatch to V3D opportunistically).
|
||||
+ *
|
||||
+ * FFmpeg's 4x4 block memory layout matches daedalus's column-major
|
||||
+ * convention: block[r + 4*c] = coefficient at (row r, col c). Both
|
||||
+ * sides destructively zero the block after the transform.
|
||||
+ *
|
||||
+ * The library context is process-global and lazily initialised under
|
||||
+ * pthread_once. We pick the no-QPU constructor here because
|
||||
+ * libavcodec.so is loaded into arbitrary host processes
|
||||
+ * (firefox-fourier, mpv-fourier, daedalus_v4l2_daemon, ...) and we
|
||||
+ * cannot assume the host has a usable Vulkan instance. Higher cycles
|
||||
+ * (deblock, MC) that benefit from the QPU initialise their own
|
||||
+ * recipe-selected context once that path is wired.
|
||||
+ */
|
||||
+
|
||||
+#include <pthread.h>
|
||||
+#include <stddef.h>
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+#include <daedalus.h>
|
||||
+
|
||||
+#include "libavutil/attributes.h"
|
||||
+#include "libavcodec/h264dsp.h"
|
||||
+
|
||||
+static daedalus_ctx *g_dctx;
|
||||
+static pthread_once_t g_dctx_once = PTHREAD_ONCE_INIT;
|
||||
+
|
||||
+static void daedalus_ctx_init_once(void)
|
||||
+{
|
||||
+ g_dctx = daedalus_ctx_create_no_qpu();
|
||||
+}
|
||||
+
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
+
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
+{
|
||||
+ static const daedalus_h264_block_meta meta = { .dst_off = 0 };
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_idct4(g_dctx, dst, (size_t)stride,
|
||||
+ block, 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
index c684574..b993df2 100644
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
@@ -66,6 +66,7 @@ void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride
|
||||
int weights, int offset);
|
||||
|
||||
void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
|
||||
int16_t *block, int stride,
|
||||
@@ -139,7 +140,7 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
|
||||
c->biweight_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
|
||||
c->biweight_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
|
||||
|
||||
- c->idct_add = ff_h264_idct_add_neon;
|
||||
+ c->idct_add = ff_h264_idct_add_daedalus;
|
||||
c->idct_dc_add = ff_h264_idct_dc_add_neon;
|
||||
c->idct_add16 = ff_h264_idct_add16_neon;
|
||||
c->idct_add16intra = ff_h264_idct_add16intra_neon;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
+38
-1
@@ -33,7 +33,12 @@ FFMPEG_VERSION=8.1
|
||||
# epoch 2 matches Debian's stock ffmpeg (currently 7:7.1.x in trixie);
|
||||
# +rfourier suffix to avoid colliding with upstream/Debian rebuilds.
|
||||
PKGVER=2:${FFMPEG_VERSION}+rfourier+gb57fbbe
|
||||
PKGREL=2 # pkgrel=2 — Path A move to /opt/fourier prefix (2026-05-19)
|
||||
PKGREL=3 # pkgrel=3 — H.264 IDCT 4x4 daedalus-fourier substitution (2026-05-21)
|
||||
|
||||
# daedalus-fourier pin — first kernel substitution in libavcodec (cycle 6
|
||||
# H.264 IDCT 4x4). Same SHA as the daedalus-v4l2 daemon already ships
|
||||
# inline; rev in lockstep with the daemon when the public API rolls.
|
||||
DAEDALUS_FOURIER_COMMIT=d87239d8172307d9a1b93c95cbed116d175b85cc
|
||||
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
@@ -57,6 +62,34 @@ fi
|
||||
# Apply patches (same as Arch).
|
||||
patch -Np1 -i "$HERE/0001-libudev-bypass-fallback.patch"
|
||||
patch -Np1 -i "$HERE/0002-nv15-to-p010-unpack.patch"
|
||||
patch -Np1 -i "$HERE/0003-h264-idct4-daedalus-fourier.patch"
|
||||
|
||||
# --- daedalus-fourier: fetch + build static .a with PIC, install to a
|
||||
# per-build prefix; libavcodec.so links it into the shared object so
|
||||
# H264DSPContext.idct_add (and follow-up kernels) dispatch through the
|
||||
# daedalus recipe layer instead of the in-tree NEON .S code. ---
|
||||
#
|
||||
# PIC is mandatory — the static .a is linked into a .so, so all object
|
||||
# code must be relocatable. Vulkan is PUBLIC-linked by daedalus_core
|
||||
# (queryable QPU substrate); we add libvulkan1 to Debian Depends below
|
||||
# so dlopen of libavcodec.so.62 succeeds on stock trixie.
|
||||
FOURIER_PREFIX=$work/fourier-prefix
|
||||
mkdir -p "$FOURIER_PREFIX"
|
||||
|
||||
pushd "$work" >/dev/null
|
||||
curl --connect-timeout 10 --max-time 600 --retry 3 --retry-delay 5 -sSLfo daedalus-fourier.tar.gz \
|
||||
"https://git.reauktion.de/marfrit/daedalus-fourier/archive/${DAEDALUS_FOURIER_COMMIT}.tar.gz"
|
||||
tar xzf daedalus-fourier.tar.gz
|
||||
pushd daedalus-fourier >/dev/null
|
||||
cmake -B build -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
||||
-DCMAKE_INSTALL_PREFIX="$FOURIER_PREFIX"
|
||||
cmake --build build --target daedalus_core
|
||||
cmake --install build
|
||||
popd >/dev/null
|
||||
popd >/dev/null
|
||||
cd "$work/FFmpeg"
|
||||
|
||||
# Configure with Arch-parity flags. Drops the same set of features
|
||||
# (X11, AMF, CUDA, FireWire, AviSynth, Bluray, OpenMPT, JPEG-XL,
|
||||
@@ -73,6 +106,9 @@ patch -Np1 -i "$HERE/0002-nv15-to-p010-unpack.patch"
|
||||
--mandir=/opt/fourier/share/man \
|
||||
--extra-ldexeflags='-Wl,-rpath,/opt/fourier/lib' \
|
||||
--extra-ldsoflags='-Wl,-rpath,/opt/fourier/lib' \
|
||||
--extra-cflags="-I${FOURIER_PREFIX}/include" \
|
||||
--extra-ldflags="-L${FOURIER_PREFIX}/lib" \
|
||||
--extra-libs="-ldaedalus_core -lvulkan -lpthread" \
|
||||
--disable-debug \
|
||||
--disable-static \
|
||||
--disable-doc \
|
||||
@@ -147,6 +183,7 @@ Priority: optional
|
||||
Architecture: arm64
|
||||
Depends: libc6,
|
||||
libdrm2,
|
||||
libvulkan1,
|
||||
libfontconfig1,
|
||||
libfreetype6,
|
||||
libfribidi0,
|
||||
|
||||
@@ -1,3 +1,32 @@
|
||||
ffmpeg-v4l2-request-fourier (2:8.1+rfourier+gb57fbbe-3) bookworm trixie; urgency=medium
|
||||
|
||||
* Add 0003-h264-idct4-daedalus-fourier.patch — H264DSPContext.idct_add
|
||||
(per-block 4x4 IDCT, called from the intra-4x4 decode path in
|
||||
libavcodec/h264_mb.c) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_idct4 instead of
|
||||
ff_h264_idct_add_neon. First end-to-end exercise of the
|
||||
daedalus-fourier kernel pack inside libavcodec.so on the
|
||||
production decode hot path (daedalus-v4l2#11 step 2 — cycle 6
|
||||
H.264 IDCT 4x4, NEON-by-recipe).
|
||||
* build-deb.sh: fetches + builds daedalus-fourier (pinned at
|
||||
d87239d, lockstep with the daemon's static link) with
|
||||
-fPIC into a per-build temp prefix, then passes
|
||||
--extra-cflags=-I.../include --extra-ldflags=-L.../lib
|
||||
--extra-libs="-ldaedalus_core -lvulkan -lpthread" to FFmpeg
|
||||
configure. Static-linked into libavcodec.so.62.
|
||||
* Bulk paths (idct_add16 / idct_add16intra / idct_add8) remain on
|
||||
the stock NEON .S code and will be batched through
|
||||
daedalus_recipe_dispatch_h264_idct4 with n_blocks>1 in a
|
||||
follow-up. Cycles 7/8/9 (IDCT 8x8 / luma-v deblock / qpel mc20)
|
||||
land in subsequent patches.
|
||||
* Depends gains libvulkan1 — daedalus_core PUBLIC-links Vulkan
|
||||
(queryable QPU substrate); the no-QPU constructor still works,
|
||||
but the loader refuses libavcodec.so.62 at dlopen time without
|
||||
libvulkan.so.1 present.
|
||||
* No ABI change; SONAMEs stay 62/62/60.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 20:00:00 +0000
|
||||
|
||||
ffmpeg-v4l2-request-fourier (2:8.1+rfourier+gb57fbbe-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Initial Debian packaging for the Kwiboo FFmpeg fork with V4L2
|
||||
|
||||
Reference in New Issue
Block a user