forked from marfrit/marfrit-packages
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 91022b390e | |||
| b736dd0529 | |||
| 0bfc4ab03e | |||
| 8729c2db92 | |||
| d449ec1073 | |||
| 9d30c34be9 | |||
| 1ca18ac130 | |||
| cf9eef6cfa | |||
| 5c69460722 | |||
| d11a52405d |
@@ -23,10 +23,10 @@ _module=daedalus_v4l2
|
||||
# content-equivalent to f0d4186 plus PR #4 (cosmetic menu ctrls).
|
||||
# PROTO_VERSION drops 1 → 0; lock-step install with
|
||||
# daedalus-v4l2 0.1.0.r33.5d8b436 REQUIRED.
|
||||
_commit=5d8b4369e58ab947d1c56b1f718293c57c6065b5
|
||||
_commit=872eec505eb91b561892d02a0526749348ddc121
|
||||
|
||||
pkgver=0.1.0.r33.5d8b436
|
||||
pkgrel=1 # reset for new upstream pin (5d8b436 — revert parking design)
|
||||
pkgver=0.1.0.r45.872eec5
|
||||
pkgrel=1 # reset for new upstream pin (872eec5 — PROTO_MAX_PAYLOAD 64 KiB -> 1 MiB, closes #19); lock-step with daedalus-v4l2 0.1.0.r45.872eec5 REQUIRED
|
||||
pkgdesc="V4L2 stateless decoder shim kernel module (DKMS) — Pi 5 / CM5"
|
||||
arch=('any')
|
||||
url="https://git.reauktion.de/reauktion/daedalus-v4l2"
|
||||
|
||||
@@ -23,12 +23,12 @@ _upstreampkg=daedalus-v4l2
|
||||
# (daedalus-v4l2#11). Daemon still needs daedalus-fourier at
|
||||
# build time (Arch packaging for that is a follow-up; Debian side
|
||||
# fetches inline via build-deb.sh).
|
||||
_commit=6e6dfa144da7bc7fa8be50c8da91d7d1c6132a2c
|
||||
_commit=872eec505eb91b561892d02a0526749348ddc121
|
||||
|
||||
# 0.1.0 (pre-1.0) + commit count + short sha. Bump the .Y on each
|
||||
# Phase 8.x close. pkgver() recomputes at build time.
|
||||
pkgver=0.1.0.r41.6e6dfa1
|
||||
pkgrel=1 # reset for new upstream pin (6e6dfa1 — soname 62 via /opt/fourier)
|
||||
pkgver=0.1.0.r45.872eec5
|
||||
pkgrel=1 # reset for new upstream pin (872eec5 — PROTO_MAX_PAYLOAD 64 KiB -> 1 MiB, closes #19); lock-step with daedalus-v4l2-dkms 0.1.0.r45.872eec5 REQUIRED
|
||||
pkgdesc="Userspace daemon for the daedalus-v4l2 V4L2 stateless decoder shim (VP9/AV1/H.264 on Pi 5 / CM5)"
|
||||
arch=('aarch64')
|
||||
url="https://git.reauktion.de/reauktion/daedalus-v4l2"
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
From 0d1292ea99bc4e5fa2da438259fa01a2374e3e04 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Fri, 22 May 2026 14:18:25 +0200
|
||||
Subject: [PATCH] avcodec/h264: restore AV_CODEC_FLAG_LOW_DELAY semantics
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
FFmpeg 8.x dropped the H.264 decoder's low_delay path —
|
||||
AV_CODEC_FLAG_LOW_DELAY no longer prevents
|
||||
h264_select_output_frame from running the display-order DPB
|
||||
output queue. V4L2-stateless-style consumers (daedalus-v4l2
|
||||
daemon, libva-v4l2-request-fourier) that set the flag end up
|
||||
seeing the 2-1-4-3 pair-swap pattern on B-frame streams again.
|
||||
|
||||
Restore the documented semantics:
|
||||
|
||||
- Early-exit at the top of h264_select_output_frame when the
|
||||
flag is set: emit the just-decoded picture immediately as
|
||||
next_output_pic, mirror the corruption / recovery-point
|
||||
tracking the main path performs, and skip the entire
|
||||
delayed_pic[] / POC reorder machinery.
|
||||
|
||||
- Suppress the SPS-driven has_b_frames clobber in
|
||||
h264_field_start when the flag is set, so the per-slice
|
||||
bitstream_restriction_flag re-pickup cannot reintroduce a
|
||||
nonzero reorder buffer mid-stream.
|
||||
|
||||
This is a fork-only change required by the daedalus-v4l2 daemon's
|
||||
one-frame-per-send_packet contract; upstream FFmpeg consumers that
|
||||
expect display-order output remain untouched (flag default = off).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 deblock
|
||||
+ flag-restoration follow-up.
|
||||
---
|
||||
libavcodec/h264_slice.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
|
||||
index 97fab70..a7bfbd6 100644
|
||||
--- a/libavcodec/h264_slice.c
|
||||
+++ b/libavcodec/h264_slice.c
|
||||
@@ -1308,6 +1308,28 @@ static int h264_select_output_frame(H264Context *h)
|
||||
cur->mmco_reset = h->mmco_reset;
|
||||
h->mmco_reset = 0;
|
||||
|
||||
+ /* AV_CODEC_FLAG_LOW_DELAY restore (FFmpeg 8.x dropped the H.264
|
||||
+ * decoder's low_delay path). Bypass the display-order DPB
|
||||
+ * output queue: emit the just-decoded picture immediately, in
|
||||
+ * decode order, one per send_packet. V4L2-stateless-style
|
||||
+ * consumers (daedalus-v4l2 daemon, libva-v4l2-request-fourier)
|
||||
+ * do their own POC-based reorder downstream and require this
|
||||
+ * behaviour. */
|
||||
+ if (h->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
|
||||
+ h->next_output_pic = cur;
|
||||
+ h->next_outputed_poc = cur->poc;
|
||||
+ h->frame_recovered |= cur->recovered;
|
||||
+ cur->recovered |= h->frame_recovered & FRAME_RECOVERED_SEI;
|
||||
+ if (!cur->recovered) {
|
||||
+ if (!(h->avctx->flags & AV_CODEC_FLAG_OUTPUT_CORRUPT) &&
|
||||
+ !(h->avctx->flags2 & AV_CODEC_FLAG2_SHOW_ALL))
|
||||
+ h->next_output_pic = NULL;
|
||||
+ else
|
||||
+ cur->f->flags |= AV_FRAME_FLAG_CORRUPT;
|
||||
+ }
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
if (sps->bitstream_restriction_flag ||
|
||||
h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT) {
|
||||
h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, sps->num_reorder_frames);
|
||||
@@ -1415,6 +1437,7 @@ static int h264_field_start(H264Context *h, const H264SliceContext *sl,
|
||||
sps = h->ps.sps;
|
||||
|
||||
if (sps->bitstream_restriction_flag &&
|
||||
+ !(h->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) &&
|
||||
h->avctx->has_b_frames < sps->num_reorder_frames) {
|
||||
h->avctx->has_b_frames = sps->num_reorder_frames;
|
||||
}
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Sat, 23 May 2026 12:00:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264qpel: route 8x8 mc20 through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264QpelContext.put_h264_qpel_pixels_tab[1][2] (8x8 luma horizontal
|
||||
half-pel, 6-tap "put" variant — the canonical representative of the
|
||||
H.264 luma motion-compensation family) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_qpel_mc20 instead of
|
||||
ff_put_h264_qpel8_mc20_neon.
|
||||
|
||||
Cycle 9 of the daedalus-v4l2#11 step 2 substitution arc; closes the
|
||||
4-cycle libavcodec.so substitution sequence (6 IDCT 4x4 / 7 IDCT 8x8 /
|
||||
8 luma-v deblock / 9 qpel mc20).
|
||||
|
||||
The recipe layer picks the substrate. Per docs/k9_h264qpel_mc20.md
|
||||
the verdict is CPU NEON: per-block 7.6 ns at 131 Mblock/s gives 135x
|
||||
margin over 30 fps 1080p, and the QPU dispatch floor (~250 ns)
|
||||
makes any V3D shader strictly worse. Substitution is plumbing-only,
|
||||
NEON-by-recipe — same daedalus_ctx_create_no_qpu pthread_once
|
||||
context shape the cycles 6/7/8 shims already own (kept SEPARATE
|
||||
from the H264DSP shim's ctx because H264QPEL is its own libavcodec
|
||||
Makefile module and link order does not guarantee a single .o
|
||||
owns the ctx symbol; one extra ~µs init per process, paid lazily).
|
||||
|
||||
Other H.264 luma MC variants (mc02, mc11, mc22 etc.) and the 16x16
|
||||
size tier stay on the in-tree NEON .S code. Per the cycle-9 phase-1
|
||||
rationale, mc20 8x8 is representative of the whole family's per-block
|
||||
cost — extending the substitution to other variants would multiply
|
||||
recipe-lookup overhead without changing the substrate verdict.
|
||||
|
||||
Bit-exact against ff_put_h264_qpel8_mc20_neon (daedalus-fourier
|
||||
cycle 9 green; M1 = 100% bit-exact across 10000 random blocks).
|
||||
|
||||
No SONAME change, no Depends change.
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 9.
|
||||
---
|
||||
libavcodec/aarch64/Makefile | 3 +-
|
||||
libavcodec/aarch64/h264_qpel_daedalus.c | 50 ++++++++++++++++++++++
|
||||
libavcodec/aarch64/h264qpel_init_aarch64.c | 4 +-
|
||||
3 files changed, 55 insertions(+), 2 deletions(-)
|
||||
create mode 100644 libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
|
||||
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
|
||||
--- a/libavcodec/aarch64/Makefile
|
||||
+++ b/libavcodec/aarch64/Makefile
|
||||
@@ -7,7 +7,8 @@ OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o \
|
||||
aarch64/h264_idct_daedalus.o
|
||||
OBJS-$(CONFIG_HUFFYUVDSP) += aarch64/huffyuvdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
||||
-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
||||
+OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o \
|
||||
+ aarch64/h264_qpel_daedalus.o
|
||||
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_ME_CMP) += aarch64/me_cmp_init_aarch64.o
|
||||
diff --git a/libavcodec/aarch64/h264_qpel_daedalus.c b/libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
@@ -0,0 +1,50 @@
|
||||
+/*
|
||||
+ * H.264 luma qpel mc20 (8x8, horizontal half-pel, 6-tap "put")
|
||||
+ * — daedalus-fourier substitution shim.
|
||||
+ *
|
||||
+ * Routes H264QpelContext.put_h264_qpel_pixels_tab[1][2] through
|
||||
+ * daedalus_recipe_dispatch_h264_qpel_mc20 instead of
|
||||
+ * ff_put_h264_qpel8_mc20_neon. The recipe layer picks the substrate
|
||||
+ * (CPU NEON for cycle 9; QPU not viable — per-block 7.6 ns vs
|
||||
+ * ~250 ns QPU dispatch floor, see docs/k9_h264qpel_mc20.md).
|
||||
+ *
|
||||
+ * Sibling to libavcodec/aarch64/h264_idct_daedalus.c. We keep a
|
||||
+ * SEPARATE process-global pthread_once context here instead of
|
||||
+ * sharing the H264DSP one because H264QPEL is its own libavcodec
|
||||
+ * Makefile module and link order does not guarantee a single .o
|
||||
+ * owns the ctx symbol. The cost is one extra
|
||||
+ * daedalus_ctx_create_no_qpu (~µs) per process; daemon and host
|
||||
+ * processes pay this lazily on first MC call.
|
||||
+ *
|
||||
+ * FFmpeg H264QpelContext convention: both dst and src use a SINGLE
|
||||
+ * stride and `src` already points at the leftmost OUTPUT column
|
||||
+ * (col 0); the 6-tap filter reads cols -2..+3. This matches
|
||||
+ * daedalus_recipe_dispatch_h264_qpel_mc20's documented contract
|
||||
+ * directly, so dst_off = src_off = 0.
|
||||
+ */
|
||||
+
|
||||
+#include <pthread.h>
|
||||
+#include <stddef.h>
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+#include <daedalus.h>
|
||||
+
|
||||
+#include "libavutil/attributes.h"
|
||||
+
|
||||
+static daedalus_ctx *g_dctx;
|
||||
+static pthread_once_t g_dctx_once = PTHREAD_ONCE_INIT;
|
||||
+
|
||||
+static void daedalus_ctx_init_once(void)
|
||||
+{
|
||||
+ g_dctx = daedalus_ctx_create_no_qpu();
|
||||
+}
|
||||
+
|
||||
+void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
+
|
||||
+void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
|
||||
+{
|
||||
+ static const daedalus_h264_qpel_meta meta = { .dst_off = 0, .src_off = 0 };
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+ daedalus_recipe_dispatch_h264_qpel_mc20(g_dctx, dst, src, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264qpel_init_aarch64.c b/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
@@ -47,6 +47,8 @@ void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t str
|
||||
void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
@@ -184,7 +186,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
|
||||
|
||||
c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
|
||||
c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_daedalus;
|
||||
c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
|
||||
c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
|
||||
c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
|
||||
--
|
||||
2.47.3
|
||||
@@ -24,13 +24,13 @@ _srcname=FFmpeg
|
||||
_version='8.1'
|
||||
_commit='b57fbbe50c9b2656fad86a1a7eeabfd2b2a50935' # v4l2-request-n8.1 tip 2026-04-24
|
||||
pkgver=8.1.r123329.b57fbbe
|
||||
pkgrel=8 # pkgrel=8 — H.264 luma-v deblock daedalus-fourier substitution (cycle 8, 2026-05-22)
|
||||
pkgrel=10 # pkgrel=10 — H.264 luma qpel mc20 daedalus-fourier substitution (cycle 9, 2026-05-23)
|
||||
epoch=2
|
||||
|
||||
# daedalus-fourier pin — first kernel substitution in libavcodec
|
||||
# (cycle 6 H.264 IDCT 4x4). Same SHA as the daedalus-v4l2 daemon's
|
||||
# inline build; lockstep with that until the public API rolls.
|
||||
_daedalus_fourier_commit='d87239d8172307d9a1b93c95cbed116d175b85cc'
|
||||
# daedalus-fourier pin. 209a421 = PR #2 merge (Phase 8c — public API
|
||||
# gains daedalus_recipe_dispatch_h264_qpel_mc20 + DAEDALUS_KERNEL_H264_QPEL_MC20).
|
||||
# Cycle 9 closes the libavcodec.so substitution arc started at cycle 6.
|
||||
_daedalus_fourier_commit='209a4218bcb98b91c04f07ad61513bb04adb13ad'
|
||||
pkgdesc='FFmpeg with V4L2 Request API hwaccel (Rockchip / Allwinner stateless decode)'
|
||||
arch=('aarch64')
|
||||
url='https://github.com/Kwiboo/FFmpeg'
|
||||
@@ -92,8 +92,10 @@ source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}"
|
||||
'0002-nv15-to-p010-unpack.patch'
|
||||
'0003-h264-idct4-daedalus-fourier.patch'
|
||||
'0004-h264-idct8-daedalus-fourier.patch'
|
||||
'0005-h264-deblock-luma-v-daedalus-fourier.patch')
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
'0005-h264-deblock-luma-v-daedalus-fourier.patch'
|
||||
'0006-h264-restore-low-delay.patch'
|
||||
'0007-h264-qpel-mc20-daedalus-fourier.patch')
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
|
||||
pkgver() {
|
||||
cd "${_srcname}"
|
||||
@@ -109,6 +111,8 @@ prepare() {
|
||||
patch -Np1 -i "${srcdir}/0003-h264-idct4-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0004-h264-idct8-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0005-h264-deblock-luma-v-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0006-h264-restore-low-delay.patch"
|
||||
patch -Np1 -i "${srcdir}/0007-h264-qpel-mc20-daedalus-fourier.patch"
|
||||
}
|
||||
|
||||
build() {
|
||||
|
||||
@@ -45,7 +45,7 @@ pkgver=26.0.6.r5.video1
|
||||
pkgrel=1
|
||||
pkgdesc="Patched Mesa libvulkan_panfrost.so adding VK_KHR_video_decode_h264 on Bifrost SBCs (sibling of mesa-panvk-bifrost-r4)"
|
||||
arch=('aarch64')
|
||||
url="https://github.com/marfrit/panvk-bifrost"
|
||||
url="https://git.reauktion.de/marfrit/panvk-bifrost"
|
||||
license=('MIT')
|
||||
|
||||
depends=(
|
||||
|
||||
@@ -34,7 +34,7 @@ pkgver=26.0.6.r4
|
||||
pkgrel=1
|
||||
pkgdesc="Patched Mesa libvulkan_panfrost.so exposing Bifrost-gen Mali to Vulkan apps (panvk-bifrost campaign)"
|
||||
arch=('aarch64')
|
||||
url="https://github.com/marfrit/panvk-bifrost"
|
||||
url="https://git.reauktion.de/marfrit/panvk-bifrost"
|
||||
license=('MIT')
|
||||
|
||||
# We co-install at /usr/lib/panvk-bifrost/ so no conflicts with stock mesa.
|
||||
|
||||
+3
-3
@@ -14,9 +14,9 @@
|
||||
# Sibling userspace package: ../daedalus-v4l2/build-deb.sh
|
||||
set -euo pipefail
|
||||
|
||||
UPSTREAM_COMMIT=5d8b4369e58ab947d1c56b1f718293c57c6065b5
|
||||
PKGVER=0.1.0+r33+g5d8b436
|
||||
PKGREL=1 # reset for new upstream pin (5d8b436 — revert parking design); still carries the #64 multi-kernel postinst fix
|
||||
UPSTREAM_COMMIT=872eec505eb91b561892d02a0526749348ddc121
|
||||
PKGVER=0.1.0+r45+g872eec5
|
||||
PKGREL=1 # reset for new upstream pin (872eec5 — PROTO_MAX_PAYLOAD 64 KiB -> 1 MiB, closes #19); lock-step with daedalus-v4l2 0.1.0+r45+g872eec5 REQUIRED
|
||||
MODULE_NAME=daedalus_v4l2
|
||||
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
+21
@@ -1,3 +1,24 @@
|
||||
daedalus-v4l2-dkms (0.1.0+r45+g872eec5-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 872eec5 — picks up daedalus-v4l2 PR #20 (closes #19).
|
||||
Wire-protocol cap DAEDALUS_PROTO_MAX_PAYLOAD raised from 64 KiB
|
||||
to 1 MiB in include/daedalus_v4l2_proto.h. The kernel module
|
||||
inherits the larger DAEDALUS_MAX_BITSTREAM via the same #define
|
||||
and daedalus_fill_output_fmt now reports OUTPUT_MPLANE
|
||||
sizeimage = ~1 MiB instead of 65484.
|
||||
* Skips the r33 -> r45 commit range — between 5d8b436 and 872eec5
|
||||
only one kernel/include change landed (the PROTO_MAX_PAYLOAD
|
||||
bump above). The intervening daemon-only bumps (r37 / r39 /
|
||||
r41 / r43) didn't touch kernel/ or include/ at all.
|
||||
* Effective wire cap is min(kernel, daemon) — lock-step install
|
||||
WITH daedalus-v4l2 0.1.0+r45+g872eec5 REQUIRED.
|
||||
* Allocations (kmemdup / kmalloc on payload, vb2 plane backing)
|
||||
are dynamic and sized per-payload at runtime; the bump only
|
||||
sets the ceiling. KMALLOC_MAX_SIZE on aarch64 SLUB is several
|
||||
MiB so 1 MiB is well within bounds.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Fri, 22 May 2026 21:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r33+g5d8b436-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 5d8b436 — reverts daedalus-v4l2 PRs #7 + #8. Kernel
|
||||
|
||||
Vendored
+3
-3
@@ -19,9 +19,9 @@ set -euo pipefail
|
||||
# source tree we own in marfrit-packages. Headers + .pc files
|
||||
# come from ffmpeg-v4l2-request-fourier (installed by the CI
|
||||
# workflow before this script runs; see PKG_CONFIG_PATH below).
|
||||
UPSTREAM_COMMIT=6e6dfa144da7bc7fa8be50c8da91d7d1c6132a2c
|
||||
PKGVER=0.1.0+r41+g6e6dfa1
|
||||
PKGREL=1 # reset for new upstream pin (6e6dfa1 — soname 62 via /opt/fourier)
|
||||
UPSTREAM_COMMIT=872eec505eb91b561892d02a0526749348ddc121
|
||||
PKGVER=0.1.0+r45+g872eec5
|
||||
PKGREL=1 # reset for new upstream pin (872eec5 — PROTO_MAX_PAYLOAD 64 KiB -> 1 MiB, closes #19); lock-step with daedalus-v4l2-dkms 0.1.0+r45+g872eec5 REQUIRED
|
||||
|
||||
# daedalus-fourier pin. d87239d = marfrit/daedalus-fourier PR #1 merge
|
||||
# (install rules + pkg-config, enables this consumer to find_package
|
||||
|
||||
+43
@@ -1,3 +1,46 @@
|
||||
daedalus-v4l2 (0.1.0+r45+g872eec5-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 872eec5 — picks up daedalus-v4l2 PR #20 (closes #19).
|
||||
Wire-protocol cap DAEDALUS_PROTO_MAX_PAYLOAD raised from 64 KiB
|
||||
to 1 MiB. DAEDALUS_MAX_BITSTREAM follows; daedalus_fill_output_fmt
|
||||
now reports OUTPUT_MPLANE sizeimage = ~1 MiB instead of 65484.
|
||||
libva-v4l2-request-fourier's S_FMT-driven OUTPUT-pool resize
|
||||
finally succeeds; Firefox no longer falls off to libmozavcodec
|
||||
SW when an H.264 slice exceeds 64 KiB (routine on any
|
||||
720p+ stream).
|
||||
* #define-only change in include/daedalus_v4l2_proto.h; struct
|
||||
layout unchanged. But effective cap is min(kernel, daemon) —
|
||||
lock-step install of this package WITH
|
||||
daedalus-v4l2-dkms 0.1.0+r45+g872eec5 REQUIRED.
|
||||
* Daemon-side allocations are dynamic (malloc-on-payload), so
|
||||
the practical growth is one ~1 MiB read buffer per daemon
|
||||
process at startup. Negligible on Pi 5 / 8 GB.
|
||||
* Picks up the same r43 -> r45 transition as daedalus-v4l2-dkms
|
||||
(which had been stuck at r33+g5d8b436 since the parking-design
|
||||
revert because the kernel module didn't change in r37/r39/r41/r43).
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Fri, 22 May 2026 21:00:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r43+g1d8f5af-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 1d8f5af — picks up daedalus-v4l2 PR #18 (closes #17).
|
||||
Daemon now drops degenerate (<4 byte) bitstreams at the REQ_DECODE
|
||||
entry instead of letting avcodec_send_packet return
|
||||
AVERROR_INVALIDDATA. Reply RESP_FRAME with status=
|
||||
DAEDALUS_DECODE_NO_FRAME so libva's V4L2 surface pool stays
|
||||
healthy.
|
||||
* Fixes the Firefox YouTube avc1 pause→resume regression observed
|
||||
on higgs: libva-v4l2-request-fourier flushes a 3-byte stub
|
||||
(presumably a bare NAL start code) into OUTPUT_MPLANE at the
|
||||
pause boundary; the old INVALIDDATA error path made Firefox
|
||||
fall off to libmozavcodec SW for the rest of the session. With
|
||||
this filter the daemon logs the sentinel as 'tiny bitstream 3
|
||||
bytes — dropping as no-op' and the next real REQ_DECODE
|
||||
proceeds normally.
|
||||
* Wire protocol unchanged. No daedalus-v4l2-dkms bump needed.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Fri, 22 May 2026 17:30:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r41+g6e6dfa1-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 6e6dfa1 — daedalus-v4l2 PR #16. Daemon dlopens Kwiboo
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
From 0d1292ea99bc4e5fa2da438259fa01a2374e3e04 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Fri, 22 May 2026 14:18:25 +0200
|
||||
Subject: [PATCH] avcodec/h264: restore AV_CODEC_FLAG_LOW_DELAY semantics
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
FFmpeg 8.x dropped the H.264 decoder's low_delay path —
|
||||
AV_CODEC_FLAG_LOW_DELAY no longer prevents
|
||||
h264_select_output_frame from running the display-order DPB
|
||||
output queue. V4L2-stateless-style consumers (daedalus-v4l2
|
||||
daemon, libva-v4l2-request-fourier) that set the flag end up
|
||||
seeing the 2-1-4-3 pair-swap pattern on B-frame streams again.
|
||||
|
||||
Restore the documented semantics:
|
||||
|
||||
- Early-exit at the top of h264_select_output_frame when the
|
||||
flag is set: emit the just-decoded picture immediately as
|
||||
next_output_pic, mirror the corruption / recovery-point
|
||||
tracking the main path performs, and skip the entire
|
||||
delayed_pic[] / POC reorder machinery.
|
||||
|
||||
- Suppress the SPS-driven has_b_frames clobber in
|
||||
h264_field_start when the flag is set, so the per-slice
|
||||
bitstream_restriction_flag re-pickup cannot reintroduce a
|
||||
nonzero reorder buffer mid-stream.
|
||||
|
||||
This is a fork-only change required by the daedalus-v4l2 daemon's
|
||||
one-frame-per-send_packet contract; upstream FFmpeg consumers that
|
||||
expect display-order output remain untouched (flag default = off).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 deblock
|
||||
+ flag-restoration follow-up.
|
||||
---
|
||||
libavcodec/h264_slice.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
|
||||
index 97fab70..a7bfbd6 100644
|
||||
--- a/libavcodec/h264_slice.c
|
||||
+++ b/libavcodec/h264_slice.c
|
||||
@@ -1308,6 +1308,28 @@ static int h264_select_output_frame(H264Context *h)
|
||||
cur->mmco_reset = h->mmco_reset;
|
||||
h->mmco_reset = 0;
|
||||
|
||||
+ /* AV_CODEC_FLAG_LOW_DELAY restore (FFmpeg 8.x dropped the H.264
|
||||
+ * decoder's low_delay path). Bypass the display-order DPB
|
||||
+ * output queue: emit the just-decoded picture immediately, in
|
||||
+ * decode order, one per send_packet. V4L2-stateless-style
|
||||
+ * consumers (daedalus-v4l2 daemon, libva-v4l2-request-fourier)
|
||||
+ * do their own POC-based reorder downstream and require this
|
||||
+ * behaviour. */
|
||||
+ if (h->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
|
||||
+ h->next_output_pic = cur;
|
||||
+ h->next_outputed_poc = cur->poc;
|
||||
+ h->frame_recovered |= cur->recovered;
|
||||
+ cur->recovered |= h->frame_recovered & FRAME_RECOVERED_SEI;
|
||||
+ if (!cur->recovered) {
|
||||
+ if (!(h->avctx->flags & AV_CODEC_FLAG_OUTPUT_CORRUPT) &&
|
||||
+ !(h->avctx->flags2 & AV_CODEC_FLAG2_SHOW_ALL))
|
||||
+ h->next_output_pic = NULL;
|
||||
+ else
|
||||
+ cur->f->flags |= AV_FRAME_FLAG_CORRUPT;
|
||||
+ }
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
if (sps->bitstream_restriction_flag ||
|
||||
h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT) {
|
||||
h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, sps->num_reorder_frames);
|
||||
@@ -1415,6 +1437,7 @@ static int h264_field_start(H264Context *h, const H264SliceContext *sl,
|
||||
sps = h->ps.sps;
|
||||
|
||||
if (sps->bitstream_restriction_flag &&
|
||||
+ !(h->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) &&
|
||||
h->avctx->has_b_frames < sps->num_reorder_frames) {
|
||||
h->avctx->has_b_frames = sps->num_reorder_frames;
|
||||
}
|
||||
--
|
||||
2.47.3
|
||||
|
||||
+139
@@ -0,0 +1,139 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Sat, 23 May 2026 12:00:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264qpel: route 8x8 mc20 through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264QpelContext.put_h264_qpel_pixels_tab[1][2] (8x8 luma horizontal
|
||||
half-pel, 6-tap "put" variant — the canonical representative of the
|
||||
H.264 luma motion-compensation family) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_qpel_mc20 instead of
|
||||
ff_put_h264_qpel8_mc20_neon.
|
||||
|
||||
Cycle 9 of the daedalus-v4l2#11 step 2 substitution arc; closes the
|
||||
4-cycle libavcodec.so substitution sequence (6 IDCT 4x4 / 7 IDCT 8x8 /
|
||||
8 luma-v deblock / 9 qpel mc20).
|
||||
|
||||
The recipe layer picks the substrate. Per docs/k9_h264qpel_mc20.md
|
||||
the verdict is CPU NEON: per-block 7.6 ns at 131 Mblock/s gives 135x
|
||||
margin over 30 fps 1080p, and the QPU dispatch floor (~250 ns)
|
||||
makes any V3D shader strictly worse. Substitution is plumbing-only,
|
||||
NEON-by-recipe — same daedalus_ctx_create_no_qpu pthread_once
|
||||
context shape the cycles 6/7/8 shims already own (kept SEPARATE
|
||||
from the H264DSP shim's ctx because H264QPEL is its own libavcodec
|
||||
Makefile module and link order does not guarantee a single .o
|
||||
owns the ctx symbol; one extra ~µs init per process, paid lazily).
|
||||
|
||||
Other H.264 luma MC variants (mc02, mc11, mc22 etc.) and the 16x16
|
||||
size tier stay on the in-tree NEON .S code. Per the cycle-9 phase-1
|
||||
rationale, mc20 8x8 is representative of the whole family's per-block
|
||||
cost — extending the substitution to other variants would multiply
|
||||
recipe-lookup overhead without changing the substrate verdict.
|
||||
|
||||
Bit-exact against ff_put_h264_qpel8_mc20_neon (daedalus-fourier
|
||||
cycle 9 green; M1 = 100% bit-exact across 10000 random blocks).
|
||||
|
||||
No SONAME change, no Depends change.
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 9.
|
||||
---
|
||||
libavcodec/aarch64/Makefile | 3 +-
|
||||
libavcodec/aarch64/h264_qpel_daedalus.c | 50 ++++++++++++++++++++++
|
||||
libavcodec/aarch64/h264qpel_init_aarch64.c | 4 +-
|
||||
3 files changed, 55 insertions(+), 2 deletions(-)
|
||||
create mode 100644 libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
|
||||
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
|
||||
--- a/libavcodec/aarch64/Makefile
|
||||
+++ b/libavcodec/aarch64/Makefile
|
||||
@@ -7,7 +7,8 @@ OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o \
|
||||
aarch64/h264_idct_daedalus.o
|
||||
OBJS-$(CONFIG_HUFFYUVDSP) += aarch64/huffyuvdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
||||
-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
||||
+OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o \
|
||||
+ aarch64/h264_qpel_daedalus.o
|
||||
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_ME_CMP) += aarch64/me_cmp_init_aarch64.o
|
||||
diff --git a/libavcodec/aarch64/h264_qpel_daedalus.c b/libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
@@ -0,0 +1,50 @@
|
||||
+/*
|
||||
+ * H.264 luma qpel mc20 (8x8, horizontal half-pel, 6-tap "put")
|
||||
+ * — daedalus-fourier substitution shim.
|
||||
+ *
|
||||
+ * Routes H264QpelContext.put_h264_qpel_pixels_tab[1][2] through
|
||||
+ * daedalus_recipe_dispatch_h264_qpel_mc20 instead of
|
||||
+ * ff_put_h264_qpel8_mc20_neon. The recipe layer picks the substrate
|
||||
+ * (CPU NEON for cycle 9; QPU not viable — per-block 7.6 ns vs
|
||||
+ * ~250 ns QPU dispatch floor, see docs/k9_h264qpel_mc20.md).
|
||||
+ *
|
||||
+ * Sibling to libavcodec/aarch64/h264_idct_daedalus.c. We keep a
|
||||
+ * SEPARATE process-global pthread_once context here instead of
|
||||
+ * sharing the H264DSP one because H264QPEL is its own libavcodec
|
||||
+ * Makefile module and link order does not guarantee a single .o
|
||||
+ * owns the ctx symbol. The cost is one extra
|
||||
+ * daedalus_ctx_create_no_qpu (~µs) per process; daemon and host
|
||||
+ * processes pay this lazily on first MC call.
|
||||
+ *
|
||||
+ * FFmpeg H264QpelContext convention: both dst and src use a SINGLE
|
||||
+ * stride and `src` already points at the leftmost OUTPUT column
|
||||
+ * (col 0); the 6-tap filter reads cols -2..+3. This matches
|
||||
+ * daedalus_recipe_dispatch_h264_qpel_mc20's documented contract
|
||||
+ * directly, so dst_off = src_off = 0.
|
||||
+ */
|
||||
+
|
||||
+#include <pthread.h>
|
||||
+#include <stddef.h>
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+#include <daedalus.h>
|
||||
+
|
||||
+#include "libavutil/attributes.h"
|
||||
+
|
||||
+static daedalus_ctx *g_dctx;
|
||||
+static pthread_once_t g_dctx_once = PTHREAD_ONCE_INIT;
|
||||
+
|
||||
+static void daedalus_ctx_init_once(void)
|
||||
+{
|
||||
+ g_dctx = daedalus_ctx_create_no_qpu();
|
||||
+}
|
||||
+
|
||||
+void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
+
|
||||
+void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
|
||||
+{
|
||||
+ static const daedalus_h264_qpel_meta meta = { .dst_off = 0, .src_off = 0 };
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+ daedalus_recipe_dispatch_h264_qpel_mc20(g_dctx, dst, src, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264qpel_init_aarch64.c b/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
@@ -47,6 +47,8 @@ void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t str
|
||||
void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
@@ -184,7 +186,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
|
||||
|
||||
c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
|
||||
c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_daedalus;
|
||||
c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
|
||||
c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
|
||||
c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
|
||||
--
|
||||
2.47.3
|
||||
+14
-11
@@ -33,18 +33,19 @@ FFMPEG_VERSION=8.1
|
||||
# epoch 2 matches Debian's stock ffmpeg (currently 7:7.1.x in trixie);
|
||||
# +rfourier suffix to avoid colliding with upstream/Debian rebuilds.
|
||||
PKGVER=2:${FFMPEG_VERSION}+rfourier+gb57fbbe
|
||||
PKGREL=8 # pkgrel=8 — H.264 luma-v deblock daedalus-fourier substitution
|
||||
# (cycle 8, non-intra bS<4 vertical luma). Stacks on cycles
|
||||
# 6/7 (IDCT 4x4 + 8x8). Wires H264DSPContext.v_loop_filter_luma
|
||||
# through daedalus_recipe_dispatch_h264_deblock_luma_v.
|
||||
# ctx stays no-QPU until a separate change gates Vulkan init
|
||||
# on a feature flag; cycle-8 dispatch is NEON-by-recipe for
|
||||
# now. (2026-05-22)
|
||||
PKGREL=10 # pkgrel=10 — H.264 luma qpel mc20 daedalus-fourier substitution
|
||||
# (cycle 9 of the daedalus-v4l2#11 step 2 substitution arc; closes
|
||||
# the libavcodec.so substitution sequence 6 IDCT4 / 7 IDCT8 /
|
||||
# 8 luma-v deblock / 9 qpel mc20). Pulls daedalus-fourier PR #2
|
||||
# which extends the public API with
|
||||
# daedalus_recipe_dispatch_h264_qpel_mc20. (2026-05-23)
|
||||
|
||||
# daedalus-fourier pin — first kernel substitution in libavcodec (cycle 6
|
||||
# H.264 IDCT 4x4). Same SHA as the daedalus-v4l2 daemon already ships
|
||||
# inline; rev in lockstep with the daemon when the public API rolls.
|
||||
DAEDALUS_FOURIER_COMMIT=d87239d8172307d9a1b93c95cbed116d175b85cc
|
||||
# daedalus-fourier pin. 209a421 = daedalus-fourier PR #2 merge — public
|
||||
# API now exposes daedalus_recipe_dispatch_h264_qpel_mc20 +
|
||||
# DAEDALUS_KERNEL_H264_QPEL_MC20. Cycle 9 plumbs the last H.264 NEON
|
||||
# kernel through the recipe layer. Daemon-side build (debian/daedalus-v4l2)
|
||||
# can bump in a follow-up; this PR only changes the libavcodec.so consumer.
|
||||
DAEDALUS_FOURIER_COMMIT=209a4218bcb98b91c04f07ad61513bb04adb13ad
|
||||
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
@@ -71,6 +72,8 @@ patch -Np1 -i "$HERE/0002-nv15-to-p010-unpack.patch"
|
||||
patch -Np1 -i "$HERE/0003-h264-idct4-daedalus-fourier.patch"
|
||||
patch -Np1 -i "$HERE/0004-h264-idct8-daedalus-fourier.patch"
|
||||
patch -Np1 -i "$HERE/0005-h264-deblock-luma-v-daedalus-fourier.patch"
|
||||
patch -Np1 -i "$HERE/0006-h264-restore-low-delay.patch"
|
||||
patch -Np1 -i "$HERE/0007-h264-qpel-mc20-daedalus-fourier.patch"
|
||||
|
||||
# --- daedalus-fourier: fetch + build static .a with PIC, install to a
|
||||
# per-build prefix; libavcodec.so links it into the shared object so
|
||||
|
||||
@@ -1,3 +1,62 @@
|
||||
ffmpeg-v4l2-request-fourier (2:8.1+rfourier+gb57fbbe-10) bookworm trixie; urgency=medium
|
||||
|
||||
* Add 0007-h264-qpel-mc20-daedalus-fourier.patch —
|
||||
H264QpelContext.put_h264_qpel_pixels_tab[1][2] (8x8 luma
|
||||
horizontal half-pel, 6-tap "put" — the canonical representative
|
||||
of the H.264 luma motion-compensation family) now dispatches
|
||||
through daedalus_recipe_dispatch_h264_qpel_mc20 instead of
|
||||
ff_put_h264_qpel8_mc20_neon. Cycle 9 of the daedalus-v4l2#11
|
||||
step 2 substitution arc; closes the 4-cycle libavcodec.so
|
||||
substitution sequence (6 IDCT4 / 7 IDCT8 / 8 luma-v deblock /
|
||||
9 qpel mc20).
|
||||
* Bumps daedalus-fourier pin d87239d → 209a421 (PR #2 — public
|
||||
API extended with daedalus_recipe_dispatch_h264_qpel_mc20 +
|
||||
DAEDALUS_KERNEL_H264_QPEL_MC20).
|
||||
* Cycle 9 is "CPU primary; QPU pointless" per
|
||||
docs/k9_h264qpel_mc20.md. Per-block 7.6 ns at 131 Mblock/s
|
||||
gives 135x margin over 30 fps 1080p; QPU dispatch floor at
|
||||
~250 ns makes any V3D shader strictly worse. Substitution
|
||||
is plumbing-only, NEON-by-recipe — same
|
||||
daedalus_ctx_create_no_qpu pthread_once shape the cycles 6/7/8
|
||||
shims already own (kept SEPARATE from the H264DSP shim's ctx
|
||||
because H264QPEL is its own libavcodec Makefile module and
|
||||
link order does not guarantee a single .o owns the ctx symbol;
|
||||
one extra ~µs init per process, paid lazily on first MC call).
|
||||
* Other H.264 luma MC variants (mc02, mc11, mc22 etc.) and the
|
||||
16x16 size tier stay on the in-tree NEON .S code. Per the
|
||||
cycle-9 phase-1 rationale, mc20 8x8 is representative of the
|
||||
whole family's per-block cost.
|
||||
* Bit-exact against ff_put_h264_qpel8_mc20_neon (daedalus-fourier
|
||||
cycle 9 green; 10000/10000 random blocks).
|
||||
* No SONAME change, no Depends change.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Sat, 23 May 2026 12:00:00 +0000
|
||||
|
||||
ffmpeg-v4l2-request-fourier (2:8.1+rfourier+gb57fbbe-9) bookworm trixie; urgency=medium
|
||||
|
||||
* Add 0006-h264-restore-low-delay.patch — restore the documented
|
||||
AV_CODEC_FLAG_LOW_DELAY semantics in the H.264 decoder. FFmpeg
|
||||
8.x dropped the H.264 low_delay code path entirely; setting the
|
||||
flag at avcodec_open2 no longer prevents the display-order DPB
|
||||
output queue from running. Visible on Firefox YouTube as the
|
||||
2-1-4-3 B-frame pair-swap, re-introduced silently by the
|
||||
SONAME 61→62 jump in daedalus-v4l2 PR #16.
|
||||
* h264_select_output_frame: early-exit when LOW_DELAY is set;
|
||||
emit the just-decoded picture as next_output_pic, mirror the
|
||||
corruption / recovery-point tracking, skip delayed_pic[] and
|
||||
the POC reorder machinery entirely.
|
||||
* h264_field_start: suppress the SPS-driven
|
||||
has_b_frames = sps->num_reorder_frames clobber when LOW_DELAY
|
||||
is set — without this the per-slice bitstream_restriction_flag
|
||||
re-pickup would reintroduce a nonzero reorder buffer mid-
|
||||
stream.
|
||||
* Restores the same one-frame-per-send_packet contract the
|
||||
daedalus-v4l2 daemon's decoder.c already relies on (the flag
|
||||
is set unconditionally for H.264). No daemon side change.
|
||||
* No SONAME change, no Depends change.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Fri, 22 May 2026 13:30:00 +0000
|
||||
|
||||
ffmpeg-v4l2-request-fourier (2:8.1+rfourier+gb57fbbe-8) bookworm trixie; urgency=medium
|
||||
|
||||
* Add 0005-h264-deblock-luma-v-daedalus-fourier.patch —
|
||||
|
||||
Reference in New Issue
Block a user