ffmpeg-v4l2-request-fourier: substitute H.264 qpel mc20 → daedalus-fourier

H264QpelContext.put_h264_qpel_pixels_tab[1][2] (8x8 luma horizontal half-pel, 6-tap "put" — the canonical representative of the H.264 luma motion-compensation family) now dispatches through daedalus_recipe_dispatch_h264_qpel_mc20 instead of ff_put_h264_qpel8_mc20_neon. Cycle 9 of the daedalus-v4l2#11 step 2 substitution arc; closes the 4-cycle libavcodec.so substitution sequence: cycle 6 (PR #76) H.264 IDCT 4x4 done cycle 7 (PR #85) H.264 IDCT 8x8 done cycle 8 (PR #86) H.264 luma-v deblock done cycle 9 (this) H.264 qpel mc20 Bumps daedalus-fourier pin d87239d → 209a421 (PR #2 — public API gains daedalus_recipe_dispatch_h264_qpel_mc20 + DAEDALUS_KERNEL_H264_QPEL_MC20). Verdict per docs/k9_h264qpel_mc20.md: CPU NEON. Per-block 7.6 ns at 131 Mblock/s gives 135× margin over 30 fps 1080p; QPU dispatch floor at ~250 ns makes any V3D shader strictly worse. Substitution is plumbing-only — same daedalus_ctx_create_no_qpu pthread_once shape the cycles 6/7/8 shims already own (kept SEPARATE from the H264DSP shim's ctx because H264QPEL is its own libavcodec Makefile module and link order does not guarantee a single .o owns the ctx symbol; one extra ~µs init per process, paid lazily on first MC call). Other H.264 luma MC variants (mc02, mc11, mc22 etc.) and the 16x16 size tier stay on the in-tree NEON .S code per the cycle-9 phase-1 rationale (mc20 8x8 is representative; remaining variants would multiply recipe-lookup overhead without changing the substrate verdict). Bit-exact against ff_put_h264_qpel8_mc20_neon (daedalus-fourier cycle 9 green; 10000/10000 random blocks bit-exact, M3 = 131 Mblock/s). No SONAME change, no Depends change. PKGREL 9 → 10. Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 9.
2026-05-23 03:32:29 +02:00
parent 8729c2db92
commit 0bfc4ab03e
5 changed files with 334 additions and 18 deletions
@@ -24,13 +24,13 @@ _srcname=FFmpeg
 _version='8.1'
 _commit='b57fbbe50c9b2656fad86a1a7eeabfd2b2a50935'  # v4l2-request-n8.1 tip 2026-04-24
 pkgver=8.1.r123329.b57fbbe
-pkgrel=9   # pkgrel=9 — restore AV_CODEC_FLAG_LOW_DELAY for H.264 (2026-05-22)
+pkgrel=10  # pkgrel=10 — H.264 luma qpel mc20 daedalus-fourier substitution (cycle 9, 2026-05-23)
 epoch=2

-# daedalus-fourier pin — first kernel substitution in libavcodec
-# (cycle 6 H.264 IDCT 4x4).  Same SHA as the daedalus-v4l2 daemon's
-# inline build; lockstep with that until the public API rolls.
-_daedalus_fourier_commit='d87239d8172307d9a1b93c95cbed116d175b85cc'
+# daedalus-fourier pin.  209a421 = PR #2 merge (Phase 8c — public API
+# gains daedalus_recipe_dispatch_h264_qpel_mc20 + DAEDALUS_KERNEL_H264_QPEL_MC20).
+# Cycle 9 closes the libavcodec.so substitution arc started at cycle 6.
+_daedalus_fourier_commit='209a4218bcb98b91c04f07ad61513bb04adb13ad'
 pkgdesc='FFmpeg with V4L2 Request API hwaccel (Rockchip / Allwinner stateless decode)'
 arch=('aarch64')
 url='https://github.com/Kwiboo/FFmpeg'
@@ -93,8 +93,9 @@ source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}"
        '0003-h264-idct4-daedalus-fourier.patch'
        '0004-h264-idct8-daedalus-fourier.patch'
        '0005-h264-deblock-luma-v-daedalus-fourier.patch'
-        '0006-h264-restore-low-delay.patch')
-sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
+        '0006-h264-restore-low-delay.patch'
+        '0007-h264-qpel-mc20-daedalus-fourier.patch')
+sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')

 pkgver() {
  cd "${_srcname}"
@@ -111,6 +112,7 @@ prepare() {
  patch -Np1 -i "${srcdir}/0004-h264-idct8-daedalus-fourier.patch"
  patch -Np1 -i "${srcdir}/0005-h264-deblock-luma-v-daedalus-fourier.patch"
  patch -Np1 -i "${srcdir}/0006-h264-restore-low-delay.patch"
+  patch -Np1 -i "${srcdir}/0007-h264-qpel-mc20-daedalus-fourier.patch"
 }

 build() {