forked from marfrit/marfrit-packages
Compare commits
196 Commits
main
...
875156782e
| Author | SHA1 | Date | |
|---|---|---|---|
| 875156782e | |||
| 8f9487d355 | |||
| f07824adb7 | |||
| 2732a022f8 | |||
| 57f73f1afb | |||
| d8aa3aae8d | |||
| 1f58ff2b6b | |||
| 45be17fbdf | |||
| 7b9bb9b2d0 | |||
| babb280410 | |||
| 5b48d1c743 | |||
| 624f83e877 | |||
| 902de73a02 | |||
| c14c22f942 | |||
| b113e053f0 | |||
| 8ec4c57ad7 | |||
| beb09c4863 | |||
| 7708744eb2 | |||
| 5d97cf15d6 | |||
| 58f67d4b2c | |||
| 685f85c22e | |||
| 6896853544 | |||
| fd56eca3cb | |||
| 91022b390e | |||
| b736dd0529 | |||
| 0bfc4ab03e | |||
| 8729c2db92 | |||
| d449ec1073 | |||
| 9d30c34be9 | |||
| 1ca18ac130 | |||
| cf9eef6cfa | |||
| 5c69460722 | |||
| d11a52405d | |||
| 29e0852d11 | |||
| 510a31622c | |||
| db9ae16da9 | |||
| 493c762967 | |||
| 7ecbcb3c1b | |||
| 360e8eb6bf | |||
| 4db64917bc | |||
| 6288536223 | |||
| 09d8813507 | |||
| 8a3186b53c | |||
| b81e2251c2 | |||
| e7cc22e42d | |||
| 62b6b0a700 | |||
| a8f4a70887 | |||
| 6ee8f2748e | |||
| 711a921e66 | |||
| 9bf97fdb49 | |||
| a536e20218 | |||
| a1dba5f630 | |||
| 88a65cb6d0 | |||
| e641d679d3 | |||
| 877238bd1b | |||
| 27617e4cb0 | |||
| a2daab1b28 | |||
| 9146e83710 | |||
| abf8fb3077 | |||
| 1414dfeac2 | |||
| 41c1e0b6b9 | |||
| c9a4b82f2c | |||
| 736b6da176 | |||
| 34972ae9c1 | |||
| a9f1b833b9 | |||
| 83e8eca56d | |||
| 1c8c186681 | |||
| a0be2dcc9f | |||
| eb89f12c3e | |||
| ce2fff1a4f | |||
| 9301894997 | |||
| f21c1ff80a | |||
| e15b887d8d | |||
| b69db65037 | |||
| adcc824bf7 | |||
| 7213b23861 | |||
| 2cd3acd680 | |||
| 22ac3c9845 | |||
| 3275d06728 | |||
| 33b91cf7dc | |||
| a640633ea7 | |||
| 5f21a71770 | |||
| de3c2c6744 | |||
| e7e79e5a76 | |||
| 130a259c69 | |||
| 9580f33cb6 | |||
| eab66cfab8 | |||
| d2cecbcd05 | |||
| 2028eccc3c | |||
| 70c8c2b417 | |||
| 793187ff9e | |||
| 42bf6b1633 | |||
| 40719efc43 | |||
| e540384f50 | |||
| 9ca97374c8 | |||
| 902e855d92 | |||
| 64269d69ee | |||
| e976c88016 | |||
| 29cc145d44 | |||
| b16a3f1a77 | |||
| c2018413aa | |||
| 243e05ca5e | |||
| a29fe71666 | |||
| b0ffd4d74f | |||
| ab60acd9f4 | |||
| 6a417fcc9d | |||
| 1c77b05f68 | |||
| 051da5e8dc | |||
| a1ff6de652 | |||
| b471847b1c | |||
| 3abfdff943 | |||
| fce33b02a2 | |||
| da60fa7c49 | |||
| 20161d231f | |||
| c7bb14f369 | |||
| f8d1257d35 | |||
| f3b1087ac7 | |||
| 2299d7a02f | |||
| 489d6e3862 | |||
| 265bf669c8 | |||
| df09c1c55d | |||
| e8a5490d44 | |||
| ff9db4e273 | |||
| 108a3dabe6 | |||
| 31da35a549 | |||
| b4d1a47d6b | |||
| 3548a761a5 | |||
| 6f65462ec4 | |||
| a0e0d123b2 | |||
| f22f8f03ac | |||
| e236ec1f42 | |||
| 57e6bac3d4 | |||
| c818445611 | |||
| 45604801fa | |||
| 0a922a9f36 | |||
| 9b170e942c | |||
| cf169fd13e | |||
| 81895f991b | |||
| 011667c8e4 | |||
| 0e6f97f041 | |||
| fae7a7b80c | |||
| b851861931 | |||
| 45f4b5e56f | |||
| 3ddc45d625 | |||
| 1bd11eaf27 | |||
| 4776dc01d2 | |||
| de358b9461 | |||
| f3dd1c1886 | |||
| add049f0bd | |||
| 249e8461bb | |||
| 3293cd6542 | |||
| 6de7268b49 | |||
| dbae9832b6 | |||
| e49797ab21 | |||
| 47dfb33e98 | |||
| 5e16fbd603 | |||
| ab553ef008 | |||
| 7a5587c0c6 | |||
| 05bf33a1ec | |||
| 3f1a26cc1b | |||
| b248aa2ac8 | |||
| 06023bcf9d | |||
| 7542989f2b | |||
| 9e9447502e | |||
| eb1782e86f | |||
| a168342fa8 | |||
| 4820e53b18 | |||
| 248bef5503 | |||
| 8a49ac6061 | |||
| 4764f5f37f | |||
| dcb1da2f59 | |||
| 238c5cee7e | |||
| 70c943e948 | |||
| d6c4260eb8 | |||
| 1fed626900 | |||
| 0b2393cecc | |||
| 697413103d | |||
| b648276122 | |||
| 2e2c9b6361 | |||
| 0d311d61b4 | |||
| efc1bfd66a | |||
| 154fa2f14a | |||
| 2af63ce988 | |||
| e9bc6ebd27 | |||
| 201e671d61 | |||
| d63d1cef72 | |||
| 108c725c58 | |||
| 75dadb2925 | |||
| fedcc4a357 | |||
| 860ebf2df5 | |||
| 109858eae5 | |||
| 9041c1bf51 | |||
| f41e9a117b | |||
| 2f78136479 | |||
| 62a594ab59 | |||
| 81cc050bf3 |
Executable
+230
@@ -0,0 +1,230 @@
|
||||
#!/bin/bash
|
||||
# check-already-published.sh <recipe-dir>
|
||||
#
|
||||
# Decide whether a given recipe (arch/<name> or debian/<name>) is already
|
||||
# present in https://packages.reauktion.de/. Emits exactly one line to
|
||||
# stdout:
|
||||
#
|
||||
# skip=1 — package with this version-pkgrel-arch tuple already lives in
|
||||
# the pool; CI should short-circuit.
|
||||
# skip=0 — file is missing or HEAD failed; CI should build + publish.
|
||||
#
|
||||
# Design notes:
|
||||
# * For Arch recipes we source the PKGBUILD in a clean subshell so
|
||||
# shell expansions (epoch=, ${_pkgver/-/}, pkgname=() arrays) resolve
|
||||
# naturally. Only the first element of pkgname[] is checked — split
|
||||
# packages share one source tarball / one build, so any-one-missing
|
||||
# forces the full rebuild anyway.
|
||||
# * For Debian recipes we extract the bare top-level PKGVER= /
|
||||
# PKGREL= assignments (plus any other top-level VAR=value lines they
|
||||
# reference) via grep and re-evaluate them in an isolated subshell —
|
||||
# sourcing the entire build-deb.sh would run curl/tar/dpkg-deb
|
||||
# against a tempdir we don't want to materialise here.
|
||||
# * Epoch handling differs by ecosystem: Arch keeps `<epoch>:` in the
|
||||
# pool filename, Debian/reprepro strips it.
|
||||
# * curl --head with -f maps non-2xx to non-zero exit, which is what we
|
||||
# want — 404 means "build it". -L follows mirrors. --max-time caps
|
||||
# the worst-case latency per HEAD.
|
||||
set -euo pipefail
|
||||
|
||||
REPO_BASE="${REPO_BASE:-https://packages.reauktion.de}"
|
||||
HEAD_TIMEOUT="${HEAD_TIMEOUT:-15}"
|
||||
|
||||
RECIPE_DIR="${1:?usage: $0 <recipe-dir> (e.g. arch/distcc-avahi or debian/lmcp)}"
|
||||
|
||||
# Resolve relative to repo root if a leading path is passed; allow
|
||||
# both `arch/foo` and absolute paths.
|
||||
if [ ! -d "$RECIPE_DIR" ]; then
|
||||
echo "error: recipe dir not found: $RECIPE_DIR" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
ecosystem="${RECIPE_DIR%%/*}"
|
||||
|
||||
http_head() {
|
||||
local url="$1"
|
||||
curl -sS -L --max-time "$HEAD_TIMEOUT" -o /dev/null \
|
||||
-w '%{http_code}' --head "$url" || echo "000"
|
||||
}
|
||||
|
||||
emit() {
|
||||
# one-line GITHUB_OUTPUT-compatible kv
|
||||
echo "skip=$1"
|
||||
exit 0
|
||||
}
|
||||
|
||||
case "$ecosystem" in
|
||||
arch)
|
||||
pkgbuild="$RECIPE_DIR/PKGBUILD"
|
||||
[ -f "$pkgbuild" ] || { echo "error: $pkgbuild missing" >&2; exit 2; }
|
||||
|
||||
# Source in a fresh bash to capture variables. Some PKGBUILDs run
|
||||
# functions or call commands at top level — keep this fast by
|
||||
# restricting PATH and trapping side effects.
|
||||
eval "$(
|
||||
bash --noprofile --norc -c "
|
||||
set +e
|
||||
# Stub out anything that might shell out; we only need variable
|
||||
# assignments to land.
|
||||
cd '$RECIPE_DIR'
|
||||
source ./PKGBUILD >/dev/null 2>&1 || true
|
||||
# pkgname may be array; print first element.
|
||||
if declare -p pkgname 2>/dev/null | grep -q 'declare -a'; then
|
||||
first_name=\"\${pkgname[0]}\"
|
||||
else
|
||||
first_name=\"\$pkgname\"
|
||||
fi
|
||||
if declare -p arch 2>/dev/null | grep -q 'declare -a'; then
|
||||
first_arch=\"\${arch[0]}\"
|
||||
else
|
||||
first_arch=\"\$arch\"
|
||||
fi
|
||||
printf 'PB_NAME=%q\n' \"\$first_name\"
|
||||
printf 'PB_VER=%q\n' \"\$pkgver\"
|
||||
printf 'PB_REL=%q\n' \"\$pkgrel\"
|
||||
printf 'PB_EPOCH=%q\n' \"\${epoch:-}\"
|
||||
printf 'PB_ARCH=%q\n' \"\$first_arch\"
|
||||
"
|
||||
)"
|
||||
|
||||
if [ -z "${PB_NAME:-}" ] || [ -z "${PB_VER:-}" ] || [ -z "${PB_REL:-}" ]; then
|
||||
echo "error: failed to parse PKGBUILD ($RECIPE_DIR)" >&2
|
||||
emit 0
|
||||
fi
|
||||
|
||||
# Pool arch:
|
||||
# arch=('any') → any
|
||||
# arch=('aarch64' 'x86_64') → aarch64 (we publish for both, but the
|
||||
# aarch64 artifact is the canonical CI build)
|
||||
# arch=('aarch64') → aarch64
|
||||
case "$PB_ARCH" in
|
||||
any) pool_arch=any ;;
|
||||
*) pool_arch=aarch64 ;;
|
||||
esac
|
||||
|
||||
# Version string with optional epoch (epoch:pkgver-pkgrel).
|
||||
if [ -n "${PB_EPOCH:-}" ]; then
|
||||
ver_full="${PB_EPOCH}:${PB_VER}-${PB_REL}"
|
||||
else
|
||||
ver_full="${PB_VER}-${PB_REL}"
|
||||
fi
|
||||
|
||||
# Pool URL path (arch keeps any/aarch64 split; 'any' lands in the
|
||||
# aarch64 dir per current marfrit layout — both arches share the
|
||||
# blob via the publish-to-both-arches step in build.yml).
|
||||
pool_dir="arch/aarch64"
|
||||
|
||||
base_url="${REPO_BASE}/${pool_dir}/${PB_NAME}-${ver_full}-${pool_arch}.pkg.tar"
|
||||
for ext in zst xz gz; do
|
||||
code=$(http_head "${base_url}.${ext}")
|
||||
if [ "$code" = "200" ]; then
|
||||
emit 1
|
||||
fi
|
||||
done
|
||||
emit 0
|
||||
;;
|
||||
|
||||
debian)
|
||||
bd="$RECIPE_DIR/build-deb.sh"
|
||||
ctrl="$RECIPE_DIR/control"
|
||||
[ -f "$bd" ] || { echo "error: $bd missing" >&2; exit 2; }
|
||||
|
||||
# Pull top-level `VAR=value` lines until we've passed PKGREL, and
|
||||
# only those whose RHS is safe to re-evaluate (no command
|
||||
# substitution `$(...)`, no escaped `\$`, no embedded commands like
|
||||
# `DESTDIR=... meson ...`). This deliberately undershoots: we just
|
||||
# need PKGVER/PKGREL plus any version vars they reference. Anything
|
||||
# else (HERE=$(readlink ...), KERNELVER=\$(uname -r) inside a
|
||||
# HEREDOC, etc.) gets dropped.
|
||||
assigns=$(awk '
|
||||
/^[A-Z_][A-Z0-9_]*=/ {
|
||||
# split into LHS and RHS
|
||||
eq = index($0, "=")
|
||||
lhs = substr($0, 1, eq - 1)
|
||||
rhs = substr($0, eq + 1)
|
||||
# strip inline `# comment`
|
||||
hash = index(rhs, "#")
|
||||
if (hash > 1 && substr(rhs, hash-1, 1) == " ") rhs = substr(rhs, 1, hash - 2)
|
||||
# reject lines with command-subst or escaped-dollar or naked commands
|
||||
if (rhs ~ /\$\(/) next
|
||||
if (rhs ~ /\\\$/) next
|
||||
if (rhs ~ / [a-z]/) next # e.g. `DESTDIR="$ROOT" meson ...`
|
||||
print lhs "=" rhs
|
||||
if (lhs == "PKGREL") exit
|
||||
}
|
||||
' "$bd")
|
||||
|
||||
eval "$(
|
||||
bash --noprofile --norc -c "
|
||||
set +e
|
||||
$assigns
|
||||
printf 'PKGVER=%q\n' \"\${PKGVER:-}\"
|
||||
printf 'PKGREL=%q\n' \"\${PKGREL:-}\"
|
||||
"
|
||||
)"
|
||||
|
||||
if [ -z "${PKGVER:-}" ] || [ -z "${PKGREL:-}" ]; then
|
||||
echo "error: failed to parse PKGVER/PKGREL from $bd" >&2
|
||||
emit 0
|
||||
fi
|
||||
|
||||
# Strip epoch (`N:` prefix) — debian pool filenames omit it.
|
||||
ver_no_epoch="${PKGVER#*:}"
|
||||
# If PKGVER had no colon, ${PKGVER#*:} returns PKGVER unchanged (bash quirk:
|
||||
# the pattern must match for the prefix to be stripped). Guard explicitly.
|
||||
case "$PKGVER" in
|
||||
*:*) : ;;
|
||||
*) ver_no_epoch="$PKGVER" ;;
|
||||
esac
|
||||
|
||||
ver_full="${ver_no_epoch}-${PKGREL}"
|
||||
|
||||
# Architecture: parse control's `Architecture:` field.
|
||||
if [ ! -f "$ctrl" ]; then
|
||||
# Some recipes ship debian/control instead of ./control
|
||||
ctrl="$RECIPE_DIR/debian/control"
|
||||
fi
|
||||
ctrl_arch=$(grep -m1 '^Architecture:' "$ctrl" 2>/dev/null | awk '{print $2}')
|
||||
case "$ctrl_arch" in
|
||||
all) file_arch=all ;;
|
||||
arm64|any) file_arch=arm64 ;;
|
||||
amd64) file_arch=amd64 ;;
|
||||
*) file_arch=arm64 ;; # conservative default
|
||||
esac
|
||||
|
||||
pkg_name=$(basename "$RECIPE_DIR")
|
||||
|
||||
# Compare against the canonical Packages index (what apt actually
|
||||
# consults). reprepro refuses lower-version uploads, so checking
|
||||
# only an exact source-pkgrel URL produces an endless-rebuild trap
|
||||
# whenever source PKGREL has rolled back below pool head. We skip
|
||||
# if pools published version >= source version-tuple.
|
||||
source_full="${ver_full}"
|
||||
if [ -n "${PKGVER#*:}" ] && [ "${PKGVER}" != "${PKGVER#*:}" ]; then
|
||||
# PKGVER had an epoch — keep it for dpkg --compare-versions.
|
||||
source_full="${PKGVER}-${PKGREL}"
|
||||
fi
|
||||
|
||||
# Determine suite: most recipes publish to both bookworm and trixie;
|
||||
# checking trixie is sufficient (changelogs share Distribution).
|
||||
suite="trixie"
|
||||
pkg_arch_label="$file_arch"
|
||||
[ "$file_arch" = "all" ] && pkg_arch_label="all"
|
||||
packages_url="${REPO_BASE}/debian/dists/${suite}/main/binary-arm64/Packages"
|
||||
[ "$file_arch" = "amd64" ] && packages_url="${REPO_BASE}/debian/dists/${suite}/main/binary-amd64/Packages"
|
||||
|
||||
pool_ver=$(set +o pipefail; curl -sS --max-time "$HEAD_TIMEOUT" "$packages_url" 2>/dev/null | awk -v p="$pkg_name" '$1=="Package:" && $2==p {found=1; next} found && $1=="Version:" {print $2; exit}')
|
||||
|
||||
if [ -n "$pool_ver" ] && command -v dpkg >/dev/null && dpkg --compare-versions "$pool_ver" ge "$source_full"; then
|
||||
echo "pool has $pool_ver >= source $source_full" >&2
|
||||
emit 1
|
||||
fi
|
||||
echo "pool has $pool_ver, source wants $source_full — build" >&2
|
||||
emit 0
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "error: unsupported ecosystem '$ecosystem' (recipe-dir=$RECIPE_DIR)" >&2
|
||||
emit 0
|
||||
;;
|
||||
esac
|
||||
+952
-4
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,53 @@
|
||||
# Maintainer: Markus Fritsche <mfritsche@reauktion.de>
|
||||
# aish — AI-augmented conversational shell in LuaJIT.
|
||||
# Source of truth: git.reauktion.de/marfrit/aish
|
||||
|
||||
pkgname=aish
|
||||
pkgver=0.1.0
|
||||
pkgrel=1
|
||||
pkgdesc="AI-augmented conversational shell (LuaJIT, FFI-only)"
|
||||
arch=('any')
|
||||
url="https://git.reauktion.de/marfrit/aish"
|
||||
license=('MIT')
|
||||
depends=('luajit' 'readline' 'curl')
|
||||
# The _tag back-translation handles both clean releases (no '_') and
|
||||
# pre-release pkgvers (e.g. 0.1.0_rc1 → v0.1.0-rc1).
|
||||
_tag="v${pkgver//_/-}"
|
||||
source=("${pkgname}-${pkgver}.tar.gz::https://git.reauktion.de/marfrit/aish/archive/${_tag}.tar.gz")
|
||||
sha256sums=('9ebc3939e028832e39391ae33efacb5ec9bcd99d123cbc8ca1cd6ca9a640b5b5')
|
||||
|
||||
package() {
|
||||
cd "${pkgname}"
|
||||
local libdir="${pkgdir}/usr/share/lua/5.1/aish"
|
||||
|
||||
# Top-level modules
|
||||
install -Dm644 main.lua "${libdir}/main.lua"
|
||||
install -Dm644 broker.lua "${libdir}/broker.lua"
|
||||
install -Dm644 context.lua "${libdir}/context.lua"
|
||||
install -Dm644 executor.lua "${libdir}/executor.lua"
|
||||
install -Dm644 history.lua "${libdir}/history.lua"
|
||||
install -Dm644 mcp.lua "${libdir}/mcp.lua"
|
||||
install -Dm644 renderer.lua "${libdir}/renderer.lua"
|
||||
install -Dm644 repl.lua "${libdir}/repl.lua"
|
||||
install -Dm644 router.lua "${libdir}/router.lua"
|
||||
install -Dm644 safety.lua "${libdir}/safety.lua"
|
||||
install -Dm644 secrets.lua "${libdir}/secrets.lua"
|
||||
|
||||
# FFI bindings
|
||||
install -Dm644 ffi/curl.lua "${libdir}/ffi/curl.lua"
|
||||
install -Dm644 ffi/libc.lua "${libdir}/ffi/libc.lua"
|
||||
install -Dm644 ffi/pty.lua "${libdir}/ffi/pty.lua"
|
||||
install -Dm644 ffi/readline.lua "${libdir}/ffi/readline.lua"
|
||||
|
||||
# Vendored dependencies
|
||||
install -Dm644 vendor/dkjson.lua "${libdir}/vendor/dkjson.lua"
|
||||
|
||||
# Launch wrapper
|
||||
install -Dm755 bin/aish "${pkgdir}/usr/bin/aish"
|
||||
|
||||
# Documentation + example config
|
||||
install -Dm644 README.md "${pkgdir}/usr/share/doc/${pkgname}/README.md"
|
||||
install -Dm644 LICENSE "${pkgdir}/usr/share/doc/${pkgname}/LICENSE"
|
||||
install -Dm644 examples/config.lua \
|
||||
"${pkgdir}/usr/share/doc/${pkgname}/examples/config.lua"
|
||||
}
|
||||
@@ -8,13 +8,13 @@
|
||||
# NEXT.md alongside this PKGBUILD for the full rationale and the
|
||||
# validation log on PineTab2 (RK3566).
|
||||
#
|
||||
# Multi-arch: builds natively on x86_64 and aarch64. The x86_64 path
|
||||
# is primarily a development / CI host; the runtime target audience is
|
||||
# aarch64. The two patches are architecture-independent.
|
||||
# Cross-compiled from x86_64 using chromium's bundled clang (upstream
|
||||
# LLVM doesn't ship clang 23+ yet; chromium's internal fork is required).
|
||||
# Runtime target is aarch64. The three patches are architecture-independent.
|
||||
|
||||
pkgname=chromium-fourier
|
||||
pkgver=147.0.7727.116
|
||||
pkgrel=2
|
||||
pkgver=148.0.7778.178
|
||||
pkgrel=1
|
||||
epoch=1
|
||||
pkgdesc='Chromium with V4L2VDA HW video decode unlocked for mainline Linux Wayland on Rockchip'
|
||||
arch=('aarch64' 'x86_64')
|
||||
@@ -150,7 +150,6 @@ build() {
|
||||
'symbol_level=0'
|
||||
'is_cfi=false'
|
||||
'treat_warnings_as_errors=false'
|
||||
'enable_nacl=false'
|
||||
'enable_widevine=false'
|
||||
|
||||
# System toolchain (clang/lld from pacman)
|
||||
|
||||
@@ -73,16 +73,15 @@ diff --git a/ui/ozone/common/native_pixmap_egl_binding.cc b/ui/ozone/common/nati
|
||||
index 31877f4459..6855c1093e 100644
|
||||
--- a/ui/ozone/common/native_pixmap_egl_binding.cc
|
||||
+++ b/ui/ozone/common/native_pixmap_egl_binding.cc
|
||||
@@ -6,10 +6,13 @@
|
||||
|
||||
@@ -6,9 +6,12 @@
|
||||
|
||||
#include <array>
|
||||
|
||||
|
||||
+#include "base/containers/flat_map.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/memory/scoped_refptr.h"
|
||||
+#include "base/no_destructor.h"
|
||||
#include "base/notreached.h"
|
||||
#include "base/numerics/safe_conversions.h"
|
||||
+#include "base/synchronization/lock.h"
|
||||
#include "ui/gfx/linux/drm_util_linux.h"
|
||||
#include "ui/gl/gl_bindings.h"
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
# Maintainer: Markus Fritsche <fritsche.markus@gmail.com>
|
||||
#
|
||||
# daedalus-v4l2-dkms — DKMS package for the daedalus_v4l2 out-of-tree
|
||||
# kernel module (V4L2 stateless decoder shim for Pi 5 / CM5).
|
||||
#
|
||||
# Pair to daedalus-v4l2 (userspace daemon). When loaded, the module
|
||||
# registers /dev/videoNN (V4L2 m2m) + /dev/mediaNN (media controller) +
|
||||
# /dev/daedalus-v4l2 (chardev to the userspace daemon). Userspace
|
||||
# clients drive the V4L2 m2m + request API path; the daemon does the
|
||||
# actual FFmpeg-backed decode on /dev/daedalus-v4l2.
|
||||
#
|
||||
# Project: https://git.reauktion.de/reauktion/daedalus-v4l2
|
||||
# Sibling userspace package: daedalus-v4l2
|
||||
# Sibling consumer: libva-v4l2-request-fourier
|
||||
|
||||
pkgname=daedalus-v4l2-dkms
|
||||
_module=daedalus_v4l2
|
||||
|
||||
# Same pin as arch/daedalus-v4l2 — keep kernel module + daemon
|
||||
# bit-versioned together so the chardev wire protocol stays in sync.
|
||||
# 5d8b436 reverts PRs #7 + #8 (parking design that broke libva's
|
||||
# 1:1 contract — see daedalus-v4l2#9 + #10). Tree is
|
||||
# content-equivalent to f0d4186 plus PR #4 (cosmetic menu ctrls).
|
||||
# PROTO_VERSION drops 1 → 0; lock-step install with
|
||||
# daedalus-v4l2 0.1.0.r33.5d8b436 REQUIRED.
|
||||
_commit=872eec505eb91b561892d02a0526749348ddc121
|
||||
|
||||
pkgver=0.1.0.r45.872eec5
|
||||
pkgrel=1 # reset for new upstream pin (872eec5 — PROTO_MAX_PAYLOAD 64 KiB -> 1 MiB, closes #19); lock-step with daedalus-v4l2 0.1.0.r45.872eec5 REQUIRED
|
||||
pkgdesc="V4L2 stateless decoder shim kernel module (DKMS) — Pi 5 / CM5"
|
||||
arch=('any')
|
||||
url="https://git.reauktion.de/reauktion/daedalus-v4l2"
|
||||
license=('GPL-2.0-or-later')
|
||||
depends=('dkms')
|
||||
makedepends=('git')
|
||||
install="${pkgname}.install"
|
||||
|
||||
source=("git+https://git.reauktion.de/reauktion/daedalus-v4l2.git#commit=${_commit}"
|
||||
"dkms.conf"
|
||||
"${pkgname}.install")
|
||||
sha256sums=('SKIP'
|
||||
'SKIP'
|
||||
'SKIP')
|
||||
|
||||
pkgver() {
|
||||
cd "${srcdir}/daedalus-v4l2"
|
||||
printf '0.1.0.r%s.%s' \
|
||||
"$(git rev-list --count HEAD)" \
|
||||
"$(git rev-parse --short=7 HEAD)"
|
||||
}
|
||||
|
||||
package() {
|
||||
local _src="${pkgdir}/usr/src/${_module}-${pkgver}"
|
||||
|
||||
install -dm755 "${_src}"
|
||||
|
||||
# Install the kernel/ subdir and embed the shared proto header in
|
||||
# the same tree. The in-tree Makefile uses
|
||||
# `ccflags-y += -I$(src)/../include` (assuming the parent
|
||||
# daedalus-v4l2 layout); for DKMS we flatten by copying the header
|
||||
# into kernel/include/ and patching the Makefile to point there.
|
||||
cp -r "${srcdir}/daedalus-v4l2/kernel/." "${_src}/"
|
||||
install -Dm644 "${srcdir}/daedalus-v4l2/include/daedalus_v4l2_proto.h" \
|
||||
"${_src}/include/daedalus_v4l2_proto.h"
|
||||
sed -i 's|-I\$(src)/\.\./include|-I$(src)/include|' "${_src}/Makefile"
|
||||
|
||||
# dkms.conf at the root of the source tree (DKMS convention).
|
||||
# Substitute #MODULE_VERSION# placeholder with the actual pkgver
|
||||
# so dkms install/uninstall match what's on disk.
|
||||
install -Dm644 "${srcdir}/dkms.conf" "${_src}/dkms.conf"
|
||||
sed -i "s/#MODULE_VERSION#/${pkgver}/" "${_src}/dkms.conf"
|
||||
|
||||
# License
|
||||
install -Dm644 "${srcdir}/daedalus-v4l2/kernel/daedalus_v4l2_main.c" \
|
||||
"${pkgdir}/usr/share/licenses/${pkgname}/SPDX-HEADER"
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
# post-install / post-upgrade hook for daedalus-v4l2-dkms.
|
||||
#
|
||||
# pacman + the dkms-helpers alpm hook will already attempt
|
||||
# `dkms install` on its own. This script's job is to emit a
|
||||
# loud, actionable warning when the module didn't actually
|
||||
# build for the running kernel — most commonly because the
|
||||
# kernel headers package isn't installed yet.
|
||||
#
|
||||
# Without this you get a silent failure: the package looks
|
||||
# installed but `modprobe daedalus_v4l2` returns ENOENT.
|
||||
|
||||
_check_dkms_built() {
|
||||
local name=daedalus_v4l2
|
||||
local ver=$1
|
||||
local kernelver=$(uname -r)
|
||||
|
||||
if ! command -v dkms >/dev/null 2>&1; then
|
||||
return 1 # the hard-dep should have caught this
|
||||
fi
|
||||
|
||||
local status
|
||||
status=$(dkms status -m "$name" -v "$ver" -k "$kernelver" 2>/dev/null || true)
|
||||
if printf '%s\n' "$status" | grep -q -E 'installed|loaded'; then
|
||||
return 0 # all good
|
||||
fi
|
||||
|
||||
cat >&2 <<EOF
|
||||
==> daedalus-v4l2-dkms: DKMS build did NOT land for kernel $kernelver.
|
||||
==> dkms status -m $name -v $ver -k $kernelver:
|
||||
==> $(printf '%s' "$status" | head -1)
|
||||
==>
|
||||
==> Most likely cause: kernel headers package is missing.
|
||||
==> Arch / ALARM: pacman -S linux-rpi-headers (or linux-rpi5-headers)
|
||||
==> Raspberry Pi OS: apt install linux-headers-rpi-2712
|
||||
==>
|
||||
==> After installing headers, finish the install with:
|
||||
==> sudo dkms autoinstall $name/$ver
|
||||
==> sudo modprobe daedalus_v4l2
|
||||
==>
|
||||
==> Until then daedalus_v4l2 will NOT be loadable and the
|
||||
==> userspace daedalus-v4l2 daemon will have nothing to talk to.
|
||||
EOF
|
||||
return 1
|
||||
}
|
||||
|
||||
post_install() {
|
||||
_check_dkms_built "$1" || true
|
||||
}
|
||||
|
||||
post_upgrade() {
|
||||
# New version pinned by the bump may have built fine, but if
|
||||
# a kernel-headers package was uninstalled / pruned since the
|
||||
# last upgrade we'd silently regress. Re-check.
|
||||
_check_dkms_built "$1" || true
|
||||
}
|
||||
|
||||
pre_remove() {
|
||||
# The dkms alpm hook handles dkms remove on its own; nothing
|
||||
# we need to add here.
|
||||
:
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
# DKMS configuration for daedalus_v4l2 — V4L2 stateless decoder shim.
|
||||
#
|
||||
# Built against /lib/modules/$kernelver/build with the in-tree Makefile.
|
||||
# The Makefile uses `obj-m := daedalus_v4l2.o` and links
|
||||
# daedalus_v4l2_main.o + daedalus_v4l2_chardev.o into the final .ko.
|
||||
|
||||
PACKAGE_NAME="daedalus_v4l2"
|
||||
PACKAGE_VERSION="#MODULE_VERSION#"
|
||||
|
||||
# Single module produced by the Makefile.
|
||||
BUILT_MODULE_NAME[0]="daedalus_v4l2"
|
||||
DEST_MODULE_LOCATION[0]="/updates"
|
||||
|
||||
# Use the package's own Makefile — it already does
|
||||
# `$(MAKE) -C $(KERNELDIR) M=$(PWD) modules`.
|
||||
MAKE[0]="make KERNELDIR=/lib/modules/${kernelver}/build all"
|
||||
CLEAN="make KERNELDIR=/lib/modules/${kernelver}/build clean"
|
||||
|
||||
AUTOINSTALL="yes"
|
||||
@@ -0,0 +1,118 @@
|
||||
# Maintainer: Markus Fritsche <fritsche.markus@gmail.com>
|
||||
#
|
||||
# daedalus-v4l2 — userspace daemon + V4L2 m2m test tools.
|
||||
#
|
||||
# Pair to daedalus-v4l2-dkms (kernel module). Together they expose
|
||||
# /dev/videoNN + /dev/mediaNN as a V4L2 stateless decoder shim on Pi 5 /
|
||||
# CM5, decoding VP9 / AV1 / H.264 via dlopen'd FFmpeg in a single-
|
||||
# threaded daemon and shipping decoded NV12 / P010 back through dmabuf.
|
||||
# Consumed end-to-end by libva-v4l2-request-fourier (>= 1.0.0.r376) so
|
||||
# `ffmpeg -hwaccel vaapi` against vp9_small.ivf produces byte-exact NV12.
|
||||
#
|
||||
# Project: https://git.reauktion.de/reauktion/daedalus-v4l2
|
||||
# Sibling kernel package: daedalus-v4l2-dkms
|
||||
# Sibling consumer: libva-v4l2-request-fourier
|
||||
|
||||
pkgname=daedalus-v4l2
|
||||
_upstreampkg=daedalus-v4l2
|
||||
|
||||
# 6e6dfa1 = picks up daedalus-v4l2 PR #16 — daemon now dlopens
|
||||
# the Kwiboo fourier fork's libavcodec.so.62 / libavformat.so.62 /
|
||||
# libavutil.so.60 at /opt/fourier instead of Debian-stock soname
|
||||
# 61/61/59. First step on the daedalus-fourier substitution arc
|
||||
# (daedalus-v4l2#11). Daemon still needs daedalus-fourier at
|
||||
# build time (Arch packaging for that is a follow-up; Debian side
|
||||
# fetches inline via build-deb.sh).
|
||||
_commit=872eec505eb91b561892d02a0526749348ddc121
|
||||
|
||||
# 0.1.0 (pre-1.0) + commit count + short sha. Bump the .Y on each
|
||||
# Phase 8.x close. pkgver() recomputes at build time.
|
||||
pkgver=0.1.0.r45.872eec5
|
||||
pkgrel=1 # reset for new upstream pin (872eec5 — PROTO_MAX_PAYLOAD 64 KiB -> 1 MiB, closes #19); lock-step with daedalus-v4l2-dkms 0.1.0.r45.872eec5 REQUIRED
|
||||
pkgdesc="Userspace daemon for the daedalus-v4l2 V4L2 stateless decoder shim (VP9/AV1/H.264 on Pi 5 / CM5)"
|
||||
arch=('aarch64')
|
||||
url="https://git.reauktion.de/reauktion/daedalus-v4l2"
|
||||
license=('BSD-2-Clause' 'GPL-2.0-or-later')
|
||||
# Daemon dlopens libavformat.so.61 / libavcodec.so.61 / libavutil.so.59
|
||||
# at runtime (Option γ — see daemon/src/ffmpeg_loader.h). ffmpeg
|
||||
# provides those; we don't link them.
|
||||
depends=('ffmpeg-v4l2-request-fourier' 'libdrm')
|
||||
# Headers from libav*-dev needed at compile time for type-safe function
|
||||
# pointer signatures; pkg-config locates them.
|
||||
makedepends=('cmake' 'ninja' 'pkgconf' 'git' 'ffmpeg')
|
||||
optdepends=('daedalus-v4l2-dkms: kernel module providing /dev/video0 + /dev/daedalus-v4l2'
|
||||
'libva-v4l2-request-fourier: VA-API consumer routing through this daemon')
|
||||
install="${pkgname}.install"
|
||||
|
||||
source=("git+https://git.reauktion.de/reauktion/daedalus-v4l2.git#commit=${_commit}"
|
||||
"${pkgname}.install")
|
||||
sha256sums=('SKIP'
|
||||
'SKIP')
|
||||
|
||||
pkgver() {
|
||||
cd "${srcdir}/${_upstreampkg}"
|
||||
printf '0.1.0.r%s.%s' \
|
||||
"$(git rev-list --count HEAD)" \
|
||||
"$(git rev-parse --short=7 HEAD)"
|
||||
}
|
||||
|
||||
build() {
|
||||
cd "${srcdir}/${_upstreampkg}/daemon"
|
||||
cmake -B build -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=/usr
|
||||
cmake --build build
|
||||
|
||||
cd "${srcdir}/${_upstreampkg}/tools"
|
||||
make
|
||||
}
|
||||
|
||||
package() {
|
||||
cd "${srcdir}/${_upstreampkg}"
|
||||
|
||||
# Daemon binary
|
||||
install -Dm755 daemon/build/daedalus_v4l2_daemon \
|
||||
"${pkgdir}/usr/bin/daedalus_v4l2_daemon"
|
||||
|
||||
# Test tools (under /usr/libexec to keep them out of the default PATH
|
||||
# — they're for verification, not daily use).
|
||||
install -Dm755 tools/test_chardev_pingpong \
|
||||
"${pkgdir}/usr/libexec/daedalus-v4l2/test_chardev_pingpong"
|
||||
install -Dm755 tools/test_m2m_decode \
|
||||
"${pkgdir}/usr/libexec/daedalus-v4l2/test_m2m_decode"
|
||||
install -Dm755 tools/test_m2m_stream \
|
||||
"${pkgdir}/usr/libexec/daedalus-v4l2/test_m2m_stream"
|
||||
|
||||
# Shared wire-protocol header (kernel ↔ daemon); useful for
|
||||
# third-party clients of the chardev.
|
||||
install -Dm644 include/daedalus_v4l2_proto.h \
|
||||
"${pkgdir}/usr/include/daedalus_v4l2_proto.h"
|
||||
|
||||
# systemd unit + module autoload — without these the daemon never
|
||||
# starts and the libva/VAAPI consumer's REQ_DECODE has nobody on
|
||||
# the other end of /dev/daedalus-v4l2.
|
||||
install -Dm644 packaging/systemd/daedalus-v4l2.service \
|
||||
"${pkgdir}/usr/lib/systemd/system/daedalus-v4l2.service"
|
||||
install -Dm644 packaging/systemd/daedalus-v4l2.modules-load \
|
||||
"${pkgdir}/usr/lib/modules-load.d/daedalus-v4l2.conf"
|
||||
|
||||
# Documentation
|
||||
install -Dm644 README.md \
|
||||
"${pkgdir}/usr/share/doc/${pkgname}/README.md"
|
||||
for d in docs/*.md; do
|
||||
install -Dm644 "$d" "${pkgdir}/usr/share/doc/${pkgname}/$(basename "$d")"
|
||||
done
|
||||
|
||||
# Licenses: BSD-2-Clause for daemon/tools, GPL for the kernel proto
|
||||
# header; the SPDX headers in src/ are the canonical declaration but
|
||||
# ship a short note here for package-manager-driven license queries.
|
||||
install -dm755 "${pkgdir}/usr/share/licenses/${pkgname}"
|
||||
cat > "${pkgdir}/usr/share/licenses/${pkgname}/LICENSE" <<'EOF'
|
||||
daedalus-v4l2 userspace components are BSD-2-Clause licensed.
|
||||
The shared kernel↔daemon wire protocol header
|
||||
(/usr/include/daedalus_v4l2_proto.h) is GPL-2.0-or-later WITH
|
||||
Linux-syscall-note for kernel-side compatibility. See SPDX
|
||||
headers on individual source files for the canonical
|
||||
per-file declaration.
|
||||
EOF
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
# post_install / post_upgrade for daedalus-v4l2.
|
||||
#
|
||||
# Enables (but doesn't start — leave that to the operator) the
|
||||
# daemon service so it comes up on next boot. Reloads systemd
|
||||
# so the new unit file is visible. Triggers modules-load so the
|
||||
# kernel module loads without a reboot if daedalus-v4l2-dkms is
|
||||
# also installed.
|
||||
|
||||
_activate() {
|
||||
systemctl daemon-reload >/dev/null 2>&1 || true
|
||||
systemctl enable daedalus-v4l2.service >/dev/null 2>&1 || true
|
||||
|
||||
# Trigger /usr/lib/modules-load.d/daedalus-v4l2.conf without a
|
||||
# reboot. Safe if the module is already loaded.
|
||||
systemd-modules-load >/dev/null 2>&1 || true
|
||||
|
||||
cat >&2 <<EOF
|
||||
==> daedalus-v4l2: service enabled, will start on next boot.
|
||||
==> To start now (requires the kernel module to be loaded):
|
||||
==> sudo systemctl start daedalus-v4l2.service
|
||||
==> Verify decode path:
|
||||
==> journalctl -u daedalus-v4l2.service -f
|
||||
EOF
|
||||
}
|
||||
|
||||
post_install() {
|
||||
_activate
|
||||
}
|
||||
|
||||
post_upgrade() {
|
||||
_activate
|
||||
systemctl try-restart daedalus-v4l2.service >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
pre_remove() {
|
||||
systemctl stop daedalus-v4l2.service >/dev/null 2>&1 || true
|
||||
systemctl disable daedalus-v4l2.service >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
post_remove() {
|
||||
systemctl daemon-reload >/dev/null 2>&1 || true
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
From 0cd6e669735e453ec8772f111065bbb2f70a5bc6 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Mon, 18 May 2026 07:27:10 +0000
|
||||
Subject: [PATCH] avutil/hwcontext_v4l2request: unpack NV15 to P010 in
|
||||
transfer_data_from
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
V4L2_PIX_FMT_NV15 (RK3399/RK3588 rkvdec 10-bit 4:2:0 capture) is mapped to
|
||||
sw_format = AV_PIX_FMT_YUV420P10 in v4l2request_capture_pixelformats[]. The
|
||||
existing transfer_get_formats explicitly blanked the format list for that
|
||||
sw_format, so 'ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le' on
|
||||
a Hi10P / Main10 input failed at filter init with EINVAL before reaching
|
||||
the actual decode (which itself succeeds — 2 frames decoded cleanly).
|
||||
|
||||
Expose AV_PIX_FMT_P010 as the transfer target for NV15-backed surfaces and
|
||||
unpack the packed 10-bit samples into the standard high-bits-of-16 layout
|
||||
inside transfer_data_from. Luma and chroma share the same packing format
|
||||
(5 bytes per 4 samples, little endian); chroma plane is W × H/2 samples
|
||||
for 4:2:0.
|
||||
|
||||
The other 'needs custom unpack' sw_formats (YUV420P / Allwinner NV12_32L32
|
||||
tiled and YUV422P10 / rkvdec NV20) keep the original ENOSYS path because
|
||||
they need different unpack code that isn't covered by this patch.
|
||||
|
||||
Closes marfrit/marfrit-packages#21.
|
||||
---
|
||||
libavutil/hwcontext_v4l2request.c | 111 +++++++++++++++++++++++++++++-
|
||||
1 file changed, 110 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c
|
||||
index b6633d9081..3842160dfb 100644
|
||||
--- a/libavutil/hwcontext_v4l2request.c
|
||||
+++ b/libavutil/hwcontext_v4l2request.c
|
||||
@@ -1073,6 +1073,56 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Unpack one NV15-packed 10-bit plane (5 bytes per 4 samples, little endian)
|
||||
+ * into a P010-style plane (10 bits in the high bits of a 16-bit container).
|
||||
+ * `dst_stride` is in bytes; `src_stride` is bytes per row of NV15 data.
|
||||
+ */
|
||||
+static void v4l2request_nv15_unpack_plane_to_p010(const uint8_t *src,
|
||||
+ uint16_t *dst,
|
||||
+ unsigned width,
|
||||
+ unsigned height,
|
||||
+ unsigned src_stride,
|
||||
+ unsigned dst_stride)
|
||||
+{
|
||||
+ for (unsigned y = 0; y < height; y++) {
|
||||
+ const uint8_t *s = src + y * src_stride;
|
||||
+ uint16_t *d = (uint16_t *)((uint8_t *)dst + y * dst_stride);
|
||||
+ unsigned x;
|
||||
+
|
||||
+ for (x = 0; x + 4 <= width; x += 4) {
|
||||
+ uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||||
+ uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
|
||||
+ uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
|
||||
+ uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
|
||||
+
|
||||
+ d[0] = (uint16_t)(a << 6);
|
||||
+ d[1] = (uint16_t)(b << 6);
|
||||
+ d[2] = (uint16_t)(c << 6);
|
||||
+ d[3] = (uint16_t)(e << 6);
|
||||
+
|
||||
+ d += 4;
|
||||
+ s += 5;
|
||||
+ }
|
||||
+
|
||||
+ if (x < width) {
|
||||
+ unsigned rem = width - x;
|
||||
+ uint16_t pix[4] = { 0, 0, 0, 0 };
|
||||
+
|
||||
+ pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||||
+ if (rem >= 2)
|
||||
+ pix[1] = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
|
||||
+ if (rem >= 3)
|
||||
+ pix[2] = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
|
||||
+ if (rem >= 4)
|
||||
+ pix[3] = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
|
||||
+
|
||||
+ for (unsigned j = 0; j < rem; j++)
|
||||
+ d[j] = (uint16_t)(pix[j] << 6);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||||
enum AVHWFrameTransferDirection dir,
|
||||
enum AVPixelFormat **formats)
|
||||
@@ -1082,6 +1132,22 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||||
if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO)
|
||||
return AVERROR(ENOSYS);
|
||||
|
||||
+ /*
|
||||
+ * NV15-backed surfaces (sw_format = YUV420P10) are exposed as P010 to
|
||||
+ * downstream filters: the unpack below converts the packed 10-bit
|
||||
+ * samples into the standard high-bits-of-16 layout. Hi10P / Main10
|
||||
+ * VAAPI/v4l2-request decode reaches userspace through this path.
|
||||
+ */
|
||||
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
|
||||
+ fmts = av_malloc_array(2, sizeof(*fmts));
|
||||
+ if (!fmts)
|
||||
+ return AVERROR(ENOMEM);
|
||||
+ fmts[0] = AV_PIX_FMT_P010;
|
||||
+ fmts[1] = AV_PIX_FMT_NONE;
|
||||
+ *formats = fmts;
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
fmts = av_malloc_array(2, sizeof(*fmts));
|
||||
if (!fmts)
|
||||
return AVERROR(ENOMEM);
|
||||
@@ -1089,8 +1155,13 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||||
fmts[0] = hwfc->sw_format;
|
||||
fmts[1] = AV_PIX_FMT_NONE;
|
||||
|
||||
+ /*
|
||||
+ * Tiled-NV12-32L32 (Allwinner) and NV20 (rkvdec 4:2:2 10-bit) still need
|
||||
+ * dedicated unpacks before hwdownload can consume them; leave them as
|
||||
+ * "no transfer formats" so the filter graph reports the limitation
|
||||
+ * rather than silently producing garbage.
|
||||
+ */
|
||||
if (hwfc->sw_format == AV_PIX_FMT_YUV420P ||
|
||||
- hwfc->sw_format == AV_PIX_FMT_YUV420P10 ||
|
||||
hwfc->sw_format == AV_PIX_FMT_YUV422P10)
|
||||
fmts[0] = AV_PIX_FMT_NONE;
|
||||
|
||||
@@ -1110,6 +1181,44 @@ static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc,
|
||||
map = av_frame_alloc();
|
||||
if (!map)
|
||||
return AVERROR(ENOMEM);
|
||||
+
|
||||
+ /*
|
||||
+ * For NV15→P010, map the raw NV15 bytes (sw_format) and unpack into
|
||||
+ * dst's P010 storage. Otherwise fall through to the original byte-copy
|
||||
+ * path used for 1:1 sw_format matches (NV12, NV16, AFBC handled by DRM).
|
||||
+ */
|
||||
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
|
||||
+ /*
|
||||
+ * Only P010 is advertised by transfer_get_formats for this sw_format;
|
||||
+ * a caller that bypasses get_formats and asks for anything else would
|
||||
+ * silently corrupt output via av_frame_copy on NV15-packed bytes.
|
||||
+ * Reject explicitly.
|
||||
+ */
|
||||
+ if (dst->format != AV_PIX_FMT_P010) {
|
||||
+ ret = AVERROR(ENOSYS);
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
+ map->format = hwfc->sw_format;
|
||||
+ ret = v4l2request_map_frame(hwfc, map, src);
|
||||
+ if (ret)
|
||||
+ goto fail;
|
||||
+
|
||||
+ v4l2request_nv15_unpack_plane_to_p010(map->data[0],
|
||||
+ (uint16_t *)dst->data[0],
|
||||
+ dst->width, dst->height,
|
||||
+ map->linesize[0],
|
||||
+ dst->linesize[0]);
|
||||
+ /* NV15 chroma plane is W × H/2 samples (4:2:0, UV interleaved). */
|
||||
+ v4l2request_nv15_unpack_plane_to_p010(map->data[1],
|
||||
+ (uint16_t *)dst->data[1],
|
||||
+ dst->width, dst->height / 2,
|
||||
+ map->linesize[1],
|
||||
+ dst->linesize[1]);
|
||||
+ ret = 0;
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
map->format = dst->format;
|
||||
|
||||
ret = v4l2request_map_frame(hwfc, map, src);
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
From f760c0541586f43334c02611fcb4c212c08ad576 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Thu, 21 May 2026 21:40:22 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 4x4 IDCT through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264DSPContext.idct_add (called per 4x4 block from the intra-4x4
|
||||
decode path in h264_mb.c) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
|
||||
The recipe layer picks the substrate; for cycle 6 (H.264 IDCT 4x4)
|
||||
the recipe is CPU NEON, so this is effectively a NEON-to-NEON
|
||||
substitution with one extra dispatch call and recipe-table lookup.
|
||||
Provides the first end-to-end exercise of the daedalus-fourier
|
||||
kernel pack inside the libavcodec.so decode hot path; follow-up
|
||||
patches wire IDCT 8x8, luma-v deblock, and qpel mc20.
|
||||
|
||||
The library context is process-global, lazily initialised under
|
||||
pthread_once on first call. We pick the no-QPU constructor because
|
||||
libavcodec.so is loaded into arbitrary host processes
|
||||
(firefox-fourier, mpv-fourier, daedalus_v4l2_daemon, ...) and we
|
||||
cannot assume the host has a usable Vulkan instance. Higher cycles
|
||||
(deblock luma-v, MC) that benefit from the QPU will provision their
|
||||
own recipe-selected context once that path is wired.
|
||||
|
||||
Bulk paths (idct_add16, idct_add16intra, idct_add8 — used for
|
||||
non-intra4x4 macroblocks) remain on the stock NEON .S implementations
|
||||
and will be batched through daedalus_recipe_dispatch_h264_idct4 with
|
||||
n_blocks>1 in a follow-up.
|
||||
|
||||
Bit-exact against ff_h264_idct_add_neon (daedalus-fourier cycle 6
|
||||
green; see marfrit/daedalus-fourier/CYCLE_LOGS.md).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2.
|
||||
---
|
||||
libavcodec/aarch64/Makefile | 3 +-
|
||||
libavcodec/aarch64/h264_idct_daedalus.c | 49 +++++++++++++++++++++++
|
||||
libavcodec/aarch64/h264dsp_init_aarch64.c | 3 +-
|
||||
3 files changed, 53 insertions(+), 2 deletions(-)
|
||||
create mode 100644 libavcodec/aarch64/h264_idct_daedalus.c
|
||||
|
||||
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
|
||||
index 41ab025..7b95fb1 100644
|
||||
--- a/libavcodec/aarch64/Makefile
|
||||
+++ b/libavcodec/aarch64/Makefile
|
||||
@@ -3,7 +3,8 @@ OBJS-$(CONFIG_AC3DSP) += aarch64/ac3dsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_FDCTDSP) += aarch64/fdctdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_init.o
|
||||
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
|
||||
-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
|
||||
+OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o \
|
||||
+ aarch64/h264_idct_daedalus.o
|
||||
OBJS-$(CONFIG_HUFFYUVDSP) += aarch64/huffyuvdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
||||
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
new file mode 100644
|
||||
index 0000000..538d223
|
||||
--- /dev/null
|
||||
+++ b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
@@ -0,0 +1,49 @@
|
||||
+/*
|
||||
+ * H.264 4x4 IDCT + add — daedalus-fourier substitution shim.
|
||||
+ *
|
||||
+ * Routes H264DSPContext.idct_add through
|
||||
+ * daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
+ * The recipe layer picks the substrate (CPU NEON by default for
|
||||
+ * cycle 6; future cycles may dispatch to V3D opportunistically).
|
||||
+ *
|
||||
+ * FFmpeg's 4x4 block memory layout matches daedalus's column-major
|
||||
+ * convention: block[r + 4*c] = coefficient at (row r, col c). Both
|
||||
+ * sides destructively zero the block after the transform.
|
||||
+ *
|
||||
+ * The library context is process-global and lazily initialised under
|
||||
+ * pthread_once. We pick the no-QPU constructor here because
|
||||
+ * libavcodec.so is loaded into arbitrary host processes
|
||||
+ * (firefox-fourier, mpv-fourier, daedalus_v4l2_daemon, ...) and we
|
||||
+ * cannot assume the host has a usable Vulkan instance. Higher cycles
|
||||
+ * (deblock, MC) that benefit from the QPU initialise their own
|
||||
+ * recipe-selected context once that path is wired.
|
||||
+ */
|
||||
+
|
||||
+#include <pthread.h>
|
||||
+#include <stddef.h>
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+#include <daedalus.h>
|
||||
+
|
||||
+#include "libavutil/attributes.h"
|
||||
+#include "libavcodec/h264dsp.h"
|
||||
+
|
||||
+static daedalus_ctx *g_dctx;
|
||||
+static pthread_once_t g_dctx_once = PTHREAD_ONCE_INIT;
|
||||
+
|
||||
+static void daedalus_ctx_init_once(void)
|
||||
+{
|
||||
+ g_dctx = daedalus_ctx_create_no_qpu();
|
||||
+}
|
||||
+
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
+
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
+{
|
||||
+ static const daedalus_h264_block_meta meta = { .dst_off = 0 };
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_idct4(g_dctx, dst, (size_t)stride,
|
||||
+ block, 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
index c684574..b993df2 100644
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
@@ -66,6 +66,7 @@ void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride
|
||||
int weights, int offset);
|
||||
|
||||
void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
|
||||
int16_t *block, int stride,
|
||||
@@ -139,7 +140,7 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
|
||||
c->biweight_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
|
||||
c->biweight_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
|
||||
|
||||
- c->idct_add = ff_h264_idct_add_neon;
|
||||
+ c->idct_add = ff_h264_idct_add_daedalus;
|
||||
c->idct_dc_add = ff_h264_idct_dc_add_neon;
|
||||
c->idct_add16 = ff_h264_idct_add16_neon;
|
||||
c->idct_add16intra = ff_h264_idct_add16intra_neon;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
From 1b286ddb4efaca26ec9b9e290e989fec77dc1c77 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Fri, 22 May 2026 10:18:21 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 8x8 IDCT through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264DSPContext.idct8_add (called per 8x8 block from the High-profile
|
||||
intra-8x8-DCT decode path in h264_mb.c) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_idct8 instead of ff_h264_idct8_add_neon.
|
||||
|
||||
The recipe layer picks the substrate; for cycle 7 (H.264 IDCT 8x8)
|
||||
the recipe is CPU NEON, so this is effectively a NEON-to-NEON
|
||||
substitution layered on top of the cycle-6 IDCT 4x4 wiring. Same
|
||||
pthread_once global context, same destructive-zero semantics; FFmpeg
|
||||
column-major 8x8 storage block[r + 8*c] matches daedalus's convention.
|
||||
|
||||
Bulk path c->idct8_add4 (used for inter 8x8-DCT macroblocks) remains
|
||||
on the in-tree NEON .S code and will be batched through
|
||||
daedalus_recipe_dispatch_h264_idct8 with n_blocks>1 in a follow-up.
|
||||
|
||||
Bit-exact against ff_h264_idct8_add_neon (daedalus-fourier cycle 7
|
||||
green).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 7.
|
||||
---
|
||||
libavcodec/aarch64/h264_idct_daedalus.c | 29 ++++++++++++++++-------
|
||||
libavcodec/aarch64/h264dsp_init_aarch64.c | 3 ++-
|
||||
2 files changed, 23 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
index 538d223..cbb98af 100644
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
+++ b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
@@ -1,14 +1,16 @@
|
||||
/*
|
||||
- * H.264 4x4 IDCT + add — daedalus-fourier substitution shim.
|
||||
+ * H.264 4x4 / 8x8 IDCT + add — daedalus-fourier substitution shims.
|
||||
*
|
||||
- * Routes H264DSPContext.idct_add through
|
||||
- * daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
- * The recipe layer picks the substrate (CPU NEON by default for
|
||||
- * cycle 6; future cycles may dispatch to V3D opportunistically).
|
||||
+ * Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
+ * H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
+ * instead of the in-tree ff_h264_idct{,8}_add_neon assembly. The
|
||||
+ * recipe layer picks the substrate (CPU NEON by default for cycles
|
||||
+ * 6 + 7; future cycles may dispatch to V3D opportunistically).
|
||||
*
|
||||
- * FFmpeg's 4x4 block memory layout matches daedalus's column-major
|
||||
- * convention: block[r + 4*c] = coefficient at (row r, col c). Both
|
||||
- * sides destructively zero the block after the transform.
|
||||
+ * FFmpeg's 4x4 and 8x8 block memory layouts match daedalus's
|
||||
+ * column-major convention: block[r + N*c] = coefficient at
|
||||
+ * (row r, col c) for N ∈ {4, 8}. Both sides destructively zero the
|
||||
+ * block after the transform.
|
||||
*
|
||||
* The library context is process-global and lazily initialised under
|
||||
* pthread_once. We pick the no-QPU constructor here because
|
||||
@@ -37,6 +39,7 @@ static void daedalus_ctx_init_once(void)
|
||||
}
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -47,3 +50,13 @@ void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
daedalus_recipe_dispatch_h264_idct4(g_dctx, dst, (size_t)stride,
|
||||
block, 1, &meta);
|
||||
}
|
||||
+
|
||||
+void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
+{
|
||||
+ static const daedalus_h264_block_meta meta = { .dst_off = 0 };
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_idct8(g_dctx, dst, (size_t)stride,
|
||||
+ block, 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
index b993df2..741e551 100644
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
@@ -79,6 +79,7 @@ void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
|
||||
const uint8_t nnzc[15 * 8]);
|
||||
|
||||
void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
|
||||
int16_t *block, int stride,
|
||||
@@ -146,7 +147,7 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
|
||||
c->idct_add16intra = ff_h264_idct_add16intra_neon;
|
||||
if (chroma_format_idc <= 1)
|
||||
c->idct_add8 = ff_h264_idct_add8_neon;
|
||||
- c->idct8_add = ff_h264_idct8_add_neon;
|
||||
+ c->idct8_add = ff_h264_idct8_add_daedalus;
|
||||
c->idct8_dc_add = ff_h264_idct8_dc_add_neon;
|
||||
c->idct8_add4 = ff_h264_idct8_add4_neon;
|
||||
} else if (have_neon(cpu_flags) && bit_depth == 10) {
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,121 @@
|
||||
From 68731c41d7ea68be0e912b128cb4e71fb56e8263 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Fri, 22 May 2026 12:15:16 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 luma-v deblock through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264DSPContext.v_loop_filter_luma (non-intra bS<4 vertical luma
|
||||
deblock, called per macroblock-row edge from the slice deblock
|
||||
loop) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_v instead of
|
||||
ff_h264_v_loop_filter_luma_neon.
|
||||
|
||||
The recipe layer picks the substrate; for cycle 8 the daedalus
|
||||
docstring marks the kernel "CPU primary; QPU opportunistic", but
|
||||
the libavcodec.so context here is built with
|
||||
daedalus_ctx_create_no_qpu — process-global pthread_once init,
|
||||
shared with cycles 6/7. QPU opportunism stays gated off until a
|
||||
follow-up adds an explicit feature flag (no implicit Vulkan init
|
||||
in arbitrary host processes). In the meantime cycle 8 is a
|
||||
plumbing-only substitution, NEON-to-NEON via the daedalus recipe.
|
||||
|
||||
Intra (bS=4) loop filter — c->v_loop_filter_luma_intra — stays on
|
||||
the in-tree NEON .S code; daedalus's daedalus_h264_deblock_meta
|
||||
only covers the non-intra path per its docstring.
|
||||
|
||||
FFmpeg `int alpha/beta/int8_t tc0[4]` → daedalus_h264_deblock_meta
|
||||
(int32_t alpha/beta + inline int8_t tc0[4]). pix already points
|
||||
to row 0 of the bottom block per FFmpeg's deblock convention,
|
||||
satisfying daedalus's `dst_off >= 4 * dst_stride` constraint.
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 8.
|
||||
---
|
||||
libavcodec/aarch64/h264_idct_daedalus.c | 36 +++++++++++++++++++----
|
||||
libavcodec/aarch64/h264dsp_init_aarch64.c | 4 ++-
|
||||
2 files changed, 33 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
index cbb98af..92365fa 100644
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
+++ b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
@@ -1,11 +1,14 @@
|
||||
/*
|
||||
- * H.264 4x4 / 8x8 IDCT + add — daedalus-fourier substitution shims.
|
||||
+ * H.264 4x4 / 8x8 IDCT + luma-v deblock — daedalus-fourier substitution shims.
|
||||
*
|
||||
- * Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
- * H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
- * instead of the in-tree ff_h264_idct{,8}_add_neon assembly. The
|
||||
- * recipe layer picks the substrate (CPU NEON by default for cycles
|
||||
- * 6 + 7; future cycles may dispatch to V3D opportunistically).
|
||||
+ * Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
+ * H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
+ * H264DSPContext.v_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_v
|
||||
+ * instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
||||
+ * picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
||||
+ * is CPU primary with QPU opportunistic — the ctx below is no-QPU,
|
||||
+ * so cycle 8 stays on the CPU NEON path until a separate change
|
||||
+ * gates QPU init on a daedalus-fourier feature flag).
|
||||
*
|
||||
* FFmpeg's 4x4 and 8x8 block memory layouts match daedalus's
|
||||
* column-major convention: block[r + N*c] = coefficient at
|
||||
@@ -40,6 +43,8 @@ static void daedalus_ctx_init_once(void)
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_v_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -60,3 +65,22 @@ void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
daedalus_recipe_dispatch_h264_idct8(g_dctx, dst, (size_t)stride,
|
||||
block, 1, &meta);
|
||||
}
|
||||
+
|
||||
+void ff_h264_v_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ meta.tc0[0] = tc0[0];
|
||||
+ meta.tc0[1] = tc0[1];
|
||||
+ meta.tc0[2] = tc0[2];
|
||||
+ meta.tc0[3] = tc0[3];
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_luma_v(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
index 741e551..85ac381 100644
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
@@ -27,6 +27,8 @@
|
||||
|
||||
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
+void ff_h264_v_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
@@ -114,7 +116,7 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_neon(cpu_flags) && bit_depth == 8) {
|
||||
- c->v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
|
||||
+ c->v_loop_filter_luma = ff_h264_v_loop_filter_luma_daedalus;
|
||||
c->h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
|
||||
c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
|
||||
c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
From 0d1292ea99bc4e5fa2da438259fa01a2374e3e04 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Fri, 22 May 2026 14:18:25 +0200
|
||||
Subject: [PATCH] avcodec/h264: restore AV_CODEC_FLAG_LOW_DELAY semantics
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
FFmpeg 8.x dropped the H.264 decoder's low_delay path —
|
||||
AV_CODEC_FLAG_LOW_DELAY no longer prevents
|
||||
h264_select_output_frame from running the display-order DPB
|
||||
output queue. V4L2-stateless-style consumers (daedalus-v4l2
|
||||
daemon, libva-v4l2-request-fourier) that set the flag end up
|
||||
seeing the 2-1-4-3 pair-swap pattern on B-frame streams again.
|
||||
|
||||
Restore the documented semantics:
|
||||
|
||||
- Early-exit at the top of h264_select_output_frame when the
|
||||
flag is set: emit the just-decoded picture immediately as
|
||||
next_output_pic, mirror the corruption / recovery-point
|
||||
tracking the main path performs, and skip the entire
|
||||
delayed_pic[] / POC reorder machinery.
|
||||
|
||||
- Suppress the SPS-driven has_b_frames clobber in
|
||||
h264_field_start when the flag is set, so the per-slice
|
||||
bitstream_restriction_flag re-pickup cannot reintroduce a
|
||||
nonzero reorder buffer mid-stream.
|
||||
|
||||
This is a fork-only change required by the daedalus-v4l2 daemon's
|
||||
one-frame-per-send_packet contract; upstream FFmpeg consumers that
|
||||
expect display-order output remain untouched (flag default = off).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 deblock
|
||||
+ flag-restoration follow-up.
|
||||
---
|
||||
libavcodec/h264_slice.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
|
||||
index 97fab70..a7bfbd6 100644
|
||||
--- a/libavcodec/h264_slice.c
|
||||
+++ b/libavcodec/h264_slice.c
|
||||
@@ -1308,6 +1308,28 @@ static int h264_select_output_frame(H264Context *h)
|
||||
cur->mmco_reset = h->mmco_reset;
|
||||
h->mmco_reset = 0;
|
||||
|
||||
+ /* AV_CODEC_FLAG_LOW_DELAY restore (FFmpeg 8.x dropped the H.264
|
||||
+ * decoder's low_delay path). Bypass the display-order DPB
|
||||
+ * output queue: emit the just-decoded picture immediately, in
|
||||
+ * decode order, one per send_packet. V4L2-stateless-style
|
||||
+ * consumers (daedalus-v4l2 daemon, libva-v4l2-request-fourier)
|
||||
+ * do their own POC-based reorder downstream and require this
|
||||
+ * behaviour. */
|
||||
+ if (h->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
|
||||
+ h->next_output_pic = cur;
|
||||
+ h->next_outputed_poc = cur->poc;
|
||||
+ h->frame_recovered |= cur->recovered;
|
||||
+ cur->recovered |= h->frame_recovered & FRAME_RECOVERED_SEI;
|
||||
+ if (!cur->recovered) {
|
||||
+ if (!(h->avctx->flags & AV_CODEC_FLAG_OUTPUT_CORRUPT) &&
|
||||
+ !(h->avctx->flags2 & AV_CODEC_FLAG2_SHOW_ALL))
|
||||
+ h->next_output_pic = NULL;
|
||||
+ else
|
||||
+ cur->f->flags |= AV_FRAME_FLAG_CORRUPT;
|
||||
+ }
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
if (sps->bitstream_restriction_flag ||
|
||||
h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT) {
|
||||
h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, sps->num_reorder_frames);
|
||||
@@ -1415,6 +1437,7 @@ static int h264_field_start(H264Context *h, const H264SliceContext *sl,
|
||||
sps = h->ps.sps;
|
||||
|
||||
if (sps->bitstream_restriction_flag &&
|
||||
+ !(h->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) &&
|
||||
h->avctx->has_b_frames < sps->num_reorder_frames) {
|
||||
h->avctx->has_b_frames = sps->num_reorder_frames;
|
||||
}
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Sat, 23 May 2026 12:00:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264qpel: route 8x8 mc20 through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264QpelContext.put_h264_qpel_pixels_tab[1][2] (8x8 luma horizontal
|
||||
half-pel, 6-tap "put" variant — the canonical representative of the
|
||||
H.264 luma motion-compensation family) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_qpel_mc20 instead of
|
||||
ff_put_h264_qpel8_mc20_neon.
|
||||
|
||||
Cycle 9 of the daedalus-v4l2#11 step 2 substitution arc; closes the
|
||||
4-cycle libavcodec.so substitution sequence (6 IDCT 4x4 / 7 IDCT 8x8 /
|
||||
8 luma-v deblock / 9 qpel mc20).
|
||||
|
||||
The recipe layer picks the substrate. Per docs/k9_h264qpel_mc20.md
|
||||
the verdict is CPU NEON: per-block 7.6 ns at 131 Mblock/s gives 135x
|
||||
margin over 30 fps 1080p, and the QPU dispatch floor (~250 ns)
|
||||
makes any V3D shader strictly worse. Substitution is plumbing-only,
|
||||
NEON-by-recipe — same daedalus_ctx_create_no_qpu pthread_once
|
||||
context shape the cycles 6/7/8 shims already own (kept SEPARATE
|
||||
from the H264DSP shim's ctx because H264QPEL is its own libavcodec
|
||||
Makefile module and link order does not guarantee a single .o
|
||||
owns the ctx symbol; one extra ~µs init per process, paid lazily).
|
||||
|
||||
Other H.264 luma MC variants (mc02, mc11, mc22 etc.) and the 16x16
|
||||
size tier stay on the in-tree NEON .S code. Per the cycle-9 phase-1
|
||||
rationale, mc20 8x8 is representative of the whole family's per-block
|
||||
cost — extending the substitution to other variants would multiply
|
||||
recipe-lookup overhead without changing the substrate verdict.
|
||||
|
||||
Bit-exact against ff_put_h264_qpel8_mc20_neon (daedalus-fourier
|
||||
cycle 9 green; M1 = 100% bit-exact across 10000 random blocks).
|
||||
|
||||
No SONAME change, no Depends change.
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 9.
|
||||
---
|
||||
libavcodec/aarch64/Makefile | 3 +-
|
||||
libavcodec/aarch64/h264_qpel_daedalus.c | 50 ++++++++++++++++++++++
|
||||
libavcodec/aarch64/h264qpel_init_aarch64.c | 4 +-
|
||||
3 files changed, 55 insertions(+), 2 deletions(-)
|
||||
create mode 100644 libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
|
||||
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
|
||||
--- a/libavcodec/aarch64/Makefile
|
||||
+++ b/libavcodec/aarch64/Makefile
|
||||
@@ -7,7 +7,8 @@ OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o \
|
||||
aarch64/h264_idct_daedalus.o
|
||||
OBJS-$(CONFIG_HUFFYUVDSP) += aarch64/huffyuvdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
||||
-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
||||
+OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o \
|
||||
+ aarch64/h264_qpel_daedalus.o
|
||||
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_ME_CMP) += aarch64/me_cmp_init_aarch64.o
|
||||
diff --git a/libavcodec/aarch64/h264_qpel_daedalus.c b/libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
@@ -0,0 +1,50 @@
|
||||
+/*
|
||||
+ * H.264 luma qpel mc20 (8x8, horizontal half-pel, 6-tap "put")
|
||||
+ * — daedalus-fourier substitution shim.
|
||||
+ *
|
||||
+ * Routes H264QpelContext.put_h264_qpel_pixels_tab[1][2] through
|
||||
+ * daedalus_recipe_dispatch_h264_qpel_mc20 instead of
|
||||
+ * ff_put_h264_qpel8_mc20_neon. The recipe layer picks the substrate
|
||||
+ * (CPU NEON for cycle 9; QPU not viable — per-block 7.6 ns vs
|
||||
+ * ~250 ns QPU dispatch floor, see docs/k9_h264qpel_mc20.md).
|
||||
+ *
|
||||
+ * Sibling to libavcodec/aarch64/h264_idct_daedalus.c. We keep a
|
||||
+ * SEPARATE process-global pthread_once context here instead of
|
||||
+ * sharing the H264DSP one because H264QPEL is its own libavcodec
|
||||
+ * Makefile module and link order does not guarantee a single .o
|
||||
+ * owns the ctx symbol. The cost is one extra
|
||||
+ * daedalus_ctx_create_no_qpu (~µs) per process; daemon and host
|
||||
+ * processes pay this lazily on first MC call.
|
||||
+ *
|
||||
+ * FFmpeg H264QpelContext convention: both dst and src use a SINGLE
|
||||
+ * stride and `src` already points at the leftmost OUTPUT column
|
||||
+ * (col 0); the 6-tap filter reads cols -2..+3. This matches
|
||||
+ * daedalus_recipe_dispatch_h264_qpel_mc20's documented contract
|
||||
+ * directly, so dst_off = src_off = 0.
|
||||
+ */
|
||||
+
|
||||
+#include <pthread.h>
|
||||
+#include <stddef.h>
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+#include <daedalus.h>
|
||||
+
|
||||
+#include "libavutil/attributes.h"
|
||||
+
|
||||
+static daedalus_ctx *g_dctx;
|
||||
+static pthread_once_t g_dctx_once = PTHREAD_ONCE_INIT;
|
||||
+
|
||||
+static void daedalus_ctx_init_once(void)
|
||||
+{
|
||||
+ g_dctx = daedalus_ctx_create_no_qpu();
|
||||
+}
|
||||
+
|
||||
+void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
+
|
||||
+void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
|
||||
+{
|
||||
+ static const daedalus_h264_qpel_meta meta = { .dst_off = 0, .src_off = 0 };
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+ daedalus_recipe_dispatch_h264_qpel_mc20(g_dctx, dst, src, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264qpel_init_aarch64.c b/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
@@ -47,6 +47,8 @@ void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t str
|
||||
void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc20_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
@@ -184,7 +186,7 @@ av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth)
|
||||
|
||||
c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
|
||||
c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_daedalus;
|
||||
c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
|
||||
c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
|
||||
c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
|
||||
--
|
||||
2.47.3
|
||||
@@ -0,0 +1,92 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: claude-noether <claude-noether@noreply.localhost>
|
||||
Date: Sun, 25 May 2026 12:00:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 luma-h deblock through daedalus-fourier
|
||||
|
||||
Sibling of 0005 (which substituted v_loop_filter_luma). Same
|
||||
NEON-to-NEON substitution: H264DSPContext.h_loop_filter_luma →
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_h. The H kernel landed
|
||||
in daedalus-fourier PR #9 (CPU NEON only — no QPU shader yet).
|
||||
|
||||
libavcodec.so ctx is no-QPU per the existing 0003-0005 / 0007
|
||||
pattern; we cannot assume Vulkan in arbitrary host processes
|
||||
(firefox-fourier RDD, mpv-fourier, etc.).
|
||||
|
||||
Intra (bS=4) h_loop_filter_luma_intra stays on the in-tree NEON .S
|
||||
code; daedalus_h264_deblock_meta only covers the non-intra path.
|
||||
An intra-h substitution can land once daedalus-fourier exposes a
|
||||
dispatch helper (the kernel already exists internally per PR #11).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 8 H.
|
||||
---
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:09:33.694760715 +0200
|
||||
+++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:09:33.715603719 +0200
|
||||
@@ -1,9 +1,10 @@
|
||||
/*
|
||||
- * H.264 4x4 / 8x8 IDCT + luma-v deblock — daedalus-fourier substitution shims.
|
||||
+ * H.264 4x4 / 8x8 IDCT + luma v/h deblock — daedalus-fourier substitution shims.
|
||||
*
|
||||
* Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
* H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
* H264DSPContext.v_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_v
|
||||
+ * H264DSPContext.h_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_h
|
||||
* instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
||||
* picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
||||
* is CPU primary with QPU opportunistic — the ctx below is no-QPU,
|
||||
@@ -45,6 +46,8 @@
|
||||
void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_v_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
+void ff_h264_h_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -84,3 +87,22 @@
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_v(g_dctx, pix, (size_t)stride,
|
||||
1, &meta);
|
||||
}
|
||||
+
|
||||
+void ff_h264_h_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ meta.tc0[0] = tc0[0];
|
||||
+ meta.tc0[1] = tc0[1];
|
||||
+ meta.tc0[2] = tc0[2];
|
||||
+ meta.tc0[3] = tc0[3];
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_luma_h(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:09:33.695937103 +0200
|
||||
+++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:09:33.715541700 +0200
|
||||
@@ -31,6 +31,8 @@
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
+void ff_h264_h_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta);
|
||||
void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
@@ -117,7 +119,7 @@
|
||||
|
||||
if (have_neon(cpu_flags) && bit_depth == 8) {
|
||||
c->v_loop_filter_luma = ff_h264_v_loop_filter_luma_daedalus;
|
||||
- c->h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
|
||||
+ c->h_loop_filter_luma = ff_h264_h_loop_filter_luma_daedalus;
|
||||
c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
|
||||
c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
|
||||
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,127 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: claude-noether <claude-noether@noreply.localhost>
|
||||
Date: Sun, 25 May 2026 12:00:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 chroma v/h deblock through daedalus-fourier
|
||||
|
||||
Chroma siblings of 0005 (luma_v) and 0008 (luma_h). Same
|
||||
NEON-to-NEON pattern via the daedalus recipe layer:
|
||||
|
||||
H264DSPContext.v_loop_filter_chroma →
|
||||
daedalus_recipe_dispatch_h264_deblock_chroma_v
|
||||
H264DSPContext.h_loop_filter_chroma →
|
||||
daedalus_recipe_dispatch_h264_deblock_chroma_h
|
||||
|
||||
Both kernels landed in daedalus-fourier PR #10. Recipe table
|
||||
routes AUTO to CPU NEON (no chroma QPU shaders yet), so this
|
||||
is plumbing-only and stays bit-exact against the in-tree NEON.
|
||||
|
||||
Intra chroma (bS=4) loop filters remain on in-tree NEON;
|
||||
daedalus_h264_deblock_meta covers the non-intra (bS<4) path.
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 8 chroma.
|
||||
---
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:15:45.995368233 +0200
|
||||
+++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:15:46.015839177 +0200
|
||||
@@ -1,10 +1,12 @@
|
||||
/*
|
||||
- * H.264 4x4 / 8x8 IDCT + luma v/h deblock — daedalus-fourier substitution shims.
|
||||
+ * H.264 4x4 / 8x8 IDCT + luma v/h + chroma v/h deblock — daedalus-fourier substitution shims.
|
||||
*
|
||||
* Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
* H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
- * H264DSPContext.v_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_v
|
||||
- * H264DSPContext.h_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_h
|
||||
+ * H264DSPContext.v_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_v
|
||||
+ * H264DSPContext.h_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_h
|
||||
+ * H264DSPContext.v_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_v
|
||||
+ * H264DSPContext.h_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_h
|
||||
* instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
||||
* picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
||||
* is CPU primary with QPU opportunistic — the ctx below is no-QPU,
|
||||
@@ -48,6 +50,10 @@
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
+void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
+void ff_h264_h_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -106,3 +112,41 @@
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_h(g_dctx, pix, (size_t)stride,
|
||||
1, &meta);
|
||||
}
|
||||
+
|
||||
+void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ meta.tc0[0] = tc0[0];
|
||||
+ meta.tc0[1] = tc0[1];
|
||||
+ meta.tc0[2] = tc0[2];
|
||||
+ meta.tc0[3] = tc0[3];
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_chroma_v(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
+
|
||||
+void ff_h264_h_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ meta.tc0[0] = tc0[0];
|
||||
+ meta.tc0[1] = tc0[1];
|
||||
+ meta.tc0[2] = tc0[2];
|
||||
+ meta.tc0[3] = tc0[3];
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_chroma_h(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:15:45.996482360 +0200
|
||||
+++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:15:46.025604910 +0200
|
||||
@@ -39,8 +39,12 @@
|
||||
int beta);
|
||||
void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
+void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
+void ff_h264_h_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, ptrdiff_t stride,
|
||||
@@ -123,11 +127,11 @@
|
||||
c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
|
||||
c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
|
||||
|
||||
- c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
|
||||
+ c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_daedalus;
|
||||
c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
|
||||
|
||||
if (chroma_format_idc <= 1) {
|
||||
- c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
|
||||
+ c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_daedalus;
|
||||
c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
|
||||
c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
|
||||
} else {
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: claude-noether <claude-noether@noreply.localhost>
|
||||
Date: Sun, 25 May 2026 12:30:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 luma intra deblock through daedalus-fourier
|
||||
|
||||
Adds the bS=4 intra-strength variants of the already-substituted
|
||||
luma_v / luma_h deblock (0005, 0008). Intra MBs and certain
|
||||
inter-MB edges (4x4 transform boundaries inside an Intra_NxN
|
||||
neighbour) force boundary strength to 4 per H.264 §8.7.2.1.
|
||||
|
||||
H264DSPContext.v_loop_filter_luma_intra →
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_v_intra
|
||||
H264DSPContext.h_loop_filter_luma_intra →
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_h_intra
|
||||
|
||||
Both kernels landed in daedalus-fourier PR #11. Recipe table
|
||||
routes AUTO to CPU NEON (no intra QPU shaders yet) — plumbing-
|
||||
only NEON-to-NEON via daedalus, bit-exact against the in-tree
|
||||
FFmpeg NEON path.
|
||||
|
||||
Signature differs from bS<4: no tc0 argument. The wrapper
|
||||
passes daedalus_h264_deblock_meta with alpha/beta set; tc0[] is
|
||||
ignored by the intra dispatch (bS=4 hardcodes the strength).
|
||||
|
||||
Chroma intra variants are deferred to a follow-up PR because the
|
||||
chroma path has a 4:2:0 / 4:2:2 split (chroma_format_idc gating)
|
||||
that needs explicit conditional substitution to avoid running
|
||||
the 4:2:0-only daedalus dispatch on 4:2:2 chroma.
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 8 intra.
|
||||
---
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:18:54.992244965 +0200
|
||||
+++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:20:12.338122217 +0200
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * H.264 4x4 / 8x8 IDCT + luma v/h + chroma v/h deblock — daedalus-fourier substitution shims.
|
||||
+ * H.264 4x4 / 8x8 IDCT + luma v/h (inter + intra) + chroma v/h deblock — daedalus-fourier substitution shims.
|
||||
*
|
||||
* Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
* H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
@@ -7,6 +7,8 @@
|
||||
* H264DSPContext.h_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_h
|
||||
* H264DSPContext.v_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_v
|
||||
* H264DSPContext.h_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_h
|
||||
+ * H264DSPContext.v_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_v_intra
|
||||
+ * H264DSPContext.h_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_h_intra
|
||||
* instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
||||
* picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
||||
* is CPU primary with QPU opportunistic — the ctx below is no-QPU,
|
||||
@@ -54,6 +56,10 @@
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
+void ff_h264_v_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta);
|
||||
+void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -150,3 +156,34 @@
|
||||
daedalus_recipe_dispatch_h264_deblock_chroma_h(g_dctx, pix, (size_t)stride,
|
||||
1, &meta);
|
||||
}
|
||||
+
|
||||
+void ff_h264_v_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ /* tc0[] is ignored by the intra-strength dispatch (bS=4 hardcodes the strength). */
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_luma_v_intra(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
+
|
||||
+void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_luma_h_intra(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:18:54.993349573 +0200
|
||||
+++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:20:12.338265830 +0200
|
||||
@@ -35,8 +35,12 @@
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta);
|
||||
+void ff_h264_v_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta);
|
||||
void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta);
|
||||
+void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta);
|
||||
void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
@@ -124,8 +128,8 @@
|
||||
if (have_neon(cpu_flags) && bit_depth == 8) {
|
||||
c->v_loop_filter_luma = ff_h264_v_loop_filter_luma_daedalus;
|
||||
c->h_loop_filter_luma = ff_h264_h_loop_filter_luma_daedalus;
|
||||
- c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
|
||||
- c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
|
||||
+ c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_daedalus;
|
||||
+ c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_daedalus;
|
||||
|
||||
c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_daedalus;
|
||||
c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: claude-noether <claude-noether@noreply.localhost>
|
||||
Date: Sun, 25 May 2026 13:00:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 chroma DC Hadamard through daedalus-fourier
|
||||
|
||||
Substitutes H264DSPContext.chroma_dc_dequant_idct in the
|
||||
4:2:0 / bit_depth=8 init path with a wrapper that composes
|
||||
the daedalus chroma DC Hadamard primitive (fourier PR #25)
|
||||
with qmul scaling FFmpeg does in one fused function.
|
||||
|
||||
Bit-exact against ff_h264_chroma_dc_dequant_idct_8_c.
|
||||
Hadamard correctness gated by fourier PR #23 test suite.
|
||||
|
||||
4:2:2 chroma stays on the in-tree 422 variant (same
|
||||
gating shape as 0009 chroma deblock substitution).
|
||||
|
||||
Requires daedalus-fourier commit b9f9ff2 or later (PR #25
|
||||
exposing the public Hadamard symbol). Pin bumps in PKGBUILD
|
||||
and build-deb.sh come in the same commit.
|
||||
---
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:38:32.019491484 +0200
|
||||
+++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 13:38:32.033821507 +0200
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * H.264 4x4 / 8x8 IDCT + luma v/h (inter + intra) + chroma v/h deblock — daedalus-fourier substitution shims.
|
||||
+ * H.264 4x4 / 8x8 IDCT + luma v/h (inter+intra) + chroma v/h deblock + chroma DC Hadamard — daedalus-fourier substitution shims.
|
||||
*
|
||||
* Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
* H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
@@ -9,6 +9,7 @@
|
||||
* H264DSPContext.h_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_h
|
||||
* H264DSPContext.v_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_v_intra
|
||||
* H264DSPContext.h_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_h_intra
|
||||
+ * H264DSPContext.chroma_dc_dequant_idct → daedalus_h264_chroma_dc_hadamard_2x2 + caller-side qmul
|
||||
* instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
||||
* picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
||||
* is CPU primary with QPU opportunistic — the ctx below is no-QPU,
|
||||
@@ -60,6 +61,7 @@
|
||||
int alpha, int beta);
|
||||
void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta);
|
||||
+void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -187,3 +189,32 @@
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_h_intra(g_dctx, pix, (size_t)stride,
|
||||
1, &meta);
|
||||
}
|
||||
+
|
||||
+/* Composes daedalus_h264_chroma_dc_hadamard_2x2 with the qmul scaling
|
||||
+ * that FFmpeg's reference does in one fused function (h264idct_template.c
|
||||
+ * ff_h264_chroma_dc_dequant_idct).
|
||||
+ *
|
||||
+ * The 4 DC coefficients are scattered across the per-MB coefficient
|
||||
+ * buffer at offsets [r*stride + c*xStride] (stride=32, xStride=16).
|
||||
+ * Extract into a contiguous int16[4], run the Hadamard, then apply
|
||||
+ * the qmul scale and write back to the original positions.
|
||||
+ *
|
||||
+ * No daedalus ctx needed; the Hadamard is a pure stateless primitive.
|
||||
+ */
|
||||
+void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul)
|
||||
+{
|
||||
+ enum { stride = 32, xStride = 16 };
|
||||
+ int16_t dc[4];
|
||||
+
|
||||
+ dc[0] = block[stride*0 + xStride*0];
|
||||
+ dc[1] = block[stride*0 + xStride*1];
|
||||
+ dc[2] = block[stride*1 + xStride*0];
|
||||
+ dc[3] = block[stride*1 + xStride*1];
|
||||
+
|
||||
+ daedalus_h264_chroma_dc_hadamard_2x2(dc);
|
||||
+
|
||||
+ block[stride*0 + xStride*0] = (int16_t)((int)dc[0] * qmul >> 7);
|
||||
+ block[stride*0 + xStride*1] = (int16_t)((int)dc[1] * qmul >> 7);
|
||||
+ block[stride*1 + xStride*0] = (int16_t)((int)dc[2] * qmul >> 7);
|
||||
+ block[stride*1 + xStride*1] = (int16_t)((int)dc[3] * qmul >> 7);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:38:32.020346459 +0200
|
||||
+++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 13:38:32.033909804 +0200
|
||||
@@ -41,6 +41,7 @@
|
||||
int beta);
|
||||
void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta);
|
||||
+void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul);
|
||||
void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
@@ -135,6 +136,7 @@
|
||||
c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
|
||||
|
||||
if (chroma_format_idc <= 1) {
|
||||
+ c->chroma_dc_dequant_idct = ff_h264_chroma_dc_dequant_idct_daedalus;
|
||||
c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_daedalus;
|
||||
c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
|
||||
c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,245 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: claude-noether <claude-noether@noreply.localhost>
|
||||
Date: Sun, 25 May 2026 14:00:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264qpel: route remaining qpel 8x8 positions through daedalus-fourier
|
||||
|
||||
Closes the H.264 qpel substitution. Extends 0007 (which routed only
|
||||
mc20 put_) to ALL 15 useful positions in BOTH the put_ and avg_
|
||||
tables, skipping mc00 (integer copy / pointer-only fast path).
|
||||
|
||||
29 substitutions total: 14 new put_ + 15 avg_. Each is a uniform
|
||||
wrapper around daedalus_recipe_dispatch_h264_qpel_{avg_,}mcXY exposed
|
||||
by daedalus-fourier PRs #15-#20.
|
||||
|
||||
All recipe-table entries route AUTO to CPU NEON (no QPU shaders
|
||||
for any qpel position other than mc20 yet), so this is plumbing-only
|
||||
NEON-to-NEON — bit-exact against the in-tree ff_*_h264_qpel8_*_neon
|
||||
path.
|
||||
|
||||
16x16 qpel tables ([0][...]) stay on the in-tree NEON. daedalus
|
||||
only exposes 8x8 today; 16x16 substitution can land once fourier
|
||||
provides those variants (likely just dispatching the 8x8 path four
|
||||
times with shifted dst/src offsets).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc qpel buildout.
|
||||
---
|
||||
diff --git a/libavcodec/aarch64/h264_qpel_daedalus.c b/libavcodec/aarch64/h264_qpel_daedalus.c
|
||||
--- a/libavcodec/aarch64/h264_qpel_daedalus.c 2026-05-25 14:05:05.789298250 +0200
|
||||
+++ libavcodec/aarch64/h264_qpel_daedalus.c 2026-05-25 14:05:05.818358374 +0200
|
||||
@@ -1,10 +1,13 @@
|
||||
/*
|
||||
- * H.264 luma qpel mc20 (8x8, horizontal half-pel, 6-tap "put")
|
||||
- * — daedalus-fourier substitution shim.
|
||||
+ * H.264 luma qpel 8x8 — daedalus-fourier substitution shims (put_ + avg_).
|
||||
*
|
||||
- * Routes H264QpelContext.put_h264_qpel_pixels_tab[1][2] through
|
||||
- * daedalus_recipe_dispatch_h264_qpel_mc20 instead of
|
||||
- * ff_put_h264_qpel8_mc20_neon. The recipe layer picks the substrate
|
||||
+ * Routes ALL 15 useful positions in H264QpelContext's 8x8 put_ and
|
||||
+ * avg_ tables through daedalus_recipe_dispatch_h264_qpel_mc{XY}
|
||||
+ * (skipping mc00 which is integer copy / FFmpeg's pointer-only fast
|
||||
+ * path). Plumbing-only NEON-by-recipe — daedalus-fourier PRs #15-#20
|
||||
+ * exposed each variant via the same dispatch signature, so the
|
||||
+ * substitution is a uniform macro across put_/avg_ and across all
|
||||
+ * 15 mc positions. The recipe layer picks the substrate
|
||||
* (CPU NEON for cycle 9; QPU not viable — per-block 7.6 ns vs
|
||||
* ~250 ns QPU dispatch floor, see docs/k9_h264qpel_mc20.md).
|
||||
*
|
||||
@@ -48,3 +51,53 @@
|
||||
daedalus_recipe_dispatch_h264_qpel_mc20(g_dctx, dst, src, (size_t)stride,
|
||||
1, &meta);
|
||||
}
|
||||
+
|
||||
+
|
||||
+/* All other 8x8 qpel positions follow the same dispatch shape as mc20
|
||||
+ * above. The macro collapses ~600 LOC of one-wrapper-per-variant
|
||||
+ * boilerplate (29 variants total: 14 put_ + 15 avg_). */
|
||||
+#define DEFINE_QPEL_WRAPPER(type, suffix, dispatch_fn) \
|
||||
+void ff_ ## type ## _h264_qpel8_ ## suffix ## _daedalus(uint8_t *dst, \
|
||||
+ const uint8_t *src, ptrdiff_t stride); \
|
||||
+void ff_ ## type ## _h264_qpel8_ ## suffix ## _daedalus(uint8_t *dst, \
|
||||
+ const uint8_t *src, ptrdiff_t stride) \
|
||||
+{ \
|
||||
+ static const daedalus_h264_qpel_meta meta = { .dst_off = 0, .src_off = 0 }; \
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once); \
|
||||
+ dispatch_fn(g_dctx, dst, src, (size_t)stride, 1, &meta); \
|
||||
+}
|
||||
+
|
||||
+/* put_ variants (mc20 stays on the explicit definition above). */
|
||||
+DEFINE_QPEL_WRAPPER(put, mc10, daedalus_recipe_dispatch_h264_qpel_mc10)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc30, daedalus_recipe_dispatch_h264_qpel_mc30)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc01, daedalus_recipe_dispatch_h264_qpel_mc01)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc11, daedalus_recipe_dispatch_h264_qpel_mc11)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc21, daedalus_recipe_dispatch_h264_qpel_mc21)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc31, daedalus_recipe_dispatch_h264_qpel_mc31)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc02, daedalus_recipe_dispatch_h264_qpel_mc02)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc12, daedalus_recipe_dispatch_h264_qpel_mc12)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc22, daedalus_recipe_dispatch_h264_qpel_mc22)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc32, daedalus_recipe_dispatch_h264_qpel_mc32)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc03, daedalus_recipe_dispatch_h264_qpel_mc03)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc13, daedalus_recipe_dispatch_h264_qpel_mc13)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc23, daedalus_recipe_dispatch_h264_qpel_mc23)
|
||||
+DEFINE_QPEL_WRAPPER(put, mc33, daedalus_recipe_dispatch_h264_qpel_mc33)
|
||||
+
|
||||
+/* avg_ variants — all 15 useful positions. */
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc10, daedalus_recipe_dispatch_h264_qpel_avg_mc10)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc20, daedalus_recipe_dispatch_h264_qpel_avg_mc20)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc30, daedalus_recipe_dispatch_h264_qpel_avg_mc30)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc01, daedalus_recipe_dispatch_h264_qpel_avg_mc01)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc11, daedalus_recipe_dispatch_h264_qpel_avg_mc11)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc21, daedalus_recipe_dispatch_h264_qpel_avg_mc21)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc31, daedalus_recipe_dispatch_h264_qpel_avg_mc31)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc02, daedalus_recipe_dispatch_h264_qpel_avg_mc02)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc12, daedalus_recipe_dispatch_h264_qpel_avg_mc12)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc22, daedalus_recipe_dispatch_h264_qpel_avg_mc22)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc32, daedalus_recipe_dispatch_h264_qpel_avg_mc32)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc03, daedalus_recipe_dispatch_h264_qpel_avg_mc03)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc13, daedalus_recipe_dispatch_h264_qpel_avg_mc13)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc23, daedalus_recipe_dispatch_h264_qpel_avg_mc23)
|
||||
+DEFINE_QPEL_WRAPPER(avg, mc33, daedalus_recipe_dispatch_h264_qpel_avg_mc33)
|
||||
+
|
||||
+#undef DEFINE_QPEL_WRAPPER
|
||||
diff --git a/libavcodec/aarch64/h264qpel_init_aarch64.c b/libavcodec/aarch64/h264qpel_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264qpel_init_aarch64.c 2026-05-25 14:05:05.790403989 +0200
|
||||
+++ libavcodec/aarch64/h264qpel_init_aarch64.c 2026-05-25 14:05:05.819136071 +0200
|
||||
@@ -50,6 +50,64 @@
|
||||
void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc10_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc30_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc01_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc11_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc21_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc31_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc02_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc12_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc22_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc32_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc03_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc13_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc23_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_put_h264_qpel8_mc33_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc10_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc20_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc30_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc01_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc11_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc21_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc31_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc02_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc12_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc22_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc32_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc03_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc13_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc23_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
+void ff_avg_h264_qpel8_mc33_daedalus(uint8_t *dst, const uint8_t *src,
|
||||
+ ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
|
||||
@@ -164,21 +222,21 @@
|
||||
c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon;
|
||||
|
||||
c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_daedalus;
|
||||
c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_daedalus;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon;
|
||||
- c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_daedalus;
|
||||
+ c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_daedalus;
|
||||
|
||||
c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
|
||||
c->avg_h264_qpel_pixels_tab[0][ 1] = ff_avg_h264_qpel16_mc10_neon;
|
||||
@@ -198,21 +256,21 @@
|
||||
c->avg_h264_qpel_pixels_tab[0][15] = ff_avg_h264_qpel16_mc33_neon;
|
||||
|
||||
c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][ 1] = ff_avg_h264_qpel8_mc10_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][ 2] = ff_avg_h264_qpel8_mc20_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][ 3] = ff_avg_h264_qpel8_mc30_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][ 4] = ff_avg_h264_qpel8_mc01_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][ 5] = ff_avg_h264_qpel8_mc11_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][ 6] = ff_avg_h264_qpel8_mc21_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][ 7] = ff_avg_h264_qpel8_mc31_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][ 8] = ff_avg_h264_qpel8_mc02_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][ 9] = ff_avg_h264_qpel8_mc12_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][10] = ff_avg_h264_qpel8_mc22_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][11] = ff_avg_h264_qpel8_mc32_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][12] = ff_avg_h264_qpel8_mc03_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon;
|
||||
- c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][ 1] = ff_avg_h264_qpel8_mc10_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][ 2] = ff_avg_h264_qpel8_mc20_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][ 3] = ff_avg_h264_qpel8_mc30_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][ 4] = ff_avg_h264_qpel8_mc01_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][ 5] = ff_avg_h264_qpel8_mc11_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][ 6] = ff_avg_h264_qpel8_mc21_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][ 7] = ff_avg_h264_qpel8_mc31_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][ 8] = ff_avg_h264_qpel8_mc02_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][ 9] = ff_avg_h264_qpel8_mc12_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][10] = ff_avg_h264_qpel8_mc22_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][11] = ff_avg_h264_qpel8_mc32_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][12] = ff_avg_h264_qpel8_mc03_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_daedalus;
|
||||
+ c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_daedalus;
|
||||
} else if (have_neon(cpu_flags) && bit_depth == 10) {
|
||||
c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon_10;
|
||||
c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon_10;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
+120
@@ -0,0 +1,120 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: claude-noether <claude-noether@noreply.localhost>
|
||||
Date: Sun, 25 May 2026 14:30:00 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 chroma intra deblock (4:2:0) through daedalus-fourier
|
||||
|
||||
Substitutes c->v_loop_filter_chroma_intra and c->h_loop_filter_chroma_intra
|
||||
with daedalus wrappers in the bit_depth=8 / chroma_format_idc<=1 (4:2:0)
|
||||
branch. 4:2:2 stays on the in-tree NEON path (the daedalus chroma intra
|
||||
dispatch is 4:2:0-only).
|
||||
|
||||
The fourier dispatches were exposed in PR #11 (DEFINE_INTRA_DISPATCH
|
||||
macro generates the public daedalus_dispatch_h264_deblock_chroma_*_intra
|
||||
symbols + recipe wrappers).
|
||||
|
||||
Re-architects the chroma init: v_loop_filter_chroma_intra was previously
|
||||
assigned unconditionally to the NEON variant (which works for both 4:2:0
|
||||
and 4:2:2). We now assign it INSIDE both branches of the chroma_format_idc
|
||||
conditional, with the 4:2:0 branch picking daedalus and the 4:2:2 branch
|
||||
keeping NEON. No regression for 4:2:2 streams.
|
||||
|
||||
Same NEON-to-NEON via recipe shape as 0010 luma intra.
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc chroma intra.
|
||||
---
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 14:21:08.267156263 +0200
|
||||
+++ libavcodec/aarch64/h264_idct_daedalus.c 2026-05-25 14:21:08.287745931 +0200
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * H.264 4x4 / 8x8 IDCT + luma v/h (inter+intra) + chroma v/h deblock + chroma DC Hadamard — daedalus-fourier substitution shims.
|
||||
+ * H.264 4x4 / 8x8 IDCT + luma v/h (inter+intra) + chroma v/h (inter+intra) deblock + chroma DC Hadamard — daedalus-fourier substitution shims.
|
||||
*
|
||||
* Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
* H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
@@ -9,6 +9,8 @@
|
||||
* H264DSPContext.h_loop_filter_chroma → daedalus_recipe_dispatch_h264_deblock_chroma_h
|
||||
* H264DSPContext.v_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_v_intra
|
||||
* H264DSPContext.h_loop_filter_luma_intra → daedalus_recipe_dispatch_h264_deblock_luma_h_intra
|
||||
+ * H264DSPContext.v_loop_filter_chroma_intra → daedalus_recipe_dispatch_h264_deblock_chroma_v_intra
|
||||
+ * H264DSPContext.h_loop_filter_chroma_intra → daedalus_recipe_dispatch_h264_deblock_chroma_h_intra
|
||||
* H264DSPContext.chroma_dc_dequant_idct → daedalus_h264_chroma_dc_hadamard_2x2 + caller-side qmul
|
||||
* instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
||||
* picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
||||
@@ -61,6 +63,10 @@
|
||||
int alpha, int beta);
|
||||
void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta);
|
||||
+void ff_h264_v_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta);
|
||||
+void ff_h264_h_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta);
|
||||
void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
@@ -218,3 +224,30 @@
|
||||
block[stride*1 + xStride*0] = (int16_t)((int)dc[2] * qmul >> 7);
|
||||
block[stride*1 + xStride*1] = (int16_t)((int)dc[3] * qmul >> 7);
|
||||
}
|
||||
+
|
||||
+void ff_h264_v_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ /* tc0[] unused for intra (bS=4 hardcodes the strength). */
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+ daedalus_recipe_dispatch_h264_deblock_chroma_v_intra(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
+
|
||||
+void ff_h264_h_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+ daedalus_recipe_dispatch_h264_deblock_chroma_h_intra(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 14:21:08.268311057 +0200
|
||||
+++ libavcodec/aarch64/h264dsp_init_aarch64.c 2026-05-25 14:21:08.287886563 +0200
|
||||
@@ -42,6 +42,10 @@
|
||||
void ff_h264_h_loop_filter_luma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta);
|
||||
void ff_h264_chroma_dc_dequant_idct_daedalus(int16_t *block, int qmul);
|
||||
+void ff_h264_v_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta);
|
||||
+void ff_h264_h_loop_filter_chroma_intra_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta);
|
||||
void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_chroma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
@@ -133,14 +137,15 @@
|
||||
c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_daedalus;
|
||||
|
||||
c->v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_daedalus;
|
||||
- c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
|
||||
|
||||
if (chroma_format_idc <= 1) {
|
||||
c->chroma_dc_dequant_idct = ff_h264_chroma_dc_dequant_idct_daedalus;
|
||||
+ c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_daedalus;
|
||||
c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_daedalus;
|
||||
- c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon;
|
||||
+ c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_daedalus;
|
||||
c->h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon;
|
||||
} else {
|
||||
+ c->v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon;
|
||||
c->h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon;
|
||||
c->h_loop_filter_chroma_mbaff = ff_h264_h_loop_filter_chroma_neon;
|
||||
c->h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma422_intra_neon;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -24,8 +24,13 @@ _srcname=FFmpeg
|
||||
_version='8.1'
|
||||
_commit='b57fbbe50c9b2656fad86a1a7eeabfd2b2a50935' # v4l2-request-n8.1 tip 2026-04-24
|
||||
pkgver=8.1.r123329.b57fbbe
|
||||
pkgrel=4
|
||||
pkgrel=10 # pkgrel=10 — H.264 luma qpel mc20 daedalus-fourier substitution (cycle 9, 2026-05-23)
|
||||
epoch=2
|
||||
|
||||
# daedalus-fourier pin. 209a421 = PR #2 merge (Phase 8c — public API
|
||||
# gains daedalus_recipe_dispatch_h264_qpel_mc20 + DAEDALUS_KERNEL_H264_QPEL_MC20).
|
||||
# Cycle 9 closes the libavcodec.so substitution arc started at cycle 6.
|
||||
_daedalus_fourier_commit='b9f9ff2a89c068aea54dcb52b543afddad28311e' # PR #25 — public chroma DC Hadamard symbol
|
||||
pkgdesc='FFmpeg with V4L2 Request API hwaccel (Rockchip / Allwinner stateless decode)'
|
||||
arch=('aarch64')
|
||||
url='https://github.com/Kwiboo/FFmpeg'
|
||||
@@ -34,6 +39,7 @@ depends=(
|
||||
alsa-lib
|
||||
bzip2
|
||||
fontconfig
|
||||
vulkan-icd-loader
|
||||
fribidi
|
||||
gmp
|
||||
gnutls
|
||||
@@ -59,10 +65,13 @@ depends=(
|
||||
zlib
|
||||
)
|
||||
makedepends=(
|
||||
cmake
|
||||
git
|
||||
linux-api-headers
|
||||
mesa
|
||||
nasm
|
||||
ninja
|
||||
vulkan-headers
|
||||
)
|
||||
provides=(
|
||||
libavcodec.so
|
||||
@@ -78,8 +87,21 @@ provides=(
|
||||
conflicts=(ffmpeg)
|
||||
replaces=(ffmpeg ffmpeg-v4l2-request-git)
|
||||
source=("git+https://github.com/Kwiboo/FFmpeg.git#commit=${_commit}"
|
||||
'0001-libudev-bypass-fallback.patch')
|
||||
sha256sums=('SKIP' 'SKIP')
|
||||
"daedalus-fourier-${_daedalus_fourier_commit}.tar.gz::https://git.reauktion.de/marfrit/daedalus-fourier/archive/${_daedalus_fourier_commit}.tar.gz"
|
||||
'0001-libudev-bypass-fallback.patch'
|
||||
'0002-nv15-to-p010-unpack.patch'
|
||||
'0003-h264-idct4-daedalus-fourier.patch'
|
||||
'0004-h264-idct8-daedalus-fourier.patch'
|
||||
'0005-h264-deblock-luma-v-daedalus-fourier.patch'
|
||||
'0006-h264-restore-low-delay.patch'
|
||||
'0007-h264-qpel-mc20-daedalus-fourier.patch'
|
||||
'0008-h264-deblock-luma-h-daedalus-fourier.patch'
|
||||
'0009-h264-deblock-chroma-daedalus-fourier.patch'
|
||||
'0010-h264-deblock-luma-intra-daedalus-fourier.patch'
|
||||
'0011-h264-chroma-dc-hadamard-daedalus-fourier.patch'
|
||||
'0012-h264-qpel-rest-daedalus-fourier.patch'
|
||||
'0013-h264-deblock-chroma-intra-daedalus-fourier.patch')
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
|
||||
pkgver() {
|
||||
cd "${_srcname}"
|
||||
@@ -91,9 +113,36 @@ pkgver() {
|
||||
prepare() {
|
||||
cd "${_srcname}"
|
||||
patch -Np1 -i "${srcdir}/0001-libudev-bypass-fallback.patch"
|
||||
patch -Np1 -i "${srcdir}/0002-nv15-to-p010-unpack.patch"
|
||||
patch -Np1 -i "${srcdir}/0003-h264-idct4-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0004-h264-idct8-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0005-h264-deblock-luma-v-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0006-h264-restore-low-delay.patch"
|
||||
patch -Np1 -i "${srcdir}/0007-h264-qpel-mc20-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0008-h264-deblock-luma-h-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0009-h264-deblock-chroma-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0010-h264-deblock-luma-intra-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0011-h264-chroma-dc-hadamard-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0012-h264-qpel-rest-daedalus-fourier.patch"
|
||||
patch -Np1 -i "${srcdir}/0013-h264-deblock-chroma-intra-daedalus-fourier.patch"
|
||||
}
|
||||
|
||||
build() {
|
||||
# --- daedalus-fourier: build static .a with PIC, install to a
|
||||
# per-build prefix; libavcodec.so links it into the shared object so
|
||||
# H264DSPContext.idct_add (and follow-up kernels) dispatch through
|
||||
# the daedalus recipe layer instead of the in-tree NEON .S code. ---
|
||||
local _fourier_prefix="${srcdir}/fourier-prefix"
|
||||
mkdir -p "${_fourier_prefix}"
|
||||
pushd "${srcdir}"/daedalus-fourier >/dev/null
|
||||
cmake -B build -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
||||
-DCMAKE_INSTALL_PREFIX="${_fourier_prefix}"
|
||||
cmake --build build --target daedalus_core
|
||||
cmake --install build
|
||||
popd >/dev/null
|
||||
|
||||
cd "${_srcname}"
|
||||
|
||||
# FFmpeg's configure resolves the compiler via `which` and bakes the
|
||||
@@ -145,6 +194,9 @@ build() {
|
||||
--enable-libx265 \
|
||||
--enable-libwebp \
|
||||
\
|
||||
--extra-cflags="-I${_fourier_prefix}/include" \
|
||||
--extra-ldflags="-L${_fourier_prefix}/lib" \
|
||||
--extra-libs="-ldaedalus_core -lvulkan -lpthread" \
|
||||
--host-cflags='-fPIC'
|
||||
|
||||
make
|
||||
+23
-22
@@ -18,27 +18,30 @@ This patch adds a sibling init path, `InitV4L2RequestDecoder`, that:
|
||||
* looks up the codec via two complementary mechanisms libavcodec
|
||||
uses for v4l2_request:
|
||||
- **named codec** (`h264_v4l2request`, `vp8_v4l2request`, etc.):
|
||||
the legacy AVCodec-per-hwaccel registration. ALARM, Debian,
|
||||
and most distros building with --enable-v4l2-request expose
|
||||
this (avcodec_find_decoder_by_name lookup).
|
||||
- **generic codec + AV_HWDEVICE_TYPE_DRM** in `hw_configs`:
|
||||
the modern hwaccel registration on some upstream-only ffmpeg
|
||||
builds.
|
||||
the legacy AVCodec-per-hwaccel registration.
|
||||
- **generic codec + hw_configs walk**: the modern hwaccel
|
||||
registration. Accepts EITHER AV_HWDEVICE_TYPE_DRM (legacy
|
||||
ffmpeg-v4l2-request-fork output prior to FFmpeg 7.1) OR
|
||||
AV_HWDEVICE_TYPE_V4L2REQUEST (FFmpeg 7.1+ dedicated enum,
|
||||
value 13 on Kwiboo's no-AMF tree, 14 on upstream-AMF tree).
|
||||
Mozilla's bundled libavutil headers may not have the V4L2REQUEST
|
||||
enumerator, so the test is on the integer value via `(int)cast`.
|
||||
Probes named-codec first (explicit, portable) and falls back to
|
||||
walking the generic codec's `hw_configs` for the DRM device type;
|
||||
* creates an `AV_HWDEVICE_TYPE_DRM` hwdevice context bound to
|
||||
`/dev/dri/renderD128` via the new `av_hwdevice_ctx_create` wrapper
|
||||
(patch 2/4) and attaches it to the codec context;
|
||||
walking the generic codec's `hw_configs` for either device type;
|
||||
* creates an hwdevice context bound to `/dev/dri/renderD128`. Uses
|
||||
integer 13 (V4L2REQUEST as defined by Kwiboo's v4l2-request-n7.1.3
|
||||
tree, what our libavcodec61-fourier emits) cast to enum
|
||||
AVHWDeviceType for the av_hwdevice_ctx_create call;
|
||||
* reuses the existing `ChooseV4L2PixelFormat` get-format callback
|
||||
(already returns `AV_PIX_FMT_DRM_PRIME`) and the existing
|
||||
`apply_cropping = 0` constraint.
|
||||
|
||||
`InitV4L2RequestDecoder` is invoked **before** `InitV4L2Decoder` in
|
||||
`InitHWDecoderIfAllowed`. On Rockchip mainline it succeeds via either
|
||||
mechanism (ALARM uses the named codec). On Pi4 / Mediatek /
|
||||
vendor-MPP-stateful boards neither mechanism is registered for the
|
||||
codec, the function bails out, and the existing stateful
|
||||
`InitV4L2Decoder` runs as before. No regression of stateful boards.
|
||||
mechanism. On Pi4 / Mediatek / vendor-MPP-stateful boards neither
|
||||
mechanism is registered for the codec, the function bails out, and the
|
||||
existing stateful `InitV4L2Decoder` runs as before. No regression of
|
||||
stateful boards.
|
||||
|
||||
`mDRMDeviceContext` is unconditionally `av_buffer_unref`'d in
|
||||
`ProcessShutdown` (no-op when null). Gated behind
|
||||
@@ -46,9 +49,8 @@ codec, the function bails out, and the existing stateful
|
||||
|
||||
Bug 1969297.
|
||||
|
||||
diff --git a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h
|
||||
--- a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h 2026-03-18 19:22:14.000000000 +0000
|
||||
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h 2026-04-27 20:43:39.347992674 +0000
|
||||
--- a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h 2026-05-21 04:57:59.570946601 +0000
|
||||
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h 2026-05-21 04:57:59.876488776 +0000
|
||||
@@ -225,7 +225,12 @@
|
||||
bool IsLinuxHDR() const;
|
||||
MediaResult InitVAAPIDecoder();
|
||||
@@ -73,9 +75,8 @@ diff --git a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.h b/dom/media/platfor
|
||||
// If video overlay is used we want to upload SW decoded frames to
|
||||
// DMABuf and present it as a external texture to rendering pipeline.
|
||||
bool mUploadSWDecodeToDMABuf = false;
|
||||
diff --git a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp
|
||||
--- a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp 2026-04-27 16:09:10.000000000 +0200
|
||||
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp 2026-04-29 00:10:00.098884335 +0200
|
||||
--- a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp 2026-05-21 04:57:59.566685221 +0000
|
||||
+++ b/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp 2026-05-21 04:58:00.136004159 +0000
|
||||
@@ -403,6 +403,129 @@
|
||||
return NS_OK;
|
||||
}
|
||||
@@ -90,7 +91,7 @@ diff --git a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp b/dom/media/platf
|
||||
+ }
|
||||
+ const char* drmDevice = "/dev/dri/renderD128";
|
||||
+ if (mLib->av_hwdevice_ctx_create(&mDRMDeviceContext,
|
||||
+ AV_HWDEVICE_TYPE_DRM, drmDevice,
|
||||
+ (enum AVHWDeviceType)13, drmDevice,
|
||||
+ nullptr, 0) < 0) {
|
||||
+ FFMPEG_LOG(" av_hwdevice_ctx_create(DRM, %s) failed", drmDevice);
|
||||
+ return false;
|
||||
@@ -143,7 +144,7 @@ diff --git a/dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp b/dom/media/platf
|
||||
+ for (int i = 0;; i++) {
|
||||
+ const AVCodecHWConfig* cfg = mLib->avcodec_get_hw_config(generic, i);
|
||||
+ if (!cfg) break;
|
||||
+ if (cfg->device_type == AV_HWDEVICE_TYPE_DRM) {
|
||||
+ if (cfg->device_type == AV_HWDEVICE_TYPE_DRM || (int)cfg->device_type == 13 || (int)cfg->device_type == 14) {
|
||||
+ codec = generic;
|
||||
+ FFMPEG_LOG(" using generic codec %s with DRM hwaccel", codec->name);
|
||||
+ break;
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
pkgname=firefox-fourier
|
||||
pkgver=150.0.1
|
||||
pkgrel=1
|
||||
pkgrel=7
|
||||
pkgdesc='Firefox with V4L2 stateless HW video decode unlocked for mainline Linux Rockchip'
|
||||
arch=('aarch64' 'x86_64')
|
||||
url='https://www.mozilla.org/firefox'
|
||||
@@ -87,8 +87,15 @@ source=(
|
||||
'0002-libwrapper-hwdevice-ctx-create.patch'
|
||||
'0003-ffmpegvideo-v4l2-request-route.patch'
|
||||
'0004-prefs-v4l2-request.patch'
|
||||
'0005-rdd-sandbox-v4l2-media-ctl.patch'
|
||||
# Vendor-default prefs that gate the patched VAAPI path on RK3399 —
|
||||
# widget.dmabuf.force-enabled etc. See marfrit-packages#8 for evidence.
|
||||
'rockchip-fourier-defaults.js'
|
||||
# Plasma/GNOME start-menu entry — categorises under Internet, picks the
|
||||
# 128px firefox icon shipped under /usr/lib/firefox-fourier/browser/.
|
||||
'firefox-fourier.desktop'
|
||||
)
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
|
||||
prepare() {
|
||||
cd "${srcdir}/firefox-${pkgver}"
|
||||
@@ -103,6 +110,7 @@ prepare() {
|
||||
patch -Np1 -i "${srcdir}/0002-libwrapper-hwdevice-ctx-create.patch"
|
||||
patch -Np1 -i "${srcdir}/0003-ffmpegvideo-v4l2-request-route.patch"
|
||||
patch -Np1 -i "${srcdir}/0004-prefs-v4l2-request.patch"
|
||||
patch -Np1 -i "${srcdir}/0005-rdd-sandbox-v4l2-media-ctl.patch"
|
||||
|
||||
cp "${srcdir}/mozconfig" .mozconfig
|
||||
}
|
||||
@@ -160,4 +168,26 @@ export MOZ_X11_EGL="${MOZ_X11_EGL:-1}"
|
||||
exec /usr/lib/firefox-fourier/firefox-fourier "$@"
|
||||
LAUNCHER
|
||||
chmod 0755 "${pkgdir}/usr/bin/firefox-fourier"
|
||||
|
||||
# Vendor-default prefs (RK3399 HW-decode unlock) — closes #8.
|
||||
# Lower precedence than user prefs / about:config; loaded by Firefox
|
||||
# at startup from the package install dir. The 0004 patch covers
|
||||
# media.ffmpeg.v4l2-request.enabled; this file covers the three
|
||||
# additional prefs that gate the path to the patched code.
|
||||
# Vendor-prefs install path: /usr/lib/firefox-fourier/defaults/preferences/
|
||||
# (Mozilla's canonical scan dir for third-party default-pref drops.) The
|
||||
# browser/defaults/preferences/ alternative looked promising but is NOT a
|
||||
# vendor-prefs scan location in Firefox 150 — empirically confirmed on
|
||||
# fresnel: file shipped there, VAAPI never engaged. Same file under
|
||||
# defaults/preferences/ → MOZ_LOG showed `Requesting pixel format
|
||||
# VAAPI_VLD` + dmabuf surfaces locking end-to-end.
|
||||
install -Dm644 "${srcdir}/rockchip-fourier-defaults.js" \
|
||||
"${pkgdir}/usr/lib/firefox-fourier/defaults/preferences/rockchip-fourier-defaults.js"
|
||||
|
||||
# Desktop entry — fileless install would leave the package without a
|
||||
# start-menu entry (stock firefox.desktop disappears when our `provides`
|
||||
# replaces stock firefox). Plasma & GNOME pick this up via the
|
||||
# `Categories=Network;WebBrowser;` line → "Internet" submenu.
|
||||
install -Dm644 "${srcdir}/firefox-fourier.desktop" \
|
||||
"${pkgdir}/usr/share/applications/firefox-fourier.desktop"
|
||||
}
|
||||
|
||||
+295
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+28
@@ -0,0 +1,28 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Alex Hochheiden <ahochheiden@mozilla.com>
|
||||
Date: Wed, 1 Apr 2026 18:11:37 +0000
|
||||
Subject: [PATCH] Bug 2023597 - Use `wasm32-wasip1` target for clang >= 22.1
|
||||
r=firefox-build-system-reviewers,sergesanspaille
|
||||
|
||||
https://github.com/llvm/llvm-project/pull/165345
|
||||
https://releases.llvm.org/22.1.0/tools/clang/docs/ReleaseNotes.html
|
||||
|
||||
Differential Revision: https://phabricator.services.mozilla.com/D291023
|
||||
---
|
||||
build/moz.configure/toolchain.configure | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure
|
||||
index a37ed610cc43..c7d0c8bdf75c 100644
|
||||
--- a/build/moz.configure/toolchain.configure
|
||||
+++ b/build/moz.configure/toolchain.configure
|
||||
@@ -695,6 +695,9 @@ def check_compiler(configure_cache, compiler, language, target, android_version)
|
||||
# This makes clang define __ANDROID_API__ and use versioned library
|
||||
# directories from the NDK.
|
||||
toolchain = "%s%d" % (target.toolchain, android_version)
|
||||
+ elif target.kernel == "WASI" and info.type == "clang" and info.version >= Version("22.1"):
|
||||
+ # The wasm32-wasi target was renamed to wasm32-wasip1 in LLVM 22.1.
|
||||
+ toolchain = "wasm32-wasip1"
|
||||
else:
|
||||
toolchain = target.toolchain
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
[Desktop Entry]
|
||||
Version=1.0
|
||||
Type=Application
|
||||
Name=Firefox (Fourier — V4L2 HW decode)
|
||||
GenericName=Web Browser
|
||||
Comment=Browse the Web with V4L2 stateless HW video decode (RK3588/RK3566)
|
||||
Exec=/usr/bin/firefox-fourier %u
|
||||
Icon=/usr/lib/firefox-fourier/browser/chrome/icons/default/default128.png
|
||||
Terminal=false
|
||||
StartupNotify=true
|
||||
StartupWMClass=firefox-fourier
|
||||
Categories=Network;WebBrowser;
|
||||
MimeType=text/html;text/xml;application/xhtml+xml;application/vnd.mozilla.xul+xml;application/rss+xml;application/rdf+xml;image/gif;image/jpeg;image/png;image/svg+xml;image/webp;image/avif;application/json;application/pdf;audio/flac;audio/ogg;audio/webm;video/ogg;video/webm;x-scheme-handler/http;x-scheme-handler/https;x-scheme-handler/chrome;x-scheme-handler/mailto;
|
||||
Actions=new-window;new-private-window;
|
||||
|
||||
[Desktop Action new-window]
|
||||
Name=Open a New Window
|
||||
Exec=/usr/bin/firefox-fourier --new-window %u
|
||||
|
||||
[Desktop Action new-private-window]
|
||||
Name=Open a New Private Window
|
||||
Exec=/usr/bin/firefox-fourier --private-window %u
|
||||
@@ -34,3 +34,20 @@ ac_add_options --with-distribution-id=de.reauktion.fourier
|
||||
|
||||
# Reduce build memory pressure on aarch64 — parallel link is heavy.
|
||||
mk_add_options MOZ_PARALLEL_BUILD=8
|
||||
|
||||
# Explicit distcc routing. fermi's makepkg.conf has
|
||||
# BUILDENV=(distcc ...) which auto-prepends /usr/lib/distcc/bin to
|
||||
# $PATH, but mach's configure picks up CC/CXX from the env directly
|
||||
# and the distcc wrappers won't fire unless we set them. Mirrors the
|
||||
# ffmpeg-v4l2-request-fourier pattern.
|
||||
#
|
||||
# Note: only the C/C++ portion of the build distributes; rustc and
|
||||
# the host-only build steps stay local. Empirically that's still a
|
||||
# 30-40% wall-clock win on a 4-worker pool. DISTCC_HOSTS comes from
|
||||
# makepkg.conf ('+zeroconf' by default — Avahi-discovers tesla,
|
||||
# dcc1, dcc2, ampere).
|
||||
if [[ ":${PATH}:" == *":/usr/lib/distcc/bin:"* ]]; then
|
||||
export CC="distcc gcc"
|
||||
export CXX="distcc g++"
|
||||
fi
|
||||
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
// firefox-fourier — RK3399 V4L2-stateless HW-decode default prefs.
|
||||
//
|
||||
// The patch series (0001..0004) builds the VAAPI / V4L2-request routing
|
||||
// path through libavcodec, but the resulting code path is gated by three
|
||||
// other prefs that are 'false' upstream because the relevant probes don't
|
||||
// fire on panfrost EGL or trip the Intel-tuned cost heuristic. Without
|
||||
// these, firefox-fourier silently SW-decodes on a fresh profile despite
|
||||
// having all the unlock patches applied.
|
||||
//
|
||||
// Filed via marfrit/marfrit-packages#8 — see that issue for MOZ_LOG
|
||||
// evidence on fresnel (Pinebook Pro / RK3399).
|
||||
//
|
||||
// These are *vendor* defaults: lower precedence than user.js and
|
||||
// about:config user prefs. Power users who want to disable HW decode for
|
||||
// debugging can flip them in user prefs without touching this file.
|
||||
|
||||
pref("widget.dmabuf.force-enabled", true);
|
||||
pref("media.hardware-video-decoding.force-enabled", true);
|
||||
pref("media.ffvpx-hw.enabled", true);
|
||||
@@ -2,7 +2,7 @@
|
||||
# Upstream maintainers: Felix Yan, Antonio Rojas
|
||||
# Contributor: Andrea Scarpino <andrea@archlinux.org>
|
||||
#
|
||||
# kwin-fourier — KWin 6.6.4 with the V4L2-stateless implicit-sync
|
||||
# kwin-fourier — KWin 6.6.5 with the V4L2-stateless implicit-sync
|
||||
# transaction wait bypass. Hypothesis: KWin's
|
||||
# `Transaction::watchDmaBuf` calls DMA_BUF_IOCTL_EXPORT_SYNC_FILE on
|
||||
# every plane of every imported dmabuf and parks the transaction on a
|
||||
@@ -21,9 +21,9 @@
|
||||
# ../chromium-fourier/KWIN_PIVOT.md for the full diagnosis thread.
|
||||
|
||||
pkgname=kwin-fourier
|
||||
pkgver=6.6.4
|
||||
pkgver=6.6.5
|
||||
_dirver=$(echo $pkgver | cut -d. -f1-3)
|
||||
pkgrel=3
|
||||
pkgrel=1
|
||||
_upname=kwin
|
||||
epoch=1
|
||||
arch=(aarch64 x86_64)
|
||||
@@ -103,7 +103,7 @@ conflicts=(kwin)
|
||||
replaces=(kwin)
|
||||
source=(https://download.kde.org/stable/plasma/$_dirver/$_upname-$pkgver.tar.xz{,.sig}
|
||||
0001-transaction-bypass-watchDmaBuf-fence-wait.patch)
|
||||
sha256sums=('3f9439760580a977d018daf4b35b62e5a1700def7b21c8dfbfc789d21378d7ad'
|
||||
sha256sums=('6c187ce7a5506090b438ef900103836fa0537674dde8b31e5b497ef321643cb4'
|
||||
'SKIP'
|
||||
'SKIP')
|
||||
validpgpkeys=('E0A3EB202F8E57528E13E72FD7574483BB57B18D' # Jonathan Esk-Riddell <jr@jriddell.org>
|
||||
|
||||
@@ -6,8 +6,11 @@
|
||||
# tracks the campaign fork's git history directly, so iteration sweeps
|
||||
# (DEBUG removal, follow-up bugfixes) land in a clean linear log.
|
||||
#
|
||||
# Campaign: ~/src/libva-multiplanar/ (eight closed iterations as of
|
||||
# 2026-05-06; iter8 close is the production tip pinned below).
|
||||
# Campaign: ~/src/libva-multiplanar/ (iter8 close 2026-05-06) plus
|
||||
# ~/src/fresnel-fourier/ which carried the fork to iter38b — multi-device
|
||||
# probe so a single libva session serves all 5 codecs (rkvdec H.264 +
|
||||
# HEVC + VP9, hantro MPEG-2 + VP8) plus a bounds-check fix for
|
||||
# MAX_PROFILES. Pinned below.
|
||||
# Fork repo: https://git.reauktion.de/marfrit/libva-v4l2-request-fourier
|
||||
# Bootlin upstream: https://github.com/bootlin/libva-v4l2-request
|
||||
#
|
||||
@@ -18,18 +21,32 @@
|
||||
# Alternative: boltzmann via his subagent + marfrit-publish.
|
||||
|
||||
pkgname=libva-v4l2-request-fourier
|
||||
epoch=1
|
||||
_upstreampkg=libva-v4l2-request
|
||||
|
||||
# Pin the fork tip. 65969da = "iter8 Phase 4: tests/run_perf_binding_cell.sh"
|
||||
# — last commit on master before fresnel-fourier work started layering
|
||||
# MPEG-2 rewrites on top of it (2026-05-08). The libva-multiplanar campaign
|
||||
# closed iter8 at this commit; promote to a later pin only after a future
|
||||
# iteration closes cleanly.
|
||||
_commit=65969da3ee901442a8ca4e1f7f9697d5461d368a
|
||||
# Pin the fork tip. c454618 = PR #16 merge "picture, request_pool:
|
||||
# transparent OUTPUT-pool resize on bitstream overrun (#15)" —
|
||||
# follow-up root-cause fix to #13/#14. On a mid-stream bitstream-
|
||||
# budget overrun (typical cause: SPS-driven resolution upshift in an
|
||||
# adaptive-bitrate stream), codec_store_buffer now snapshots the in-
|
||||
# flight surface's accumulated bytes, releases its OUTPUT pool slot,
|
||||
# calls request_pool_resize (STREAMOFF → REQBUFS(0) → S_FMT with
|
||||
# 2×sizeimage hint, capped at 1 GiB, page-aligned → CREATE_BUFS →
|
||||
# mmap → media_request_alloc → STREAMON), re-acquires a slot, re-
|
||||
# mirrors the surface's source_{data,size,request_fd}, restores the
|
||||
# bytes, and continues. The frame survives instead of being dropped
|
||||
# back to libavcodec for surface recreation. CAPTURE side untouched
|
||||
# (per-queue V4L2 streaming independence).
|
||||
#
|
||||
# Prior pin (2860d75) = PR #14 merge — codec_store_buffer bounds-
|
||||
# check floor (#13).
|
||||
_commit=c454618ae11addce2e17b560f4deeacbed067d98
|
||||
|
||||
# Project version from meson.build (1.0.0) + commit count + short sha,
|
||||
# matching the ffmpeg-v4l2-request-fourier convention.
|
||||
pkgver=1.0.0.r280.65969da
|
||||
# matching the ffmpeg-v4l2-request-fourier convention. Recomputed at
|
||||
# build time by pkgver() below; the static value here is a placeholder
|
||||
# so AUR-style consumers see something coherent before src/ exists.
|
||||
pkgver=1.0.0.r390.c454618
|
||||
pkgrel=1
|
||||
pkgdesc="VA-API backend for V4L2 stateless decoders (multiplanar fork — fourier umbrella)"
|
||||
arch=('aarch64')
|
||||
@@ -55,7 +72,15 @@ build() {
|
||||
cd "${srcdir}/${_upstreampkg}-fourier"
|
||||
# meson_options.txt only exposes 'kernel_headers' — leave it empty to
|
||||
# use system /usr/include kernel UAPI headers. No per-codec toggles.
|
||||
arch-meson build --buildtype=release
|
||||
#
|
||||
# b_lto=false: override arch-meson's wrapper default of `-D b_lto=true`,
|
||||
# which the makepkg.conf OPTIONS=(..., !lto, ...) line does NOT actually
|
||||
# override (arch-meson hard-codes b_lto=true). The hand-built reproducer
|
||||
# from issue #17 shows: LTO/ICF kernel-folds per-codec helpers and HEVC's
|
||||
# multi-control-struct chain (SPS+PPS+DECODE_PARAMS+SLICE_PARAMS) gets a
|
||||
# wrong helper-instance pulled in at vaEndPicture → segfault. The 4 other
|
||||
# codecs (single-control-struct) tolerate the folding by accident.
|
||||
arch-meson build --buildtype=release -Db_lto=false
|
||||
meson compile -C build
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
# Maintainer: Markus Fritsche <fritsche.markus@gmail.com>
|
||||
# Campaign: ohm_gl_fix Phase 6 Step 1
|
||||
#
|
||||
# DEPRECATED (2026-05-16): superseded by ../libva-v4l2-request-fourier/
|
||||
# which tracks the campaign fork's git history directly and adds the
|
||||
# iter38 multi-device probe (single libva session for rkvdec H.264/HEVC/VP9
|
||||
# + hantro MPEG-2/VP8). The successor declares
|
||||
# replaces=('libva-v4l2-request-ohm-gl-fix'), so installing it removes
|
||||
# this package automatically. See README.md for the full deprecation note.
|
||||
#
|
||||
# Forks libva-v4l2-request to add hantro-vpu multiplanar + modern
|
||||
# stateless UAPI support. Conflicts/replaces stock libva-v4l2-request.
|
||||
#
|
||||
@@ -11,7 +18,7 @@ pkgname=libva-v4l2-request-ohm-gl-fix
|
||||
_upstreampkg=libva-v4l2-request
|
||||
pkgver=1.0.0.r0.ga3c2476
|
||||
pkgrel=2
|
||||
pkgdesc="VA-API backend for V4L2 stateless decoders, hantro-vpu multiplanar fork"
|
||||
pkgdesc="DEPRECATED — use libva-v4l2-request-fourier. VA-API backend for V4L2 stateless decoders, hantro-vpu multiplanar fork"
|
||||
arch=('aarch64')
|
||||
url="https://github.com/bootlin/libva-v4l2-request"
|
||||
license=('LGPL2.1' 'MIT')
|
||||
|
||||
@@ -1,5 +1,24 @@
|
||||
# libva-v4l2-request-ohm-gl-fix
|
||||
|
||||
> ## ⚠ DEPRECATED — use [`libva-v4l2-request-fourier`](../libva-v4l2-request-fourier/) instead
|
||||
>
|
||||
> This package is the **predecessor experimental** build (tarball pin
|
||||
> + 18 stacked patches) and is no longer maintained as of 2026-05-16.
|
||||
> Its successor `libva-v4l2-request-fourier` tracks the campaign fork's
|
||||
> git history directly
|
||||
> ([git.reauktion.de/marfrit/libva-v4l2-request-fourier](https://git.reauktion.de/marfrit/libva-v4l2-request-fourier))
|
||||
> so iteration sweeps (DEBUG removal, follow-up bugfixes) land in a clean
|
||||
> linear log, and adds the iter38 multi-device probe that lets a single
|
||||
> libva session serve rkvdec H.264/HEVC/VP9 + hantro MPEG-2/VP8 without
|
||||
> needing `LIBVA_V4L2_REQUEST_VIDEO_PATH` overrides.
|
||||
>
|
||||
> `libva-v4l2-request-fourier` declares
|
||||
> `replaces=('libva-v4l2-request-ohm-gl-fix')`, so installing it will
|
||||
> remove this package automatically. Kept in-tree as historical reference
|
||||
> for the ohm_gl_fix Phase 6 audit trail.
|
||||
|
||||
---
|
||||
|
||||
Bootlin's libva-v4l2-request VA-API backend, with hantro-vpu
|
||||
multi-planar + chromium-149-era stateless H.264 patches developed
|
||||
in the [ohm_gl_fix campaign](../../../ohm_gl_fix/) Phase 6 Step 1
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
# Maintainer: Markus Fritsche <mfritsche@reauktion.de>
|
||||
#
|
||||
# linux-ampere-fourier — CoolPi GenBook (RK3588) kernel built from the
|
||||
# kernel-agent fleet/ampere.yaml manifest applied to mainline v7.0-rc3.
|
||||
#
|
||||
# kafr2 baseline (2026-05-18): mainline v7.0-rc3 + the 10 scope-tagged
|
||||
# kernel-agent patches under patches/{soc,module,board,driver}/:
|
||||
# - 1 soc/rk3588 pwm15 pinctrl
|
||||
# - 6 board/coolpi-cm5-genbook DTS patches (pwm-fan, RK806 power-off,
|
||||
# speaker, USB-C PD, lid switch + USB3 PHY, microphone)
|
||||
# - 3 driver/media VP9-on-VDPU381 patches (Sarma's v8 series, imported
|
||||
# via marfrit/kernel-agent#12 closure and PR #24)
|
||||
#
|
||||
# Drops the prior f8f3ad9 baseline ("18 commits ahead") because that tip
|
||||
# black-screens ampere — kernel-agent's ka-promote produces this 10-patch
|
||||
# minimal set from fleet/ampere.yaml. End-to-end VP9 + AV1 (av1-vpu-dec
|
||||
# is mainline-7.0) decode verified bit-exact via kdirect on the
|
||||
# hand-built tip 48a8c78 before this package iteration was cut.
|
||||
#
|
||||
# Coexists with the user's other extlinux labels in
|
||||
# /boot/firmware/extlinux/extlinux.conf; never edits them. Adds a
|
||||
# managed `linux-ampere-fourier` label (the user sets `default` manually
|
||||
# after verifying boot).
|
||||
#
|
||||
# Bootloader path: /boot/firmware/ (vfat on mmcblk0p1). Kernel +
|
||||
# initramfs + DTB land there directly. Reverting = boot a different
|
||||
# extlinux label (e.g. arch_mainline, ubuntu_mainline).
|
||||
|
||||
pkgbase=linux-ampere-fourier
|
||||
pkgname=("$pkgbase" "$pkgbase-headers")
|
||||
pkgver=7.0rc3.kafr2
|
||||
pkgrel=1
|
||||
pkgdesc='CoolPi GenBook kernel (v7.0-rc3 + kernel-agent fleet/ampere.yaml — 6 board patches + 3 VP9-VDPU381 + 1 pwm15)'
|
||||
arch=(aarch64)
|
||||
url='https://git.reauktion.de/marfrit/kernel-agent'
|
||||
license=(GPL-2.0-only)
|
||||
makedepends=(
|
||||
bc cpio gettext kmod libelf pahole perl python tar xz
|
||||
ccache
|
||||
uboot-tools dtc
|
||||
)
|
||||
options=('!strip')
|
||||
|
||||
# Pinned tip of the kernel-agent-managed source tree for ampere.
|
||||
# 10 commits ahead of v7.0-rc3, exactly mirroring fleet/ampere.yaml's
|
||||
# manifest under apply order:
|
||||
# - c57d069 soc/rk3588: pwm15 pinctrl entries
|
||||
# - 05a915c board/genbook: pwm-fan with thermal cooling
|
||||
# - d007b90 module/coolpi-cm5: RK806 system-power-controller
|
||||
# - 3722eab board/genbook: speaker via audio-graph-card
|
||||
# - 3e42ab6 board/genbook: USB-C PD via FUSB302
|
||||
# - 7c241f2 board/genbook: lid switch + USB3 PHY lane
|
||||
# - dd545fa board/genbook: wire internal microphone
|
||||
# - 9ddcae5 driver/media: rkvdec-vp9 helper rename (Sarma)
|
||||
# - c5063d9 driver/media: rkvdec move vp9 to common (Sarma)
|
||||
# - 48a8c78 driver/media: rkvdec VP9 for VDPU381 (Sarma)
|
||||
#
|
||||
# This is the same tree state ka-promote ampere produces as cumulative.patch
|
||||
# (see marfrit/kernel-agent build/ampere/v7.0-rc3/manifest.lock for the
|
||||
# b2sum + per-patch sha256s).
|
||||
_commit=48a8c785de7f5320513052a64e544c6310d7b273
|
||||
|
||||
source=(
|
||||
# Local tarball produced by ./prebuild.sh from a local clone of the
|
||||
# linux-rk3588-marfrit branch. Not fetched from a URL because the
|
||||
# boltzmann working clone is shallow (gitea push rejects) and the
|
||||
# 260MB tarball isn't committed to marfrit-packages. Run prebuild.sh
|
||||
# before makepkg; see README in this dir.
|
||||
"linux-rk3588-marfrit-${_commit:0:7}.tar.gz"
|
||||
'config' # snapshot of running ampere kernel's /proc/config.gz (7.0.0-rc3-ARCH+)
|
||||
'linux-ampere-fourier.preset'
|
||||
'extlinux-add.hook'
|
||||
'extlinux-add.sh'
|
||||
)
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
|
||||
# kernelrelease becomes <Makefile-VERSION>.<PATCH>.<SUBLEVEL><EXTRAVERSION><LOCALVERSION>
|
||||
# i.e. 7.0.0-rc3-ampere-fourier. Module dir + EXTRAVERSION suffix keep
|
||||
# this disjoint from the hand-managed /boot/firmware/Image-7.0.0-rc3-ARCH+
|
||||
# that's currently on the host.
|
||||
_kernver=7.0.0-rc3-ampere-fourier
|
||||
_srcdir=linux-rk3588-marfrit
|
||||
|
||||
prepare() {
|
||||
cd "${_srcdir}"
|
||||
|
||||
echo ":: writing config"
|
||||
cp "${srcdir}/config" .config
|
||||
|
||||
# LOCALVERSION suffix to differentiate from upstream-stock builds.
|
||||
scripts/config --set-str LOCALVERSION "-ampere-fourier"
|
||||
scripts/config -d LOCALVERSION_AUTO
|
||||
|
||||
echo ":: olddefconfig (accept new symbols sensibly)"
|
||||
make olddefconfig
|
||||
|
||||
make -s kernelrelease > version
|
||||
echo ":: kernel release: $(<version)"
|
||||
}
|
||||
|
||||
build() {
|
||||
cd "${_srcdir}"
|
||||
unset LDFLAGS
|
||||
# Native build only — no distcc per kernel-agent policy
|
||||
# (feedback_kernel_agent_no_distcc.md). ccache stays.
|
||||
export CC="ccache gcc"
|
||||
export HOSTCC="ccache gcc"
|
||||
make ${MAKEFLAGS:--j$(nproc)} Image modules dtbs
|
||||
}
|
||||
|
||||
_package() {
|
||||
pkgdesc='CoolPi GenBook overclocked kernel (ampere-fourier baseline)'
|
||||
depends=(coreutils kmod mkinitcpio uboot-tools)
|
||||
optdepends=('linux-firmware: firmware images needed for some devices')
|
||||
backup=("etc/mkinitcpio.d/${pkgbase}.preset")
|
||||
|
||||
cd "${_srcdir}"
|
||||
local _kver
|
||||
_kver=$(<version)
|
||||
|
||||
# Kernel image into the vfat firmware partition (where extlinux looks).
|
||||
install -Dm644 arch/arm64/boot/Image \
|
||||
"${pkgdir}/boot/firmware/Image-ampere-fourier"
|
||||
|
||||
# Single DTB for the GenBook target — install directly under
|
||||
# /boot/firmware/ (no subdir, matches existing host convention).
|
||||
install -Dm644 arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dtb \
|
||||
"${pkgdir}/boot/firmware/rk3588-coolpi-cm5-genbook.dtb-ampere-fourier"
|
||||
|
||||
ZSTD_CLEVEL=19 make INSTALL_MOD_PATH="${pkgdir}/usr" \
|
||||
INSTALL_MOD_STRIP=1 modules_install
|
||||
rm -f "${pkgdir}/usr/lib/modules/${_kver}/"{source,build}
|
||||
|
||||
install -Dm644 "${srcdir}/${pkgbase}.preset" \
|
||||
"${pkgdir}/etc/mkinitcpio.d/${pkgbase}.preset"
|
||||
|
||||
install -Dm755 "${srcdir}/extlinux-add.hook" \
|
||||
"${pkgdir}/usr/share/libalpm/hooks/95-${pkgbase}-extlinux.hook"
|
||||
install -Dm755 "${srcdir}/extlinux-add.sh" \
|
||||
"${pkgdir}/usr/share/libalpm/scripts/${pkgbase}-extlinux"
|
||||
}
|
||||
|
||||
_package-headers() {
|
||||
pkgdesc='Headers and scripts for the linux-ampere-fourier kernel'
|
||||
depends=(pahole)
|
||||
|
||||
cd "${_srcdir}"
|
||||
local _kver _builddir
|
||||
_kver=$(<version)
|
||||
_builddir="${pkgdir}/usr/lib/modules/${_kver}/build"
|
||||
|
||||
install -Dt "${_builddir}" -m644 .config Makefile Module.symvers System.map vmlinux version
|
||||
install -Dt "${_builddir}/kernel" -m644 kernel/Makefile
|
||||
|
||||
cp -a scripts "${_builddir}"
|
||||
install -Dt "${_builddir}/arch/arm64" -m644 arch/arm64/Makefile
|
||||
cp -a arch/arm64/include "${_builddir}/arch/arm64/"
|
||||
cp -a include "${_builddir}/"
|
||||
|
||||
find . -name 'Kbuild' -exec install -Dm644 {} "${_builddir}/{}" \;
|
||||
find . -name 'Kconfig*' -exec install -Dm644 {} "${_builddir}/{}" \;
|
||||
|
||||
install -d "${pkgdir}/usr/src"
|
||||
ln -sr "${_builddir}" "${pkgdir}/usr/src/${pkgbase}"
|
||||
}
|
||||
|
||||
eval "package_${pkgbase}() { _package; }"
|
||||
eval "package_${pkgbase}-headers() { _package-headers; }"
|
||||
@@ -0,0 +1,70 @@
|
||||
# linux-ampere-fourier
|
||||
|
||||
Kernel package for ampere (CoolPi GenBook RK3588). Baselined on
|
||||
`marfrit/linux-rk3588-marfrit @ f8f3ad9` (mainline v7.0-rc3 + 18
|
||||
RK3588-fleet-relevant commits — 10 Markus, 8 upstream cherry-picks).
|
||||
|
||||
See `marfrit/kernel-agent/fleet/ampere.yaml` for the manifest +
|
||||
`marfrit/kernel-agent/patches/{soc,module,board}/...` for the
|
||||
scope-tagged board patches in the baseline.
|
||||
|
||||
## Build
|
||||
|
||||
The kernel source isn't on Gitea — boltzmann's working clone is
|
||||
shallow (Gitea refuses shallow pushes) and a 260MB tarball doesn't
|
||||
belong in `marfrit-packages`. Stage the source locally from a
|
||||
clone of the `linux-rk3588-marfrit` branch:
|
||||
|
||||
```sh
|
||||
cd arch/linux-ampere-fourier
|
||||
./prebuild.sh # produces linux-rk3588-marfrit-f8f3ad9.tar.gz
|
||||
makepkg -s --noconfirm # native aarch64 build; no distcc
|
||||
```
|
||||
|
||||
`prebuild.sh` looks at `$LINUX_RK3588_MARFRIT_TREE` (default
|
||||
`~/src/linux-rockchip`) for the kernel working tree. The tip commit
|
||||
must be reachable in that clone — fetch the `linux-rk3588-marfrit`
|
||||
branch first if you cloned from elsewhere.
|
||||
|
||||
## Build hosts
|
||||
|
||||
Native aarch64 only (per kernel-agent `feedback_kernel_agent_no_distcc.md`).
|
||||
Either ampere itself (8C/2.4GHz, 32GB, native arch) or boltzmann
|
||||
(Rock 5 ITX+, same uarch). fermi as fallback.
|
||||
|
||||
## Install
|
||||
|
||||
Adds a managed label to `/boot/firmware/extlinux/extlinux.conf`:
|
||||
|
||||
```
|
||||
label linux-ampere-fourier
|
||||
menu label linux-ampere-fourier (managed)
|
||||
kernel /Image-ampere-fourier
|
||||
initrd /initramfs-ampere-fourier.img
|
||||
fdt /rk3588-coolpi-cm5-genbook.dtb-ampere-fourier
|
||||
append <inherited from arch_mainline>
|
||||
```
|
||||
|
||||
Default label is NOT changed. After verifying boot of the managed
|
||||
label at the u-boot menu, you flip `default` manually. Reverting =
|
||||
boot a different label (e.g. `arch_mainline`, `ubuntu_mainline`).
|
||||
|
||||
## Boot path
|
||||
|
||||
ampere uses `/boot/firmware/` (vfat on mmcblk0p1, ~1G), distinct
|
||||
from fresnel's `/boot/` on root partition. The PKGBUILD installs
|
||||
Image, initramfs, and DTB directly under `/boot/firmware/`. No
|
||||
DTB subdir — single board target.
|
||||
|
||||
## Open follow-ups (per kernel-agent issue #6)
|
||||
|
||||
- **Ask 2** (VP9 enablement on RK3588 rkvdec) — not addressed in this
|
||||
iteration. Separate session.
|
||||
- **Ask 3** (AV1 decoder integration) — backend libva work, not kernel.
|
||||
- Hosting the source tarball publicly so `prebuild.sh` isn't needed —
|
||||
candidate: Gitea release asset, or `packages.reauktion.de/sources/`.
|
||||
- Splitting the 12 non-board cherry-pick commits in the baseline
|
||||
(4 Shawn Lin phy, 2 Cristian Ciocaltea, etc.) into scope-tagged
|
||||
patches in kernel-agent — currently they ride along inside the
|
||||
pinned baseline rather than being explicit `includes:` in the
|
||||
manifest.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,13 @@
|
||||
[Trigger]
|
||||
Operation = Install
|
||||
Operation = Upgrade
|
||||
Operation = Remove
|
||||
Type = Path
|
||||
Target = boot/firmware/Image-ampere-fourier
|
||||
Target = boot/firmware/rk3588-coolpi-cm5-genbook.dtb-ampere-fourier
|
||||
Target = boot/firmware/initramfs-ampere-fourier.img
|
||||
|
||||
[Action]
|
||||
Description = Updating extlinux entry for linux-ampere-fourier
|
||||
When = PostTransaction
|
||||
Exec = /usr/share/libalpm/scripts/linux-ampere-fourier-extlinux
|
||||
@@ -0,0 +1,62 @@
|
||||
#!/bin/sh
|
||||
# Add / update / remove the linux-ampere-fourier entry in
|
||||
# /boot/firmware/extlinux/extlinux.conf. Idempotent. Coexists with
|
||||
# the hand-managed labels in that file; never edits them. Default
|
||||
# label is NOT touched — user picks at u-boot menu.
|
||||
#
|
||||
# ampere boots from a vfat partition (mmcblk0p1) mounted at
|
||||
# /boot/firmware/, distinct from fresnel's /boot/ on root.
|
||||
|
||||
set -eu
|
||||
|
||||
CONF="/boot/firmware/extlinux/extlinux.conf"
|
||||
TAG_BEGIN="# >>> linux-ampere-fourier (managed) >>>"
|
||||
TAG_END="# <<< linux-ampere-fourier (managed) <<<"
|
||||
|
||||
# Copy APPEND from the user's `arch_mainline` label so the managed
|
||||
# entry inherits the same root= and console= settings the host's
|
||||
# bootloader already trusts. Falls back to a CoolPi GenBook default
|
||||
# if no arch_mainline label exists (first-time install on a fresh
|
||||
# bootloader config).
|
||||
EXISTING_APPEND=$(awk '
|
||||
/^[[:space:]]*label[[:space:]]+arch_mainline[[:space:]]*$/ { found=1; next }
|
||||
found && /^[[:space:]]*append[[:space:]]/ {
|
||||
sub(/^[[:space:]]*append[[:space:]]+/, "")
|
||||
print
|
||||
exit
|
||||
}
|
||||
/^[[:space:]]*label[[:space:]]/ { found=0 }
|
||||
' "$CONF" 2>/dev/null || true)
|
||||
|
||||
APPEND="${EXISTING_APPEND:-root=LABEL=arch rw rootwait rootfstype=btrfs rootflags=subvol=@,ssd,discard=async console=ttyS2,1500000 console=tty1 consoleblank=0 loglevel=7 cma=256M coherent_pool=2M}"
|
||||
|
||||
ENTRY=$(cat <<EOF
|
||||
${TAG_BEGIN}
|
||||
label linux-ampere-fourier
|
||||
menu label linux-ampere-fourier (managed)
|
||||
kernel /Image-ampere-fourier
|
||||
initrd /initramfs-ampere-fourier.img
|
||||
fdt /rk3588-coolpi-cm5-genbook.dtb-ampere-fourier
|
||||
append ${APPEND}
|
||||
${TAG_END}
|
||||
EOF
|
||||
)
|
||||
|
||||
# Strip any prior managed block, then append fresh
|
||||
TMP=$(mktemp)
|
||||
awk -v b="$TAG_BEGIN" -v e="$TAG_END" '
|
||||
$0==b{skip=1; next}
|
||||
$0==e{skip=0; next}
|
||||
!skip{print}
|
||||
' "$CONF" > "$TMP"
|
||||
|
||||
# Post-Remove: kernel files absent → don't re-add the entry
|
||||
if [ -f "/boot/firmware/Image-ampere-fourier" ] \
|
||||
&& [ -f "/boot/firmware/rk3588-coolpi-cm5-genbook.dtb-ampere-fourier" ]; then
|
||||
printf '%s\n' "$ENTRY" >> "$TMP"
|
||||
echo "linux-ampere-fourier: extlinux entry updated"
|
||||
else
|
||||
echo "linux-ampere-fourier: kernel files absent, entry removed"
|
||||
fi
|
||||
|
||||
mv "$TMP" "$CONF"
|
||||
@@ -0,0 +1,13 @@
|
||||
# mkinitcpio preset for linux-ampere-fourier
|
||||
#
|
||||
# ampere boots from /boot/firmware/ (vfat partition on mmcblk0p1). The
|
||||
# initramfs lands there too so extlinux can pick it up. Only one PRESET
|
||||
# because /boot/firmware is ~1G total — no room for a fallback image
|
||||
# alongside the primary.
|
||||
|
||||
ALL_kver="/boot/firmware/Image-ampere-fourier"
|
||||
ALL_microcode=()
|
||||
|
||||
PRESETS=('default')
|
||||
|
||||
default_image="/boot/firmware/initramfs-ampere-fourier.img"
|
||||
Executable
+68
@@ -0,0 +1,68 @@
|
||||
#!/bin/bash
|
||||
# prebuild — stage the kernel source tarball this PKGBUILD expects.
|
||||
#
|
||||
# linux-ampere-fourier's source is a snapshot of marfrit/linux-rk3588-marfrit
|
||||
# @ f8f3ad9 (260MB), too big to commit to marfrit-packages and currently
|
||||
# unpushable to Gitea (boltzmann's working clone is shallow; gitea push
|
||||
# refuses shallow updates). Hosting the tarball outside Gitea would need
|
||||
# infrastructure setup that's not in scope for the first iteration.
|
||||
#
|
||||
# So: produce the tarball locally from the kernel working tree just
|
||||
# before makepkg runs. Idempotent — if an existing tarball matches the
|
||||
# expected sha256 we skip the archive step.
|
||||
#
|
||||
# Run from this directory: cd arch/linux-ampere-fourier && ./prebuild.sh
|
||||
# Override the kernel-tree location: LINUX_RK3588_MARFRIT_TREE=/path ./prebuild.sh
|
||||
#
|
||||
# Default tree location matches the boltzmann/ampere convention from
|
||||
# kernel-agent issue #6: $HOME/src/linux-rockchip.
|
||||
set -euo pipefail
|
||||
|
||||
TREE="${LINUX_RK3588_MARFRIT_TREE:-${HOME}/src/linux-rockchip}"
|
||||
COMMIT=48a8c785de7f5320513052a64e544c6310d7b273
|
||||
# Generated tarball sha varies with gzip version — script warns-not-fails.
|
||||
# Leave EXPECTED empty for fresh kafr2 builds; first successful build can
|
||||
# pin a canonical sha here if a reproducibility audit needs it.
|
||||
SHA256_EXPECTED=
|
||||
|
||||
HERE="$(cd "$(dirname "$0")" && pwd)"
|
||||
OUTPUT="${HERE}/linux-rk3588-marfrit-${COMMIT:0:7}.tar.gz"
|
||||
|
||||
if [ -f "$OUTPUT" ]; then
|
||||
have=$(sha256sum "$OUTPUT" | cut -d' ' -f1)
|
||||
if [ "$have" = "$SHA256_EXPECTED" ]; then
|
||||
echo "prebuild: $OUTPUT already exists with correct sha256"
|
||||
exit 0
|
||||
else
|
||||
echo "prebuild: existing $OUTPUT sha mismatch (have=$have, want=$SHA256_EXPECTED) — regenerating" >&2
|
||||
rm -f "$OUTPUT"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -d "$TREE/.git" ]; then
|
||||
echo "prebuild: kernel tree not at $TREE" >&2
|
||||
echo " set LINUX_RK3588_MARFRIT_TREE=/path/to/linux-rockchip and retry" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
cd "$TREE"
|
||||
|
||||
if ! git cat-file -e "$COMMIT" 2>/dev/null; then
|
||||
echo "prebuild: commit $COMMIT not found in $TREE" >&2
|
||||
echo " fetch the linux-rk3588-marfrit branch first:" >&2
|
||||
echo " git fetch <remote> linux-rk3588-marfrit" >&2
|
||||
exit 3
|
||||
fi
|
||||
|
||||
echo "prebuild: generating archive from $TREE @ $COMMIT..."
|
||||
git archive --format=tar.gz --prefix=linux-rk3588-marfrit/ "$COMMIT" -o "$OUTPUT"
|
||||
|
||||
# git archive emits a deterministic tar stream but gzip compression may
|
||||
# vary by version. The sha256 check is informational; warn-don't-fail.
|
||||
have=$(sha256sum "$OUTPUT" | cut -d' ' -f1)
|
||||
if [ "$have" != "$SHA256_EXPECTED" ]; then
|
||||
echo "prebuild: WARNING $OUTPUT sha=$have (canonical=$SHA256_EXPECTED)" >&2
|
||||
echo " probably a gzip-version difference; tar payload should be identical" >&2
|
||||
fi
|
||||
|
||||
echo "prebuild: wrote $OUTPUT ($(du -h "$OUTPUT" | cut -f1), sha=$have)"
|
||||
+356
@@ -0,0 +1,356 @@
|
||||
From a202de1646d4c8f8ee2ebc2e4c100b621975754a Mon Sep 17 00:00:00 2001
|
||||
In-Reply-To: <20260429195306.239666-1-mfritsche@reauktion.de>
|
||||
References: <20260429195306.239666-1-mfritsche@reauktion.de>
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Sat, 9 May 2026 16:16:07 +0200
|
||||
Subject: [PATCH RFC v2] media: videobuf2: add opt-in dma_resv producer fence
|
||||
helper
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
V4L2 producers historically don't propagate buffer-state-done into
|
||||
the dmabuf's dma_resv exclusive fence. Userspace consumers that
|
||||
import V4L2-produced dmabufs and wait on the dmabuf's implicit-sync
|
||||
fence (poll(POLLIN), DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
|
||||
EGL_LINUX_DMA_BUF_EXT) currently see either zero fences or a stub
|
||||
fence from dma_fence_get_stub(). This is correct by accident for the
|
||||
common DQBUF-then-import case but represents a contract gap that
|
||||
breaks Wayland compositors importing CAPTURE buffers from a stateless
|
||||
H.264 decoder under continuous playback on implicit-sync GPU stacks
|
||||
(observed on RK3566 + hantro VPU + Mali-G52 panfrost; manifests as
|
||||
green frames -- BT.709 limited-range YUV(0,0,0) -> RGB(0,77,0) -- when
|
||||
the GPU samples the dmabuf before the producer's decode completes).
|
||||
|
||||
Add an opt-in API gated by both a per-driver runtime flag
|
||||
(vb2_queue::supports_release_fences) and a Kconfig
|
||||
(CONFIG_VIDEOBUF2_RELEASE_FENCES, default n) that lets producers
|
||||
populate a real dma_resv exclusive write fence on the dmabufs they
|
||||
export. Drivers call vb2_buffer_attach_release_fence(vb) at a
|
||||
finite-time-fenced point in their pipeline (typically m2m
|
||||
device_run, just before the HW kick); vb2_buffer_done() signals and
|
||||
puts the fence as part of its state transition.
|
||||
|
||||
The publish and signal paths are wrapped in
|
||||
dma_fence_begin_signalling() / dma_fence_end_signalling() so
|
||||
PROVE_LOCKING can validate that nothing taken in those critical
|
||||
sections deadlocks against the signal path. dma_resv_lock is
|
||||
sleepable but not taken on the signal path, so taking it inside the
|
||||
publish critical section is safe under lockdep.
|
||||
|
||||
Skips planes whose vb2_plane.dbuf is NULL -- buffers never exported
|
||||
via VIDIOC_EXPBUF (or imported via V4L2_MEMORY_DMABUF) have no
|
||||
dmabuf for userspace to wait on.
|
||||
|
||||
Drivers that don't opt in pay nothing: the helper is a no-op stub
|
||||
when CONFIG_VIDEOBUF2_RELEASE_FENCES=n, and an early-return check
|
||||
of supports_release_fences when =y but the flag is unset.
|
||||
|
||||
Validated on RK3566 PineTab2 with PROVE_LOCKING enabled: 30s of
|
||||
bbb_1080p30 H.264 stateless decode + zero-copy panfrost EGL import
|
||||
via dmabuf-wayland (mpv 0.41 + KWin 6.6.4 + Mesa panfrost 26.0.5)
|
||||
produces 31,816 dma_fence init/signal pairs across 5,724 vb2 buffer
|
||||
cycles with zero lockdep splats from videobuf2 / dma_resv code paths.
|
||||
|
||||
Subsequent patches in this series opt the hantro and rockchip-rga
|
||||
drivers in.
|
||||
|
||||
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
|
||||
Cc: Christian König <christian.koenig@amd.com>
|
||||
Cc: Nicolas Dufresne <nicolas@ndufresne.ca>
|
||||
Cc: Sumit Semwal <sumit.semwal@linaro.org>
|
||||
Cc: Hans Verkuil <hverkuil@xs4all.nl>
|
||||
Cc: Tomasz Figa <tfiga@chromium.org>
|
||||
Cc: linux-media@vger.kernel.org
|
||||
Cc: dri-devel@lists.freedesktop.org
|
||||
Cc: linaro-mm-sig@lists.linaro.org
|
||||
Signed-off-by: Markus Fritsche <mfritsche@reauktion.de>
|
||||
---
|
||||
drivers/media/common/videobuf2/Kconfig | 29 ++++
|
||||
.../media/common/videobuf2/videobuf2-core.c | 135 ++++++++++++++++++
|
||||
include/media/videobuf2-core.h | 51 +++++++
|
||||
3 files changed, 215 insertions(+)
|
||||
|
||||
diff --git a/drivers/media/common/videobuf2/Kconfig b/drivers/media/common/videobuf2/Kconfig
|
||||
index d2223a12c..bbfa26984 100644
|
||||
--- a/drivers/media/common/videobuf2/Kconfig
|
||||
+++ b/drivers/media/common/videobuf2/Kconfig
|
||||
@@ -30,3 +30,32 @@ config VIDEOBUF2_DMA_SG
|
||||
config VIDEOBUF2_DVB
|
||||
tristate
|
||||
select VIDEOBUF2_CORE
|
||||
+
|
||||
+config VIDEOBUF2_RELEASE_FENCES
|
||||
+ bool "videobuf2: opt-in dma_resv producer fences for V4L2 dmabuf exports"
|
||||
+ depends on VIDEOBUF2_CORE
|
||||
+ depends on DMA_SHARED_BUFFER
|
||||
+ default n
|
||||
+ help
|
||||
+ Enables an opt-in API that lets vb2 producers populate a dma_resv
|
||||
+ exclusive write fence on the dmabufs they export to userspace.
|
||||
+ The fence is signalled when the buffer transitions to
|
||||
+ VB2_BUF_STATE_DONE.
|
||||
+
|
||||
+ This gives userspace consumers that import V4L2-produced dmabufs
|
||||
+ and wait on the dmabuf's implicit-sync fence (poll(POLLIN),
|
||||
+ DMA_BUF_IOCTL_EXPORT_SYNC_FILE, EGL_LINUX_DMA_BUF_EXT) a real
|
||||
+ producer fence to wait on, instead of a stub fence from
|
||||
+ dma_fence_get_stub() that the dma_buf core substitutes when
|
||||
+ dma_resv is empty.
|
||||
+
|
||||
+ Drivers individually opt in by setting
|
||||
+ vb2_queue::supports_release_fences = true and calling
|
||||
+ vb2_buffer_attach_release_fence() at the right point in their
|
||||
+ pipeline (typically m2m device_run, just before HW kick).
|
||||
+
|
||||
+ Distributors leave this off unless targeting Wayland/EGL
|
||||
+ consumers of V4L2 stateless decoder output on
|
||||
+ implicit-sync-only GPU stacks (e.g. mainline panfrost).
|
||||
+
|
||||
+ If unsure, say N.
|
||||
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
|
||||
index adf668b21..85d7fddbd 100644
|
||||
--- a/drivers/media/common/videobuf2/videobuf2-core.c
|
||||
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
|
||||
@@ -26,6 +26,12 @@
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
+#ifdef CONFIG_VIDEOBUF2_RELEASE_FENCES
|
||||
+#include <linux/dma-fence.h>
|
||||
+#include <linux/dma-resv.h>
|
||||
+#include <linux/dma-buf.h>
|
||||
+#endif
|
||||
+
|
||||
#include <media/videobuf2-core.h>
|
||||
#include <media/v4l2-mc.h>
|
||||
|
||||
@@ -1173,6 +1179,120 @@ void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vb2_plane_cookie);
|
||||
|
||||
+#ifdef CONFIG_VIDEOBUF2_RELEASE_FENCES
|
||||
+/*
|
||||
+ * dma_resv release-fence integration.
|
||||
+ *
|
||||
+ * Optional, opt-in path that lets producers publish a real
|
||||
+ * dma_fence on their CAPTURE-side dmabufs so userspace consumers
|
||||
+ * (compositors, EGL importers) get spec-clean implicit-sync
|
||||
+ * semantics instead of the dma_buf core's stub fence. Drivers
|
||||
+ * call vb2_buffer_attach_release_fence() at a finite-time-fenced
|
||||
+ * point (typically m2m device_run) and the fence is signalled by
|
||||
+ * vb2_buffer_done(). Gated at runtime by
|
||||
+ * vb2_queue::supports_release_fences and at compile time by
|
||||
+ * CONFIG_VIDEOBUF2_RELEASE_FENCES.
|
||||
+ */
|
||||
+
|
||||
+static const char *vb2_dma_resv_get_driver_name(struct dma_fence *fence)
|
||||
+{
|
||||
+ return "videobuf2";
|
||||
+}
|
||||
+
|
||||
+static const char *vb2_dma_resv_get_timeline_name(struct dma_fence *fence)
|
||||
+{
|
||||
+ return "vb2-release-fence";
|
||||
+}
|
||||
+
|
||||
+static const struct dma_fence_ops vb2_dma_resv_fence_ops = {
|
||||
+ .get_driver_name = vb2_dma_resv_get_driver_name,
|
||||
+ .get_timeline_name = vb2_dma_resv_get_timeline_name,
|
||||
+};
|
||||
+
|
||||
+int vb2_buffer_attach_release_fence(struct vb2_buffer *vb)
|
||||
+{
|
||||
+ struct vb2_queue *q = vb->vb2_queue;
|
||||
+ struct dma_fence *fence;
|
||||
+ unsigned int plane;
|
||||
+ bool cookie;
|
||||
+
|
||||
+ if (!q->supports_release_fences)
|
||||
+ return 0;
|
||||
+
|
||||
+ if (WARN_ON(vb->release_fence))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
|
||||
+ if (!fence)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ dma_fence_init(fence, &vb2_dma_resv_fence_ops, &q->dma_resv_fence_lock,
|
||||
+ q->dma_resv_fence_context,
|
||||
+ atomic64_inc_return(&q->dma_resv_fence_seqno));
|
||||
+
|
||||
+ /*
|
||||
+ * Annotate the publish-side critical section. Per
|
||||
+ * Documentation/driver-api/dma-buf.rst, lockdep validates
|
||||
+ * that nothing taken in this region can deadlock against
|
||||
+ * the signal path in vb2_buffer_signal_release_fence().
|
||||
+ * dma_resv_lock is sleepable but is not taken on the signal
|
||||
+ * path, so taking it inside the critical section is safe.
|
||||
+ */
|
||||
+ cookie = dma_fence_begin_signalling();
|
||||
+ for (plane = 0; plane < vb->num_planes; plane++) {
|
||||
+ struct dma_buf *dbuf = vb->planes[plane].dbuf;
|
||||
+
|
||||
+ if (!dbuf)
|
||||
+ continue;
|
||||
+
|
||||
+ dma_resv_lock(dbuf->resv, NULL);
|
||||
+ dma_resv_add_fence(dbuf->resv, fence, DMA_RESV_USAGE_WRITE);
|
||||
+ dma_resv_unlock(dbuf->resv);
|
||||
+ }
|
||||
+ dma_fence_end_signalling(cookie);
|
||||
+
|
||||
+ /* One reference for the eventual signal in vb2_buffer_done. */
|
||||
+ vb->release_fence = dma_fence_get(fence);
|
||||
+
|
||||
+ /* The dma_resv held its own reference per plane. Drop ours. */
|
||||
+ dma_fence_put(fence);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(vb2_buffer_attach_release_fence);
|
||||
+
|
||||
+static void vb2_buffer_signal_release_fence(struct vb2_buffer *vb,
|
||||
+ enum vb2_buffer_state state)
|
||||
+{
|
||||
+ struct dma_fence *fence = vb->release_fence;
|
||||
+ bool cookie;
|
||||
+
|
||||
+ if (!fence)
|
||||
+ return;
|
||||
+
|
||||
+ cookie = dma_fence_begin_signalling();
|
||||
+ if (state == VB2_BUF_STATE_ERROR)
|
||||
+ dma_fence_set_error(fence, -EIO);
|
||||
+ dma_fence_signal(fence);
|
||||
+ dma_fence_end_signalling(cookie);
|
||||
+
|
||||
+ dma_fence_put(fence);
|
||||
+ vb->release_fence = NULL;
|
||||
+}
|
||||
+#else /* !CONFIG_VIDEOBUF2_RELEASE_FENCES */
|
||||
+
|
||||
+int vb2_buffer_attach_release_fence(struct vb2_buffer *vb)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(vb2_buffer_attach_release_fence);
|
||||
+
|
||||
+static inline void vb2_buffer_signal_release_fence(struct vb2_buffer *vb,
|
||||
+ enum vb2_buffer_state state)
|
||||
+{
|
||||
+}
|
||||
+#endif /* CONFIG_VIDEOBUF2_RELEASE_FENCES */
|
||||
+
|
||||
void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
|
||||
{
|
||||
struct vb2_queue *q = vb->vb2_queue;
|
||||
@@ -1199,6 +1319,9 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
|
||||
if (state != VB2_BUF_STATE_QUEUED)
|
||||
__vb2_buf_mem_finish(vb);
|
||||
|
||||
+ if (state != VB2_BUF_STATE_QUEUED)
|
||||
+ vb2_buffer_signal_release_fence(vb, state);
|
||||
+
|
||||
spin_lock_irqsave(&q->done_lock, flags);
|
||||
if (state == VB2_BUF_STATE_QUEUED) {
|
||||
vb->state = VB2_BUF_STATE_QUEUED;
|
||||
@@ -2651,6 +2774,18 @@ int vb2_core_queue_init(struct vb2_queue *q)
|
||||
mutex_init(&q->mmap_lock);
|
||||
init_waitqueue_head(&q->done_wq);
|
||||
|
||||
+#ifdef CONFIG_VIDEOBUF2_RELEASE_FENCES
|
||||
+ /*
|
||||
+ * Per-queue dma_resv release-fence context. Drivers that
|
||||
+ * opt in via supports_release_fences and call
|
||||
+ * vb2_buffer_attach_release_fence() use these to allocate
|
||||
+ * fences on a single per-queue timeline.
|
||||
+ */
|
||||
+ q->dma_resv_fence_context = dma_fence_context_alloc(1);
|
||||
+ atomic64_set(&q->dma_resv_fence_seqno, 0);
|
||||
+ spin_lock_init(&q->dma_resv_fence_lock);
|
||||
+#endif
|
||||
+
|
||||
q->memory = VB2_MEMORY_UNKNOWN;
|
||||
|
||||
if (q->buf_struct_size == 0)
|
||||
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
|
||||
index 4424d481d..766ff2194 100644
|
||||
--- a/include/media/videobuf2-core.h
|
||||
+++ b/include/media/videobuf2-core.h
|
||||
@@ -288,6 +288,16 @@ struct vb2_buffer {
|
||||
unsigned int skip_cache_sync_on_finish:1;
|
||||
|
||||
struct vb2_plane planes[VB2_MAX_PLANES];
|
||||
+#ifdef CONFIG_VIDEOBUF2_RELEASE_FENCES
|
||||
+ /*
|
||||
+ * Producer release fence published on each plane's
|
||||
+ * dmabuf->resv when the driver opts in via
|
||||
+ * vb2_buffer_attach_release_fence(). Signalled and put by
|
||||
+ * vb2_buffer_done() on transition to DONE/ERROR. NULL when
|
||||
+ * the driver did not opt in for this buffer.
|
||||
+ */
|
||||
+ struct dma_fence *release_fence;
|
||||
+#endif
|
||||
struct list_head queued_entry;
|
||||
struct list_head done_entry;
|
||||
#ifdef CONFIG_VIDEO_ADV_DEBUG
|
||||
@@ -648,6 +658,19 @@ struct vb2_queue {
|
||||
spinlock_t done_lock;
|
||||
wait_queue_head_t done_wq;
|
||||
|
||||
+#ifdef CONFIG_VIDEOBUF2_RELEASE_FENCES
|
||||
+ /*
|
||||
+ * dma_resv release-fence context. Drivers that set
|
||||
+ * supports_release_fences and call
|
||||
+ * vb2_buffer_attach_release_fence() use these to allocate
|
||||
+ * fences on a per-queue timeline.
|
||||
+ */
|
||||
+ u64 dma_resv_fence_context;
|
||||
+ atomic64_t dma_resv_fence_seqno;
|
||||
+ spinlock_t dma_resv_fence_lock;
|
||||
+#endif
|
||||
+
|
||||
+ unsigned int supports_release_fences:1;
|
||||
unsigned int streaming:1;
|
||||
unsigned int start_streaming_called:1;
|
||||
unsigned int error:1;
|
||||
@@ -735,6 +758,34 @@ void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no);
|
||||
*/
|
||||
void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state);
|
||||
|
||||
+/**
|
||||
+ * vb2_buffer_attach_release_fence() - opt-in dma_resv release fence.
|
||||
+ * @vb: the buffer being committed to the producer.
|
||||
+ *
|
||||
+ * Drivers that have set vb2_queue::supports_release_fences may call
|
||||
+ * this from any sleepable context where they have committed to
|
||||
+ * running the operation in finite time -- typically m2m
|
||||
+ * device_run(), just before the HW kick. The helper allocates a
|
||||
+ * dma_fence on the queue's per-queue timeline, attaches it as
|
||||
+ * DMA_RESV_USAGE_WRITE on each plane's dmabuf->resv, and stashes
|
||||
+ * it in vb->release_fence. vb2_buffer_done() signals and puts the
|
||||
+ * fence as part of the buffer's state transition.
|
||||
+ *
|
||||
+ * Skips planes whose vb2_plane.dbuf is NULL -- buffers never
|
||||
+ * exported via VIDIOC_EXPBUF (or imported via V4L2_MEMORY_DMABUF)
|
||||
+ * have no dmabuf for userspace to wait on.
|
||||
+ *
|
||||
+ * No-op when vb2_queue::supports_release_fences is not set
|
||||
+ * (regardless of CONFIG_VIDEOBUF2_RELEASE_FENCES). When
|
||||
+ * CONFIG_VIDEOBUF2_RELEASE_FENCES=n, this is a stub that returns 0.
|
||||
+ *
|
||||
+ * Returns 0 on success or when the no-op stub is in effect,
|
||||
+ * negative errno on allocation failure when fence publishing was
|
||||
+ * attempted. Best-effort: drivers should ignore the return value
|
||||
+ * unless they want diagnostics.
|
||||
+ */
|
||||
+int vb2_buffer_attach_release_fence(struct vb2_buffer *vb);
|
||||
+
|
||||
/**
|
||||
* vb2_discard_done() - discard all buffers marked as DONE.
|
||||
* @q: pointer to &struct vb2_queue with videobuf2 queue.
|
||||
--
|
||||
2.53.0
|
||||
|
||||
+95
@@ -0,0 +1,95 @@
|
||||
From 1844c263bde8dd244d7db46f8c508e7c70da459c Mon Sep 17 00:00:00 2001
|
||||
In-Reply-To: <20260429195306.239666-1-mfritsche@reauktion.de>
|
||||
References: <20260429195306.239666-1-mfritsche@reauktion.de>
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Sat, 9 May 2026 16:24:01 +0200
|
||||
Subject: [PATCH RFC v2] media: hantro: attach dma_resv release fence at
|
||||
device_run
|
||||
|
||||
Opt the hantro driver into the new vb2 release-fence helper so its
|
||||
CAPTURE-side dmabufs carry a real producer fence that wayland
|
||||
compositors and other implicit-sync consumers can wait on, instead
|
||||
of the dma_buf core's stub fence.
|
||||
|
||||
Attach point is m2m device_run, immediately after
|
||||
v4l2_m2m_buf_copy_metadata() and before ctx->codec_ops->run().
|
||||
Per Nicolas Dufresne's v1 review (lore.kernel.org/linux-media/
|
||||
3d8deeb15581b754e4c061d4c4a13657aa08bc3c.camel@ndufresne.ca/),
|
||||
this satisfies the dma_fence finite-time contract: the m2m core
|
||||
has committed to running the job by this point, codec_ops->run
|
||||
either kicks the HW (decode-complete signals the fence via
|
||||
vb2_buffer_done) or fails immediately (job_finish with
|
||||
VB2_BUF_STATE_ERROR signals with -EIO). PM and clocks are already
|
||||
up by this point, so no allocation context restrictions.
|
||||
|
||||
The CAPTURE queue is opted in with supports_release_fences=true at
|
||||
queue_init.
|
||||
|
||||
Userspace consumers that import hantro CAPTURE dmabufs and wait on
|
||||
their implicit-sync fence (Wayland zwp_linux_dmabuf_v1 +
|
||||
panfrost EGL_LINUX_DMA_BUF_EXT) now wait on a real fence
|
||||
representing the producer's actual completion, fixing green-frame
|
||||
corruption observed on RK3566 PineTab2 + Mali-G52 panfrost (the
|
||||
GPU was sampling zero pages because the dmabuf's implicit fence
|
||||
was the dma_buf core's pre-signalled stub).
|
||||
|
||||
Validated end-to-end on PineTab2 (RK3566 / hantro G1 / Mali-G52
|
||||
mainline panfrost): 30s of bbb_1080p30 H.264 stateless decode +
|
||||
zero-copy panfrost EGL import via dmabuf-wayland (mpv 0.41 +
|
||||
KWin 6.6.4 + Mesa panfrost 26.0.5) renders correctly with no
|
||||
green-frame corruption and no PROVE_LOCKING splats.
|
||||
|
||||
Cc: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
|
||||
Cc: Philipp Zabel <p.zabel@pengutronix.de>
|
||||
Cc: Nicolas Dufresne <nicolas@ndufresne.ca>
|
||||
Cc: linux-media@vger.kernel.org
|
||||
Cc: linux-rockchip@lists.infradead.org
|
||||
Signed-off-by: Markus Fritsche <mfritsche@reauktion.de>
|
||||
---
|
||||
.../media/platform/verisilicon/hantro_drv.c | 23 +++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/drivers/media/platform/verisilicon/hantro_drv.c b/drivers/media/platform/verisilicon/hantro_drv.c
|
||||
index 2e81877f6..6a66c47ed 100644
|
||||
--- a/drivers/media/platform/verisilicon/hantro_drv.c
|
||||
+++ b/drivers/media/platform/verisilicon/hantro_drv.c
|
||||
@@ -186,6 +186,22 @@ static void device_run(void *priv)
|
||||
|
||||
v4l2_m2m_buf_copy_metadata(src, dst);
|
||||
|
||||
+ /*
|
||||
+ * Attach a producer fence on the CAPTURE-side dmabuf so userspace
|
||||
+ * importers (e.g. Wayland compositors) get spec-clean implicit-sync
|
||||
+ * semantics. Called from device_run rather than buf_queue: the
|
||||
+ * dma_fence finite-time contract requires that once a fence is
|
||||
+ * published, the producer must signal it in finite time. By the
|
||||
+ * time we reach device_run, the m2m core has committed to running
|
||||
+ * this job, and the next hop (codec_ops->run) either kicks the HW
|
||||
+ * (decode-complete signals the fence via vb2_buffer_done) or
|
||||
+ * fails immediately (job_finish with VB2_BUF_STATE_ERROR signals
|
||||
+ * the fence with -EIO). Either path resolves the fence in finite
|
||||
+ * time. Best-effort: a NOMEM here means we lose implicit-sync
|
||||
+ * precision for this frame, no functional regression.
|
||||
+ */
|
||||
+ (void)vb2_buffer_attach_release_fence(&dst->vb2_buf);
|
||||
+
|
||||
if (ctx->codec_ops->run(ctx))
|
||||
goto err_cancel_job;
|
||||
|
||||
@@ -249,6 +265,13 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
|
||||
dst_vq->lock = &ctx->dev->vpu_mutex;
|
||||
dst_vq->dev = ctx->dev->v4l2_dev.dev;
|
||||
|
||||
+ /*
|
||||
+ * Opt the CAPTURE queue into vb2 release-fence publishing.
|
||||
+ * No-op unless CONFIG_VIDEOBUF2_RELEASE_FENCES=y; runtime cost
|
||||
+ * is one extra fence allocation + dma_resv update per device_run.
|
||||
+ */
|
||||
+ dst_vq->supports_release_fences = true;
|
||||
+
|
||||
return vb2_queue_init(dst_vq);
|
||||
}
|
||||
|
||||
--
|
||||
2.53.0
|
||||
|
||||
+117
@@ -0,0 +1,117 @@
|
||||
From 2c63a63bf65739763051dc4ce7ce2ffaf2d514c4 Mon Sep 17 00:00:00 2001
|
||||
In-Reply-To: <20260429195306.239666-1-mfritsche@reauktion.de>
|
||||
References: <20260429195306.239666-1-mfritsche@reauktion.de>
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Sat, 9 May 2026 16:50:51 +0200
|
||||
Subject: [PATCH RFC v2] media: rockchip-rga: attach dma_resv release fence at
|
||||
device_run
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Opt the rockchip-rga driver into the new vb2 release-fence helper.
|
||||
|
||||
Same shape as the hantro patch: attach a producer fence on the
|
||||
CAPTURE-side dmabuf at m2m device_run, signalled by
|
||||
vb2_buffer_done() when RGA completes the m2m operation.
|
||||
|
||||
Differs from hantro in one mechanical detail: rga's device_run
|
||||
wraps the entire body in spin_lock_irqsave(&rga->ctrl_lock). Our
|
||||
helper calls dma_resv_lock(), which is sleepable, so the
|
||||
buffer-fetch + fence-attach sequence has to run above the spinlock.
|
||||
Restructure device_run so:
|
||||
|
||||
- v4l2_m2m_next_src_buf / next_dst_buf,
|
||||
- src->sequence increment,
|
||||
- vb2_buffer_attach_release_fence(&dst->vb2_buf)
|
||||
|
||||
run before spin_lock_irqsave; only the rga->curr assignment and
|
||||
rga_hw_start() (the actual HW kick) remain inside the spinlock.
|
||||
|
||||
This is safe under the m2m-job ownership model: by the time
|
||||
device_run is called, the m2m core has selected this context and
|
||||
serializes one device_run per context, so v4l2_m2m_next_*_buf
|
||||
returns stable pointers until the corresponding *_buf_remove in
|
||||
rga_isr. ctrl_lock was previously protecting per-device state
|
||||
(rga->curr) and the HW register access, neither of which depends on
|
||||
the buffer-fetch happening inside the lock.
|
||||
|
||||
The CAPTURE queue is opted in with supports_release_fences=true at
|
||||
queue_init.
|
||||
|
||||
Userspace consumers of RGA-produced dmabufs (image-processing
|
||||
pipelines, screen-rotation servers, gstreamer flows on Rockchip
|
||||
boards) get spec-clean implicit-sync semantics, matching what
|
||||
hantro does in the previous patch in this series.
|
||||
|
||||
Sven Püschel's ongoing "media: platform: rga: Add RGA3 support"
|
||||
v5 series (linux-rockchip 2026-04-28) restructures rga.c
|
||||
substantially. If that lands first, the device_run restructure
|
||||
here will need a rebase against the new shape; the locking story
|
||||
itself is invariant.
|
||||
|
||||
Cc: Jacob Chen <jacob-chen@iotwrt.com>
|
||||
Cc: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
|
||||
Cc: Sven Püschel <s.pueschel@pengutronix.de>
|
||||
Cc: Heiko Stuebner <heiko@sntech.de>
|
||||
Cc: Hans Verkuil <hverkuil@xs4all.nl>
|
||||
Cc: linux-media@vger.kernel.org
|
||||
Cc: linux-rockchip@lists.infradead.org
|
||||
Signed-off-by: Markus Fritsche <mfritsche@reauktion.de>
|
||||
---
|
||||
drivers/media/platform/rockchip/rga/rga.c | 27 +++++++++++++++++++----
|
||||
1 file changed, 23 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c
|
||||
index fea63b94c..03030c7ea 100644
|
||||
--- a/drivers/media/platform/rockchip/rga/rga.c
|
||||
+++ b/drivers/media/platform/rockchip/rga/rga.c
|
||||
@@ -38,15 +38,28 @@ static void device_run(void *prv)
|
||||
struct vb2_v4l2_buffer *src, *dst;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&rga->ctrl_lock, flags);
|
||||
-
|
||||
- rga->curr = ctx;
|
||||
-
|
||||
+ /*
|
||||
+ * Fetch the next-job buffers and (best-effort) attach a producer
|
||||
+ * fence on CAPTURE before taking ctrl_lock below.
|
||||
+ * vb2_buffer_attach_release_fence() takes dma_resv_lock, which is
|
||||
+ * sleepable; ctrl_lock is taken with spin_lock_irqsave so any
|
||||
+ * sleepable call must happen above it. Buffer ownership is
|
||||
+ * already committed at this point: the m2m core has selected
|
||||
+ * this context for device_run and serializes one device_run per
|
||||
+ * context, so v4l2_m2m_next_*_buf returns stable pointers until
|
||||
+ * the corresponding *_buf_remove in rga_isr.
|
||||
+ */
|
||||
src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
|
||||
src->sequence = ctx->osequence++;
|
||||
|
||||
dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
|
||||
|
||||
+ (void)vb2_buffer_attach_release_fence(&dst->vb2_buf);
|
||||
+
|
||||
+ spin_lock_irqsave(&rga->ctrl_lock, flags);
|
||||
+
|
||||
+ rga->curr = ctx;
|
||||
+
|
||||
rga_hw_start(rga, vb_to_rga(src), vb_to_rga(dst));
|
||||
|
||||
spin_unlock_irqrestore(&rga->ctrl_lock, flags);
|
||||
@@ -123,6 +136,12 @@ queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)
|
||||
dst_vq->lock = &ctx->rga->mutex;
|
||||
dst_vq->dev = ctx->rga->v4l2_dev.dev;
|
||||
|
||||
+ /*
|
||||
+ * Opt the CAPTURE queue into vb2 release-fence publishing.
|
||||
+ * Compile-time gated by CONFIG_VIDEOBUF2_RELEASE_FENCES.
|
||||
+ */
|
||||
+ dst_vq->supports_release_fences = true;
|
||||
+
|
||||
return vb2_queue_init(dst_vq);
|
||||
}
|
||||
|
||||
--
|
||||
2.53.0
|
||||
|
||||
@@ -10,8 +10,8 @@
|
||||
pkgbase=linux-fresnel-fourier
|
||||
pkgname=("$pkgbase" "$pkgbase-headers")
|
||||
pkgver=7.0
|
||||
pkgrel=1
|
||||
pkgdesc='Pinebook Pro kernel (mmind/linux-rockchip v7.0 + OC OPP + PBP DTS tweaks)'
|
||||
pkgrel=14
|
||||
pkgdesc='Pinebook Pro kernel (mmind/linux-rockchip v7.0 + OC OPP + PBP DTS tweaks + vb2_dma_resv RFC v2)'
|
||||
arch=(aarch64)
|
||||
url='https://git.reauktion.de/marfrit/kernel-agent'
|
||||
license=(GPL-2.0-only)
|
||||
@@ -23,15 +23,20 @@ makedepends=(
|
||||
options=('!strip')
|
||||
source=(
|
||||
"https://git.kernel.org/torvalds/t/linux-${pkgver}.tar.gz"
|
||||
# board/pinebook-pro
|
||||
'0001-arm64-dts-rk3399-pinebook-pro-add-OC-OPP-tables-1704-2184.patch'
|
||||
'0002-arm64-dts-rk3399-pinebook-pro-enable-hdmi-sound.patch'
|
||||
'0003-arm64-dts-rk3399-pinebook-pro-spi1-max-freq-10MHz.patch'
|
||||
# subsystem/media/videobuf2/dma-resv-release-fence (RFC v2, in kernel-agent)
|
||||
'0004-media-videobuf2-add-opt-in-dma_resv-producer-fence-h.patch'
|
||||
'0005-media-hantro-attach-dma_resv-release-fence-at-device.patch'
|
||||
'0006-media-rockchip-rga-attach-dma_resv-release-fence-at-.patch'
|
||||
'config' # snapshot of fresnel /usr/lib/modules/6.19.10-1-eos-arm/build/.config
|
||||
'linux-fresnel-fourier.preset'
|
||||
'extlinux-add.hook'
|
||||
'extlinux-add.sh'
|
||||
)
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
sha256sums=('SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP' 'SKIP')
|
||||
|
||||
_kernver=${pkgver}.0-fresnel-fourier
|
||||
_srcdir=linux-${pkgver}
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
# makepkg outputs and intermediate state
|
||||
src/
|
||||
pkg/
|
||||
*.pkg.tar.zst
|
||||
*.pkg.tar.zst.sig
|
||||
*-build.log*
|
||||
*-prepare.log
|
||||
*-package_*.log
|
||||
*.tar.xz
|
||||
*.tar.sign
|
||||
*.patch.zst
|
||||
*.patch.zst.sig
|
||||
logpipe.*
|
||||
|
||||
# build-time artifacts that must never be committed
|
||||
src/
|
||||
*.tar.xz
|
||||
*.tar.zst
|
||||
pkg/
|
||||
+4647
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,247 @@
|
||||
# Maintainer: Markus Fritsche <fritsche.markus@gmail.com>
|
||||
# Forked from: linux-pinetab2 by Danct12 <danct12@disroot.org>
|
||||
# Original Contributor: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
|
||||
#
|
||||
# linux-pinetab2-danctnix-besser: linux-pinetab2 + the BESser bes2600
|
||||
# driver patchset. Soft-upstream fork of linux-pinetab2 — drop-in
|
||||
# replacement, same kernel version, only the bes2600 staging driver
|
||||
# differs. See git.reauktion.de/marfrit/besser and
|
||||
# git.reauktion.de/marfrit/bes2600-dkms for full provenance.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# kernel-agent flow (effective pkgrel=4):
|
||||
#
|
||||
# The single 0001-bes2600-besser-kernel-agent-cumulative.patch is
|
||||
# regenerated by `ka-promote ohm` in
|
||||
# git.reauktion.de/marfrit/kernel-agent against fleet/ohm.yaml. Its
|
||||
# b2sum below MUST match the cumulative.b2sum recorded in the
|
||||
# co-located manifest.lock. Sibling manifest.lock pins the source-of-
|
||||
# truth manifest sha256 and per-include patch sha256s.
|
||||
#
|
||||
# To update for a new patch in the manifest:
|
||||
# 1. Edit kernel-agent:fleet/ohm.yaml (add/remove includes)
|
||||
# 2. `ka-promote ohm` → writes new cumulative.patch + manifest.lock
|
||||
# to kernel-agent/build/ohm/v7.0-danctnix1/
|
||||
# 3. Copy both files into this directory (replacing existing ones)
|
||||
# 4. Update the b2sum in this PKGBUILD for the cumulative
|
||||
# 5. Bump pkgrel
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
pkgbase=linux-pinetab2-danctnix-besser
|
||||
pkgver=7.0.danctnix1
|
||||
pkgrel=5
|
||||
pkgdesc='PineTab2 (BESser bes2600 driver patchset, kernel-agent managed)'
|
||||
_srcname=linux-pinetab2
|
||||
_srctag=v${pkgver%.*}-${pkgver##*.}
|
||||
arch=(aarch64)
|
||||
_url_git="https://codeberg.org/DanctNIX/${_srcname}"
|
||||
url="${_url_git}/commits/tag/$_srctag"
|
||||
license=(GPL-2.0-only)
|
||||
makedepends=(
|
||||
bc
|
||||
cpio
|
||||
gettext
|
||||
git
|
||||
libelf
|
||||
pahole
|
||||
perl
|
||||
python
|
||||
tar
|
||||
xz
|
||||
)
|
||||
options=(
|
||||
!debug
|
||||
!strip
|
||||
)
|
||||
source=(
|
||||
https://cdn.kernel.org/pub/linux/kernel/v${pkgver%%.*}.x/linux-${pkgver%.*}.tar.{xz,sign}
|
||||
${_url_git}/releases/download/${_srctag}/${_srctag}.patch.zst{,.sig}
|
||||
0001-bes2600-besser-kernel-agent-cumulative.patch
|
||||
config
|
||||
)
|
||||
validpgpkeys=(
|
||||
ABAF11C65A2970B130ABE3C479BE3E4300411886 # Linus Torvalds
|
||||
647F28654894E3BD457199BE38DBBDC86092693E # Greg Kroah-Hartman
|
||||
F09A933C0FE0331E558CA4E166CAB7EAA45DD781 # Danct12
|
||||
)
|
||||
b2sums=('3d9795083c8938f80f480de0d10bfd9c525640e59d5c7f22983de3f12ee42c84c31be902cafb05579ddb1c32bac5ed06b0d4953f9705450be185bd2d9ab08f89'
|
||||
'SKIP'
|
||||
'71fe98221e802b315e54b4b10d3e8c8f376695a36bae3541d876e5776a37f3fa33c8f8dfa6e51fcbd6f5396add02e5166634165f2351836a0ea0453c172fe56c'
|
||||
'SKIP'
|
||||
'50397711a6a3ba522283685a9e7397aeed6663f353f7cba214d4bb88bc98516065b2fca9a36ce13c52644617879f69f39c5305e86db5d9fb25c4dae5434eb9c4'
|
||||
'656a998ab40cb85ee4c00f087b071a91632a6c091da2c84b0f74236b51d2dea6e9db6886625f80ad81dc249d8494ec47cd79d6dd9ea4f5e44f3cde857f861e10')
|
||||
|
||||
export KBUILD_BUILD_HOST=archlinux
|
||||
export KBUILD_BUILD_USER=$pkgbase
|
||||
export KBUILD_BUILD_TIMESTAMP="$(date -Ru${SOURCE_DATE_EPOCH:+d @$SOURCE_DATE_EPOCH})"
|
||||
|
||||
prepare() {
|
||||
cd linux-${pkgver%.*}
|
||||
|
||||
echo "Setting version..."
|
||||
echo "-$pkgrel" > localversion.10-pkgrel
|
||||
echo "${pkgbase#linux}" > localversion.20-pkgname
|
||||
|
||||
local src
|
||||
for src in "${source[@]}"; do
|
||||
src="${src%%::*}"
|
||||
src="${src##*/}"
|
||||
src="${src%.zst}"
|
||||
[[ $src = *.patch ]] || continue
|
||||
echo "Applying patch: $src..."
|
||||
patch -Np1 < "../$src"
|
||||
done
|
||||
|
||||
echo "Setting config..."
|
||||
cp ../config .config
|
||||
make olddefconfig
|
||||
diff -u ../config .config || :
|
||||
|
||||
make -s kernelrelease > version
|
||||
echo "Prepared $pkgbase version $(<version)"
|
||||
}
|
||||
|
||||
build() {
|
||||
cd linux-${pkgver%.*}
|
||||
make DTC_FLAGS="-@" all
|
||||
make -C tools/bpf/bpftool vmlinux.h feature-clang-bpf-co-re=1
|
||||
}
|
||||
|
||||
_package() {
|
||||
pkgdesc="The $pkgdesc kernel and modules"
|
||||
depends=(
|
||||
coreutils
|
||||
kmod
|
||||
mkinitcpio
|
||||
)
|
||||
optdepends=(
|
||||
'wireless-regdb: to set the correct wireless channels of your country'
|
||||
'linux-firmware: firmware images needed for some devices'
|
||||
)
|
||||
provides=(
|
||||
KSMBD-MODULE
|
||||
WIREGUARD-MODULE
|
||||
"linux-pinetab2=$pkgver-$pkgrel"
|
||||
)
|
||||
conflicts=(linux-pinetab2)
|
||||
replaces=(
|
||||
wireguard-arch
|
||||
)
|
||||
|
||||
cd linux-${pkgver%.*}
|
||||
local modulesdir="$pkgdir/usr/lib/modules/$(<version)"
|
||||
|
||||
echo "Installing boot image..."
|
||||
# systemd expects to find the kernel here to allow hibernation
|
||||
# https://github.com/systemd/systemd/commit/edda44605f06a41fb86b7ab8128dcf99161d2344
|
||||
install -Dm644 "$(make -s image_name)" "$modulesdir/vmlinuz"
|
||||
|
||||
# Used by mkinitcpio to name the kernel
|
||||
echo "$pkgbase" | install -Dm644 /dev/stdin "$modulesdir/pkgbase"
|
||||
|
||||
echo "Installing modules..."
|
||||
ZSTD_CLEVEL=19 make INSTALL_MOD_PATH="$pkgdir/usr" INSTALL_MOD_STRIP=1 \
|
||||
DEPMOD=/doesnt/exist modules_install # Suppress depmod
|
||||
|
||||
echo "Installing device trees..."
|
||||
make INSTALL_DTBS_PATH="$pkgdir/boot/dtbs" dtbs_install
|
||||
|
||||
# Removing unnecessary device trees (keep only pinetab2 variants).
|
||||
# find -delete is robust to nullglob/cwd quirks where a bash for-loop
|
||||
# silently no-op'd in the makepkg environment.
|
||||
find "$pkgdir"/boot/dtbs/rockchip/ -mindepth 1 -maxdepth 1 -type f \
|
||||
! -name 'rk3566-pinetab2-*' -delete
|
||||
|
||||
# remove build link
|
||||
rm "$modulesdir"/build
|
||||
}
|
||||
|
||||
_package-headers() {
|
||||
pkgdesc="Headers and scripts for building modules for the $pkgdesc kernel"
|
||||
depends=(pahole)
|
||||
|
||||
cd linux-${pkgver%.*}
|
||||
local builddir="$pkgdir/usr/lib/modules/$(<version)/build"
|
||||
|
||||
echo "Installing build files..."
|
||||
install -Dt "$builddir" -m644 .config Makefile Module.symvers System.map \
|
||||
localversion.* version vmlinux tools/bpf/bpftool/vmlinux.h
|
||||
install -Dt "$builddir/kernel" -m644 kernel/Makefile
|
||||
install -Dt "$builddir/arch/arm64" -m644 arch/arm64/Makefile
|
||||
cp -t "$builddir" -a scripts
|
||||
|
||||
# required when DEBUG_INFO_BTF_MODULES is enabled
|
||||
install -Dt "$builddir/tools/bpf/resolve_btfids" tools/bpf/resolve_btfids/resolve_btfids
|
||||
|
||||
echo "Installing headers..."
|
||||
cp -t "$builddir" -a include
|
||||
cp -t "$builddir/arch/arm64" -a arch/arm64/include
|
||||
install -Dt "$builddir/arch/arm64/kernel" -m644 arch/arm64/kernel/asm-offsets.s
|
||||
|
||||
install -Dt "$builddir/drivers/md" -m644 drivers/md/*.h
|
||||
install -Dt "$builddir/net/mac80211" -m644 net/mac80211/*.h
|
||||
|
||||
# https://bugs.archlinux.org/task/13146
|
||||
install -Dt "$builddir/drivers/media/i2c" -m644 drivers/media/i2c/msp3400-driver.h
|
||||
|
||||
# https://bugs.archlinux.org/task/20402
|
||||
install -Dt "$builddir/drivers/media/usb/dvb-usb" -m644 drivers/media/usb/dvb-usb/*.h
|
||||
install -Dt "$builddir/drivers/media/dvb-frontends" -m644 drivers/media/dvb-frontends/*.h
|
||||
install -Dt "$builddir/drivers/media/tuners" -m644 drivers/media/tuners/*.h
|
||||
|
||||
# https://bugs.archlinux.org/task/71392
|
||||
install -Dt "$builddir/drivers/iio/common/hid-sensors" -m644 drivers/iio/common/hid-sensors/*.h
|
||||
|
||||
echo "Installing KConfig files..."
|
||||
find . -name 'Kconfig*' -exec install -Dm644 {} "$builddir/{}" \;
|
||||
|
||||
echo "Removing unneeded architectures..."
|
||||
local arch
|
||||
for arch in "$builddir"/arch/*/; do
|
||||
[[ $arch = */arm64/ ]] && continue
|
||||
echo "Removing $(basename "$arch")"
|
||||
rm -r "$arch"
|
||||
done
|
||||
|
||||
echo "Removing documentation..."
|
||||
rm -r "$builddir/Documentation"
|
||||
|
||||
echo "Removing broken symlinks..."
|
||||
find -L "$builddir" -type l -printf 'Removing %P\n' -delete
|
||||
|
||||
echo "Removing loose objects..."
|
||||
find "$builddir" -type f -name '*.o' -printf 'Removing %P\n' -delete
|
||||
|
||||
echo "Stripping build tools..."
|
||||
local file
|
||||
while read -rd '' file; do
|
||||
case "$(file -Sib "$file")" in
|
||||
application/x-sharedlib\;*) # Libraries (.so)
|
||||
strip -v $STRIP_SHARED "$file" ;;
|
||||
application/x-archive\;*) # Libraries (.a)
|
||||
strip -v $STRIP_STATIC "$file" ;;
|
||||
application/x-executable\;*) # Binaries
|
||||
strip -v $STRIP_BINARIES "$file" ;;
|
||||
application/x-pie-executable\;*) # Relocatable binaries
|
||||
strip -v $STRIP_SHARED "$file" ;;
|
||||
esac
|
||||
done < <(find "$builddir" -type f -perm -u+x ! -name vmlinux -print0)
|
||||
|
||||
echo "Stripping vmlinux..."
|
||||
strip -v $STRIP_STATIC "$builddir/vmlinux"
|
||||
|
||||
echo "Adding symlink..."
|
||||
mkdir -p "$pkgdir/usr/src"
|
||||
ln -sr "$builddir" "$pkgdir/usr/src/$pkgbase"
|
||||
}
|
||||
|
||||
pkgname=(
|
||||
"$pkgbase"
|
||||
"$pkgbase-headers"
|
||||
)
|
||||
for _p in "${pkgname[@]}"; do
|
||||
eval "package_$_p() {
|
||||
$(declare -f "_package${_p#$pkgbase}")
|
||||
_package${_p#$pkgbase}
|
||||
}"
|
||||
done
|
||||
@@ -0,0 +1,52 @@
|
||||
# linux-pinetab2-danctnix-besser
|
||||
|
||||
DanctNIX PineTab2 kernel + the BESser bes2600 driver patchset.
|
||||
**kernel-agent managed** as of pkgrel=4 (2026-05-18).
|
||||
|
||||
## TL;DR
|
||||
|
||||
- **What**: drop-in replacement for `linux-pinetab2` with the BESser
|
||||
bes2600 patchset applied.
|
||||
- **For**: PineTab2 (RK3566) with the on-board BES2600 SDIO WiFi/BT chip.
|
||||
- **Install**: `pacman -U linux-pinetab2-danctnix-besser-*.pkg.tar.zst
|
||||
linux-pinetab2-danctnix-besser-headers-*.pkg.tar.zst`. It `provides`
|
||||
and `conflicts` with `linux-pinetab2`, so it slots in cleanly.
|
||||
- **Source of truth for patches**:
|
||||
[git.reauktion.de/marfrit/kernel-agent](https://git.reauktion.de/marfrit/kernel-agent)
|
||||
`fleet/ohm.yaml`.
|
||||
|
||||
## How the patch cumulative is built
|
||||
|
||||
1. `kernel-agent/fleet/ohm.yaml` lists the patch series-dir includes
|
||||
for this kernel.
|
||||
2. `kernel-agent/bin/ka-promote ohm` walks those includes, concatenates
|
||||
them into a single `cumulative.patch`, and emits a `manifest.lock`
|
||||
recording per-patch sha256s and the cumulative b2sum.
|
||||
3. The cumulative is dropped into this directory as
|
||||
`0001-bes2600-besser-kernel-agent-cumulative.patch`, the b2sum in
|
||||
`PKGBUILD` is updated to match, and `pkgrel` is bumped.
|
||||
4. `manifest.lock` is committed alongside the cumulative for an
|
||||
audit trail (which patches went in, in what order).
|
||||
|
||||
## pkgrel history
|
||||
|
||||
| pkgrel | Date | Notes |
|
||||
|--------|------------|-------------------------------------------------|
|
||||
| 1–3 | 2026-05-08…05-18 | hand-managed in `marfrit/besser/danctnix-besser-pkgbuild/` (DEPRECATED) |
|
||||
| 4 | 2026-05-18 | first kernel-agent-managed release; same source contents as the hand-managed pkgrel=3 (Patch I + SCS workaround folded into single cumulative); fixes pkgrel=3's duplicated `0003-arm64-...patch` source-array bug |
|
||||
|
||||
## Known caveats
|
||||
|
||||
- **`CONFIG_SHADOW_CALL_STACK=n`** in the config: GCC 15.2.1 fails to
|
||||
build `arch/arm64/lib/xor-neon.c` with `arm_neon.h` `#pragma GCC
|
||||
target()` blocks and `-fsanitize=shadow-call-stack`. See
|
||||
`reference_arm64_scs_arm_neon_gcc15` memory and the Makefile
|
||||
workaround patch included in the cumulative. Track upstream GCC
|
||||
resolution before re-enabling.
|
||||
- The `cumulative-c5x-danctnix` series-dir in kernel-agent is currently
|
||||
a single-file interim (the working hand-curated cumulative) because
|
||||
the per-series mirrors created in `kernel-agent#17` use DKMS-style
|
||||
root paths instead of in-tree staging paths and do not apply
|
||||
cleanly. Per-series reconstruction is a separate kernel-agent
|
||||
follow-up. The b2sum integrity check in `PKGBUILD` still pins the
|
||||
exact bytes shipped.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,37 @@
|
||||
baseline:
|
||||
ref: v7.0-danctnix1
|
||||
tree: DanctNIX/linux-pinetab2
|
||||
upstream_compat: linux-7.0
|
||||
url: https://codeberg.org/DanctNIX/linux-pinetab2
|
||||
cumulative:
|
||||
b2sum: 50397711a6a3ba522283685a9e7397aeed6663f353f7cba214d4bb88bc98516065b2fca9a36ce13c52644617879f69f39c5305e86db5d9fb25c4dae5434eb9c4
|
||||
path: cumulative.patch
|
||||
size: 162716
|
||||
generated_at: '2026-05-18T17:16:06.455474+00:00'
|
||||
host: ohm
|
||||
ka_promote_version: 1
|
||||
manifest:
|
||||
path: fleet/ohm.yaml
|
||||
sha256: da59ac2c965e5ad9c5004a115b10a37abf47ed3ecc8b7f5ab426470d2ee7b442
|
||||
resolved_patches:
|
||||
- apply_order: 1
|
||||
from_series: true
|
||||
include: driver/bes2600/cumulative-c5x-danctnix/0001-bes2600-besser-cumulative-series.patch
|
||||
sha256: e477a170567487fef84fe13be5b0a1f0498247ff1f201000d0085a2e49ff9026
|
||||
size: 148149
|
||||
- apply_order: 2
|
||||
from_series: true
|
||||
include: driver/bes2600/scan-filter-5ghz-danctnix/0001-bes2600-filter-5ghz-scan-and-allow-single-channel.patch
|
||||
sha256: 31e67569e00daead0784214aced1e077d3270cf1407baa0b330d474e17ec3931
|
||||
size: 7735
|
||||
- apply_order: 3
|
||||
from_series: true
|
||||
include: arch/arm64/xor-neon-ffixed-x18-scs-build-fix-danctnix/0001-arm64-xor-neon-ffixed-x18-build-fix.patch
|
||||
sha256: a49c50f0ebffc499970c24908b832c3e61c96ed87de35b3a82178eff587f94f1
|
||||
size: 1574
|
||||
- apply_order: 4
|
||||
from_series: true
|
||||
include: driver/bes2600/queue-pending-record-lock-bh-danctnix/0001-bes2600-take-pending-record-lock-with-bh.patch
|
||||
sha256: 089862e5f6da5783ed0db979144e4fa07cff7f743809a0bebd715c75a3bb8eb5
|
||||
size: 5258
|
||||
schema_version: 1
|
||||
+6
-3
@@ -3,15 +3,18 @@
|
||||
# Source of truth: git.reauktion.de/marfrit/lmcp
|
||||
|
||||
pkgname=lmcp
|
||||
pkgver=0.5.4
|
||||
pkgver=1.2.1
|
||||
pkgrel=1
|
||||
pkgdesc="Lightweight MCP (Model Context Protocol) server in pure Lua"
|
||||
arch=('any')
|
||||
url="https://git.reauktion.de/marfrit/lmcp"
|
||||
license=('MIT')
|
||||
depends=('lua' 'lua-socket')
|
||||
source=("${pkgname}-${pkgver}.tar.gz::https://git.reauktion.de/marfrit/lmcp/archive/v${pkgver}.tar.gz")
|
||||
sha256sums=('af72b8c1d88255456b75d2c53cd5c451a8923417e5498ef31858539397e09caf')
|
||||
# The _tag back-translation handles both clean releases (no '_') and
|
||||
# pre-release pkgvers (e.g. 1.2.0_rc1 → v1.2.0-rc1).
|
||||
_tag="v${pkgver//_/-}"
|
||||
source=("${pkgname}-${pkgver}.tar.gz::https://git.reauktion.de/marfrit/lmcp/archive/${_tag}.tar.gz")
|
||||
sha256sums=('bf9cce1a84c66b1b74c5aec923c5960d60ae33c221afc8d47ce0d74b8f7ee609')
|
||||
|
||||
package() {
|
||||
cd "${pkgname}"
|
||||
|
||||
+57
@@ -0,0 +1,57 @@
|
||||
From: claude-noether (on behalf of mfritsche)
|
||||
Date: 2026-05-19
|
||||
Subject: panvk: expose VK_KHR/EXT_robustness2 + nullDescriptor on Bifrost (PAN_ARCH 6/7)
|
||||
|
||||
Without this, Mesa's Zink driver refuses to use PanVk-Bifrost as its Vulkan
|
||||
backend, falling back silently to llvmpipe (software rasterizer) for all
|
||||
GL-via-Zink on Bifrost SBCs. That defeats the entire purpose of having a
|
||||
Vulkan driver on Bifrost — GL acceleration via Zink is the most natural
|
||||
near-term consumer.
|
||||
|
||||
panvk_vX_nir_lower_descriptors.c:1309 and panvk_vX_shader.c:1355 already
|
||||
plumb dev->vk.enabled_features.nullDescriptor arch-agnostically — the gate
|
||||
at panvk_vX_physical_device.c was set conservatively when Bifrost was
|
||||
unmaintained, not because of hardware incapability.
|
||||
|
||||
iter1–7 of the panvk-bifrost campaign proved fundamental driver functions
|
||||
on Mali-G52 r1 MC1 (PAN_ARCH=7). This patch is the iter8 follow-up.
|
||||
|
||||
robustBufferAccess2 and robustImageAccess2 are NOT flipped — they're
|
||||
independent rb2 features Zink doesn't require, gated differently
|
||||
(robustBufferAccess2 = PAN_ARCH >= 11, robustImageAccess2 = false), and
|
||||
out of scope for iter8.
|
||||
|
||||
---
|
||||
src/panfrost/vulkan/panvk_vX_physical_device.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
@@ -91,7 +91,7 @@ get_device_extensions(const struct panvk_physical_device *device,
|
||||
.KHR_pipeline_binary = true,
|
||||
.KHR_pipeline_executable_properties = true,
|
||||
.KHR_pipeline_library = true,
|
||||
- .KHR_robustness2 = PAN_ARCH >= 10,
|
||||
+ .KHR_robustness2 = true,
|
||||
.KHR_sampler_mirror_clamp_to_edge = true,
|
||||
.KHR_sampler_ycbcr_conversion = true,
|
||||
.KHR_separate_depth_stencil_layouts = true,
|
||||
@@ -168,7 +168,7 @@ get_device_extensions(const struct panvk_physical_device *device,
|
||||
.EXT_queue_family_foreign = true,
|
||||
.EXT_robustness = pan_arch(device->kmod.dev->props.gpu_id) >= 9,
|
||||
.EXT_image_robustness = true,
|
||||
- .EXT_robustness2 = PAN_ARCH >= 10,
|
||||
+ .EXT_robustness2 = true,
|
||||
.EXT_sampler_filter_minmax = PAN_ARCH >= 10,
|
||||
.EXT_scalar_block_layout = true,
|
||||
.EXT_separate_stencil_usage = true,
|
||||
@@ -493,7 +493,7 @@ get_device_features(const struct panvk_physical_device *device,
|
||||
/* VK_KHR_robustness2 */
|
||||
.robustBufferAccess2 = PAN_ARCH >= 11,
|
||||
.robustImageAccess2 = false,
|
||||
- .nullDescriptor = PAN_ARCH >= 10,
|
||||
+ .nullDescriptor = true,
|
||||
|
||||
/* VK_KHR_shader_clock */
|
||||
.shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp,
|
||||
@@ -0,0 +1,47 @@
|
||||
From: claude-noether (on behalf of mfritsche)
|
||||
Date: 2026-05-20
|
||||
Subject: panvk: expose Vulkan 1.1 + 1.2 on Bifrost (PAN_ARCH 6/7)
|
||||
|
||||
ANGLE (Chromium's GL stack) requires apiVersion >= 1.1 to initialize. Without
|
||||
this, Brave / Chromium's GPU process fails at GL info collection:
|
||||
|
||||
vk_renderer.cpp:2659 (initialize): ANGLE Requires a minimum Vulkan device
|
||||
version of 1.1
|
||||
Display::initialize error 0: Internal Vulkan error (-9): The requested
|
||||
version of Vulkan is not supported by the driver
|
||||
|
||||
Stack-up with iter8's robustness2 patch enables ANGLE → PanVk-Bifrost →
|
||||
Skia (via --enable-features=Vulkan) on Bifrost SBCs.
|
||||
|
||||
PanVk-Bifrost already supports the bulk of 1.1-promoted features as extensions
|
||||
(multiview, maintenance1-3, descriptor update template, 16-bit storage,
|
||||
descriptor update template, sampler ycbcr, variable pointers, etc. — all
|
||||
visible in iter0 vulkaninfo). The version bump primarily bundles them.
|
||||
|
||||
Risk: Vulkan 1.1 has features beyond what iter1–7 exercised (protected memory,
|
||||
full subgroup ops). Specific app failures will be characterizable.
|
||||
|
||||
1.2 is also flipped — Brave's Vulkan path may want descriptor indexing,
|
||||
buffer device address, etc. (all listed in iter0 vulkaninfo as supported
|
||||
extensions, just gated as 1.0-with-extensions, not 1.2-core).
|
||||
|
||||
---
|
||||
src/panfrost/vulkan/panvk_vX_physical_device.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
@@ -38,8 +38,8 @@ get_device_extensions(const struct panvk_physical_device *device,
|
||||
struct vk_device_extension_table *ext)
|
||||
{
|
||||
*ext = (struct vk_device_extension_table){
|
||||
- .KHR_8bit_storage = true,
|
||||
- .KHR_16bit_storage = true,
|
||||
- bool has_vk1_1 = PAN_ARCH >= 10;
|
||||
- bool has_vk1_2 = PAN_ARCH >= 10;
|
||||
+ .KHR_8bit_storage = true,
|
||||
+ .KHR_16bit_storage = true,
|
||||
+ bool has_vk1_1 = true;
|
||||
+ bool has_vk1_2 = true;
|
||||
*ext = (struct vk_device_extension_table){
|
||||
@@ -0,0 +1,328 @@
|
||||
--- a/src/panfrost/vulkan/panvk_shader.h 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_shader.h 2026-05-20 18:52:53.312698258 +0200
|
||||
@@ -150,6 +150,10 @@
|
||||
struct {
|
||||
#if PAN_ARCH < 9
|
||||
int32_t raw_vertex_offset;
|
||||
+ uint32_t num_vertices; /* iter13: XFB needs per-draw vertex count */
|
||||
+ /* aligned_u64 attribute below inserts the 4-byte alignment gap
|
||||
+ * after num_vertices automatically — no explicit pad needed. */
|
||||
+ aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */
|
||||
#endif
|
||||
int32_t first_vertex;
|
||||
int32_t base_instance;
|
||||
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 19:09:29.711145446 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 18:52:54.832720445 +0200
|
||||
@@ -169,6 +169,7 @@
|
||||
.EXT_provoking_vertex = true,
|
||||
.EXT_queue_family_foreign = true,
|
||||
.EXT_robustness2 = true,
|
||||
+ .EXT_transform_feedback = PAN_ARCH < 9, /* iter13: JM-class only for now */
|
||||
.EXT_sampler_filter_minmax = PAN_ARCH >= 10,
|
||||
.EXT_scalar_block_layout = true,
|
||||
.EXT_separate_stencil_usage = true,
|
||||
@@ -495,6 +496,10 @@
|
||||
.robustImageAccess2 = false,
|
||||
.nullDescriptor = true,
|
||||
|
||||
+ /* VK_EXT_transform_feedback (iter13) */
|
||||
+ .transformFeedback = PAN_ARCH < 9,
|
||||
+ .geometryStreams = false,
|
||||
+
|
||||
/* VK_KHR_shader_clock */
|
||||
.shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp,
|
||||
.shaderDeviceClock = device->kmod.dev->props.timestamp_device_coherent,
|
||||
@@ -1020,6 +1025,18 @@
|
||||
.robustStorageBufferAccessSizeAlignment = 1,
|
||||
.robustUniformBufferAccessSizeAlignment = 1,
|
||||
|
||||
+ /* VK_EXT_transform_feedback (iter13) */
|
||||
+ .maxTransformFeedbackStreams = 1,
|
||||
+ .maxTransformFeedbackBuffers = 4,
|
||||
+ .maxTransformFeedbackBufferSize = UINT32_MAX,
|
||||
+ .maxTransformFeedbackStreamDataSize = 512,
|
||||
+ .maxTransformFeedbackBufferDataSize = 512,
|
||||
+ .maxTransformFeedbackBufferDataStride = 2048,
|
||||
+ .transformFeedbackQueries = false,
|
||||
+ .transformFeedbackStreamsLinesTriangles = false,
|
||||
+ .transformFeedbackRasterizationStreamSelect = false,
|
||||
+ .transformFeedbackDraw = false,
|
||||
+
|
||||
/* VK_EXT_shader_object */
|
||||
/* We do not currently support VK_EXT_shader_object but this is used
|
||||
* internally by vk_shader
|
||||
--- a/src/panfrost/vulkan/panvk_vX_shader.c 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_shader.c 2026-05-20 18:52:56.556745611 +0200
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_sampler.h"
|
||||
#include "panvk_shader.h"
|
||||
+#include "pan_nir.h" /* iter13: pan_nir_lower_xfb */
|
||||
|
||||
#include "spirv/nir_spirv.h"
|
||||
#include "util/memstream.h"
|
||||
@@ -100,6 +101,20 @@
|
||||
case nir_intrinsic_load_raw_vertex_offset_pan:
|
||||
val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset);
|
||||
break;
|
||||
+ case nir_intrinsic_load_num_vertices: /* iter13: XFB index calc */
|
||||
+ val = load_sysval(b, graphics, bit_size, vs.num_vertices);
|
||||
+ break;
|
||||
+ case nir_intrinsic_load_xfb_address: { /* iter13: XFB buffer N base address */
|
||||
+ unsigned idx = nir_intrinsic_base(intr);
|
||||
+ switch (idx) {
|
||||
+ case 0: val = load_sysval(b, graphics, bit_size, vs.xfb_address[0]); break;
|
||||
+ case 1: val = load_sysval(b, graphics, bit_size, vs.xfb_address[1]); break;
|
||||
+ case 2: val = load_sysval(b, graphics, bit_size, vs.xfb_address[2]); break;
|
||||
+ case 3: val = load_sysval(b, graphics, bit_size, vs.xfb_address[3]); break;
|
||||
+ default: return false;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
case nir_intrinsic_load_layer_id:
|
||||
assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
val = load_sysval(b, graphics, bit_size, layer_id);
|
||||
@@ -457,6 +472,7 @@
|
||||
core_max_id);
|
||||
|
||||
pan_preprocess_nir(nir, pdev->kmod.dev->props.gpu_id);
|
||||
+
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -870,6 +886,18 @@
|
||||
nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
|
||||
glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics);
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
+ /* iter13: VK_EXT_transform_feedback — runs AFTER nir_lower_io so that
|
||||
+ * shader outputs are now store_output intrinsics that pan_nir_lower_xfb
|
||||
+ * can rewrite to nir_store_global+nir_load_xfb_address. */
|
||||
+ if (nir->info.stage == MESA_SHADER_VERTEX &&
|
||||
+ nir->info.has_transform_feedback_varyings) {
|
||||
+ NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
+ NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
|
||||
+ NIR_PASS(_, nir, pan_nir_lower_xfb);
|
||||
+ }
|
||||
+#endif
|
||||
}
|
||||
|
||||
static VkResult
|
||||
@@ -1288,6 +1316,9 @@
|
||||
.view_mask = (state && state->rp) ? state->rp->view_mask : 0,
|
||||
.robust2_modes = robust2_modes,
|
||||
.robust_descriptors = dev->vk.enabled_features.nullDescriptor,
|
||||
+ /* iter13: XFB shaders must disable IDVS (matches Panfrost-Gallium). */
|
||||
+ .no_idvs = (info->stage == MESA_SHADER_VERTEX) &&
|
||||
+ info->nir->info.has_transform_feedback_varyings,
|
||||
};
|
||||
|
||||
switch (info->stage) {
|
||||
--- a/src/panfrost/vulkan/panvk_cmd_draw.h 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_cmd_draw.h 2026-05-20 18:52:57.748763011 +0200
|
||||
@@ -135,6 +135,19 @@
|
||||
struct panvk_graphics_sysvals sysvals;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
+ /* iter13: VK_EXT_transform_feedback state (JM-class only for now). */
|
||||
+ struct {
|
||||
+ bool active;
|
||||
+ uint32_t buffer_count;
|
||||
+ struct {
|
||||
+ uint64_t addr;
|
||||
+ uint64_t offset;
|
||||
+ uint64_t size;
|
||||
+ } buffers[4];
|
||||
+ } xfb;
|
||||
+#endif
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
struct panvk_shader_link link;
|
||||
#endif
|
||||
|
||||
--- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-05-20 19:10:23.031919662 +0200
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "panvk_entrypoints.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
+#include "pan_compiler.h" /* PAN_SHADER_OOB_ADDRESS */
|
||||
#include "pan_util.h"
|
||||
|
||||
static void
|
||||
@@ -722,6 +723,35 @@
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
|
||||
info->vertex.raw_offset);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
|
||||
+
|
||||
+ /* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
|
||||
+ * reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
|
||||
+ {
|
||||
+ const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
|
||||
+ /* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS
|
||||
+ * (= 1<<63). This is the Panfrost-Gallium memory-sink idiom — the
|
||||
+ * Bifrost MMU silently discards stores to this address, so a pipeline
|
||||
+ * with XFB outputs used in a non-XFB draw (or in an XFB draw with
|
||||
+ * fewer bound buffers than the shader declares) is safe instead of
|
||||
+ * faulting. See gallium/drivers/panfrost/pan_cmdstream.c PAN_SYSVAL_XFB. */
|
||||
+ uint64_t _xa0 = PAN_SHADER_OOB_ADDRESS, _xa1 = PAN_SHADER_OOB_ADDRESS,
|
||||
+ _xa2 = PAN_SHADER_OOB_ADDRESS, _xa3 = PAN_SHADER_OOB_ADDRESS;
|
||||
+ if (_gfx->xfb.active) {
|
||||
+ if (_gfx->xfb.buffer_count > 0 && _gfx->xfb.buffers[0].addr)
|
||||
+ _xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset;
|
||||
+ if (_gfx->xfb.buffer_count > 1 && _gfx->xfb.buffers[1].addr)
|
||||
+ _xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset;
|
||||
+ if (_gfx->xfb.buffer_count > 2 && _gfx->xfb.buffers[2].addr)
|
||||
+ _xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset;
|
||||
+ if (_gfx->xfb.buffer_count > 3 && _gfx->xfb.buffers[3].addr)
|
||||
+ _xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset;
|
||||
+ }
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3);
|
||||
+ }
|
||||
#endif
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
|
||||
--- a/src/panfrost/vulkan/meson.build 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/meson.build 2026-05-20 18:53:04.484861338 +0200
|
||||
@@ -73,6 +73,7 @@
|
||||
jm_inc_dir = ['jm']
|
||||
jm_files = [
|
||||
'jm/panvk_vX_bind_queue.c',
|
||||
+ 'jm/panvk_vX_cmd_xfb.c', # iter13
|
||||
'jm/panvk_vX_cmd_buffer.c',
|
||||
'jm/panvk_vX_cmd_dispatch.c',
|
||||
'jm/panvk_vX_cmd_draw.c',
|
||||
--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-05-20 19:10:26.163965149 +0200
|
||||
@@ -473,5 +473,12 @@
|
||||
|
||||
vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
|
||||
|
||||
+#if PAN_ARCH < 9
|
||||
+ /* iter13: clear XFB state on Begin so a reused command buffer does not
|
||||
+ * inherit stale xfb.buffer_count / xfb.active / xfb.buffers[] from a
|
||||
+ * prior recording. */
|
||||
+ memset(&cmdbuf->state.gfx.xfb, 0, sizeof(cmdbuf->state.gfx.xfb));
|
||||
+#endif
|
||||
+
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-18 12:50:53.067999996 +0200
|
||||
+++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-20 19:10:27.175979847 +0200
|
||||
@@ -0,0 +1,111 @@
|
||||
+/*
|
||||
+ * Copyright © 2026 mfritsche / claude-noether
|
||||
+ * SPDX-License-Identifier: MIT
|
||||
+ *
|
||||
+ * iter13: VK_EXT_transform_feedback command handlers for the JM
|
||||
+ * architecture path (Bifrost v6/v7 + Valhall-JM v9).
|
||||
+ *
|
||||
+ * The runtime contract:
|
||||
+ * - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size)
|
||||
+ * for each slot into cmdbuf->state.gfx.xfb.buffers[].
|
||||
+ * - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true.
|
||||
+ * Mark sysvals dirty so the next draw re-emits vs.xfb_address[].
|
||||
+ * - vkCmdEndTransformFeedbackEXT: set active = false.
|
||||
+ *
|
||||
+ * Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/
|
||||
+ * pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't
|
||||
+ * support pause/resume. transformFeedbackDraw is advertised as false.
|
||||
+ *
|
||||
+ * Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb
|
||||
+ * and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb
|
||||
+ * pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers
|
||||
+ * (via panvk_vX_shader.c sysval handler) to a load from the per-draw
|
||||
+ * sysval push area.
|
||||
+ */
|
||||
+
|
||||
+#include "vk_log.h"
|
||||
+#include "util/log.h"
|
||||
+
|
||||
+#include "panvk_cmd_buffer.h"
|
||||
+#include "panvk_cmd_draw.h"
|
||||
+#include "panvk_buffer.h"
|
||||
+#include "panvk_entrypoints.h"
|
||||
+
|
||||
+VKAPI_ATTR void VKAPI_CALL
|
||||
+panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)(
|
||||
+ VkCommandBuffer commandBuffer,
|
||||
+ uint32_t firstBinding,
|
||||
+ uint32_t bindingCount,
|
||||
+ const VkBuffer *pBuffers,
|
||||
+ const VkDeviceSize *pOffsets,
|
||||
+ const VkDeviceSize *pSizes)
|
||||
+{
|
||||
+ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
+ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
+
|
||||
+ for (uint32_t i = 0; i < bindingCount; i++) {
|
||||
+ uint32_t slot = firstBinding + i;
|
||||
+ if (slot >= 4)
|
||||
+ continue;
|
||||
+
|
||||
+ VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]);
|
||||
+ gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0);
|
||||
+ gfx->xfb.buffers[slot].offset = pOffsets[i];
|
||||
+ gfx->xfb.buffers[slot].size =
|
||||
+ (pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE)
|
||||
+ ? pSizes[i]
|
||||
+ : (buf->vk.size - pOffsets[i]);
|
||||
+ }
|
||||
+
|
||||
+ if (firstBinding + bindingCount > gfx->xfb.buffer_count)
|
||||
+ gfx->xfb.buffer_count = firstBinding + bindingCount;
|
||||
+}
|
||||
+
|
||||
+VKAPI_ATTR void VKAPI_CALL
|
||||
+panvk_per_arch(CmdBeginTransformFeedbackEXT)(
|
||||
+ VkCommandBuffer commandBuffer,
|
||||
+ uint32_t firstCounterBuffer,
|
||||
+ uint32_t counterBufferCount,
|
||||
+ const VkBuffer *pCounterBuffers,
|
||||
+ const VkDeviceSize *pCounterBufferOffsets)
|
||||
+{
|
||||
+ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
+ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
+
|
||||
+ /* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback
|
||||
+ * PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c.
|
||||
+ * App is spec-compliant if it does not pass counter buffers (which our
|
||||
+ * features advertisement allows), but warn loudly if it does so we do not
|
||||
+ * silently produce wrong capture state. */
|
||||
+ (void)firstCounterBuffer;
|
||||
+ (void)pCounterBufferOffsets;
|
||||
+ if (counterBufferCount > 0 && pCounterBuffers != NULL) {
|
||||
+ mesa_logw("panvk: CmdBeginTransformFeedbackEXT: counter buffers not "
|
||||
+ "implemented (transformFeedbackDraw=false); XFB resume will "
|
||||
+ "restart at buffer offset 0");
|
||||
+ }
|
||||
+
|
||||
+ gfx->xfb.active = true;
|
||||
+ /* Per-draw set_gfx_sysval picks up the change automatically — no
|
||||
+ * explicit dirty marking required (set_gfx_sysval uses memcmp +
|
||||
+ * BITSET to detect state diffs and re-emit sysvals). */
|
||||
+}
|
||||
+
|
||||
+VKAPI_ATTR void VKAPI_CALL
|
||||
+panvk_per_arch(CmdEndTransformFeedbackEXT)(
|
||||
+ VkCommandBuffer commandBuffer,
|
||||
+ uint32_t firstCounterBuffer,
|
||||
+ uint32_t counterBufferCount,
|
||||
+ const VkBuffer *pCounterBuffers,
|
||||
+ const VkDeviceSize *pCounterBufferOffsets)
|
||||
+{
|
||||
+ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
+ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
+
|
||||
+ (void)firstCounterBuffer;
|
||||
+ (void)counterBufferCount;
|
||||
+ (void)pCounterBuffers;
|
||||
+ (void)pCounterBufferOffsets;
|
||||
+
|
||||
+ gfx->xfb.active = false;
|
||||
+}
|
||||
@@ -0,0 +1,629 @@
|
||||
diff -urN a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build
|
||||
--- a/src/panfrost/vulkan/meson.build 2026-05-21 14:04:02.529474145 +0200
|
||||
+++ b/src/panfrost/vulkan/meson.build 2026-05-21 14:04:04.106755486 +0200
|
||||
@@ -123,6 +123,7 @@
|
||||
'panvk_vX_nir_lower_input_attachment_loads.c',
|
||||
'panvk_vX_sampler.c',
|
||||
'panvk_vX_shader.c',
|
||||
+ 'panvk_vX_xfb_lower.c',
|
||||
sha1_h,
|
||||
]
|
||||
|
||||
diff -urN a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h
|
||||
--- a/src/panfrost/vulkan/panvk_shader.h 2026-05-21 14:04:02.525251986 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_shader.h 2026-05-21 14:04:04.084251800 +0200
|
||||
@@ -154,6 +154,8 @@
|
||||
/* aligned_u64 attribute below inserts the 4-byte alignment gap
|
||||
* after num_vertices automatically — no explicit pad needed. */
|
||||
aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */
|
||||
+ uint32_t xfb_topology; /* iter17: panvk_xfb_topology enum value */
|
||||
+ uint32_t xfb_output_count; /* iter17: per-instance output verts after decomp */
|
||||
#endif
|
||||
int32_t first_vertex;
|
||||
int32_t base_instance;
|
||||
@@ -569,4 +571,76 @@
|
||||
struct pan_compute_dim local_size, const void *bin_ptr, size_t bin_size,
|
||||
struct panvk_shader **shader_out);
|
||||
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
+/* iter17: encoding for vs.xfb_topology sysval. Maps VkPrimitiveTopology values
|
||||
+ * we need to distinguish at shader runtime for XFB capture. LIST topologies
|
||||
+ * use the iter13 single-store fast path; non-LIST need per-vertex decomposition. */
|
||||
+enum panvk_xfb_topology {
|
||||
+ PANVK_XFB_TOPO_LIST = 0,
|
||||
+ PANVK_XFB_TOPO_LINE_STRIP = 1,
|
||||
+ PANVK_XFB_TOPO_TRI_STRIP = 2,
|
||||
+ PANVK_XFB_TOPO_TRI_FAN = 3,
|
||||
+ PANVK_XFB_TOPO_LINE_LIST_ADJ = 4,
|
||||
+ PANVK_XFB_TOPO_LINE_STRIP_ADJ = 5,
|
||||
+ PANVK_XFB_TOPO_TRI_LIST_ADJ = 6,
|
||||
+ PANVK_XFB_TOPO_TRI_STRIP_ADJ = 7,
|
||||
+};
|
||||
+
|
||||
+#include "panvk_macros.h"
|
||||
+struct nir_shader;
|
||||
+bool panvk_per_arch(nir_lower_xfb)(struct nir_shader *nir);
|
||||
+
|
||||
+/* Map VkPrimitiveTopology to panvk_xfb_topology enum (driver-side helper). */
|
||||
+static inline uint32_t
|
||||
+panvk_vk_topology_to_xfb_enum(VkPrimitiveTopology topo)
|
||||
+{
|
||||
+ switch (topo) {
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
|
||||
+ return PANVK_XFB_TOPO_LINE_STRIP;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
|
||||
+ return PANVK_XFB_TOPO_TRI_STRIP;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
|
||||
+ return PANVK_XFB_TOPO_TRI_FAN;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
|
||||
+ return PANVK_XFB_TOPO_LINE_LIST_ADJ;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
|
||||
+ return PANVK_XFB_TOPO_LINE_STRIP_ADJ;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
|
||||
+ return PANVK_XFB_TOPO_TRI_LIST_ADJ;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
|
||||
+ return PANVK_XFB_TOPO_TRI_STRIP_ADJ;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
|
||||
+ default:
|
||||
+ return PANVK_XFB_TOPO_LIST;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Compute the per-instance output vertex count for a given (topology, input count). */
|
||||
+static inline uint32_t
|
||||
+panvk_xfb_output_count(VkPrimitiveTopology topo, uint32_t input_count)
|
||||
+{
|
||||
+ switch (topo) {
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
|
||||
+ return input_count >= 1 ? 2u * (input_count - 1u) : 0u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
|
||||
+ return input_count >= 2 ? 3u * (input_count - 2u) : 0u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
|
||||
+ return (input_count / 4u) * 2u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
|
||||
+ return input_count >= 3 ? 2u * (input_count - 3u) : 0u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
|
||||
+ return (input_count / 6u) * 3u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
|
||||
+ return input_count >= 6 ? 3u * (input_count / 2u - 2u) : 0u;
|
||||
+ default:
|
||||
+ return input_count; /* LIST topologies: 1:1 mapping */
|
||||
+ }
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+
|
||||
#endif
|
||||
diff -urN a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-05-21 14:04:02.528576354 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-05-21 14:04:04.091357598 +0200
|
||||
@@ -727,6 +727,20 @@
|
||||
/* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
|
||||
* reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
|
||||
+
|
||||
+ /* iter17: XFB primitive-decomposition sysvals.
|
||||
+ * xfb_topology = enum value for the current bound topology.
|
||||
+ * xfb_output_count = per-instance output vertex count after decomposition.
|
||||
+ * For LIST topologies, output_count == input vertex count and the shader
|
||||
+ * takes the iter13 single-store fast path. */
|
||||
+ {
|
||||
+ VkPrimitiveTopology vk_topo =
|
||||
+ cmdbuf->vk.dynamic_graphics_state.ia.primitive_topology;
|
||||
+ uint32_t topo_enum = panvk_vk_topology_to_xfb_enum(vk_topo);
|
||||
+ uint32_t out_count = panvk_xfb_output_count(vk_topo, info->vertex.count);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_topology, topo_enum);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_output_count, out_count);
|
||||
+ }
|
||||
{
|
||||
const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
|
||||
/* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS
|
||||
diff -urN a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_shader.c 2026-05-21 14:04:02.527576494 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_shader.c 2026-05-21 14:04:04.098356619 +0200
|
||||
@@ -895,7 +895,10 @@
|
||||
nir->info.has_transform_feedback_varyings) {
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
|
||||
- NIR_PASS(_, nir, pan_nir_lower_xfb);
|
||||
+ /* iter17: panvk-specific replacement for pan_nir_lower_xfb that handles
|
||||
+ * primitive decomposition for non-LIST topologies. Single-store LIST
|
||||
+ * fast path matches iter13 behavior. */
|
||||
+ NIR_PASS(_, nir, panvk_per_arch(nir_lower_xfb));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
diff -urN a/src/panfrost/vulkan/panvk_vX_xfb_lower.c b/src/panfrost/vulkan/panvk_vX_xfb_lower.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_xfb_lower.c 1970-01-01 01:00:00.000000000 +0100
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_xfb_lower.c 2026-05-21 14:04:04.115354242 +0200
|
||||
@@ -0,0 +1,486 @@
|
||||
+/*
|
||||
+ * Copyright © 2026 mfritsche / claude-noether
|
||||
+ * SPDX-License-Identifier: MIT
|
||||
+ *
|
||||
+ * iter17: panvk-specific replacement for pan_nir_lower_xfb that handles
|
||||
+ * primitive decomposition for transform_feedback on non-LIST topologies
|
||||
+ * (TRIANGLE_STRIP/FAN, LINE_STRIP, *_WITH_ADJACENCY).
|
||||
+ *
|
||||
+ * Approach: emit a topology dispatch at the start of each store_output
|
||||
+ * lowering. The shader reads vs.xfb_topology sysval at runtime and branches
|
||||
+ * into per-topology emission logic. For each affected topology, the lowered
|
||||
+ * code emits guarded conditional stores — one per primitive this vertex
|
||||
+ * contributes to, computing the output buffer position via primitive index
|
||||
+ * and slot within the decomposed primitive.
|
||||
+ *
|
||||
+ * For LIST topologies (POINT/LINE/TRIANGLE LIST), takes a fast path that
|
||||
+ * matches iter13's single-store behavior.
|
||||
+ *
|
||||
+ * For TRIANGLE_FAN, the central vertex (v=0) contributes to ALL primitives
|
||||
+ * as slot 2 — handled via a NIR loop bounded by num_vertices.
|
||||
+ *
|
||||
+ * See ~/src/panvk-bifrost/iter17/phase{0,1,2}_*.md for full design context.
|
||||
+ */
|
||||
+
|
||||
+#include "panvk_macros.h"
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
+
|
||||
+#include "panvk_shader.h"
|
||||
+
|
||||
+#include "compiler/nir/nir_builder.h"
|
||||
+#include "pan_nir.h"
|
||||
+
|
||||
+#include <vulkan/vulkan_core.h>
|
||||
+
|
||||
+/* ----- Address arithmetic ----- */
|
||||
+
|
||||
+static nir_def *
|
||||
+xfb_store_addr(nir_builder *b, nir_def *buf, nir_def *out_idx,
|
||||
+ uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *byte_off = nir_iadd_imm(b,
|
||||
+ nir_imul_imm(b, out_idx, stride), offset_bytes);
|
||||
+ return nir_iadd(b, buf, nir_u2u64(b, byte_off));
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+emit_list_store(nir_builder *b, nir_def *buf, nir_def *output_count,
|
||||
+ nir_def *instance_id, nir_def *raw_vid, nir_def *value,
|
||||
+ uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *out_idx = nir_iadd(b,
|
||||
+ nir_imul(b, instance_id, output_count), raw_vid);
|
||||
+ nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
|
||||
+ nir_store_global(b, value, addr);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+emit_prim_store(nir_builder *b, nir_def *buf, nir_def *output_count,
|
||||
+ nir_def *instance_id, nir_def *eligible,
|
||||
+ nir_def *prim_idx, nir_def *slot,
|
||||
+ uint32_t verts_per_prim,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_push_if(b, eligible);
|
||||
+ {
|
||||
+ nir_def *out_idx = nir_iadd(b,
|
||||
+ nir_imul(b, instance_id, output_count),
|
||||
+ nir_iadd(b, nir_imul_imm(b, prim_idx, verts_per_prim), slot));
|
||||
+ nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
|
||||
+ nir_store_global(b, value, addr);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
+/* ----- Per-topology emission ----- */
|
||||
+
|
||||
+/* TRIANGLE_STRIP: vertex v contributes to prims v, v-1, v-2 (per eligibility). */
|
||||
+static void
|
||||
+emit_tri_strip(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *Nm2 = nir_iadd_imm(b, N, -2);
|
||||
+ nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
||||
+
|
||||
+ /* Prim v, slot 0: v < N-2 */
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ult(b, v, Nm2),
|
||||
+ v, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
|
||||
+
|
||||
+ /* Prim v-1, slot = 1 if prim even else 2: 1 <= v < N-1 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -1);
|
||||
+ nir_def *parity = nir_iand_imm(b, prim, 1u);
|
||||
+ nir_def *slot = nir_iadd_imm(b, parity, 1);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 1)),
|
||||
+ nir_ult(b, v, Nm1));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, slot, 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Prim v-2, slot = 2 if prim even else 1: 2 <= v < N */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -2);
|
||||
+ nir_def *parity = nir_iand_imm(b, prim, 1u);
|
||||
+ nir_def *slot = nir_isub(b, nir_imm_int(b, 2), parity);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 2)),
|
||||
+ nir_ult(b, v, N));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, slot, 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* LINE_STRIP: vertex v contributes to prim v slot 0 + prim v-1 slot 1. */
|
||||
+static void
|
||||
+emit_line_strip(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
||||
+
|
||||
+ /* Prim v, slot 0: v < N-1 */
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ult(b, v, Nm1),
|
||||
+ v, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
|
||||
+
|
||||
+ /* Prim v-1, slot 1: 1 <= v < N */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -1);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 1)),
|
||||
+ nir_ult(b, v, N));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* TRIANGLE_FAN: prim p emits {p+1, p+2, 0}.
|
||||
+ * vertex v=0: contributes to ALL prims as slot 2 (loop required)
|
||||
+ * vertex v>=1: contributes to prim v-1 as slot 0 (if 1 <= v <= N-2)
|
||||
+ * vertex v>=2: contributes to prim v-2 as slot 1 (if 2 <= v <= N-1)
|
||||
+ */
|
||||
+static void
|
||||
+emit_tri_fan(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
||||
+ nir_def *Nm2 = nir_iadd_imm(b, N, -2);
|
||||
+
|
||||
+ /* Prim v-1, slot 0: 1 <= v < N-1 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -1);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 1)),
|
||||
+ nir_ult(b, v, Nm1));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Prim v-2, slot 1: 2 <= v < N */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -2);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 2)),
|
||||
+ nir_ult(b, v, N));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 1), 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Central vertex (v == 0): loop over all prims, write to slot 2. */
|
||||
+ nir_push_if(b, nir_ieq_imm(b, v, 0));
|
||||
+ {
|
||||
+ nir_variable *p_var = nir_local_variable_create(b->impl,
|
||||
+ glsl_uint_type(), "fan_p");
|
||||
+ nir_store_var(b, p_var, nir_imm_int(b, 0), 0x1);
|
||||
+ nir_push_loop(b);
|
||||
+ {
|
||||
+ nir_def *p = nir_load_var(b, p_var);
|
||||
+ nir_push_if(b, nir_uge(b, p, Nm2));
|
||||
+ {
|
||||
+ nir_jump(b, nir_jump_break);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+
|
||||
+ nir_def *out_idx = nir_iadd(b,
|
||||
+ nir_imul(b, instance_id, output_count),
|
||||
+ nir_iadd_imm(b, nir_imul_imm(b, p, 3), 2));
|
||||
+ nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
|
||||
+ nir_store_global(b, value, addr);
|
||||
+
|
||||
+ nir_store_var(b, p_var, nir_iadd_imm(b, p, 1), 0x1);
|
||||
+ }
|
||||
+ nir_pop_loop(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
+/* LINE_LIST_WITH_ADJACENCY: 4-vertex groups [4i..4i+3]; output {4i+1, 4i+2}.
|
||||
+ * v contributes if v%4 == 1: prim v/4 slot 0
|
||||
+ * v contributes if v%4 == 2: prim v/4 slot 1
|
||||
+ */
|
||||
+static void
|
||||
+emit_line_list_adj(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ (void)N; /* eligibility is mod-based, not range-based */
|
||||
+ nir_def *vmod4 = nir_iand_imm(b, v, 3u);
|
||||
+ nir_def *prim = nir_ushr_imm(b, v, 2); /* v / 4 */
|
||||
+
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ieq_imm(b, vmod4, 1),
|
||||
+ prim, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
|
||||
+
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ieq_imm(b, vmod4, 2),
|
||||
+ prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
|
||||
+}
|
||||
+
|
||||
+/* LINE_STRIP_WITH_ADJACENCY: prim p emits {p+1, p+2}.
|
||||
+ * v contributes to prim v-1 slot 0 (1 <= v <= N-2)
|
||||
+ * v contributes to prim v-2 slot 1 (2 <= v <= N-1)
|
||||
+ */
|
||||
+static void
|
||||
+emit_line_strip_adj(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
||||
+ nir_def *Nm2 = nir_iadd_imm(b, N, -2);
|
||||
+
|
||||
+ /* Prim v-1, slot 0: 1 <= v <= N-2 ⇔ v >= 1 AND v <= N-2 ⇔ v >= 1 AND v < N-1 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -1);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 1)),
|
||||
+ nir_ult(b, v, Nm1));
|
||||
+ (void)Nm2;
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Prim v-2, slot 1: 2 <= v <= N-1 ⇔ v >= 2 AND v < N */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -2);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 2)),
|
||||
+ nir_ult(b, v, N));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* TRIANGLE_LIST_WITH_ADJACENCY: 6-vertex groups; output {6i, 6i+2, 6i+4}.
|
||||
+ * v contributes if v%6 == 0: prim v/6 slot 0
|
||||
+ * v contributes if v%6 == 2: prim v/6 slot 1
|
||||
+ * v contributes if v%6 == 4: prim v/6 slot 2
|
||||
+ */
|
||||
+static void
|
||||
+emit_tri_list_adj(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ (void)N;
|
||||
+ nir_def *vmod6 = nir_umod_imm(b, v, 6);
|
||||
+ nir_def *prim = nir_udiv_imm(b, v, 6);
|
||||
+
|
||||
+ for (uint32_t slot = 0; slot < 3; slot++) {
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ieq_imm(b, vmod6, slot * 2),
|
||||
+ prim, nir_imm_int(b, slot), 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* TRIANGLE_STRIP_WITH_ADJACENCY: prim i emits:
|
||||
+ * even i: {2i, 2i+2, 2i+4} (slots 0, 1, 2 ← input indices 2i, 2i+2, 2i+4)
|
||||
+ * odd i: {2i, 2i+4, 2i+2} (slots 0, 1, 2 ← input indices 2i, 2i+4, 2i+2)
|
||||
+ *
|
||||
+ * Only EVEN input vertices contribute (since all output indices are 2*something).
|
||||
+ * For even input v:
|
||||
+ * prim v/2 slot 0 (always, if v/2 < N/2-2)
|
||||
+ * prim (v-2)/2 slot 1 if (v-2)/2 even, slot 2 if odd (when v >= 2)
|
||||
+ * prim (v-4)/2 slot 2 if (v-4)/2 even, slot 1 if odd (when v >= 4)
|
||||
+ */
|
||||
+static void
|
||||
+emit_tri_strip_adj(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ /* Bail for odd input vertices — they never contribute. */
|
||||
+ nir_def *v_is_even = nir_ieq_imm(b, nir_iand_imm(b, v, 1u), 0);
|
||||
+ nir_push_if(b, v_is_even);
|
||||
+ {
|
||||
+ nir_def *N_half = nir_ushr_imm(b, N, 1);
|
||||
+ nir_def *max_prim = nir_iadd_imm(b, N_half, -2); /* N/2 - 2 */
|
||||
+ nir_def *v_half = nir_ushr_imm(b, v, 1);
|
||||
+
|
||||
+ /* Prim v/2 slot 0: v/2 < N/2 - 2 */
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ult(b, v_half, max_prim),
|
||||
+ v_half, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
|
||||
+
|
||||
+ /* Prim (v-2)/2 = v/2 - 1: v >= 2 AND prim < N/2-2 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v_half, -1);
|
||||
+ nir_def *parity = nir_iand_imm(b, prim, 1u);
|
||||
+ nir_def *slot = nir_iadd_imm(b, parity, 1); /* even→1, odd→2 */
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 2)),
|
||||
+ nir_ult(b, prim, max_prim));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, slot, 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Prim (v-4)/2 = v/2 - 2: v >= 4 AND prim < N/2-2 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v_half, -2);
|
||||
+ nir_def *parity = nir_iand_imm(b, prim, 1u);
|
||||
+ nir_def *slot = nir_isub(b, nir_imm_int(b, 2), parity); /* even→2, odd→1 */
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 4)),
|
||||
+ nir_ult(b, prim, max_prim));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, slot, 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
+/* ----- Main lowering: per store_output XFB channel ----- */
|
||||
+
|
||||
+static void
|
||||
+lower_xfb_output_iter17(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
+ unsigned channel_idx, unsigned num_components,
|
||||
+ unsigned buffer, unsigned offset_words)
|
||||
+{
|
||||
+ assert(buffer < MAX_XFB_BUFFERS);
|
||||
+ assert(nir_intrinsic_component(intr) == 0);
|
||||
+
|
||||
+ uint16_t stride = b->shader->info.xfb_stride[buffer] * 4;
|
||||
+ assert(stride != 0);
|
||||
+ uint16_t offset_bytes = offset_words * 4;
|
||||
+
|
||||
+ BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
|
||||
+ BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
|
||||
+
|
||||
+ nir_def *topology = load_sysval(b, graphics, 32, vs.xfb_topology);
|
||||
+ nir_def *out_count = load_sysval(b, graphics, 32, vs.xfb_output_count);
|
||||
+ nir_def *N = nir_load_num_vertices(b);
|
||||
+ nir_def *v = nir_load_raw_vertex_id_pan(b);
|
||||
+ nir_def *instance = nir_load_instance_id(b);
|
||||
+ nir_def *buf = nir_load_xfb_address(b, 64, .base = buffer);
|
||||
+
|
||||
+ nir_def *src = intr->src[0].ssa;
|
||||
+ nir_component_mask_t mask = nir_component_mask(num_components);
|
||||
+ nir_def *value = nir_channels(b, src, mask << channel_idx);
|
||||
+
|
||||
+ /* Topology dispatch ladder. LIST first (fast path). */
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LIST));
|
||||
+ {
|
||||
+ emit_list_store(b, buf, out_count, instance, v, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ /* iter17 Janet Finding 3: gate all non-LIST emission on
|
||||
+ * output_count > 0. For degenerate input counts (N < min required
|
||||
+ * for the topology), output_count is 0 and we must emit NO stores
|
||||
+ * — otherwise N-2 / N-3 / etc. arithmetic underflows in the
|
||||
+ * eligibility predicates and we falsely fire stores. */
|
||||
+ nir_push_if(b, nir_ult(b, nir_imm_int(b, 0), out_count));
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_STRIP));
|
||||
+ {
|
||||
+ emit_tri_strip(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_STRIP));
|
||||
+ {
|
||||
+ emit_line_strip(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_FAN));
|
||||
+ {
|
||||
+ emit_tri_fan(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_LIST_ADJ));
|
||||
+ {
|
||||
+ emit_line_list_adj(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_STRIP_ADJ));
|
||||
+ {
|
||||
+ emit_line_strip_adj(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_LIST_ADJ));
|
||||
+ {
|
||||
+ emit_tri_list_adj(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ /* TRI_STRIP_ADJ — last case */
|
||||
+ emit_tri_strip_adj(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL); /* Janet Finding 3: close output_count > 0 guard */
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
+/* Mirror of pan_nir_lower_xfb's lower_xfb: load_vertex_id rewrite +
|
||||
+ * dispatch store_output through our topology-aware emission. */
|
||||
+static bool
|
||||
+lower_xfb_iter17(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
+ UNUSED void *data)
|
||||
+{
|
||||
+ if (intr->intrinsic == nir_intrinsic_load_vertex_id) {
|
||||
+ b->cursor = nir_instr_remove(&intr->instr);
|
||||
+ nir_def *repl = nir_iadd(b, nir_load_raw_vertex_id_pan(b),
|
||||
+ nir_load_raw_vertex_offset_pan(b));
|
||||
+ nir_def_rewrite_uses(&intr->def, repl);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
+ return false;
|
||||
+
|
||||
+ bool progress = false;
|
||||
+ b->cursor = nir_before_instr(&intr->instr);
|
||||
+
|
||||
+ /* io_xfb has only out[0,1]; the other 2 channels are in io_xfb2.
|
||||
+ * Outer loop selects which annotation; inner picks which channel. */
|
||||
+ for (unsigned i = 0; i < 2; ++i) {
|
||||
+ nir_io_xfb xfb = i ? nir_intrinsic_io_xfb2(intr)
|
||||
+ : nir_intrinsic_io_xfb(intr);
|
||||
+ for (unsigned j = 0; j < 2; ++j) {
|
||||
+ if (!xfb.out[j].num_components)
|
||||
+ continue;
|
||||
+ lower_xfb_output_iter17(b, intr, i * 2 + j, xfb.out[j].num_components,
|
||||
+ xfb.out[j].buffer, xfb.out[j].offset);
|
||||
+ progress = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (progress)
|
||||
+ nir_instr_remove(&intr->instr);
|
||||
+ return progress;
|
||||
+}
|
||||
+
|
||||
+bool
|
||||
+panvk_per_arch(nir_lower_xfb)(nir_shader *nir)
|
||||
+{
|
||||
+ return nir_shader_intrinsics_pass(
|
||||
+ nir, lower_xfb_iter17, nir_metadata_control_flow, NULL);
|
||||
+}
|
||||
+
|
||||
+#endif /* PAN_ARCH < 9 */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,181 @@
|
||||
# Maintainer: Markus Fritsche <fritsche.markus@gmail.com>
|
||||
#
|
||||
# mesa-panvk-bifrost-video — sibling of mesa-panvk-bifrost (r4) that adds
|
||||
# VK_KHR_video_decode_h264 on Mali Bifrost SBCs (PAN_ARCH 6/7) backed by
|
||||
# the SoC's V4L2-stateless hantro VPU (RK3566/RK3568).
|
||||
#
|
||||
# Campaign: ~/src/panvk-bifrost-video/ — Phase 4 byte-exact validated
|
||||
# 2026-05-21 (48/48 BBB display frames match ffmpeg+libva-v4l2-request-
|
||||
# fourier byte-for-byte on the same hantro). Phase 5 second-model review
|
||||
# completed; load-bearing findings (output_map OOB, static counter,
|
||||
# session_init unwind, probe_hantro gate) all applied.
|
||||
#
|
||||
# What it does (on top of r4):
|
||||
# - 0001..0004: inherited from mesa-panvk-bifrost (robustness2/null-
|
||||
# descriptor, vk1.1/1.2 advertisement, EXT_transform_feedback, XFB
|
||||
# primitive decomposition) — symlinked from the r4 package directory
|
||||
# so the patches don't drift between siblings.
|
||||
# - 0005: VK_KHR_video_queue + VK_KHR_video_decode_queue +
|
||||
# VK_KHR_video_decode_h264 backed by V4L2-stateless hantro.
|
||||
# Touches 14 files in src/panfrost/vulkan/; full diff in
|
||||
# 0005-panvk-bifrost-video-KHR-video-decode-h264.patch.
|
||||
#
|
||||
# Co-existence:
|
||||
# - Installs to /usr/lib/panvk-bifrost-video/ (parallel to r4's
|
||||
# /usr/lib/panvk-bifrost/). Pick at runtime via VK_ICD_FILENAMES.
|
||||
# - r4 stays the recommended default for the Chromium-GPU-process
|
||||
# consumer (no video needed there). Use this package when the
|
||||
# consumer wants Vulkan video decode (mpv-fourier, ffmpeg-vulkan,
|
||||
# future Chromium-VulkanVideoDecoder).
|
||||
#
|
||||
# Phase 1 limitations to know about (documented in source comments):
|
||||
# - Single video session per device (active_video singleton)
|
||||
# - Synchronous decode at record time — no pipelining yet
|
||||
# - Hardcoded /dev/video1 + /dev/media0 (matches RK3566/68, blocks
|
||||
# other SoCs without a topology-walk port)
|
||||
# - Bitstream source buffer assumed HOST_VISIBLE (true on panvk-
|
||||
# bifrost, would need fallback on other backends)
|
||||
#
|
||||
# Build target: arch-aarch64 runner via marfrit-packages Gitea Actions.
|
||||
# Mesa build is slow (~30-60min on Cortex-A55).
|
||||
|
||||
pkgname=mesa-panvk-bifrost-video
|
||||
_mesaver=26.0.6
|
||||
pkgver=26.0.6.r5.video1
|
||||
pkgrel=1
|
||||
pkgdesc="Patched Mesa libvulkan_panfrost.so adding VK_KHR_video_decode_h264 on Bifrost SBCs (sibling of mesa-panvk-bifrost-r4)"
|
||||
arch=('aarch64')
|
||||
url="https://git.reauktion.de/marfrit/panvk-bifrost"
|
||||
license=('MIT')
|
||||
|
||||
depends=(
|
||||
'mesa' # for shared mesa runtime libs
|
||||
'libdrm'
|
||||
'wayland'
|
||||
'libxcb'
|
||||
'libx11'
|
||||
'libxshmfence'
|
||||
'zlib'
|
||||
'zstd'
|
||||
'libelf'
|
||||
'libffi'
|
||||
'expat'
|
||||
'llvm-libs'
|
||||
'lm_sensors'
|
||||
)
|
||||
makedepends=(
|
||||
'meson'
|
||||
'ninja'
|
||||
'glslang'
|
||||
'python-mako'
|
||||
'python-packaging'
|
||||
'wayland-protocols'
|
||||
'libxrandr'
|
||||
'xorgproto'
|
||||
'libdrm'
|
||||
'llvm'
|
||||
'libclc'
|
||||
'spirv-llvm-translator'
|
||||
'spirv-tools'
|
||||
'rust-bindgen'
|
||||
'patch'
|
||||
)
|
||||
|
||||
source=(
|
||||
"https://archive.mesa3d.org/mesa-${_mesaver}.tar.xz"
|
||||
"0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch"
|
||||
"0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch"
|
||||
"0003-panvk-bifrost-vk-ext-transform-feedback.patch"
|
||||
"0004-panvk-bifrost-xfb-primitive-decomposition.patch"
|
||||
"0005-panvk-bifrost-video-KHR-video-decode-h264.patch"
|
||||
"icd.json"
|
||||
)
|
||||
# Mesa tarball checksum matches the sibling r4 package — same upstream version.
|
||||
sha256sums=(
|
||||
'SKIP' # mesa tarball — co-trust w/ r4 sibling
|
||||
'SKIP' # patches are local
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP' # icd.json
|
||||
)
|
||||
|
||||
prepare() {
|
||||
cd "mesa-${_mesaver}"
|
||||
|
||||
# r1+r2: small sed-based edits inherited from r4 (verbatim from the
|
||||
# sibling PKGBUILD — keep in sync).
|
||||
sed -i 's|\.KHR_robustness2 = PAN_ARCH >= 10,|.KHR_robustness2 = true,|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
sed -i 's|\.EXT_robustness2 = PAN_ARCH >= 10,|.EXT_robustness2 = true,|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
sed -i 's|\.nullDescriptor = PAN_ARCH >= 10,|.nullDescriptor = true,|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
sed -i 's|bool has_vk1_1 = PAN_ARCH >= 10;|bool has_vk1_1 = true;|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
sed -i 's|bool has_vk1_2 = PAN_ARCH >= 10;|bool has_vk1_2 = true;|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
|
||||
# r3: EXT_transform_feedback for Bifrost.
|
||||
patch -p1 < "${srcdir}/0003-panvk-bifrost-vk-ext-transform-feedback.patch"
|
||||
|
||||
# r4: XFB primitive decomposition NIR pass.
|
||||
patch -p1 < "${srcdir}/0004-panvk-bifrost-xfb-primitive-decomposition.patch"
|
||||
|
||||
# video: VK_KHR_video_decode_h264 via V4L2-hantro.
|
||||
patch -p1 < "${srcdir}/0005-panvk-bifrost-video-KHR-video-decode-h264.patch"
|
||||
|
||||
# Sanity-check r1..r4 (inherited).
|
||||
grep -q "KHR_robustness2 = true," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "EXT_robustness2 = true," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "nullDescriptor = true," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "has_vk1_1 = true;" src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "has_vk1_2 = true;" src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "EXT_transform_feedback = PAN_ARCH < 9," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
test -f src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c
|
||||
grep -q "panvk_per_arch(nir_lower_xfb)" src/panfrost/vulkan/panvk_vX_shader.c
|
||||
test -f src/panfrost/vulkan/panvk_vX_xfb_lower.c
|
||||
|
||||
# Sanity-check video patch landed.
|
||||
grep -q "KHR_video_queue = PAN_ARCH < 9 && panvk_v4l2_probe_hantro()" \
|
||||
src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "PANVK_QUEUE_FAMILY_VIDEO_DECODE" src/panfrost/vulkan/panvk_device.h
|
||||
test -f src/panfrost/vulkan/panvk_video_decode.c
|
||||
test -f src/panfrost/vulkan/panvk_video_decode.h
|
||||
test -f src/panfrost/vulkan/panvk_v4l2.c
|
||||
test -f src/panfrost/vulkan/panvk_v4l2_h264.c
|
||||
test -f src/panfrost/vulkan/panvk_v4l2_h264_slice_header.c
|
||||
test -f src/panfrost/vulkan/panvk_v4l2_h264_slice_header.h
|
||||
grep -q "panvk_v4l2_h264_slice_header.c" src/panfrost/vulkan/meson.build
|
||||
grep -q "panvk_video_queue_submit_noop" src/panfrost/vulkan/panvk_vX_device.c
|
||||
}
|
||||
|
||||
build() {
|
||||
cd "mesa-${_mesaver}"
|
||||
# Mirror r4's narrow build profile.
|
||||
meson setup build/ \
|
||||
--prefix=/usr \
|
||||
--libdir=lib \
|
||||
--buildtype=release \
|
||||
-Dvulkan-drivers=panfrost \
|
||||
-Dgallium-drivers= \
|
||||
-Dplatforms=wayland,x11 \
|
||||
-Dglx=disabled \
|
||||
-Degl=disabled \
|
||||
-Dgles1=disabled \
|
||||
-Dgles2=disabled \
|
||||
-Dvulkan-layers= \
|
||||
-Dtools= \
|
||||
-Dgallium-rusticl=false \
|
||||
-Dmicrosoft-clc=disabled
|
||||
meson compile -C build
|
||||
}
|
||||
|
||||
package() {
|
||||
cd "${srcdir}/mesa-${_mesaver}"
|
||||
|
||||
# Co-install path — parallel to r4's /usr/lib/panvk-bifrost/.
|
||||
install -Dm755 build/src/panfrost/vulkan/libvulkan_panfrost.so \
|
||||
"$pkgdir/usr/lib/panvk-bifrost-video/libvulkan_panfrost.so"
|
||||
|
||||
# ICD JSON pointing at the video build. Opt-in via VK_ICD_FILENAMES;
|
||||
# NOT in /usr/share/vulkan/icd.d/ so it doesn't override stock or r4.
|
||||
install -Dm644 "$srcdir/icd.json" \
|
||||
"$pkgdir/usr/lib/panvk-bifrost-video/icd.json"
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
# mesa-panvk-bifrost-video
|
||||
|
||||
Patched Mesa `libvulkan_panfrost.so` that **adds `VK_KHR_video_decode_h264`** on Mali Bifrost SBCs (PAN_ARCH 6/7, RK3566/RK3568 class hardware), backed by the SoC's V4L2-stateless **hantro** VPU.
|
||||
|
||||
This is a **sibling** of [mesa-panvk-bifrost](../mesa-panvk-bifrost/) (the r4 package that exposes Bifrost to Chromium's Vulkan compositor). Pick this one when the consumer wants Vulkan **video decode** in addition; pick r4 for compositor-only.
|
||||
|
||||
## Status
|
||||
|
||||
Phase 4 byte-exact validated 2026-05-21: 48/48 unique BBB display frames decoded by this package are byte-identical to `ffmpeg+libva-v4l2-request-fourier` running on the same hantro hardware. Phase 5 second-model review completed; all load-bearing findings addressed. First publish via marfrit-packages CI 2026-05-22 (PR #79 merge did not auto-fire Actions; this re-trigger restores the standard build/sign/publish path).
|
||||
|
||||
## How to use
|
||||
|
||||
```sh
|
||||
# Co-installs alongside r4 and stock mesa.
|
||||
sudo pacman -S mesa-panvk-bifrost-video
|
||||
|
||||
# Opt in (not on the default loader search path).
|
||||
export VK_ICD_FILENAMES=/usr/lib/panvk-bifrost-video/icd.json
|
||||
export PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 # mesa-upstream gate
|
||||
|
||||
# Run a Vulkan video consumer.
|
||||
vulkan-video-dec-simple-test -i your.h264 --codec h264 --noPresent --maxFrameCount 50
|
||||
# or
|
||||
ffmpeg -hwaccel vulkan -i your.mp4 ...
|
||||
```
|
||||
|
||||
## Phase 1 limitations
|
||||
|
||||
Documented in source comments and worth knowing before relying on this in production:
|
||||
|
||||
- **Single video session per device.** Concurrent `VkVideoSessionKHR` on the same device clobber each other (`active_video` singleton). Sufficient for current single-stream consumers.
|
||||
- **Synchronous decode at record time.** The full V4L2 ioctl dance runs to completion inside `vkCmdDecodeVideoKHR`. No pipelining. Throughput is bounded by hantro's ~1.16× realtime on 1080p H.264.
|
||||
- **Hardcoded `/dev/video1` + `/dev/media0`.** Matches RK3566/68 but won't work on other SoCs without a topology-walk port (see `libva-v4l2-request-fourier` for the full version).
|
||||
- **Bitstream source buffer assumed HOST_VISIBLE.** True on panvk-bifrost (no DEVICE_LOCAL-only memory types exist), but the code silently skips decode if the app bound the buffer to non-host-visible memory.
|
||||
|
||||
## Co-existence
|
||||
|
||||
- Installs to `/usr/lib/panvk-bifrost-video/` — parallel to r4's `/usr/lib/panvk-bifrost/` and stock `/usr/lib/`.
|
||||
- Opt-in via `VK_ICD_FILENAMES`; does NOT register itself in `/usr/share/vulkan/icd.d/`.
|
||||
- Three drivers coexist without conflict; the user picks at runtime which to use.
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"ICD": {
|
||||
"api_version": "1.4.335",
|
||||
"library_path": "/usr/lib/panvk-bifrost-video/libvulkan_panfrost.so"
|
||||
},
|
||||
"file_format_version": "1.0.1"
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
From: claude-noether (on behalf of mfritsche)
|
||||
Date: 2026-05-19
|
||||
Subject: panvk: expose VK_KHR/EXT_robustness2 + nullDescriptor on Bifrost (PAN_ARCH 6/7)
|
||||
|
||||
Without this, Mesa's Zink driver refuses to use PanVk-Bifrost as its Vulkan
|
||||
backend, falling back silently to llvmpipe (software rasterizer) for all
|
||||
GL-via-Zink on Bifrost SBCs. That defeats the entire purpose of having a
|
||||
Vulkan driver on Bifrost — GL acceleration via Zink is the most natural
|
||||
near-term consumer.
|
||||
|
||||
panvk_vX_nir_lower_descriptors.c:1309 and panvk_vX_shader.c:1355 already
|
||||
plumb dev->vk.enabled_features.nullDescriptor arch-agnostically — the gate
|
||||
at panvk_vX_physical_device.c was set conservatively when Bifrost was
|
||||
unmaintained, not because of hardware incapability.
|
||||
|
||||
iter1–7 of the panvk-bifrost campaign proved fundamental driver functions
|
||||
on Mali-G52 r1 MC1 (PAN_ARCH=7). This patch is the iter8 follow-up.
|
||||
|
||||
robustBufferAccess2 and robustImageAccess2 are NOT flipped — they're
|
||||
independent rb2 features Zink doesn't require, gated differently
|
||||
(robustBufferAccess2 = PAN_ARCH >= 11, robustImageAccess2 = false), and
|
||||
out of scope for iter8.
|
||||
|
||||
---
|
||||
src/panfrost/vulkan/panvk_vX_physical_device.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
@@ -91,7 +91,7 @@ get_device_extensions(const struct panvk_physical_device *device,
|
||||
.KHR_pipeline_binary = true,
|
||||
.KHR_pipeline_executable_properties = true,
|
||||
.KHR_pipeline_library = true,
|
||||
- .KHR_robustness2 = PAN_ARCH >= 10,
|
||||
+ .KHR_robustness2 = true,
|
||||
.KHR_sampler_mirror_clamp_to_edge = true,
|
||||
.KHR_sampler_ycbcr_conversion = true,
|
||||
.KHR_separate_depth_stencil_layouts = true,
|
||||
@@ -168,7 +168,7 @@ get_device_extensions(const struct panvk_physical_device *device,
|
||||
.EXT_queue_family_foreign = true,
|
||||
.EXT_robustness = pan_arch(device->kmod.dev->props.gpu_id) >= 9,
|
||||
.EXT_image_robustness = true,
|
||||
- .EXT_robustness2 = PAN_ARCH >= 10,
|
||||
+ .EXT_robustness2 = true,
|
||||
.EXT_sampler_filter_minmax = PAN_ARCH >= 10,
|
||||
.EXT_scalar_block_layout = true,
|
||||
.EXT_separate_stencil_usage = true,
|
||||
@@ -493,7 +493,7 @@ get_device_features(const struct panvk_physical_device *device,
|
||||
/* VK_KHR_robustness2 */
|
||||
.robustBufferAccess2 = PAN_ARCH >= 11,
|
||||
.robustImageAccess2 = false,
|
||||
- .nullDescriptor = PAN_ARCH >= 10,
|
||||
+ .nullDescriptor = true,
|
||||
|
||||
/* VK_KHR_shader_clock */
|
||||
.shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp,
|
||||
@@ -0,0 +1,47 @@
|
||||
From: claude-noether (on behalf of mfritsche)
|
||||
Date: 2026-05-20
|
||||
Subject: panvk: expose Vulkan 1.1 + 1.2 on Bifrost (PAN_ARCH 6/7)
|
||||
|
||||
ANGLE (Chromium's GL stack) requires apiVersion >= 1.1 to initialize. Without
|
||||
this, Brave / Chromium's GPU process fails at GL info collection:
|
||||
|
||||
vk_renderer.cpp:2659 (initialize): ANGLE Requires a minimum Vulkan device
|
||||
version of 1.1
|
||||
Display::initialize error 0: Internal Vulkan error (-9): The requested
|
||||
version of Vulkan is not supported by the driver
|
||||
|
||||
Stack-up with iter8's robustness2 patch enables ANGLE → PanVk-Bifrost →
|
||||
Skia (via --enable-features=Vulkan) on Bifrost SBCs.
|
||||
|
||||
PanVk-Bifrost already supports the bulk of 1.1-promoted features as extensions
|
||||
(multiview, maintenance1-3, descriptor update template, 16-bit storage,
|
||||
descriptor update template, sampler ycbcr, variable pointers, etc. — all
|
||||
visible in iter0 vulkaninfo). The version bump primarily bundles them.
|
||||
|
||||
Risk: Vulkan 1.1 has features beyond what iter1–7 exercised (protected memory,
|
||||
full subgroup ops). Specific app failures will be characterizable.
|
||||
|
||||
1.2 is also flipped — Brave's Vulkan path may want descriptor indexing,
|
||||
buffer device address, etc. (all listed in iter0 vulkaninfo as supported
|
||||
extensions, just gated as 1.0-with-extensions, not 1.2-core).
|
||||
|
||||
---
|
||||
src/panfrost/vulkan/panvk_vX_physical_device.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
@@ -38,8 +38,8 @@ get_device_extensions(const struct panvk_physical_device *device,
|
||||
struct vk_device_extension_table *ext)
|
||||
{
|
||||
*ext = (struct vk_device_extension_table){
|
||||
- .KHR_8bit_storage = true,
|
||||
- .KHR_16bit_storage = true,
|
||||
- bool has_vk1_1 = PAN_ARCH >= 10;
|
||||
- bool has_vk1_2 = PAN_ARCH >= 10;
|
||||
+ .KHR_8bit_storage = true,
|
||||
+ .KHR_16bit_storage = true,
|
||||
+ bool has_vk1_1 = true;
|
||||
+ bool has_vk1_2 = true;
|
||||
*ext = (struct vk_device_extension_table){
|
||||
@@ -0,0 +1,328 @@
|
||||
--- a/src/panfrost/vulkan/panvk_shader.h 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_shader.h 2026-05-20 18:52:53.312698258 +0200
|
||||
@@ -150,6 +150,10 @@
|
||||
struct {
|
||||
#if PAN_ARCH < 9
|
||||
int32_t raw_vertex_offset;
|
||||
+ uint32_t num_vertices; /* iter13: XFB needs per-draw vertex count */
|
||||
+ /* aligned_u64 attribute below inserts the 4-byte alignment gap
|
||||
+ * after num_vertices automatically — no explicit pad needed. */
|
||||
+ aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */
|
||||
#endif
|
||||
int32_t first_vertex;
|
||||
int32_t base_instance;
|
||||
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 19:09:29.711145446 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 18:52:54.832720445 +0200
|
||||
@@ -169,6 +169,7 @@
|
||||
.EXT_provoking_vertex = true,
|
||||
.EXT_queue_family_foreign = true,
|
||||
.EXT_robustness2 = true,
|
||||
+ .EXT_transform_feedback = PAN_ARCH < 9, /* iter13: JM-class only for now */
|
||||
.EXT_sampler_filter_minmax = PAN_ARCH >= 10,
|
||||
.EXT_scalar_block_layout = true,
|
||||
.EXT_separate_stencil_usage = true,
|
||||
@@ -495,6 +496,10 @@
|
||||
.robustImageAccess2 = false,
|
||||
.nullDescriptor = true,
|
||||
|
||||
+ /* VK_EXT_transform_feedback (iter13) */
|
||||
+ .transformFeedback = PAN_ARCH < 9,
|
||||
+ .geometryStreams = false,
|
||||
+
|
||||
/* VK_KHR_shader_clock */
|
||||
.shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp,
|
||||
.shaderDeviceClock = device->kmod.dev->props.timestamp_device_coherent,
|
||||
@@ -1020,6 +1025,18 @@
|
||||
.robustStorageBufferAccessSizeAlignment = 1,
|
||||
.robustUniformBufferAccessSizeAlignment = 1,
|
||||
|
||||
+ /* VK_EXT_transform_feedback (iter13) */
|
||||
+ .maxTransformFeedbackStreams = 1,
|
||||
+ .maxTransformFeedbackBuffers = 4,
|
||||
+ .maxTransformFeedbackBufferSize = UINT32_MAX,
|
||||
+ .maxTransformFeedbackStreamDataSize = 512,
|
||||
+ .maxTransformFeedbackBufferDataSize = 512,
|
||||
+ .maxTransformFeedbackBufferDataStride = 2048,
|
||||
+ .transformFeedbackQueries = false,
|
||||
+ .transformFeedbackStreamsLinesTriangles = false,
|
||||
+ .transformFeedbackRasterizationStreamSelect = false,
|
||||
+ .transformFeedbackDraw = false,
|
||||
+
|
||||
/* VK_EXT_shader_object */
|
||||
/* We do not currently support VK_EXT_shader_object but this is used
|
||||
* internally by vk_shader
|
||||
--- a/src/panfrost/vulkan/panvk_vX_shader.c 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_shader.c 2026-05-20 18:52:56.556745611 +0200
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_sampler.h"
|
||||
#include "panvk_shader.h"
|
||||
+#include "pan_nir.h" /* iter13: pan_nir_lower_xfb */
|
||||
|
||||
#include "spirv/nir_spirv.h"
|
||||
#include "util/memstream.h"
|
||||
@@ -100,6 +101,20 @@
|
||||
case nir_intrinsic_load_raw_vertex_offset_pan:
|
||||
val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset);
|
||||
break;
|
||||
+ case nir_intrinsic_load_num_vertices: /* iter13: XFB index calc */
|
||||
+ val = load_sysval(b, graphics, bit_size, vs.num_vertices);
|
||||
+ break;
|
||||
+ case nir_intrinsic_load_xfb_address: { /* iter13: XFB buffer N base address */
|
||||
+ unsigned idx = nir_intrinsic_base(intr);
|
||||
+ switch (idx) {
|
||||
+ case 0: val = load_sysval(b, graphics, bit_size, vs.xfb_address[0]); break;
|
||||
+ case 1: val = load_sysval(b, graphics, bit_size, vs.xfb_address[1]); break;
|
||||
+ case 2: val = load_sysval(b, graphics, bit_size, vs.xfb_address[2]); break;
|
||||
+ case 3: val = load_sysval(b, graphics, bit_size, vs.xfb_address[3]); break;
|
||||
+ default: return false;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
case nir_intrinsic_load_layer_id:
|
||||
assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
val = load_sysval(b, graphics, bit_size, layer_id);
|
||||
@@ -457,6 +472,7 @@
|
||||
core_max_id);
|
||||
|
||||
pan_preprocess_nir(nir, pdev->kmod.dev->props.gpu_id);
|
||||
+
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -870,6 +886,18 @@
|
||||
nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
|
||||
glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics);
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
+ /* iter13: VK_EXT_transform_feedback — runs AFTER nir_lower_io so that
|
||||
+ * shader outputs are now store_output intrinsics that pan_nir_lower_xfb
|
||||
+ * can rewrite to nir_store_global+nir_load_xfb_address. */
|
||||
+ if (nir->info.stage == MESA_SHADER_VERTEX &&
|
||||
+ nir->info.has_transform_feedback_varyings) {
|
||||
+ NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
+ NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
|
||||
+ NIR_PASS(_, nir, pan_nir_lower_xfb);
|
||||
+ }
|
||||
+#endif
|
||||
}
|
||||
|
||||
static VkResult
|
||||
@@ -1288,6 +1316,9 @@
|
||||
.view_mask = (state && state->rp) ? state->rp->view_mask : 0,
|
||||
.robust2_modes = robust2_modes,
|
||||
.robust_descriptors = dev->vk.enabled_features.nullDescriptor,
|
||||
+ /* iter13: XFB shaders must disable IDVS (matches Panfrost-Gallium). */
|
||||
+ .no_idvs = (info->stage == MESA_SHADER_VERTEX) &&
|
||||
+ info->nir->info.has_transform_feedback_varyings,
|
||||
};
|
||||
|
||||
switch (info->stage) {
|
||||
--- a/src/panfrost/vulkan/panvk_cmd_draw.h 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_cmd_draw.h 2026-05-20 18:52:57.748763011 +0200
|
||||
@@ -135,6 +135,19 @@
|
||||
struct panvk_graphics_sysvals sysvals;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
+ /* iter13: VK_EXT_transform_feedback state (JM-class only for now). */
|
||||
+ struct {
|
||||
+ bool active;
|
||||
+ uint32_t buffer_count;
|
||||
+ struct {
|
||||
+ uint64_t addr;
|
||||
+ uint64_t offset;
|
||||
+ uint64_t size;
|
||||
+ } buffers[4];
|
||||
+ } xfb;
|
||||
+#endif
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
struct panvk_shader_link link;
|
||||
#endif
|
||||
|
||||
--- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-05-20 19:10:23.031919662 +0200
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "panvk_entrypoints.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
+#include "pan_compiler.h" /* PAN_SHADER_OOB_ADDRESS */
|
||||
#include "pan_util.h"
|
||||
|
||||
static void
|
||||
@@ -722,6 +723,35 @@
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
|
||||
info->vertex.raw_offset);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
|
||||
+
|
||||
+ /* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
|
||||
+ * reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
|
||||
+ {
|
||||
+ const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
|
||||
+ /* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS
|
||||
+ * (= 1<<63). This is the Panfrost-Gallium memory-sink idiom — the
|
||||
+ * Bifrost MMU silently discards stores to this address, so a pipeline
|
||||
+ * with XFB outputs used in a non-XFB draw (or in an XFB draw with
|
||||
+ * fewer bound buffers than the shader declares) is safe instead of
|
||||
+ * faulting. See gallium/drivers/panfrost/pan_cmdstream.c PAN_SYSVAL_XFB. */
|
||||
+ uint64_t _xa0 = PAN_SHADER_OOB_ADDRESS, _xa1 = PAN_SHADER_OOB_ADDRESS,
|
||||
+ _xa2 = PAN_SHADER_OOB_ADDRESS, _xa3 = PAN_SHADER_OOB_ADDRESS;
|
||||
+ if (_gfx->xfb.active) {
|
||||
+ if (_gfx->xfb.buffer_count > 0 && _gfx->xfb.buffers[0].addr)
|
||||
+ _xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset;
|
||||
+ if (_gfx->xfb.buffer_count > 1 && _gfx->xfb.buffers[1].addr)
|
||||
+ _xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset;
|
||||
+ if (_gfx->xfb.buffer_count > 2 && _gfx->xfb.buffers[2].addr)
|
||||
+ _xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset;
|
||||
+ if (_gfx->xfb.buffer_count > 3 && _gfx->xfb.buffers[3].addr)
|
||||
+ _xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset;
|
||||
+ }
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3);
|
||||
+ }
|
||||
#endif
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
|
||||
--- a/src/panfrost/vulkan/meson.build 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/meson.build 2026-05-20 18:53:04.484861338 +0200
|
||||
@@ -73,6 +73,7 @@
|
||||
jm_inc_dir = ['jm']
|
||||
jm_files = [
|
||||
'jm/panvk_vX_bind_queue.c',
|
||||
+ 'jm/panvk_vX_cmd_xfb.c', # iter13
|
||||
'jm/panvk_vX_cmd_buffer.c',
|
||||
'jm/panvk_vX_cmd_dispatch.c',
|
||||
'jm/panvk_vX_cmd_draw.c',
|
||||
--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-05-20 19:10:26.163965149 +0200
|
||||
@@ -473,5 +473,12 @@
|
||||
|
||||
vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
|
||||
|
||||
+#if PAN_ARCH < 9
|
||||
+ /* iter13: clear XFB state on Begin so a reused command buffer does not
|
||||
+ * inherit stale xfb.buffer_count / xfb.active / xfb.buffers[] from a
|
||||
+ * prior recording. */
|
||||
+ memset(&cmdbuf->state.gfx.xfb, 0, sizeof(cmdbuf->state.gfx.xfb));
|
||||
+#endif
|
||||
+
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-18 12:50:53.067999996 +0200
|
||||
+++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-20 19:10:27.175979847 +0200
|
||||
@@ -0,0 +1,111 @@
|
||||
+/*
|
||||
+ * Copyright © 2026 mfritsche / claude-noether
|
||||
+ * SPDX-License-Identifier: MIT
|
||||
+ *
|
||||
+ * iter13: VK_EXT_transform_feedback command handlers for the JM
|
||||
+ * architecture path (Bifrost v6/v7 + Valhall-JM v9).
|
||||
+ *
|
||||
+ * The runtime contract:
|
||||
+ * - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size)
|
||||
+ * for each slot into cmdbuf->state.gfx.xfb.buffers[].
|
||||
+ * - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true.
|
||||
+ * Mark sysvals dirty so the next draw re-emits vs.xfb_address[].
|
||||
+ * - vkCmdEndTransformFeedbackEXT: set active = false.
|
||||
+ *
|
||||
+ * Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/
|
||||
+ * pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't
|
||||
+ * support pause/resume. transformFeedbackDraw is advertised as false.
|
||||
+ *
|
||||
+ * Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb
|
||||
+ * and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb
|
||||
+ * pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers
|
||||
+ * (via panvk_vX_shader.c sysval handler) to a load from the per-draw
|
||||
+ * sysval push area.
|
||||
+ */
|
||||
+
|
||||
+#include "vk_log.h"
|
||||
+#include "util/log.h"
|
||||
+
|
||||
+#include "panvk_cmd_buffer.h"
|
||||
+#include "panvk_cmd_draw.h"
|
||||
+#include "panvk_buffer.h"
|
||||
+#include "panvk_entrypoints.h"
|
||||
+
|
||||
+VKAPI_ATTR void VKAPI_CALL
|
||||
+panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)(
|
||||
+ VkCommandBuffer commandBuffer,
|
||||
+ uint32_t firstBinding,
|
||||
+ uint32_t bindingCount,
|
||||
+ const VkBuffer *pBuffers,
|
||||
+ const VkDeviceSize *pOffsets,
|
||||
+ const VkDeviceSize *pSizes)
|
||||
+{
|
||||
+ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
+ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
+
|
||||
+ for (uint32_t i = 0; i < bindingCount; i++) {
|
||||
+ uint32_t slot = firstBinding + i;
|
||||
+ if (slot >= 4)
|
||||
+ continue;
|
||||
+
|
||||
+ VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]);
|
||||
+ gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0);
|
||||
+ gfx->xfb.buffers[slot].offset = pOffsets[i];
|
||||
+ gfx->xfb.buffers[slot].size =
|
||||
+ (pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE)
|
||||
+ ? pSizes[i]
|
||||
+ : (buf->vk.size - pOffsets[i]);
|
||||
+ }
|
||||
+
|
||||
+ if (firstBinding + bindingCount > gfx->xfb.buffer_count)
|
||||
+ gfx->xfb.buffer_count = firstBinding + bindingCount;
|
||||
+}
|
||||
+
|
||||
+VKAPI_ATTR void VKAPI_CALL
|
||||
+panvk_per_arch(CmdBeginTransformFeedbackEXT)(
|
||||
+ VkCommandBuffer commandBuffer,
|
||||
+ uint32_t firstCounterBuffer,
|
||||
+ uint32_t counterBufferCount,
|
||||
+ const VkBuffer *pCounterBuffers,
|
||||
+ const VkDeviceSize *pCounterBufferOffsets)
|
||||
+{
|
||||
+ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
+ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
+
|
||||
+ /* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback
|
||||
+ * PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c.
|
||||
+ * App is spec-compliant if it does not pass counter buffers (which our
|
||||
+ * features advertisement allows), but warn loudly if it does so we do not
|
||||
+ * silently produce wrong capture state. */
|
||||
+ (void)firstCounterBuffer;
|
||||
+ (void)pCounterBufferOffsets;
|
||||
+ if (counterBufferCount > 0 && pCounterBuffers != NULL) {
|
||||
+ mesa_logw("panvk: CmdBeginTransformFeedbackEXT: counter buffers not "
|
||||
+ "implemented (transformFeedbackDraw=false); XFB resume will "
|
||||
+ "restart at buffer offset 0");
|
||||
+ }
|
||||
+
|
||||
+ gfx->xfb.active = true;
|
||||
+ /* Per-draw set_gfx_sysval picks up the change automatically — no
|
||||
+ * explicit dirty marking required (set_gfx_sysval uses memcmp +
|
||||
+ * BITSET to detect state diffs and re-emit sysvals). */
|
||||
+}
|
||||
+
|
||||
+VKAPI_ATTR void VKAPI_CALL
|
||||
+panvk_per_arch(CmdEndTransformFeedbackEXT)(
|
||||
+ VkCommandBuffer commandBuffer,
|
||||
+ uint32_t firstCounterBuffer,
|
||||
+ uint32_t counterBufferCount,
|
||||
+ const VkBuffer *pCounterBuffers,
|
||||
+ const VkDeviceSize *pCounterBufferOffsets)
|
||||
+{
|
||||
+ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
+ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
+
|
||||
+ (void)firstCounterBuffer;
|
||||
+ (void)counterBufferCount;
|
||||
+ (void)pCounterBuffers;
|
||||
+ (void)pCounterBufferOffsets;
|
||||
+
|
||||
+ gfx->xfb.active = false;
|
||||
+}
|
||||
@@ -0,0 +1,629 @@
|
||||
diff -urN a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build
|
||||
--- a/src/panfrost/vulkan/meson.build 2026-05-21 14:04:02.529474145 +0200
|
||||
+++ b/src/panfrost/vulkan/meson.build 2026-05-21 14:04:04.106755486 +0200
|
||||
@@ -123,6 +123,7 @@
|
||||
'panvk_vX_nir_lower_input_attachment_loads.c',
|
||||
'panvk_vX_sampler.c',
|
||||
'panvk_vX_shader.c',
|
||||
+ 'panvk_vX_xfb_lower.c',
|
||||
sha1_h,
|
||||
]
|
||||
|
||||
diff -urN a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h
|
||||
--- a/src/panfrost/vulkan/panvk_shader.h 2026-05-21 14:04:02.525251986 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_shader.h 2026-05-21 14:04:04.084251800 +0200
|
||||
@@ -154,6 +154,8 @@
|
||||
/* aligned_u64 attribute below inserts the 4-byte alignment gap
|
||||
* after num_vertices automatically — no explicit pad needed. */
|
||||
aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */
|
||||
+ uint32_t xfb_topology; /* iter17: panvk_xfb_topology enum value */
|
||||
+ uint32_t xfb_output_count; /* iter17: per-instance output verts after decomp */
|
||||
#endif
|
||||
int32_t first_vertex;
|
||||
int32_t base_instance;
|
||||
@@ -569,4 +571,76 @@
|
||||
struct pan_compute_dim local_size, const void *bin_ptr, size_t bin_size,
|
||||
struct panvk_shader **shader_out);
|
||||
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
+/* iter17: encoding for vs.xfb_topology sysval. Maps VkPrimitiveTopology values
|
||||
+ * we need to distinguish at shader runtime for XFB capture. LIST topologies
|
||||
+ * use the iter13 single-store fast path; non-LIST need per-vertex decomposition. */
|
||||
+enum panvk_xfb_topology {
|
||||
+ PANVK_XFB_TOPO_LIST = 0,
|
||||
+ PANVK_XFB_TOPO_LINE_STRIP = 1,
|
||||
+ PANVK_XFB_TOPO_TRI_STRIP = 2,
|
||||
+ PANVK_XFB_TOPO_TRI_FAN = 3,
|
||||
+ PANVK_XFB_TOPO_LINE_LIST_ADJ = 4,
|
||||
+ PANVK_XFB_TOPO_LINE_STRIP_ADJ = 5,
|
||||
+ PANVK_XFB_TOPO_TRI_LIST_ADJ = 6,
|
||||
+ PANVK_XFB_TOPO_TRI_STRIP_ADJ = 7,
|
||||
+};
|
||||
+
|
||||
+#include "panvk_macros.h"
|
||||
+struct nir_shader;
|
||||
+bool panvk_per_arch(nir_lower_xfb)(struct nir_shader *nir);
|
||||
+
|
||||
+/* Map VkPrimitiveTopology to panvk_xfb_topology enum (driver-side helper). */
|
||||
+static inline uint32_t
|
||||
+panvk_vk_topology_to_xfb_enum(VkPrimitiveTopology topo)
|
||||
+{
|
||||
+ switch (topo) {
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
|
||||
+ return PANVK_XFB_TOPO_LINE_STRIP;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
|
||||
+ return PANVK_XFB_TOPO_TRI_STRIP;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
|
||||
+ return PANVK_XFB_TOPO_TRI_FAN;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
|
||||
+ return PANVK_XFB_TOPO_LINE_LIST_ADJ;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
|
||||
+ return PANVK_XFB_TOPO_LINE_STRIP_ADJ;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
|
||||
+ return PANVK_XFB_TOPO_TRI_LIST_ADJ;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
|
||||
+ return PANVK_XFB_TOPO_TRI_STRIP_ADJ;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
|
||||
+ default:
|
||||
+ return PANVK_XFB_TOPO_LIST;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Compute the per-instance output vertex count for a given (topology, input count). */
|
||||
+static inline uint32_t
|
||||
+panvk_xfb_output_count(VkPrimitiveTopology topo, uint32_t input_count)
|
||||
+{
|
||||
+ switch (topo) {
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
|
||||
+ return input_count >= 1 ? 2u * (input_count - 1u) : 0u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
|
||||
+ return input_count >= 2 ? 3u * (input_count - 2u) : 0u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
|
||||
+ return (input_count / 4u) * 2u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
|
||||
+ return input_count >= 3 ? 2u * (input_count - 3u) : 0u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
|
||||
+ return (input_count / 6u) * 3u;
|
||||
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
|
||||
+ return input_count >= 6 ? 3u * (input_count / 2u - 2u) : 0u;
|
||||
+ default:
|
||||
+ return input_count; /* LIST topologies: 1:1 mapping */
|
||||
+ }
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+
|
||||
#endif
|
||||
diff -urN a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-05-21 14:04:02.528576354 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-05-21 14:04:04.091357598 +0200
|
||||
@@ -727,6 +727,20 @@
|
||||
/* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
|
||||
* reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
|
||||
+
|
||||
+ /* iter17: XFB primitive-decomposition sysvals.
|
||||
+ * xfb_topology = enum value for the current bound topology.
|
||||
+ * xfb_output_count = per-instance output vertex count after decomposition.
|
||||
+ * For LIST topologies, output_count == input vertex count and the shader
|
||||
+ * takes the iter13 single-store fast path. */
|
||||
+ {
|
||||
+ VkPrimitiveTopology vk_topo =
|
||||
+ cmdbuf->vk.dynamic_graphics_state.ia.primitive_topology;
|
||||
+ uint32_t topo_enum = panvk_vk_topology_to_xfb_enum(vk_topo);
|
||||
+ uint32_t out_count = panvk_xfb_output_count(vk_topo, info->vertex.count);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_topology, topo_enum);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_output_count, out_count);
|
||||
+ }
|
||||
{
|
||||
const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
|
||||
/* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS
|
||||
diff -urN a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_shader.c 2026-05-21 14:04:02.527576494 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_shader.c 2026-05-21 14:04:04.098356619 +0200
|
||||
@@ -895,7 +895,10 @@
|
||||
nir->info.has_transform_feedback_varyings) {
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
|
||||
- NIR_PASS(_, nir, pan_nir_lower_xfb);
|
||||
+ /* iter17: panvk-specific replacement for pan_nir_lower_xfb that handles
|
||||
+ * primitive decomposition for non-LIST topologies. Single-store LIST
|
||||
+ * fast path matches iter13 behavior. */
|
||||
+ NIR_PASS(_, nir, panvk_per_arch(nir_lower_xfb));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
diff -urN a/src/panfrost/vulkan/panvk_vX_xfb_lower.c b/src/panfrost/vulkan/panvk_vX_xfb_lower.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_xfb_lower.c 1970-01-01 01:00:00.000000000 +0100
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_xfb_lower.c 2026-05-21 14:04:04.115354242 +0200
|
||||
@@ -0,0 +1,486 @@
|
||||
+/*
|
||||
+ * Copyright © 2026 mfritsche / claude-noether
|
||||
+ * SPDX-License-Identifier: MIT
|
||||
+ *
|
||||
+ * iter17: panvk-specific replacement for pan_nir_lower_xfb that handles
|
||||
+ * primitive decomposition for transform_feedback on non-LIST topologies
|
||||
+ * (TRIANGLE_STRIP/FAN, LINE_STRIP, *_WITH_ADJACENCY).
|
||||
+ *
|
||||
+ * Approach: emit a topology dispatch at the start of each store_output
|
||||
+ * lowering. The shader reads vs.xfb_topology sysval at runtime and branches
|
||||
+ * into per-topology emission logic. For each affected topology, the lowered
|
||||
+ * code emits guarded conditional stores — one per primitive this vertex
|
||||
+ * contributes to, computing the output buffer position via primitive index
|
||||
+ * and slot within the decomposed primitive.
|
||||
+ *
|
||||
+ * For LIST topologies (POINT/LINE/TRIANGLE LIST), takes a fast path that
|
||||
+ * matches iter13's single-store behavior.
|
||||
+ *
|
||||
+ * For TRIANGLE_FAN, the central vertex (v=0) contributes to ALL primitives
|
||||
+ * as slot 2 — handled via a NIR loop bounded by num_vertices.
|
||||
+ *
|
||||
+ * See ~/src/panvk-bifrost/iter17/phase{0,1,2}_*.md for full design context.
|
||||
+ */
|
||||
+
|
||||
+#include "panvk_macros.h"
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
+
|
||||
+#include "panvk_shader.h"
|
||||
+
|
||||
+#include "compiler/nir/nir_builder.h"
|
||||
+#include "pan_nir.h"
|
||||
+
|
||||
+#include <vulkan/vulkan_core.h>
|
||||
+
|
||||
+/* ----- Address arithmetic ----- */
|
||||
+
|
||||
+static nir_def *
|
||||
+xfb_store_addr(nir_builder *b, nir_def *buf, nir_def *out_idx,
|
||||
+ uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *byte_off = nir_iadd_imm(b,
|
||||
+ nir_imul_imm(b, out_idx, stride), offset_bytes);
|
||||
+ return nir_iadd(b, buf, nir_u2u64(b, byte_off));
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+emit_list_store(nir_builder *b, nir_def *buf, nir_def *output_count,
|
||||
+ nir_def *instance_id, nir_def *raw_vid, nir_def *value,
|
||||
+ uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *out_idx = nir_iadd(b,
|
||||
+ nir_imul(b, instance_id, output_count), raw_vid);
|
||||
+ nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
|
||||
+ nir_store_global(b, value, addr);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+emit_prim_store(nir_builder *b, nir_def *buf, nir_def *output_count,
|
||||
+ nir_def *instance_id, nir_def *eligible,
|
||||
+ nir_def *prim_idx, nir_def *slot,
|
||||
+ uint32_t verts_per_prim,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_push_if(b, eligible);
|
||||
+ {
|
||||
+ nir_def *out_idx = nir_iadd(b,
|
||||
+ nir_imul(b, instance_id, output_count),
|
||||
+ nir_iadd(b, nir_imul_imm(b, prim_idx, verts_per_prim), slot));
|
||||
+ nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
|
||||
+ nir_store_global(b, value, addr);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
+/* ----- Per-topology emission ----- */
|
||||
+
|
||||
+/* TRIANGLE_STRIP: vertex v contributes to prims v, v-1, v-2 (per eligibility). */
|
||||
+static void
|
||||
+emit_tri_strip(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *Nm2 = nir_iadd_imm(b, N, -2);
|
||||
+ nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
||||
+
|
||||
+ /* Prim v, slot 0: v < N-2 */
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ult(b, v, Nm2),
|
||||
+ v, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
|
||||
+
|
||||
+ /* Prim v-1, slot = 1 if prim even else 2: 1 <= v < N-1 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -1);
|
||||
+ nir_def *parity = nir_iand_imm(b, prim, 1u);
|
||||
+ nir_def *slot = nir_iadd_imm(b, parity, 1);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 1)),
|
||||
+ nir_ult(b, v, Nm1));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, slot, 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Prim v-2, slot = 2 if prim even else 1: 2 <= v < N */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -2);
|
||||
+ nir_def *parity = nir_iand_imm(b, prim, 1u);
|
||||
+ nir_def *slot = nir_isub(b, nir_imm_int(b, 2), parity);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 2)),
|
||||
+ nir_ult(b, v, N));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, slot, 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* LINE_STRIP: vertex v contributes to prim v slot 0 + prim v-1 slot 1. */
|
||||
+static void
|
||||
+emit_line_strip(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
||||
+
|
||||
+ /* Prim v, slot 0: v < N-1 */
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ult(b, v, Nm1),
|
||||
+ v, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
|
||||
+
|
||||
+ /* Prim v-1, slot 1: 1 <= v < N */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -1);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 1)),
|
||||
+ nir_ult(b, v, N));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* TRIANGLE_FAN: prim p emits {p+1, p+2, 0}.
|
||||
+ * vertex v=0: contributes to ALL prims as slot 2 (loop required)
|
||||
+ * vertex v>=1: contributes to prim v-1 as slot 0 (if 1 <= v <= N-2)
|
||||
+ * vertex v>=2: contributes to prim v-2 as slot 1 (if 2 <= v <= N-1)
|
||||
+ */
|
||||
+static void
|
||||
+emit_tri_fan(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
||||
+ nir_def *Nm2 = nir_iadd_imm(b, N, -2);
|
||||
+
|
||||
+ /* Prim v-1, slot 0: 1 <= v < N-1 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -1);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 1)),
|
||||
+ nir_ult(b, v, Nm1));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Prim v-2, slot 1: 2 <= v < N */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -2);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 2)),
|
||||
+ nir_ult(b, v, N));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 1), 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Central vertex (v == 0): loop over all prims, write to slot 2. */
|
||||
+ nir_push_if(b, nir_ieq_imm(b, v, 0));
|
||||
+ {
|
||||
+ nir_variable *p_var = nir_local_variable_create(b->impl,
|
||||
+ glsl_uint_type(), "fan_p");
|
||||
+ nir_store_var(b, p_var, nir_imm_int(b, 0), 0x1);
|
||||
+ nir_push_loop(b);
|
||||
+ {
|
||||
+ nir_def *p = nir_load_var(b, p_var);
|
||||
+ nir_push_if(b, nir_uge(b, p, Nm2));
|
||||
+ {
|
||||
+ nir_jump(b, nir_jump_break);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+
|
||||
+ nir_def *out_idx = nir_iadd(b,
|
||||
+ nir_imul(b, instance_id, output_count),
|
||||
+ nir_iadd_imm(b, nir_imul_imm(b, p, 3), 2));
|
||||
+ nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
|
||||
+ nir_store_global(b, value, addr);
|
||||
+
|
||||
+ nir_store_var(b, p_var, nir_iadd_imm(b, p, 1), 0x1);
|
||||
+ }
|
||||
+ nir_pop_loop(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
+/* LINE_LIST_WITH_ADJACENCY: 4-vertex groups [4i..4i+3]; output {4i+1, 4i+2}.
|
||||
+ * v contributes if v%4 == 1: prim v/4 slot 0
|
||||
+ * v contributes if v%4 == 2: prim v/4 slot 1
|
||||
+ */
|
||||
+static void
|
||||
+emit_line_list_adj(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ (void)N; /* eligibility is mod-based, not range-based */
|
||||
+ nir_def *vmod4 = nir_iand_imm(b, v, 3u);
|
||||
+ nir_def *prim = nir_ushr_imm(b, v, 2); /* v / 4 */
|
||||
+
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ieq_imm(b, vmod4, 1),
|
||||
+ prim, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
|
||||
+
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ieq_imm(b, vmod4, 2),
|
||||
+ prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
|
||||
+}
|
||||
+
|
||||
+/* LINE_STRIP_WITH_ADJACENCY: prim p emits {p+1, p+2}.
|
||||
+ * v contributes to prim v-1 slot 0 (1 <= v <= N-2)
|
||||
+ * v contributes to prim v-2 slot 1 (2 <= v <= N-1)
|
||||
+ */
|
||||
+static void
|
||||
+emit_line_strip_adj(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
||||
+ nir_def *Nm2 = nir_iadd_imm(b, N, -2);
|
||||
+
|
||||
+ /* Prim v-1, slot 0: 1 <= v <= N-2 ⇔ v >= 1 AND v <= N-2 ⇔ v >= 1 AND v < N-1 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -1);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 1)),
|
||||
+ nir_ult(b, v, Nm1));
|
||||
+ (void)Nm2;
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Prim v-2, slot 1: 2 <= v <= N-1 ⇔ v >= 2 AND v < N */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v, -2);
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 2)),
|
||||
+ nir_ult(b, v, N));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* TRIANGLE_LIST_WITH_ADJACENCY: 6-vertex groups; output {6i, 6i+2, 6i+4}.
|
||||
+ * v contributes if v%6 == 0: prim v/6 slot 0
|
||||
+ * v contributes if v%6 == 2: prim v/6 slot 1
|
||||
+ * v contributes if v%6 == 4: prim v/6 slot 2
|
||||
+ */
|
||||
+static void
|
||||
+emit_tri_list_adj(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ (void)N;
|
||||
+ nir_def *vmod6 = nir_umod_imm(b, v, 6);
|
||||
+ nir_def *prim = nir_udiv_imm(b, v, 6);
|
||||
+
|
||||
+ for (uint32_t slot = 0; slot < 3; slot++) {
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ieq_imm(b, vmod6, slot * 2),
|
||||
+ prim, nir_imm_int(b, slot), 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* TRIANGLE_STRIP_WITH_ADJACENCY: prim i emits:
|
||||
+ * even i: {2i, 2i+2, 2i+4} (slots 0, 1, 2 ← input indices 2i, 2i+2, 2i+4)
|
||||
+ * odd i: {2i, 2i+4, 2i+2} (slots 0, 1, 2 ← input indices 2i, 2i+4, 2i+2)
|
||||
+ *
|
||||
+ * Only EVEN input vertices contribute (since all output indices are 2*something).
|
||||
+ * For even input v:
|
||||
+ * prim v/2 slot 0 (always, if v/2 < N/2-2)
|
||||
+ * prim (v-2)/2 slot 1 if (v-2)/2 even, slot 2 if odd (when v >= 2)
|
||||
+ * prim (v-4)/2 slot 2 if (v-4)/2 even, slot 1 if odd (when v >= 4)
|
||||
+ */
|
||||
+static void
|
||||
+emit_tri_strip_adj(nir_builder *b, nir_def *v, nir_def *N,
|
||||
+ nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
||||
+ nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
||||
+{
|
||||
+ /* Bail for odd input vertices — they never contribute. */
|
||||
+ nir_def *v_is_even = nir_ieq_imm(b, nir_iand_imm(b, v, 1u), 0);
|
||||
+ nir_push_if(b, v_is_even);
|
||||
+ {
|
||||
+ nir_def *N_half = nir_ushr_imm(b, N, 1);
|
||||
+ nir_def *max_prim = nir_iadd_imm(b, N_half, -2); /* N/2 - 2 */
|
||||
+ nir_def *v_half = nir_ushr_imm(b, v, 1);
|
||||
+
|
||||
+ /* Prim v/2 slot 0: v/2 < N/2 - 2 */
|
||||
+ emit_prim_store(b, buf, output_count, instance_id,
|
||||
+ nir_ult(b, v_half, max_prim),
|
||||
+ v_half, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
|
||||
+
|
||||
+ /* Prim (v-2)/2 = v/2 - 1: v >= 2 AND prim < N/2-2 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v_half, -1);
|
||||
+ nir_def *parity = nir_iand_imm(b, prim, 1u);
|
||||
+ nir_def *slot = nir_iadd_imm(b, parity, 1); /* even→1, odd→2 */
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 2)),
|
||||
+ nir_ult(b, prim, max_prim));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, slot, 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+
|
||||
+ /* Prim (v-4)/2 = v/2 - 2: v >= 4 AND prim < N/2-2 */
|
||||
+ {
|
||||
+ nir_def *prim = nir_iadd_imm(b, v_half, -2);
|
||||
+ nir_def *parity = nir_iand_imm(b, prim, 1u);
|
||||
+ nir_def *slot = nir_isub(b, nir_imm_int(b, 2), parity); /* even→2, odd→1 */
|
||||
+ nir_def *eligible = nir_iand(b,
|
||||
+ nir_uge(b, v, nir_imm_int(b, 4)),
|
||||
+ nir_ult(b, prim, max_prim));
|
||||
+ emit_prim_store(b, buf, output_count, instance_id, eligible,
|
||||
+ prim, slot, 3, value, stride, offset_bytes);
|
||||
+ }
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
+/* ----- Main lowering: per store_output XFB channel ----- */
|
||||
+
|
||||
+static void
|
||||
+lower_xfb_output_iter17(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
+ unsigned channel_idx, unsigned num_components,
|
||||
+ unsigned buffer, unsigned offset_words)
|
||||
+{
|
||||
+ assert(buffer < MAX_XFB_BUFFERS);
|
||||
+ assert(nir_intrinsic_component(intr) == 0);
|
||||
+
|
||||
+ uint16_t stride = b->shader->info.xfb_stride[buffer] * 4;
|
||||
+ assert(stride != 0);
|
||||
+ uint16_t offset_bytes = offset_words * 4;
|
||||
+
|
||||
+ BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
|
||||
+ BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
|
||||
+
|
||||
+ nir_def *topology = load_sysval(b, graphics, 32, vs.xfb_topology);
|
||||
+ nir_def *out_count = load_sysval(b, graphics, 32, vs.xfb_output_count);
|
||||
+ nir_def *N = nir_load_num_vertices(b);
|
||||
+ nir_def *v = nir_load_raw_vertex_id_pan(b);
|
||||
+ nir_def *instance = nir_load_instance_id(b);
|
||||
+ nir_def *buf = nir_load_xfb_address(b, 64, .base = buffer);
|
||||
+
|
||||
+ nir_def *src = intr->src[0].ssa;
|
||||
+ nir_component_mask_t mask = nir_component_mask(num_components);
|
||||
+ nir_def *value = nir_channels(b, src, mask << channel_idx);
|
||||
+
|
||||
+ /* Topology dispatch ladder. LIST first (fast path). */
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LIST));
|
||||
+ {
|
||||
+ emit_list_store(b, buf, out_count, instance, v, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ /* iter17 Janet Finding 3: gate all non-LIST emission on
|
||||
+ * output_count > 0. For degenerate input counts (N < min required
|
||||
+ * for the topology), output_count is 0 and we must emit NO stores
|
||||
+ * — otherwise N-2 / N-3 / etc. arithmetic underflows in the
|
||||
+ * eligibility predicates and we falsely fire stores. */
|
||||
+ nir_push_if(b, nir_ult(b, nir_imm_int(b, 0), out_count));
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_STRIP));
|
||||
+ {
|
||||
+ emit_tri_strip(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_STRIP));
|
||||
+ {
|
||||
+ emit_line_strip(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_FAN));
|
||||
+ {
|
||||
+ emit_tri_fan(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_LIST_ADJ));
|
||||
+ {
|
||||
+ emit_line_list_adj(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_STRIP_ADJ));
|
||||
+ {
|
||||
+ emit_line_strip_adj(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_LIST_ADJ));
|
||||
+ {
|
||||
+ emit_tri_list_adj(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_push_else(b, NULL);
|
||||
+ {
|
||||
+ /* TRI_STRIP_ADJ — last case */
|
||||
+ emit_tri_strip_adj(b, v, N, buf, out_count, instance, value,
|
||||
+ stride, offset_bytes);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL); /* Janet Finding 3: close output_count > 0 guard */
|
||||
+ }
|
||||
+ nir_pop_if(b, NULL);
|
||||
+}
|
||||
+
|
||||
+/* Mirror of pan_nir_lower_xfb's lower_xfb: load_vertex_id rewrite +
|
||||
+ * dispatch store_output through our topology-aware emission. */
|
||||
+static bool
|
||||
+lower_xfb_iter17(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
+ UNUSED void *data)
|
||||
+{
|
||||
+ if (intr->intrinsic == nir_intrinsic_load_vertex_id) {
|
||||
+ b->cursor = nir_instr_remove(&intr->instr);
|
||||
+ nir_def *repl = nir_iadd(b, nir_load_raw_vertex_id_pan(b),
|
||||
+ nir_load_raw_vertex_offset_pan(b));
|
||||
+ nir_def_rewrite_uses(&intr->def, repl);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
+ return false;
|
||||
+
|
||||
+ bool progress = false;
|
||||
+ b->cursor = nir_before_instr(&intr->instr);
|
||||
+
|
||||
+ /* io_xfb has only out[0,1]; the other 2 channels are in io_xfb2.
|
||||
+ * Outer loop selects which annotation; inner picks which channel. */
|
||||
+ for (unsigned i = 0; i < 2; ++i) {
|
||||
+ nir_io_xfb xfb = i ? nir_intrinsic_io_xfb2(intr)
|
||||
+ : nir_intrinsic_io_xfb(intr);
|
||||
+ for (unsigned j = 0; j < 2; ++j) {
|
||||
+ if (!xfb.out[j].num_components)
|
||||
+ continue;
|
||||
+ lower_xfb_output_iter17(b, intr, i * 2 + j, xfb.out[j].num_components,
|
||||
+ xfb.out[j].buffer, xfb.out[j].offset);
|
||||
+ progress = true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (progress)
|
||||
+ nir_instr_remove(&intr->instr);
|
||||
+ return progress;
|
||||
+}
|
||||
+
|
||||
+bool
|
||||
+panvk_per_arch(nir_lower_xfb)(nir_shader *nir)
|
||||
+{
|
||||
+ return nir_shader_intrinsics_pass(
|
||||
+ nir, lower_xfb_iter17, nir_metadata_control_flow, NULL);
|
||||
+}
|
||||
+
|
||||
+#endif /* PAN_ARCH < 9 */
|
||||
@@ -0,0 +1,50 @@
|
||||
From: marfrit-packages noether <claude-noether@reauktion.de>
|
||||
Subject: [PATCH] panvk: report fragmentStoresAndAtomics = true on Bifrost
|
||||
|
||||
Backports Mesa main's unconditional advertisement of
|
||||
fragmentStoresAndAtomics for panvk (snapshot ref: src/panfrost/vulkan/
|
||||
panvk_vX_physical_device.c at commit-time 2026-05-06; the line reads
|
||||
`.fragmentStoresAndAtomics = true,` on main with no PAN_ARCH gate).
|
||||
|
||||
Motivation: Chromium Dawn's WebGPU initializer in
|
||||
third_party/dawn/src/dawn/native/vulkan/PhysicalDeviceVk.cpp:250
|
||||
unconditionally rejects any Vulkan adapter that doesn't advertise this
|
||||
feature, causing Dawn to fall back to the SwiftShader CPU adapter
|
||||
on PineTab2 / RK3566 / Mali-G52 r1 MC1 (PAN_ARCH 7). With this patch the
|
||||
device advertises true, satisfying Dawn's gate. Tracked at
|
||||
https://git.reauktion.de/marfrit/panvk-bifrost/issues/2.
|
||||
|
||||
The disjunction with `instance->force_enable_shader_atomics` is
|
||||
preserved as a kill-switch: in compiler terms it's dead code
|
||||
(`true || X == true`), but it leaves the DRI option
|
||||
`pan_force_enable_shader_atomics` semantically wired so future
|
||||
rebases or downstream debugging can see the link to the runtime knob.
|
||||
|
||||
Caveat: the existing DRI option's description in src/util/driconf.h
|
||||
still labels this as "may not work reliably and is for debug purposes
|
||||
only". Mesa main's choice to ship it as default-on for all panvk
|
||||
architectures (including Bifrost, which is non-conformant per the
|
||||
PAN_I_WANT_A_BROKEN_VULKAN_DRIVER gate) reflects an upstream judgment
|
||||
that the practical risk is acceptable. Verify-before-ship for this
|
||||
package: dEQP-VK.glsl.atomic_operations.* + dEQP-VK.image.store.*
|
||||
deltas vs the r4 baseline must show no new fails. Pass counts may rise
|
||||
(tests that previously NotSupported now run); the load-bearing line is
|
||||
the Failed column staying at zero.
|
||||
|
||||
---
|
||||
src/panfrost/vulkan/panvk_vX_physical_device.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
@@ -280,8 +280,7 @@
|
||||
.vertexPipelineStoresAndAtomics =
|
||||
(PAN_ARCH >= 13 && instance->enable_vertex_pipeline_stores_atomics) ||
|
||||
instance->force_enable_shader_atomics,
|
||||
- .fragmentStoresAndAtomics =
|
||||
- (PAN_ARCH >= 10) || instance->force_enable_shader_atomics,
|
||||
+ .fragmentStoresAndAtomics = true || instance->force_enable_shader_atomics,
|
||||
.shaderTessellationAndGeometryPointSize = false,
|
||||
.shaderImageGatherExtended = true,
|
||||
.shaderStorageImageExtendedFormats = true,
|
||||
@@ -0,0 +1,51 @@
|
||||
From: marfrit-packages noether <claude-noether@reauktion.de>
|
||||
Subject: [PATCH] panvk: advertise VK_EXT_legacy_dithering on Bifrost
|
||||
|
||||
Backports Mesa main's flip — vanilla 26.0.6 doesn't have the extension
|
||||
in the panvk advertisement list; main does (line 172 / 647 on snapshot
|
||||
617da94, 2026-05-06).
|
||||
|
||||
VK_EXT_legacy_dithering exposes the classic OpenGL-style dithering
|
||||
behavior to Vulkan apps. Pure-software composition; no new HW path.
|
||||
ARM's own libmali driver release r51p0 (BXODROIDN2PL, Aug 2024) lists
|
||||
this extension in its Vulkan implementation for ODROID-N2 boards
|
||||
using the same Mali-G52 architecture family — confirms ARM ships it
|
||||
for Mali-G52-class hardware.
|
||||
|
||||
Consumer benefit: dithering matters for low-bit-depth framebuffers
|
||||
(RGB565 / RGB5A1 — common on portable / battery-saving renders)
|
||||
where banding is visible. DXVK / vkd3d-proton both opt in when
|
||||
available.
|
||||
|
||||
Verify-before-ship: vulkaninfo lists the extension and
|
||||
VkPhysicalDeviceLegacyDitheringFeaturesEXT.legacyDithering == true.
|
||||
|
||||
Cross-refs:
|
||||
- marfrit/panvk-bifrost research/r6_r7_mali_g52_feature_audit_2026-05-24.md
|
||||
- ARM blob r51p0 strings dump (in-blob extension confirmed)
|
||||
|
||||
---
|
||||
src/panfrost/vulkan/panvk_vX_physical_device.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
@@ -156,6 +156,7 @@
|
||||
.EXT_image_drm_format_modifier = true,
|
||||
.EXT_image_robustness = true,
|
||||
.EXT_index_type_uint8 = true,
|
||||
+ .EXT_legacy_dithering = true,
|
||||
.EXT_line_rasterization = true,
|
||||
.EXT_load_store_op_none = true,
|
||||
.EXT_non_seamless_cube_map = true,
|
||||
@@ -552,6 +553,9 @@
|
||||
|
||||
/* VK_EXT_multisampled_render_to_single_sampled */
|
||||
.multisampledRenderToSingleSampled = true,
|
||||
+
|
||||
+ /* VK_EXT_legacy_dithering */
|
||||
+ .legacyDithering = true,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
From: marfrit-packages noether <claude-noether@reauktion.de>
|
||||
Subject: [PATCH] panvk-bifrost: fix XFB store channel-extract for packed varyings
|
||||
|
||||
iter19 — fixes a reliable SIGSEGV during vkCreateGraphicsPipeline on any
|
||||
shader that uses XFB-bound varyings declared with non-zero `layout
|
||||
(component=N)` qualifiers. Surfaced by
|
||||
dEQP-VK.transform_feedback.simple.holes_vert; backtrace lands 11 frames
|
||||
into libvulkan_panfrost.so called from `vkt::TransformFeedback::
|
||||
TransformFeedbackHolesInstance::iterate`.
|
||||
|
||||
Root cause: `lower_xfb_output_iter17` (and upstream `lower_xfb_output`,
|
||||
which carries a `// TODO` on the same assertion) computes the source-
|
||||
channel mask as `mask << channel_idx`, where `channel_idx` is the
|
||||
varying-location component (0..3) but `src` only contains channels for
|
||||
the source-side range starting at `nir_intrinsic_component(intr)`. For
|
||||
`flat out float vegeta` declared with `component=2`, NIR emits
|
||||
`store_output src=<vec1>, component=2`, and the lowering computes
|
||||
`mask << 2` against a single-component src — out-of-range; the
|
||||
resulting malformed nir_def then segfaults inside downstream NIR
|
||||
constant-folding (`nir_constant_expressions.c::evaluate_*`).
|
||||
|
||||
The assertion `assert(nir_intrinsic_component(intr) == 0)` was inherited
|
||||
from upstream `pan_nir_lower_xfb.c` as a documented `// TODO`; release
|
||||
builds (-DNDEBUG) elide it. The fix translates `channel_idx` to the
|
||||
source-channel space by subtracting `nir_intrinsic_component(intr)`
|
||||
before shifting the mask, and replaces the elided asserts with explicit
|
||||
release-mode guards (the patch closes the same release-mode-elision
|
||||
class as the original bug).
|
||||
|
||||
Verified on PineTab2 (Mali-G52 r1 MC1, PAN_ARCH 7) against vulkan-cts
|
||||
1.3.10.0:
|
||||
- holes_vert / holes_extra_draw_vert no longer SIGSEGV (now Fail on
|
||||
color-check; that is a separate iter20 finding — the rasterized
|
||||
varying gets removed alongside the XFB-bound one).
|
||||
- basic_*: 36/36 Pass. depth_clip_*: 1 Pass + 4 NotSupported.
|
||||
lines_or_triangles*: 16 NotSupported. 0 Fail across the full set.
|
||||
- holes_geom / holes_extra_draw_geom remain NotSupported
|
||||
(geometryShader not on G52) — unchanged.
|
||||
|
||||
Caveat: max_output_components_64/_128/_256 were never reached on the
|
||||
r5 sweep (watchdog killed transform_feedback after the holes_vert
|
||||
crash). With this fix in place, those tests now run and surface
|
||||
*their own pre-existing* coredumps — confirmed on shipped r6 baseline
|
||||
too. They are NOT regressions from this patch; they are latent crashes
|
||||
unmasked by it. iter20+ territory.
|
||||
|
||||
Phase 5 (2nd-model) review: APPROVE WITH CHANGES (non-blocking).
|
||||
Changes applied: release-mode defensive guards on both preconditions
|
||||
plus a dispatcher-side comment clarifying the i*2+j semantics.
|
||||
|
||||
Cross-refs:
|
||||
- iter19/phase{0,1,2,3}_holes_vert*.md in panvk-bifrost repo
|
||||
|
||||
---
|
||||
src/panfrost/vulkan/panvk_vX_xfb_lower.c | 24 +++++++++++++++++++++---
|
||||
1 file changed, 21 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/panfrost/vulkan/panvk_vX_xfb_lower.c b/src/panfrost/vulkan/panvk_vX_xfb_lower.c
|
||||
@@ -339,7 +339,20 @@
|
||||
unsigned buffer, unsigned offset_words)
|
||||
{
|
||||
assert(buffer < MAX_XFB_BUFFERS);
|
||||
- assert(nir_intrinsic_component(intr) == 0);
|
||||
+
|
||||
+ /* iter19: nir_intrinsic_component(intr) is the source-channel base —
|
||||
+ * for a packed varying like `layout (location=0, component=2) flat out
|
||||
+ * float vegeta`, NIR emits store_output with component=2 and a single-
|
||||
+ * component src. The XFB iteration index `channel_idx` (0..3) is the
|
||||
+ * varying-location component, not the source channel. Translate by
|
||||
+ * subtracting the base before shifting the mask. Fixes the long-
|
||||
+ * standing `assert(nir_intrinsic_component(intr) == 0) // TODO` in
|
||||
+ * upstream pan_nir_lower_xfb that surfaces on holes_vert. */
|
||||
+ const unsigned base_comp = nir_intrinsic_component(intr);
|
||||
+ /* Defensive against release-build elision: this is precisely the
|
||||
+ * bug class the patch is fixing, so don't re-introduce it. */
|
||||
+ if (channel_idx < base_comp)
|
||||
+ return;
|
||||
|
||||
uint16_t stride = b->shader->info.xfb_stride[buffer] * 4;
|
||||
assert(stride != 0);
|
||||
@@ -357,7 +370,11 @@
|
||||
|
||||
nir_def *src = intr->src[0].ssa;
|
||||
nir_component_mask_t mask = nir_component_mask(num_components);
|
||||
- nir_def *value = nir_channels(b, src, mask << channel_idx);
|
||||
+ const unsigned src_channel = channel_idx - base_comp;
|
||||
+ /* Same defensive class as the channel_idx >= base_comp guard above. */
|
||||
+ if (src_channel + num_components > src->num_components)
|
||||
+ return;
|
||||
+ nir_def *value = nir_channels(b, src, mask << src_channel);
|
||||
|
||||
/* Topology dispatch ladder. LIST first (fast path). */
|
||||
nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LIST));
|
||||
@@ -465,6 +482,9 @@
|
||||
for (unsigned j = 0; j < 2; ++j) {
|
||||
if (!xfb.out[j].num_components)
|
||||
continue;
|
||||
+ /* `i*2+j` is the varying-location component (0..3) — io_xfb covers
|
||||
+ * slots 0..1, io_xfb2 covers 2..3. The leaf translates this into
|
||||
+ * a source-channel index by subtracting nir_intrinsic_component(intr). */
|
||||
lower_xfb_output_iter17(b, intr, i * 2 + j, xfb.out[j].num_components,
|
||||
xfb.out[j].buffer, xfb.out[j].offset);
|
||||
progress = true;
|
||||
@@ -0,0 +1,229 @@
|
||||
# Maintainer: Markus Fritsche <fritsche.markus@gmail.com>
|
||||
#
|
||||
# mesa-panvk-bifrost — patched Mesa libvulkan_panfrost.so that exposes
|
||||
# Bifrost-gen Mali (Mali-G31/G52/G72/G76, PAN_ARCH 6/7) to Chromium-family
|
||||
# browsers' Vulkan compositor.
|
||||
#
|
||||
# Campaign: ~/src/panvk-bifrost/ — iter9 close (technical milestone
|
||||
# 2026-05-20, operator-confirmed Brave window). Goal close requires
|
||||
# this package built + published + installable per the
|
||||
# feedback_package_done_means_installable.md 3-point check.
|
||||
#
|
||||
# What it does:
|
||||
# - Builds Mesa 26.0.6 (matching ohm's stock mesa pkg) with two
|
||||
# patches against src/panfrost/vulkan/panvk_vX_physical_device.c:
|
||||
# (1) expose VK_KHR/EXT_robustness2 + nullDescriptor on PAN_ARCH 6/7
|
||||
# (2) set has_vk1_1/has_vk1_2 to true on Bifrost
|
||||
# - Installs the patched libvulkan_panfrost.so to /usr/lib/panvk-bifrost/
|
||||
# (NOT /usr/lib — co-installs alongside stock mesa).
|
||||
# - Registers a custom ICD JSON at a non-default path so the system
|
||||
# Vulkan loader only picks up our driver when VK_ICD_FILENAMES is set.
|
||||
# - Ships /usr/bin/brave-vulkan launcher that wires up env vars + flags.
|
||||
#
|
||||
# Co-existence: stock /usr/lib/libvulkan_panfrost.so is untouched. Stock
|
||||
# /usr/share/vulkan/icd.d/panfrost_icd.json is untouched. Users opt in
|
||||
# via brave-vulkan (or by setting VK_ICD_FILENAMES manually).
|
||||
#
|
||||
# Build target: arch-aarch64 runner via marfrit-packages Gitea Actions.
|
||||
# Mesa build is slow (~30-60min on Cortex-A55). Build deps installed
|
||||
# inside the runner via pacman -S in the workflow.
|
||||
|
||||
pkgname=mesa-panvk-bifrost
|
||||
_mesaver=26.0.6
|
||||
pkgver=26.0.6.r7
|
||||
pkgrel=1
|
||||
pkgdesc="Patched Mesa libvulkan_panfrost.so exposing Bifrost-gen Mali to Vulkan apps (panvk-bifrost campaign)"
|
||||
arch=('aarch64')
|
||||
url="https://git.reauktion.de/marfrit/panvk-bifrost"
|
||||
license=('MIT')
|
||||
|
||||
# We co-install at /usr/lib/panvk-bifrost/ so no conflicts with stock mesa.
|
||||
# We DO provide a script that requires brave to be installed.
|
||||
depends=(
|
||||
'mesa' # for shared mesa runtime libs (libgallium-mesa etc.)
|
||||
'libdrm'
|
||||
'wayland'
|
||||
'libxcb'
|
||||
'libx11'
|
||||
'libxshmfence'
|
||||
'zlib'
|
||||
'zstd'
|
||||
'libdisplay-info'
|
||||
'expat'
|
||||
'systemd-libs' # libudev
|
||||
'spirv-tools'
|
||||
)
|
||||
optdepends=(
|
||||
'brave-bin: for the brave-vulkan launcher script'
|
||||
)
|
||||
makedepends=(
|
||||
'meson'
|
||||
'ninja'
|
||||
'pkgconf'
|
||||
'bison'
|
||||
'flex'
|
||||
'python'
|
||||
'python-yaml'
|
||||
'python-mako'
|
||||
'glslang'
|
||||
'spirv-tools'
|
||||
'llvm'
|
||||
'clang'
|
||||
'libclc'
|
||||
'spirv-llvm-translator'
|
||||
'vulkan-headers'
|
||||
'wayland-protocols'
|
||||
)
|
||||
|
||||
source=(
|
||||
"https://archive.mesa3d.org/mesa-${_mesaver}.tar.xz"
|
||||
"0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch"
|
||||
"0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch"
|
||||
"0003-panvk-bifrost-vk-ext-transform-feedback.patch"
|
||||
"0004-panvk-bifrost-xfb-primitive-decomposition.patch"
|
||||
"0005-panvk-bifrost-fragment-stores-atomics.patch"
|
||||
"0006-panvk-bifrost-legacy-dithering.patch"
|
||||
"0007-panvk-bifrost-xfb-component-base-fix.patch"
|
||||
"brave-vulkan"
|
||||
"icd.json"
|
||||
)
|
||||
sha256sums=(
|
||||
'SKIP' # TODO: pin once we know the upstream tarball is stable. archive.mesa3d.org tarballs are stable, so we can hash-pin in iter10.
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
)
|
||||
|
||||
prepare() {
|
||||
cd "mesa-${_mesaver}"
|
||||
# iter8 patch: robustness2 + nullDescriptor exposure on Bifrost.
|
||||
# Three hunks in panvk_vX_physical_device.c. Apply via sed since
|
||||
# the upstream context drifts between Mesa releases and unified-diff
|
||||
# patching is brittle (we hit hunk-rejects during manual application).
|
||||
sed -i 's|\.KHR_robustness2 = PAN_ARCH >= 10,|.KHR_robustness2 = true,|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
sed -i 's|\.EXT_robustness2 = PAN_ARCH >= 10,|.EXT_robustness2 = true,|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
sed -i 's|\.nullDescriptor = PAN_ARCH >= 10,|.nullDescriptor = true,|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
# iter9 patch: bump api version to 1.1/1.2 for Bifrost. NOTE: this only
|
||||
# affects extension exposure flags, NOT the reported apiVersion (which
|
||||
# is set by get_api_version() further down and gated separately).
|
||||
# The brave-vulkan launcher sets MESA_VK_VERSION_OVERRIDE=1.2 at runtime
|
||||
# to deal with the latter — no source change needed for that path.
|
||||
sed -i 's|bool has_vk1_1 = PAN_ARCH >= 10;|bool has_vk1_1 = true;|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
sed -i 's|bool has_vk1_2 = PAN_ARCH >= 10;|bool has_vk1_2 = true;|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
|
||||
# iter13: VK_EXT_transform_feedback implementation for Bifrost (PAN_ARCH<9).
|
||||
# Applied as a real unified-diff patch — the change is too large for sed.
|
||||
# Phase-doc context: ~/src/panvk-bifrost/phase{4,5,6}_iter13_close.md.
|
||||
# Unlocks ANGLE-Vulkan → GLES3 → WebGL2 / WebGPU on Brave (chrome://gpu
|
||||
# reports "Hardware accelerated" across the board for the affected paths).
|
||||
patch -p1 < "${srcdir}/0003-panvk-bifrost-vk-ext-transform-feedback.patch"
|
||||
|
||||
# iter17: XFB primitive decomposition for non-LIST topologies (TRI_STRIP,
|
||||
# TRI_FAN, LINE_STRIP, *_WITH_ADJACENCY). Replacement panvk-specific
|
||||
# NIR pass (panvk_per_arch(nir_lower_xfb)) substituted for upstream
|
||||
# pan_nir_lower_xfb. Closes the 162 dEQP-VK winding_* failures from
|
||||
# iter15 (958 P / 81 F / 0 Crash on full XFB CTS — remaining 81 fails
|
||||
# are by-design resume_* tests, transformFeedbackDraw=false).
|
||||
# Phase-doc context: ~/src/panvk-bifrost/iter17/phase{0,1,2,4,5,6,8}_*.md.
|
||||
patch -p1 < "${srcdir}/0004-panvk-bifrost-xfb-primitive-decomposition.patch"
|
||||
|
||||
# r5 (2026-05-23): advertise .fragmentStoresAndAtomics = true on Bifrost
|
||||
# to satisfy Chromium Dawn's WebGPU init gate
|
||||
# (third_party/dawn/src/dawn/native/vulkan/PhysicalDeviceVk.cpp:250).
|
||||
# Backports Mesa main's unconditional flip (same line as on main as of
|
||||
# 2026-05-06). Disjunction with instance->force_enable_shader_atomics
|
||||
# is preserved as a documented kill-switch even though the compiler
|
||||
# folds it away. Closes marfrit/panvk-bifrost#2.
|
||||
# Verify-before-ship: dEQP-VK.glsl.atomic_operations.* and
|
||||
# dEQP-VK.image.store.* show no new Failed vs r4 baseline.
|
||||
patch -p1 < "${srcdir}/0005-panvk-bifrost-fragment-stores-atomics.patch"
|
||||
|
||||
# r6 (2026-05-25): advertise VK_EXT_legacy_dithering. Backports Mesa
|
||||
# main's unconditional flip. Pure-software composition; vk_render_pass
|
||||
# already gates on enabled_features.legacyDithering and panvk_vX_blend
|
||||
# + pan_format already plumb the dithered BLEND descriptor (BFMT2 table
|
||||
# has MALI_BLEND_AU encodings for RGB565/RGB5A1/RGBA4/RGB10A2 on
|
||||
# PAN_ARCH 7). Closes the EXT_legacy_dithering gap surfaced by
|
||||
# marfrit/panvk-bifrost research/r6_r7_*. ARM blob r51p0 confirms the
|
||||
# extension as Mali-G52-architecture supported.
|
||||
patch -p1 < "${srcdir}/0006-panvk-bifrost-legacy-dithering.patch"
|
||||
|
||||
# r7 (2026-05-25): XFB store channel-extract fix for packed varyings.
|
||||
# Eliminates a reliable SIGSEGV in vkCreateGraphicsPipeline whenever
|
||||
# an XFB-bound vertex output is declared with non-zero
|
||||
# `layout (component=N)`. Surfaced by dEQP-VK.transform_feedback.
|
||||
# simple.holes_vert (now Fails on color-check rather than crashing;
|
||||
# the color-check residual is a separate iter20 finding).
|
||||
# Phase-doc context: ~/src/panvk-bifrost/iter19/phase{0,1,2,3}_*.md.
|
||||
# Phase 5 reviewed; release-mode-elision defensive guards applied.
|
||||
patch -p1 < "${srcdir}/0007-panvk-bifrost-xfb-component-base-fix.patch"
|
||||
|
||||
# Sanity-check the patches landed.
|
||||
grep -q "KHR_robustness2 = true," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "EXT_robustness2 = true," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "nullDescriptor = true," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "has_vk1_1 = true;" src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "has_vk1_2 = true;" src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
# iter13 sanity:
|
||||
grep -q "EXT_transform_feedback = PAN_ARCH < 9," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
test -f src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c
|
||||
# iter17 sanity: pan_nir_lower_xfb call site has been replaced; new file present.
|
||||
grep -q "panvk_per_arch(nir_lower_xfb)" src/panfrost/vulkan/panvk_vX_shader.c
|
||||
# r5 sanity: fragmentStoresAndAtomics = true patch landed
|
||||
grep -q "fragmentStoresAndAtomics = true ||" src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
# r6 sanity: VK_EXT_legacy_dithering advertised
|
||||
grep -q '\.EXT_legacy_dithering = true,' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q '\.legacyDithering = true,' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "xfb_topology" src/panfrost/vulkan/panvk_shader.h
|
||||
grep -q "panvk_xfb_topology" src/panfrost/vulkan/panvk_shader.h
|
||||
test -f src/panfrost/vulkan/panvk_vX_xfb_lower.c
|
||||
# r7 sanity: XFB channel-base correction landed
|
||||
grep -q "iter19: nir_intrinsic_component(intr) is the source-channel base" src/panfrost/vulkan/panvk_vX_xfb_lower.c
|
||||
grep -q "mask << src_channel" src/panfrost/vulkan/panvk_vX_xfb_lower.c
|
||||
}
|
||||
|
||||
build() {
|
||||
cd "mesa-${_mesaver}"
|
||||
# Minimal Mesa build — only the panvk Vulkan driver. No GLES/GLX/EGL/
|
||||
# gallium drivers — keeps the build narrower and avoids pulling in
|
||||
# the entire Mesa runtime in this package's dep graph.
|
||||
meson setup build/ \
|
||||
--prefix=/usr \
|
||||
--libdir=lib \
|
||||
--buildtype=release \
|
||||
-Dvulkan-drivers=panfrost \
|
||||
-Dgallium-drivers= \
|
||||
-Dplatforms=wayland,x11 \
|
||||
-Dglx=disabled \
|
||||
-Degl=disabled \
|
||||
-Dgles1=disabled \
|
||||
-Dgles2=disabled \
|
||||
-Dvulkan-layers= \
|
||||
-Dtools= \
|
||||
-Dgallium-rusticl=false \
|
||||
-Dmicrosoft-clc=disabled
|
||||
meson compile -C build
|
||||
}
|
||||
|
||||
package() {
|
||||
cd "${srcdir}/mesa-${_mesaver}"
|
||||
|
||||
# Patched lib — co-install path, NOT /usr/lib (to avoid clashing
|
||||
# with stock mesa's libvulkan_panfrost.so).
|
||||
install -Dm755 build/src/panfrost/vulkan/libvulkan_panfrost.so \
|
||||
"$pkgdir/usr/lib/panvk-bifrost/libvulkan_panfrost.so"
|
||||
|
||||
# Custom ICD JSON. NOT under /usr/share/vulkan/icd.d/ (the default
|
||||
# loader search path) — the user has to opt in via VK_ICD_FILENAMES.
|
||||
install -Dm644 "$srcdir/icd.json" \
|
||||
"$pkgdir/usr/lib/panvk-bifrost/icd.json"
|
||||
|
||||
# The brave-vulkan launcher wires up env + flags.
|
||||
install -Dm755 "$srcdir/brave-vulkan" "$pkgdir/usr/bin/brave-vulkan"
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
# mesa-panvk-bifrost
|
||||
|
||||
Patched Mesa `libvulkan_panfrost.so` exposing Bifrost-gen Mali GPUs
|
||||
(Mali-G31/G52/G72/G76, PAN_ARCH 6/7) to Chromium-family browsers'
|
||||
Vulkan compositor.
|
||||
|
||||
Result of the **panvk-bifrost** campaign ([`~/src/panvk-bifrost/`](../../../panvk-bifrost/)),
|
||||
iter9 close 2026-05-20 (technical milestone — operator-confirmed Brave
|
||||
window) followed by this package as the actual delivery.
|
||||
|
||||
## What it solves
|
||||
|
||||
Stock Chromium / Brave on Bifrost SBCs (PineTab2, etc.) currently dies
|
||||
at GL bindings init:
|
||||
|
||||
```
|
||||
ERROR: ui/gl/gl_context_egl.cc:120 GLES3 is unsupported and ES version fallback is disabled
|
||||
ERROR: ui/gl/init/gl_factory.cc:111 Requested GL implementation not found
|
||||
ERROR: components/viz/service/main/viz_main_impl.cc:189 Exiting GPU process due to errors during initialization
|
||||
```
|
||||
|
||||
This package makes Brave's **Vulkan compositor path** work on Bifrost,
|
||||
side-stepping the failing GL stack. Browser chrome and standard page
|
||||
rendering work.
|
||||
|
||||
## Known limitations (not addressed)
|
||||
|
||||
- **WebGL / WebGL2** in-page: blocked. ANGLE needs `VK_EXT_transform_feedback`
|
||||
to expose GLES3 contexts; PanVk-Bifrost doesn't currently support that.
|
||||
Sites using WebGL will degrade or refuse.
|
||||
- **VAAPI hardware video decode**: unrelated to this package — see the
|
||||
`libva-v4l2-request-fourier` package for that path.
|
||||
|
||||
## Install
|
||||
|
||||
```sh
|
||||
# Ensure [marfrit] is in /etc/pacman.conf, then:
|
||||
sudo pacman -Sy mesa-panvk-bifrost
|
||||
```
|
||||
|
||||
## Use
|
||||
|
||||
```sh
|
||||
brave-vulkan # launches Brave with Vulkan
|
||||
brave-vulkan https://www.example.com
|
||||
brave-vulkan --your-flags-here # extra args passed through
|
||||
```
|
||||
|
||||
The launcher sets:
|
||||
|
||||
- `VK_ICD_FILENAMES=/usr/lib/panvk-bifrost/icd.json` (the patched driver)
|
||||
- `PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1` (Mesa upstream gate)
|
||||
- `MESA_VK_VERSION_OVERRIDE=1.2` (apiVersion bump for ANGLE)
|
||||
- Brave flags: `--use-gl=disabled --enable-features=Vulkan --use-vulkan=native --ozone-platform=x11 --no-sandbox --disable-gpu-sandbox --ignore-gpu-blocklist`
|
||||
|
||||
## What's in the package
|
||||
|
||||
- `/usr/lib/panvk-bifrost/libvulkan_panfrost.so` — patched Mesa Vulkan driver (Mesa 26.0.6 + 2 sed-applied patches)
|
||||
- `/usr/lib/panvk-bifrost/icd.json` — Vulkan ICD JSON pointing at the patched .so (NOT auto-loaded; only via `VK_ICD_FILENAMES`)
|
||||
- `/usr/bin/brave-vulkan` — launcher script
|
||||
|
||||
System Mesa is untouched. The stock `/usr/lib/libvulkan_panfrost.so` and
|
||||
`/usr/share/vulkan/icd.d/panfrost_icd.json` continue to work for any
|
||||
other Vulkan app.
|
||||
|
||||
## Co-existence
|
||||
|
||||
Both stock Mesa and this package can be installed. Stock Vulkan apps
|
||||
(`vulkaninfo`, `vkcube`, etc.) use the stock driver by default. Only
|
||||
apps started via `brave-vulkan` (or with `VK_ICD_FILENAMES` manually
|
||||
set to our path) use the patched driver.
|
||||
|
||||
## Campaign close criterion
|
||||
|
||||
Per [`feedback_package_done_means_installable`](file:///home/mfritsche/.claude/projects/-home-mfritsche-src/memory/feedback_package_done_means_installable.md)
|
||||
three-point check:
|
||||
|
||||
1. PR merged to `marfrit-packages`
|
||||
2. CI green AND `packages.reauktion.de/arch/aarch64/mesa-panvk-bifrost-*.pkg.tar.zst` exists
|
||||
3. `pacman -Ss mesa-panvk-bifrost` on a fresh consumer host (e.g. ohm
|
||||
after `pacman -Syu` from clean state) returns the package + brave-vulkan launches
|
||||
|
||||
When all three pass, panvk-bifrost iter9 closes.
|
||||
@@ -0,0 +1,61 @@
|
||||
#!/bin/bash
|
||||
# brave-vulkan — launch Brave with the PanVk-Bifrost Vulkan compositor on
|
||||
# Bifrost SBCs (PineTab2 / Mali-G52 r1 MC1 and similar).
|
||||
#
|
||||
# Side-steps the GL stack failures stock Brave hits on Bifrost
|
||||
# (the README "Consumer-side benefit" path of the panvk-bifrost campaign).
|
||||
#
|
||||
# Provided by the mesa-panvk-bifrost package. See:
|
||||
# /usr/share/doc/mesa-panvk-bifrost/README
|
||||
# ~/src/panvk-bifrost/phase8_iteration9_close.md (campaign close)
|
||||
#
|
||||
# Usage: brave-vulkan [brave args...]
|
||||
# Equivalent to: brave [VULKAN_FLAGS] [your args]
|
||||
|
||||
set -e
|
||||
|
||||
# Patched Vulkan driver (from this package) — must point at the custom path
|
||||
# so we don't clash with the stock /usr/share/vulkan/icd.d/panfrost_icd.json
|
||||
export VK_ICD_FILENAMES=/usr/lib/panvk-bifrost/icd.json
|
||||
|
||||
# PanVk's "I know it's not conformant" gate — the patched driver still
|
||||
# refuses to enumerate Bifrost without this env var (Mesa upstream choice,
|
||||
# kept for compatibility).
|
||||
export PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1
|
||||
|
||||
# Override apiVersion to 1.2 — ANGLE (Chromium's GL stack) requires
|
||||
# device.apiVersion >= 1.1. The patched libvulkan_panfrost.so still has
|
||||
# a PAN_ARCH>=10 gate inside get_api_version(); easier to override at
|
||||
# runtime via this Mesa env var than to add a third patch.
|
||||
export MESA_VK_VERSION_OVERRIDE=1.2
|
||||
|
||||
# Find the live Plasma session's Xauthority. On a fresh boot the suffix
|
||||
# is randomized; pgrep the Xwayland args to find the current one.
|
||||
if [ -z "${XAUTHORITY:-}" ]; then
|
||||
XAUTHF=$(pgrep -fa Xwayland 2>/dev/null | grep -oE "/run/user/$(id -u)/xauth_[A-Za-z0-9]+" | head -1)
|
||||
if [ -n "$XAUTHF" ]; then
|
||||
export XAUTHORITY="$XAUTHF"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Standard session env if not already set
|
||||
: "${XDG_RUNTIME_DIR:=/run/user/$(id -u)}"
|
||||
export XDG_RUNTIME_DIR
|
||||
|
||||
# Default to the active Plasma Wayland session if WAYLAND_DISPLAY unset
|
||||
: "${WAYLAND_DISPLAY:=wayland-0}"
|
||||
export WAYLAND_DISPLAY
|
||||
|
||||
# Default to the XWayland :1 unless DISPLAY is set
|
||||
: "${DISPLAY:=:1}"
|
||||
export DISPLAY
|
||||
|
||||
exec brave \
|
||||
--use-gl=disabled \
|
||||
--enable-features=Vulkan \
|
||||
--use-vulkan=native \
|
||||
--ozone-platform=x11 \
|
||||
--no-sandbox \
|
||||
--disable-gpu-sandbox \
|
||||
--ignore-gpu-blocklist \
|
||||
"$@"
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"ICD": {
|
||||
"api_version": "1.4.335",
|
||||
"library_path": "/usr/lib/panvk-bifrost/libvulkan_panfrost.so"
|
||||
},
|
||||
"file_format_version": "1.0.1"
|
||||
}
|
||||
@@ -21,14 +21,14 @@
|
||||
# pkgbase stays as qt6-base so $_pkgfn (= ${pkgbase/6-/} = "qtbase")
|
||||
# resolves correctly. The "-fourier" suffix lives only in the
|
||||
# directory name and the commit history; epoch=1 gives our local
|
||||
# build strict precedence over upstream pkgrel=2 until upstream lands
|
||||
# build strict precedence over upstream pkgrel=N until upstream lands
|
||||
# the GL_R8/ES3 fix and we can drop the epoch.
|
||||
pkgbase=qt6-base
|
||||
pkgname=(qt6-base-fourier
|
||||
qt6-xcb-private-headers-fourier)
|
||||
_pkgver=6.11.0
|
||||
_pkgver=6.11.1
|
||||
pkgver=${_pkgver/-/}
|
||||
pkgrel=3
|
||||
pkgrel=2
|
||||
epoch=1
|
||||
arch=(aarch64 x86_64)
|
||||
url='https://www.qt.io'
|
||||
@@ -42,7 +42,6 @@ depends=(brotli
|
||||
double-conversion
|
||||
fontconfig
|
||||
freetype2
|
||||
gcc-libs
|
||||
glib2
|
||||
glibc
|
||||
harfbuzz
|
||||
@@ -51,6 +50,7 @@ depends=(brotli
|
||||
libb2
|
||||
libcups
|
||||
libdrm
|
||||
libgcc
|
||||
libgl
|
||||
libice
|
||||
libinput
|
||||
@@ -58,6 +58,7 @@ depends=(brotli
|
||||
libpng
|
||||
libproxy
|
||||
libsm
|
||||
libstdc++
|
||||
liburing
|
||||
libx11
|
||||
libxcb
|
||||
@@ -115,7 +116,7 @@ source=(git+https://code.qt.io/qt/$_pkgfn#tag=v$_pkgver
|
||||
0001-qopengltextureglyphcache-pick-GL_R8-on-ES3.patch
|
||||
0002-qrhigles2-RED_OR_ALPHA8-pick-GL_R8-on-ES3.patch
|
||||
0003-qopengltextureuploader-pick-GL_R8-on-ES3.patch)
|
||||
sha256sums=('2223c075e95d86f8dbf6395b025a74d996c418f094453c903290e3c2663fbed2'
|
||||
sha256sums=('2eafe504fae873d20f206b5661e2e10506879455cb2d370f42c5bb72ccf7a8a1'
|
||||
'5411edbe215c24b30448fac69bd0ba7c882f545e8cf05027b2b6e2227abc5e78'
|
||||
'4b93f6a79039e676a56f9d6990a324a64a36f143916065973ded89adc621e094'
|
||||
'SKIP'
|
||||
@@ -126,9 +127,8 @@ prepare() {
|
||||
patch -d $_pkgfn -p1 < qt6-base-cflags.patch # Use system CFLAGS
|
||||
patch -d $_pkgfn -p1 < qt6-base-nostrip.patch # Don't strip binaries with qmake
|
||||
|
||||
# cherry-pick needs git author identity; git-cli refuses without it.
|
||||
git -C $_pkgfn -c user.email=fourier@build -c user.name='qt6-base-fourier build' \
|
||||
cherry-pick -n 8b54513cdcf62047376a5d27d784ad68a8f235bf # Fix qdbus crashes
|
||||
# 8b54513cdcf6 (qdbus crash fix) cherry-pick removed: landed upstream
|
||||
# in 6.11.1. Re-add if qdbus regressions re-surface.
|
||||
|
||||
# qt6-base-fourier — three small runtime-checks that pick GL_R8 over
|
||||
# GL_ALPHA when the live GL context is ES 3 or newer. See the
|
||||
@@ -165,7 +165,7 @@ build() {
|
||||
-DFEATURE_system_sqlite=ON \
|
||||
-DFEATURE_system_xcb_xinput=ON \
|
||||
-DFEATURE_no_direct_extern_access=$_no_direct_extern_access \
|
||||
-DFEATURE_sql_ibase=OFF \
|
||||
-DFEATURE_mimetype_database=OFF \
|
||||
-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON \
|
||||
-DCMAKE_MESSAGE_LOG_LEVEL=STATUS
|
||||
cmake --build build
|
||||
@@ -188,7 +188,7 @@ package_qt6-xcb-private-headers-fourier() {
|
||||
conflicts=(qt6-xcb-private-headers)
|
||||
replaces=(qt6-xcb-private-headers)
|
||||
|
||||
depends=("qt6-base-fourier=$pkgver")
|
||||
depends=("qt6-base-fourier=$epoch:$pkgver")
|
||||
optdepends=()
|
||||
groups=()
|
||||
|
||||
|
||||
+85
@@ -0,0 +1,85 @@
|
||||
#!/bin/bash
|
||||
# Build aish_<ver>_all.deb from this directory using dpkg-deb directly.
|
||||
# Run from inside the runner container, which has dpkg installed.
|
||||
#
|
||||
# Matches the lmcp build-deb.sh pattern: no dh/debhelper, no Build-Depends
|
||||
# beyond `dpkg`, structurally a normal apt package (Architecture: all).
|
||||
set -euo pipefail
|
||||
|
||||
PKGVER=0.1.0
|
||||
UPSTREAM_TAG=v0.1.0
|
||||
PKGREL=1
|
||||
AISH_TARBALL_SHA256=9ebc3939e028832e39391ae33efacb5ec9bcd99d123cbc8ca1cd6ca9a640b5b5
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
# Reproducible build: pin all file mtimes + ar member timestamps to a fixed
|
||||
# epoch tied to this packaging release (aish v0.1.0 — 2026-05-25 00:00 UTC).
|
||||
# Without this, repeat builds produce different byte streams and reprepro
|
||||
# refuses re-includes with "size expected: X, got: Y".
|
||||
export SOURCE_DATE_EPOCH=1779667200
|
||||
|
||||
work=$(mktemp -d)
|
||||
trap "rm -rf $work" EXIT
|
||||
|
||||
cd "$work"
|
||||
curl --connect-timeout 10 --max-time 600 --retry 3 --retry-delay 5 -sSLfo aish.tar.gz \
|
||||
"https://git.reauktion.de/marfrit/aish/archive/${UPSTREAM_TAG}.tar.gz"
|
||||
echo "$AISH_TARBALL_SHA256 aish.tar.gz" | sha256sum -c
|
||||
tar xzf aish.tar.gz
|
||||
|
||||
ROOT="$work/pkgroot"
|
||||
LIBDIR="$ROOT/usr/share/lua/5.1/aish"
|
||||
mkdir -p "$ROOT/DEBIAN" \
|
||||
"$LIBDIR/ffi" \
|
||||
"$LIBDIR/vendor" \
|
||||
"$ROOT/usr/bin" \
|
||||
"$ROOT/usr/share/doc/aish/examples"
|
||||
|
||||
# Top-level modules
|
||||
for m in main broker context executor history mcp renderer repl router safety secrets; do
|
||||
cp "aish/${m}.lua" "$LIBDIR/${m}.lua"
|
||||
done
|
||||
|
||||
# FFI bindings
|
||||
for m in curl libc pty readline; do
|
||||
cp "aish/ffi/${m}.lua" "$LIBDIR/ffi/${m}.lua"
|
||||
done
|
||||
|
||||
# Vendored dependencies
|
||||
cp aish/vendor/dkjson.lua "$LIBDIR/vendor/dkjson.lua"
|
||||
|
||||
# Launch wrapper
|
||||
install -m 755 aish/bin/aish "$ROOT/usr/bin/aish"
|
||||
|
||||
# Documentation + example config
|
||||
cp aish/README.md "$ROOT/usr/share/doc/aish/"
|
||||
cp aish/LICENSE "$ROOT/usr/share/doc/aish/"
|
||||
cp aish/examples/config.lua "$ROOT/usr/share/doc/aish/examples/"
|
||||
cp "$HERE/debian/copyright" "$ROOT/usr/share/doc/aish/copyright"
|
||||
cp "$HERE/debian/changelog" "$ROOT/usr/share/doc/aish/changelog.Debian"
|
||||
gzip -9 -n "$ROOT/usr/share/doc/aish/changelog.Debian"
|
||||
|
||||
cat > "$ROOT/DEBIAN/control" <<EOF
|
||||
Package: aish
|
||||
Version: ${PKGVER}-${PKGREL}
|
||||
Section: shells
|
||||
Priority: optional
|
||||
Architecture: all
|
||||
Depends: luajit, libreadline8t64 | libreadline8, libcurl4t64 | libcurl4
|
||||
Maintainer: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Homepage: https://git.reauktion.de/marfrit/aish
|
||||
Description: AI-augmented conversational shell (LuaJIT, FFI-only)
|
||||
aish is an interactive REPL that interleaves shell execution and
|
||||
language-model conversation against llama.cpp HTTP brokers. Pure
|
||||
LuaJIT 2.x with FFI bindings to libcurl, GNU readline, and libc.
|
||||
.
|
||||
Modules install under /usr/share/lua/5.1/aish/. The launcher is
|
||||
/usr/bin/aish. Example configuration is at
|
||||
/usr/share/doc/aish/examples/config.lua (copy to
|
||||
~/.config/aish/config.lua and adapt).
|
||||
EOF
|
||||
|
||||
# Build the .deb. Output to current dir of the caller.
|
||||
DEB_OUT=aish_${PKGVER}-${PKGREL}_all.deb
|
||||
dpkg-deb --root-owner-group --build "$ROOT" "$HERE/$DEB_OUT"
|
||||
echo "built: $HERE/$DEB_OUT"
|
||||
Vendored
+14
@@ -0,0 +1,14 @@
|
||||
aish (0.1.0-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Initial release packaged for marfrit overlay repo. Phases 0-10
|
||||
complete (102 closed issues): local llama.cpp + cloud broker
|
||||
routing via hossenfelder, MCP tool calls with confirm-gate and
|
||||
per-tool auto_approve, Chuck Norris autonomous mode with
|
||||
destructive-op heuristic, cross-session memory.jsonl, multi-model
|
||||
routing + GBNF grammar passthrough, project file-tree context,
|
||||
cost/usage observability, /tokenize endpoint integration, project
|
||||
overlay (.aish.lua + sha256-pinned trust ledger), cloud preplanner
|
||||
→ local executor split.
|
||||
* Source-of-truth: git.reauktion.de/marfrit/aish, tagged v0.1.0.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Mon, 25 May 2026 00:00:00 +0000
|
||||
Vendored
+20
@@ -0,0 +1,20 @@
|
||||
Source: aish
|
||||
Section: shells
|
||||
Priority: optional
|
||||
Maintainer: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Standards-Version: 4.6.2
|
||||
Homepage: https://git.reauktion.de/marfrit/aish
|
||||
|
||||
Package: aish
|
||||
Architecture: all
|
||||
Depends: ${misc:Depends}, luajit, libreadline8t64 | libreadline8, libcurl4t64 | libcurl4
|
||||
Description: AI-augmented conversational shell (LuaJIT, FFI-only)
|
||||
aish is an interactive REPL that interleaves shell execution and language-
|
||||
model conversation against llama.cpp HTTP brokers. Implementation is pure
|
||||
LuaJIT 2.x with FFI bindings to libcurl, GNU readline, and libc — no C
|
||||
extensions, no build step.
|
||||
.
|
||||
Modules install under /usr/share/lua/5.1/aish/. The launcher is
|
||||
/usr/bin/aish. Example configuration is at
|
||||
/usr/share/doc/aish/examples/config.lua (copy to ~/.config/aish/config.lua
|
||||
and adapt).
|
||||
Vendored
+30
@@ -0,0 +1,30 @@
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: aish
|
||||
Source: https://git.reauktion.de/marfrit/aish
|
||||
|
||||
Files: *
|
||||
Copyright: 2026 Markus Fritsche <mfritsche@reauktion.de>
|
||||
License: MIT
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
.
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
|
||||
|
||||
Files: vendor/dkjson.lua
|
||||
Copyright: 2010-2014 David Heiko Kolf
|
||||
License: MIT
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including the rights to use, copy,
|
||||
modify, merge, publish, distribute, sublicense, and/or sell copies of the
|
||||
Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions: the above copyright notice and this
|
||||
permission notice shall be included in all copies or substantial portions of
|
||||
the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
|
||||
+150
@@ -0,0 +1,150 @@
|
||||
#!/bin/bash
|
||||
# Package pre-built chromium-fourier artifacts into a .deb.
|
||||
#
|
||||
# Chromium can't be compiled natively on any available aarch64 runner
|
||||
# (clang version wall — chromium requires its internal clang fork).
|
||||
# The build is cross-compiled on CT 220 (data, x86_64 Ryzen 7).
|
||||
# This script expects the build artifacts to exist at BUILD_DIR
|
||||
# (default: fetched from CT 220 via SSH).
|
||||
#
|
||||
# Sibling Arch package: ../../arch/chromium-fourier/PKGBUILD
|
||||
set -euo pipefail
|
||||
|
||||
PKGVER=148.0.7778.178
|
||||
EPOCH=1
|
||||
PKGREL=1
|
||||
ARCH=arm64
|
||||
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
export SOURCE_DATE_EPOCH=1779854400 # 2026-05-24 09:00 UTC
|
||||
|
||||
BUILD_DIR="${BUILD_DIR:-}"
|
||||
|
||||
work=$(mktemp -d)
|
||||
trap "rm -rf $work" EXIT
|
||||
|
||||
if [ -z "$BUILD_DIR" ]; then
|
||||
echo "BUILD_DIR not set — fetching artifacts from CT 220 on data..."
|
||||
BUILD_DIR="$work/artifacts"
|
||||
mkdir -p "$BUILD_DIR"
|
||||
ssh root@data "pct exec 220 -- tar -cf - -C /build/chromium/src/out/Default \
|
||||
chrome chrome_crashpad_handler \
|
||||
libEGL.so libGLESv2.so libvk_swiftshader.so libvulkan.so.1 \
|
||||
vk_swiftshader_icd.json \
|
||||
chrome_100_percent.pak chrome_200_percent.pak resources.pak \
|
||||
v8_context_snapshot.bin snapshot_blob.bin icudtl.dat \
|
||||
locales/" | tar -xf - -C "$BUILD_DIR"
|
||||
fi
|
||||
|
||||
ROOT="$work/pkgroot"
|
||||
|
||||
install -Dm755 "$BUILD_DIR/chrome" "$ROOT/usr/lib/chromium/chromium"
|
||||
install -Dm755 "$BUILD_DIR/chrome_crashpad_handler" "$ROOT/usr/lib/chromium/chrome_crashpad_handler"
|
||||
|
||||
for so in libEGL.so libGLESv2.so libvk_swiftshader.so libvulkan.so.1; do
|
||||
[ -f "$BUILD_DIR/$so" ] && install -Dm755 "$BUILD_DIR/$so" "$ROOT/usr/lib/chromium/$so"
|
||||
done
|
||||
|
||||
for icd in "$BUILD_DIR"/*_icd.json; do
|
||||
[ -f "$icd" ] && install -Dm644 "$icd" "$ROOT/usr/lib/chromium/$(basename "$icd")"
|
||||
done
|
||||
|
||||
for f in chrome_100_percent.pak chrome_200_percent.pak resources.pak \
|
||||
v8_context_snapshot.bin snapshot_blob.bin icudtl.dat; do
|
||||
[ -f "$BUILD_DIR/$f" ] && install -Dm644 "$BUILD_DIR/$f" "$ROOT/usr/lib/chromium/$f"
|
||||
done
|
||||
|
||||
if [ -d "$BUILD_DIR/locales" ]; then
|
||||
install -dm755 "$ROOT/usr/lib/chromium/locales"
|
||||
cp -r "$BUILD_DIR/locales/"* "$ROOT/usr/lib/chromium/locales/"
|
||||
fi
|
||||
|
||||
install -dm755 "$ROOT/usr/bin"
|
||||
cat > "$ROOT/usr/bin/chromium-fourier" <<'LAUNCHER'
|
||||
#!/bin/bash
|
||||
USER_HANDLES_VULKAN=0
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--use-vulkan*|--enable-features=*Vulkan*|--disable-features=*Vulkan*|--use-angle=vulkan*)
|
||||
USER_HANDLES_VULKAN=1
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
vulkan_default=()
|
||||
if [ "$USER_HANDLES_VULKAN" = 0 ]; then
|
||||
vulkan_default=(--disable-features=Vulkan)
|
||||
fi
|
||||
|
||||
exec /usr/lib/chromium/chromium \
|
||||
--ozone-platform=wayland \
|
||||
--use-gl=angle --use-angle=gles \
|
||||
--enable-features=AcceleratedVideoDecoder \
|
||||
"${vulkan_default[@]}" \
|
||||
"$@"
|
||||
LAUNCHER
|
||||
chmod 0755 "$ROOT/usr/bin/chromium-fourier"
|
||||
|
||||
mkdir -p "$ROOT/usr/share/doc/chromium-fourier" "$ROOT/DEBIAN"
|
||||
install -Dm644 "$HERE/debian/copyright" \
|
||||
"$ROOT/usr/share/doc/chromium-fourier/copyright"
|
||||
install -Dm644 "$HERE/debian/changelog" \
|
||||
"$ROOT/usr/share/doc/chromium-fourier/changelog.Debian"
|
||||
gzip -9 -n "$ROOT/usr/share/doc/chromium-fourier/changelog.Debian"
|
||||
|
||||
ISIZE=$(du -sk "$ROOT" | awk '{print $1}')
|
||||
cat > "$ROOT/DEBIAN/control" <<EOF
|
||||
Package: chromium-fourier
|
||||
Version: ${EPOCH}:${PKGVER}-${PKGREL}
|
||||
Section: web
|
||||
Priority: optional
|
||||
Architecture: ${ARCH}
|
||||
Installed-Size: ${ISIZE}
|
||||
Depends: libasound2,
|
||||
libatk-bridge2.0-0,
|
||||
libatk1.0-0,
|
||||
libcairo2,
|
||||
libcups2,
|
||||
libdbus-1-3,
|
||||
libdrm2,
|
||||
libexpat1,
|
||||
libfontconfig1,
|
||||
libfreetype6,
|
||||
libgbm1,
|
||||
libglib2.0-0,
|
||||
libgtk-3-0,
|
||||
libnspr4,
|
||||
libnss3,
|
||||
libpango-1.0-0,
|
||||
libpulse0,
|
||||
libva2,
|
||||
libwayland-client0,
|
||||
libx11-6,
|
||||
libxcb1,
|
||||
libxkbcommon0,
|
||||
libpipewire-0.3-0,
|
||||
fonts-liberation,
|
||||
v4l-utils
|
||||
Provides: www-browser
|
||||
Conflicts: chromium
|
||||
Maintainer: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Homepage: https://www.chromium.org/
|
||||
Description: Chromium with V4L2 HW video decode for Rockchip (Wayland + mainline)
|
||||
Chromium ${PKGVER} with three patches enabling V4L2 hardware video
|
||||
decoding on mainline Linux / Wayland for Rockchip SoCs (RK3566 hantro,
|
||||
RK3588 VDPU381).
|
||||
.
|
||||
Cross-compiled from x86_64 using chromium's bundled clang (upstream
|
||||
LLVM cannot compile chromium). Runtime target is aarch64.
|
||||
.
|
||||
Patches: enable-v4l2-decoder-default, wayland-allow-direct-egl-gles2,
|
||||
nv12-external-oes-on-modifier-external-only.
|
||||
.
|
||||
Launcher at /usr/bin/chromium-fourier defaults to Wayland + ANGLE/GLES
|
||||
with Vulkan disabled (panvk on RK3566 breaks V4L2 dispatch).
|
||||
EOF
|
||||
|
||||
DEB_OUT="chromium-fourier_${EPOCH}%3a${PKGVER}-${PKGREL}_${ARCH}.deb"
|
||||
dpkg-deb --root-owner-group --build "$ROOT" "$HERE/$DEB_OUT"
|
||||
echo "built: $HERE/$DEB_OUT"
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
chromium-fourier (1:148.0.7778.178-1) trixie; urgency=medium
|
||||
|
||||
* Chromium 148.0.7778.178 with V4L2 HW decode patches for Rockchip.
|
||||
* Cross-compiled from x86_64 using chromium's bundled clang.
|
||||
* Three fourier patches: enable-v4l2-decoder-default,
|
||||
wayland-allow-direct-egl-gles2, nv12-external-oes-on-modifier-external-only.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Sat, 24 May 2026 09:00:00 +0200
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: Chromium
|
||||
Upstream-Contact: chromium-dev@chromium.org
|
||||
Source: https://www.chromium.org/
|
||||
|
||||
Files: *
|
||||
Copyright: The Chromium Authors
|
||||
License: BSD-3-Clause
|
||||
|
||||
Files: debian/*
|
||||
Copyright: 2026 Markus Fritsche <mfritsche@reauktion.de>
|
||||
License: BSD-3-Clause
|
||||
|
||||
License: BSD-3-Clause
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
.
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
.
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED.
|
||||
Vendored
+1
-1
@@ -16,7 +16,7 @@ work=$(mktemp -d)
|
||||
trap "rm -rf $work" EXIT
|
||||
|
||||
cd "$work"
|
||||
curl -sSLfo his.tar.gz \
|
||||
curl --connect-timeout 10 --max-time 600 --retry 3 --retry-delay 5 -sSLfo his.tar.gz \
|
||||
"https://git.reauktion.de/marfrit/claude-his-agent/archive/v${PKGVER}.tar.gz"
|
||||
echo "$HIS_TARBALL_SHA256 his.tar.gz" | sha256sum -c
|
||||
tar xzf his.tar.gz
|
||||
|
||||
+200
@@ -0,0 +1,200 @@
|
||||
#!/bin/bash
|
||||
# Build daedalus-v4l2-dkms_<ver>_all.deb (kernel module via DKMS).
|
||||
#
|
||||
# Installs kernel/ source tree to /usr/src/daedalus_v4l2-${PKGVER}/
|
||||
# plus a dkms.conf. Postinst registers with DKMS (dkms add + build +
|
||||
# install). Prerm deregisters. Result: the daedalus_v4l2 module
|
||||
# auto-rebuilds against any installed kernel headers without users
|
||||
# needing to remember to dkms-add it.
|
||||
#
|
||||
# Architecture: all. The kernel module itself is per-kernel-version,
|
||||
# but the SOURCE package is arch-independent.
|
||||
#
|
||||
# Sibling Arch package: ../../arch/daedalus-v4l2-dkms/PKGBUILD
|
||||
# Sibling userspace package: ../daedalus-v4l2/build-deb.sh
|
||||
set -euo pipefail
|
||||
|
||||
UPSTREAM_COMMIT=872eec505eb91b561892d02a0526749348ddc121
|
||||
PKGVER=0.1.0+r45+g872eec5
|
||||
PKGREL=1 # reset for new upstream pin (872eec5 — PROTO_MAX_PAYLOAD 64 KiB -> 1 MiB, closes #19); lock-step with daedalus-v4l2 0.1.0+r45+g872eec5 REQUIRED
|
||||
MODULE_NAME=daedalus_v4l2
|
||||
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
# Reproducible build. 2026-05-18 23:00 UTC — Phase 8.13 close.
|
||||
export SOURCE_DATE_EPOCH=1779231600
|
||||
|
||||
work=$(mktemp -d)
|
||||
trap "rm -rf $work" EXIT
|
||||
|
||||
cd "$work"
|
||||
curl --connect-timeout 10 --max-time 600 --retry 3 --retry-delay 5 -sSLfo daedalus-v4l2.tar.gz \
|
||||
"https://git.reauktion.de/reauktion/daedalus-v4l2/archive/${UPSTREAM_COMMIT}.tar.gz"
|
||||
tar xzf daedalus-v4l2.tar.gz
|
||||
SRCDIR=daedalus-v4l2
|
||||
|
||||
ROOT="$work/pkgroot"
|
||||
SRCROOT="$ROOT/usr/src/${MODULE_NAME}-${PKGVER}"
|
||||
|
||||
mkdir -p "$SRCROOT/include" \
|
||||
"$ROOT/DEBIAN" \
|
||||
"$ROOT/usr/share/doc/daedalus-v4l2-dkms"
|
||||
|
||||
# Copy kernel/ source files to the DKMS source dir.
|
||||
cp -r "$work/$SRCDIR/kernel/." "$SRCROOT/"
|
||||
|
||||
# Embed the shared protocol header inline (rather than referencing
|
||||
# ../include/ which doesn't exist after DKMS extracts the tree).
|
||||
# Patch the Makefile to find it at $SRCROOT/include/ instead.
|
||||
install -m 644 "$work/$SRCDIR/include/daedalus_v4l2_proto.h" \
|
||||
"$SRCROOT/include/daedalus_v4l2_proto.h"
|
||||
sed -i 's|-I\$(src)/\.\./include|-I$(src)/include|' "$SRCROOT/Makefile"
|
||||
|
||||
# Generate dkms.conf with the actual version substituted.
|
||||
cat > "$SRCROOT/dkms.conf" <<EOF
|
||||
PACKAGE_NAME="${MODULE_NAME}"
|
||||
PACKAGE_VERSION="${PKGVER}"
|
||||
|
||||
BUILT_MODULE_NAME[0]="${MODULE_NAME}"
|
||||
DEST_MODULE_LOCATION[0]="/updates"
|
||||
|
||||
MAKE[0]="make KERNELDIR=/lib/modules/\${kernelver}/build all"
|
||||
CLEAN="make KERNELDIR=/lib/modules/\${kernelver}/build clean"
|
||||
|
||||
AUTOINSTALL="yes"
|
||||
EOF
|
||||
|
||||
# Doc
|
||||
install -m 644 "$work/$SRCDIR/README.md" \
|
||||
"$ROOT/usr/share/doc/daedalus-v4l2-dkms/README.md"
|
||||
install -Dm644 "$HERE/debian/copyright" "$ROOT/usr/share/doc/daedalus-v4l2-dkms/copyright"
|
||||
install -Dm644 "$HERE/debian/changelog" "$ROOT/usr/share/doc/daedalus-v4l2-dkms/changelog.Debian"
|
||||
gzip -9 -n "$ROOT/usr/share/doc/daedalus-v4l2-dkms/changelog.Debian"
|
||||
|
||||
# DKMS post-install / pre-remove hooks.
|
||||
cat > "$ROOT/DEBIAN/postinst" <<EOF
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
NAME=${MODULE_NAME}
|
||||
VERSION=${PKGVER}
|
||||
|
||||
# Yellow + bold ANSI for the warning so it stands out in apt's
|
||||
# stream of "Setting up" lines. Disable colour on non-TTY.
|
||||
if [ -t 1 ]; then
|
||||
Y=\$(printf '\\033[1;33m'); R=\$(printf '\\033[0m')
|
||||
else
|
||||
Y=''; R=''
|
||||
fi
|
||||
|
||||
warn() {
|
||||
printf '%s==> daedalus-v4l2-dkms: %s%s\\n' "\$Y" "\$1" "\$R" >&2
|
||||
}
|
||||
|
||||
if [ "\$1" = "configure" ]; then
|
||||
if ! command -v dkms >/dev/null 2>&1; then
|
||||
warn "dkms not installed; module \$NAME/\$VERSION not registered."
|
||||
warn "Install 'dkms' then run: dkms add \$NAME/\$VERSION && dkms autoinstall"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
dkms add "\$NAME/\$VERSION" 2>/dev/null || true
|
||||
|
||||
# Enumerate every kernel whose headers are actually present
|
||||
# (/lib/modules/<kver>/build resolves to a directory). We iterate
|
||||
# all of them — not just \$(uname -r) — so that installing this
|
||||
# package after a kernel update covers the newly-installed kernel
|
||||
# too, and so that a later kernel-headers install for a previously
|
||||
# uncovered version gets picked up on dpkg-reconfigure. Without
|
||||
# this, autoinstall (which targets only the running kernel) leaves
|
||||
# /dev/daedalus-v4l2 absent after a kernel switch + reboot
|
||||
# (marfrit/marfrit-packages#64).
|
||||
kvers=''
|
||||
for d in /lib/modules/*/build; do
|
||||
[ -d "\$d" ] || continue
|
||||
k=\$(basename "\$(dirname "\$d")")
|
||||
kvers="\$kvers \$k"
|
||||
done
|
||||
|
||||
if [ -z "\$kvers" ]; then
|
||||
warn ""
|
||||
warn "No kernels with headers found under /lib/modules/*/build."
|
||||
warn "Install kernel headers (e.g. linux-headers-rpi-2712 on Pi OS)"
|
||||
warn "then finish with:"
|
||||
warn " sudo dkms autoinstall \$NAME/\$VERSION"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
failed=''
|
||||
for k in \$kvers; do
|
||||
dkms autoinstall -k "\$k" "\$NAME/\$VERSION" >/dev/null 2>&1 || true
|
||||
s=\$(dkms status -m "\$NAME" -v "\$VERSION" -k "\$k" 2>/dev/null || true)
|
||||
if ! printf '%s\\n' "\$s" | grep -q -E 'installed|loaded'; then
|
||||
failed="\$failed \$k"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "\$failed" ]; then
|
||||
warn ""
|
||||
warn "DKMS build did NOT land for kernel(s):\$failed"
|
||||
warn ""
|
||||
warn "Most likely cause: kernel headers missing for those versions."
|
||||
warn " Raspberry Pi OS / Pi 5: apt install linux-headers-rpi-2712"
|
||||
warn " Debian generic: apt install linux-headers-<version>"
|
||||
warn ""
|
||||
warn "After installing headers, finish with:"
|
||||
for k in \$failed; do
|
||||
warn " sudo dkms autoinstall -k \$k \$NAME/\$VERSION"
|
||||
done
|
||||
warn " sudo modprobe daedalus_v4l2 (after booting that kernel)"
|
||||
warn ""
|
||||
warn "Until then daedalus_v4l2 will NOT be loadable on those kernels"
|
||||
warn "and the userspace daedalus-v4l2 daemon will have nothing to talk to."
|
||||
fi
|
||||
fi
|
||||
|
||||
#DEBHELPER#
|
||||
EOF
|
||||
chmod 755 "$ROOT/DEBIAN/postinst"
|
||||
|
||||
cat > "$ROOT/DEBIAN/prerm" <<EOF
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
NAME=${MODULE_NAME}
|
||||
VERSION=${PKGVER}
|
||||
|
||||
if [ "\$1" = "remove" ] && command -v dkms >/dev/null 2>&1; then
|
||||
dkms remove "\$NAME/\$VERSION" --all || true
|
||||
fi
|
||||
|
||||
#DEBHELPER#
|
||||
EOF
|
||||
chmod 755 "$ROOT/DEBIAN/prerm"
|
||||
|
||||
cat > "$ROOT/DEBIAN/control" <<EOF
|
||||
Package: daedalus-v4l2-dkms
|
||||
Version: ${PKGVER}-${PKGREL}
|
||||
Section: kernel
|
||||
Priority: optional
|
||||
Architecture: all
|
||||
Depends: dkms (>= 2.1.0.0)
|
||||
Recommends: daedalus-v4l2, linux-headers-rpi-2712 | linux-headers-rpi | linux-headers-generic | linux-headers
|
||||
Maintainer: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Homepage: https://git.reauktion.de/reauktion/daedalus-v4l2
|
||||
Description: V4L2 stateless decoder shim kernel module (DKMS) — Pi 5 / CM5
|
||||
Out-of-tree V4L2 m2m kernel module for the daedalus-v4l2 stack on
|
||||
Raspberry Pi 5 / CM5. Registers /dev/videoNN (V4L2 stateless m2m
|
||||
decoder), /dev/mediaNN (media controller with request API), and
|
||||
/dev/daedalus-v4l2 (chardev bridge to the userspace daemon).
|
||||
.
|
||||
The actual decode happens in the userspace daemon shipped by the
|
||||
daedalus-v4l2 package — this module is just the kernel-side V4L2
|
||||
plumbing. Install both to actually serve VAAPI / V4L2 clients.
|
||||
.
|
||||
Built via DKMS against the running kernel's headers.
|
||||
EOF
|
||||
|
||||
DEB_OUT="daedalus-v4l2-dkms_${PKGVER}-${PKGREL}_all.deb"
|
||||
dpkg-deb --root-owner-group --build "$ROOT" "$HERE/$DEB_OUT"
|
||||
echo "built: $HERE/$DEB_OUT"
|
||||
+167
@@ -0,0 +1,167 @@
|
||||
daedalus-v4l2-dkms (0.1.0+r45+g872eec5-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 872eec5 — picks up daedalus-v4l2 PR #20 (closes #19).
|
||||
Wire-protocol cap DAEDALUS_PROTO_MAX_PAYLOAD raised from 64 KiB
|
||||
to 1 MiB in include/daedalus_v4l2_proto.h. The kernel module
|
||||
inherits the larger DAEDALUS_MAX_BITSTREAM via the same #define
|
||||
and daedalus_fill_output_fmt now reports OUTPUT_MPLANE
|
||||
sizeimage = ~1 MiB instead of 65484.
|
||||
* Skips the r33 -> r45 commit range — between 5d8b436 and 872eec5
|
||||
only one kernel/include change landed (the PROTO_MAX_PAYLOAD
|
||||
bump above). The intervening daemon-only bumps (r37 / r39 /
|
||||
r41 / r43) didn't touch kernel/ or include/ at all.
|
||||
* Effective wire cap is min(kernel, daemon) — lock-step install
|
||||
WITH daedalus-v4l2 0.1.0+r45+g872eec5 REQUIRED.
|
||||
* Allocations (kmemdup / kmalloc on payload, vb2 plane backing)
|
||||
are dynamic and sized per-payload at runtime; the bump only
|
||||
sets the ceiling. KMALLOC_MAX_SIZE on aarch64 SLUB is several
|
||||
MiB so 1 MiB is well within bounds.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Fri, 22 May 2026 21:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r33+g5d8b436-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 5d8b436 — reverts daedalus-v4l2 PRs #7 + #8. Kernel
|
||||
module returns to the pre-#7 buf_done_and_job_finish completion
|
||||
model: no src/dst lifecycle decoupling, no parked dst_bufs, no
|
||||
1:1-contract violation against libva-v4l2-request-fourier
|
||||
(closes daedalus-v4l2#9 + #10 as won't-fix at this layer; proper
|
||||
fix tracked at daedalus-v4l2#11).
|
||||
* Wire-protocol drops 1 → 0; lock-step install with daedalus-v4l2
|
||||
0.1.0+r33+g5d8b436 REQUIRED.
|
||||
* Carries forward the #64 multi-kernel postinst fix.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 14:50:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r30+g6ffe92b-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 6ffe92b — fixes the kernel panic regression introduced
|
||||
by 79256dc's split-completion design (closes daedalus-v4l2#8).
|
||||
`device_run` now removes both src + dst from `m2m_ctx`'s
|
||||
rdy_queue at pickup time, not at `buf_done` time. Without
|
||||
this, after `SRC_CONSUMED`'s `job_finish` released the m2m
|
||||
scheduler, the NEXT `device_run` saw the still-queued parked
|
||||
dst_buf and paired it with a fresh src — two inflight entries
|
||||
referencing the same vb2_buffer, the later `HAS_PIXELS`
|
||||
triggered list_del on an already-detached list_head, smashing
|
||||
the rdy_queue → hard reboot on Pi CM5 during `mpv vaapi-copy`
|
||||
playback of 720p H.264 (2026-05-21).
|
||||
* Wire protocol unchanged — DAEDALUS_PROTO_VERSION stays at 1.
|
||||
Daemon (userspace daedalus-v4l2 package) need NOT bump in
|
||||
lockstep with this DKMS update; the existing
|
||||
daedalus-v4l2 0.1.0+r28+g79256dc is wire-compatible with
|
||||
daedalus-v4l2-dkms 0.1.0+r30+g6ffe92b.
|
||||
* Carries forward the #64 multi-kernel postinst fix.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 14:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r28+g79256dc-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 79256dc — H.264 B-frame display reorder fix (closes
|
||||
daedalus-v4l2#6). libavcodec's H.264 decoder reorders output to
|
||||
display order before returning from avcodec_receive_frame; the
|
||||
daemon was binding each REQ_DECODE's pixels to the cookie of the
|
||||
bitstream that triggered the receive_frame call, not the cookie
|
||||
of the bitstream that actually produced the picture. For B-frame
|
||||
sequences this paired cookie N's CAPTURE buffer with cookie N-2's
|
||||
pixels and silently lost intermediate frames — visible as
|
||||
"2 1 4 3 6 5" frame pairing in mpv / Firefox on Pi CM5.
|
||||
* Wire-protocol bump (DAEDALUS_PROTO_VERSION 0 → 1): REQ_DECODE
|
||||
gains __u64 src_pts; RESP_FRAME gains __u32 flags +
|
||||
__u64 output_src_pts. Kernel + daemon must install atomically
|
||||
(this package + daedalus-v4l2 0.1.0+r28+g79256dc).
|
||||
* Carries forward the #64 multi-kernel postinst fix from -2:
|
||||
autoinstall for every /lib/modules/*/build that resolves to real
|
||||
headers, not just $(uname -r).
|
||||
* Closes #64.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 12:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r24+gf0d4186-2) bookworm trixie; urgency=medium
|
||||
|
||||
* postinst: autoinstall for every installed kernel with headers, not
|
||||
just the running one. Previously `dkms autoinstall $NAME/$VERSION`
|
||||
built only against `$(uname -r)`, so installing the package on
|
||||
kernel A and then rebooting into a separately-installed kernel B
|
||||
left /lib/modules/B/updates/dkms/ empty — /dev/daedalus-v4l2 absent,
|
||||
daedalus daemon nothing to talk to, browser/VAAPI silently falling
|
||||
back to software with no obvious diagnostic. Now we enumerate every
|
||||
/lib/modules/*/build that resolves to a real directory and run
|
||||
`dkms autoinstall -k <kver>` for each, reporting per-kernel failure
|
||||
only when headers are missing. Closes #64.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 09:30:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r24+gf0d4186-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to f0d4186 — per-ctx vb2 lock fix. daedalus_queue_init now
|
||||
uses ctx->vb_mutex instead of ctx->dev->m2m_lock for each
|
||||
vb2_queue's lock, unblocking Firefox's multi-process VAAPI
|
||||
clients (they were colliding on the device-wide mutex and one
|
||||
would EBUSY-fail S_FMT while another was mid-streamon).
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 23:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r22+g462aa4b-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 462aa4b — kernel device_run() now calls
|
||||
v4l2_ctrl_request_setup() before reading the H.264 stateless
|
||||
control values from the bound media_request, so the values
|
||||
daedalus ships to the userspace daemon match what the V4L2
|
||||
client (libva-v4l2-request-fourier) actually set. Closes the
|
||||
libva→kernel control-binding gap that was causing decoded
|
||||
frames to come back as best-effort zero garbage from libavcodec.
|
||||
* Wire-ABI lockstep with daedalus-v4l2 0.1.0+r22+g462aa4b.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 22:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r20+g3dd0eb0-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 3dd0eb0 — DAEMON-PPS kernel-side changes. device_run()
|
||||
now reads the V4L2 H.264 stateless control values from the bound
|
||||
media_request and ships them to the daemon inside REQ_DECODE
|
||||
via the new struct daedalus_h264_meta block (gated on
|
||||
DAEDALUS_REQ_FLAG_H264_META). Required for H.264 decode to
|
||||
work via the libva-v4l2-request -> daedalus daemon path; daemon
|
||||
synthesises AnnexB SPS+PPS NAL units from the structs.
|
||||
* Wire-ABI lockstep with daedalus-v4l2 0.1.0+r20+g3dd0eb0 — install
|
||||
both packages together.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 21:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r18+g481279c-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 481279c in lockstep with the userspace daedalus-v4l2
|
||||
package (which ships the new systemd unit + modules-load.d
|
||||
drop-in). No kernel-module behaviour change in this commit —
|
||||
bump is purely to keep DKMS pkgver and userspace pkgver aligned
|
||||
so /etc/modules-load.d/daedalus-v4l2.conf has a module to load.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 16:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r17+gf0cd29a-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to f0cd29a — Linux 6.18 compatibility fix. v4l2_fh_add /
|
||||
v4l2_fh_del gained a `struct file *filp` second argument at
|
||||
v6.18; module source now uses LINUX_VERSION_CODE conditional so
|
||||
it builds against 6.12 (Pi 5 stock RPi-2712 kernel) AND 6.18+
|
||||
(RPi-2712 / arm64). Verified DKMS rebuild on higgs
|
||||
(6.18.29+rpt-rpi-2712) succeeds + /dev/daedalus-v4l2 appears.
|
||||
* Recommends widened so apt pulls the right kernel-headers
|
||||
metapackage on RPi OS: linux-headers-rpi-2712 first, then
|
||||
linux-headers-rpi, then linux-headers-generic, then
|
||||
linux-headers.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Tue, 20 May 2026 10:30:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r16+gf55b2cd-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Initial Debian DKMS packaging for the daedalus_v4l2 kernel module.
|
||||
* Pinned to f55b2cd (Phase 8.13 close): kernel-side framework
|
||||
integration (V4L2 m2m, dmabuf-export, media controller, request
|
||||
API, NV12 single-plane + NV12M + P010 CAPTURE) that closes the
|
||||
libva→/dev/video0→daemon round-trip with byte-exact pixels.
|
||||
* Auto-builds via DKMS against the running kernel's headers.
|
||||
* Companion userspace package: daedalus-v4l2 (daemon + tools).
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Mon, 18 May 2026 23:00:00 +0000
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
Source: daedalus-v4l2-dkms
|
||||
Section: kernel
|
||||
Priority: optional
|
||||
Maintainer: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Build-Depends: debhelper-compat (= 13)
|
||||
Standards-Version: 4.6.2
|
||||
Homepage: https://git.reauktion.de/reauktion/daedalus-v4l2
|
||||
|
||||
Package: daedalus-v4l2-dkms
|
||||
Architecture: all
|
||||
Depends: ${misc:Depends}, dkms (>= 2.1.0.0)
|
||||
Recommends: daedalus-v4l2,
|
||||
linux-headers-rpi-2712 | linux-headers-rpi | linux-headers-generic | linux-headers
|
||||
Description: V4L2 stateless decoder shim kernel module (DKMS) — Pi 5 / CM5
|
||||
Out-of-tree V4L2 m2m kernel module for the daedalus-v4l2 stack on
|
||||
Raspberry Pi 5 / CM5. Registers /dev/videoNN (V4L2 stateless m2m
|
||||
decoder), /dev/mediaNN (media controller with request API), and
|
||||
/dev/daedalus-v4l2 (chardev bridge to the userspace daemon).
|
||||
.
|
||||
The actual decode happens in the userspace daemon shipped by the
|
||||
daedalus-v4l2 package — this module is just the kernel-side V4L2
|
||||
plumbing. Install both to actually serve VAAPI / V4L2 clients.
|
||||
.
|
||||
Built via DKMS against the running kernel's headers.
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: daedalus-v4l2
|
||||
Upstream-Contact: Markus Fritsche <fritsche.markus@gmail.com>
|
||||
Source: https://git.reauktion.de/reauktion/daedalus-v4l2
|
||||
|
||||
Files: *
|
||||
Copyright: 2026 Markus Fritsche <fritsche.markus@gmail.com>
|
||||
License: GPL-2.0-or-later
|
||||
Comment:
|
||||
Kernel module (loadable into the Linux kernel) — GPL-2.0-or-later.
|
||||
The shared protocol header carries an additional Linux-syscall-note
|
||||
exception so userspace inclusion is BSD-clean.
|
||||
|
||||
License: GPL-2.0-or-later
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
.
|
||||
On Debian systems, the complete text of the GNU General Public
|
||||
License v2 can be found in `/usr/share/common-licenses/GPL-2'.
|
||||
+214
@@ -0,0 +1,214 @@
|
||||
#!/bin/bash
|
||||
# Build daedalus-v4l2_<ver>_arm64.deb (userspace daemon + test tools).
|
||||
#
|
||||
# Mirrors arch/daedalus-v4l2 (Arch Linux build). The companion DKMS
|
||||
# package (debian/daedalus-v4l2-dkms) carries the kernel module
|
||||
# separately so apt/dpkg can split kernel-version-tied and userspace
|
||||
# upgrade cadence.
|
||||
#
|
||||
# Sibling Arch package: ../../arch/daedalus-v4l2/PKGBUILD
|
||||
# Sibling DKMS package: ../daedalus-v4l2-dkms/build-deb.sh
|
||||
# Upstream repo: https://git.reauktion.de/reauktion/daedalus-v4l2
|
||||
set -euo pipefail
|
||||
|
||||
# 6e6dfa1 = picks up daedalus-v4l2 PR #16 — daemon now dlopens
|
||||
# the Kwiboo fourier fork's libavcodec.so.62 / libavformat.so.62 /
|
||||
# libavutil.so.60 at /opt/fourier instead of Debian-stock soname
|
||||
# 61/61/59. First step on the daedalus-fourier substitution arc
|
||||
# (daedalus-v4l2#11): routes the daemon through the libavcodec
|
||||
# source tree we own in marfrit-packages. Headers + .pc files
|
||||
# come from ffmpeg-v4l2-request-fourier (installed by the CI
|
||||
# workflow before this script runs; see PKG_CONFIG_PATH below).
|
||||
UPSTREAM_COMMIT=872eec505eb91b561892d02a0526749348ddc121
|
||||
PKGVER=0.1.0+r45+g872eec5
|
||||
PKGREL=1 # reset for new upstream pin (872eec5 — PROTO_MAX_PAYLOAD 64 KiB -> 1 MiB, closes #19); lock-step with daedalus-v4l2-dkms 0.1.0+r45+g872eec5 REQUIRED
|
||||
|
||||
# daedalus-fourier pin. d87239d = marfrit/daedalus-fourier PR #1 merge
|
||||
# (install rules + pkg-config, enables this consumer to find_package
|
||||
# + link). Bump in lockstep with the upstream daemon when daedalus-
|
||||
# fourier's API or installed shaders are changed by a new consumer.
|
||||
DAEDALUS_FOURIER_COMMIT=d87239d8172307d9a1b93c95cbed116d175b85cc
|
||||
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
# Reproducible build. 2026-05-18 23:00 UTC — Phase 8.13 close.
|
||||
export SOURCE_DATE_EPOCH=1779231600
|
||||
|
||||
work=$(mktemp -d)
|
||||
trap "rm -rf $work" EXIT
|
||||
|
||||
# --- daedalus-fourier: fetch + build + install to per-build prefix ---
|
||||
#
|
||||
# Static-linked into the daemon, so the temp prefix is only for the
|
||||
# duration of this build script. Requires libvulkan-dev + glslang-tools
|
||||
# on the runner (already needed for the daedalus-fourier benches).
|
||||
FOURIER_PREFIX=$work/fourier-prefix
|
||||
mkdir -p "$FOURIER_PREFIX"
|
||||
|
||||
cd "$work"
|
||||
curl --connect-timeout 10 --max-time 600 --retry 3 --retry-delay 5 -sSLfo daedalus-fourier.tar.gz \
|
||||
"https://git.reauktion.de/marfrit/daedalus-fourier/archive/${DAEDALUS_FOURIER_COMMIT}.tar.gz"
|
||||
tar xzf daedalus-fourier.tar.gz
|
||||
cd daedalus-fourier
|
||||
cmake -B build -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX="$FOURIER_PREFIX"
|
||||
cmake --build build --target daedalus_core
|
||||
cmake --install build
|
||||
|
||||
# --- daedalus-v4l2: fetch + build daemon against installed daedalus-fourier ---
|
||||
|
||||
cd "$work"
|
||||
curl --connect-timeout 10 --max-time 600 --retry 3 --retry-delay 5 -sSLfo daedalus-v4l2.tar.gz \
|
||||
"https://git.reauktion.de/reauktion/daedalus-v4l2/archive/${UPSTREAM_COMMIT}.tar.gz"
|
||||
tar xzf daedalus-v4l2.tar.gz
|
||||
SRCDIR=daedalus-v4l2
|
||||
|
||||
# Build daemon (CMake) — point pkg-config at the daedalus-fourier
|
||||
# temp prefix so pkg_check_modules(DAEDALUS_FOURIER …) resolves to it.
|
||||
cd "$SRCDIR/daemon"
|
||||
PKG_CONFIG_PATH="$FOURIER_PREFIX/lib/pkgconfig:/opt/fourier/lib/pkgconfig" \
|
||||
cmake -B build -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=/usr
|
||||
cmake --build build
|
||||
|
||||
# Build test tools (in-tree Makefile)
|
||||
cd "$work/$SRCDIR/tools"
|
||||
make
|
||||
|
||||
# Stage
|
||||
ROOT="$work/pkgroot"
|
||||
mkdir -p "$ROOT/DEBIAN" \
|
||||
"$ROOT/usr/bin" \
|
||||
"$ROOT/usr/libexec/daedalus-v4l2" \
|
||||
"$ROOT/usr/include" \
|
||||
"$ROOT/usr/share/doc/daedalus-v4l2" \
|
||||
"$ROOT/lib/systemd/system" \
|
||||
"$ROOT/usr/lib/modules-load.d"
|
||||
|
||||
install -m 755 "$work/$SRCDIR/daemon/build/daedalus_v4l2_daemon" \
|
||||
"$ROOT/usr/bin/daedalus_v4l2_daemon"
|
||||
|
||||
install -m 755 "$work/$SRCDIR/tools/test_chardev_pingpong" \
|
||||
"$ROOT/usr/libexec/daedalus-v4l2/test_chardev_pingpong"
|
||||
install -m 755 "$work/$SRCDIR/tools/test_m2m_decode" \
|
||||
"$ROOT/usr/libexec/daedalus-v4l2/test_m2m_decode"
|
||||
install -m 755 "$work/$SRCDIR/tools/test_m2m_stream" \
|
||||
"$ROOT/usr/libexec/daedalus-v4l2/test_m2m_stream"
|
||||
|
||||
install -m 644 "$work/$SRCDIR/include/daedalus_v4l2_proto.h" \
|
||||
"$ROOT/usr/include/daedalus_v4l2_proto.h"
|
||||
|
||||
# systemd unit + module autoload — without these the daemon never
|
||||
# starts and the libva/VAAPI consumer's REQ_DECODE has nobody on
|
||||
# the other end of /dev/daedalus-v4l2.
|
||||
install -m 644 "$work/$SRCDIR/packaging/systemd/daedalus-v4l2.service" \
|
||||
"$ROOT/lib/systemd/system/daedalus-v4l2.service"
|
||||
install -m 644 "$work/$SRCDIR/packaging/systemd/daedalus-v4l2.modules-load" \
|
||||
"$ROOT/usr/lib/modules-load.d/daedalus-v4l2.conf"
|
||||
|
||||
install -m 644 "$work/$SRCDIR/README.md" \
|
||||
"$ROOT/usr/share/doc/daedalus-v4l2/README.md"
|
||||
for d in "$work/$SRCDIR/docs/"*.md; do
|
||||
install -m 644 "$d" "$ROOT/usr/share/doc/daedalus-v4l2/$(basename "$d")"
|
||||
done
|
||||
|
||||
install -Dm644 "$HERE/debian/copyright" "$ROOT/usr/share/doc/daedalus-v4l2/copyright"
|
||||
install -Dm644 "$HERE/debian/changelog" "$ROOT/usr/share/doc/daedalus-v4l2/changelog.Debian"
|
||||
gzip -9 -n "$ROOT/usr/share/doc/daedalus-v4l2/changelog.Debian"
|
||||
|
||||
# DEBIAN/postinst — enable service + reload modules-load.d so the
|
||||
# kernel module loads now if daedalus-v4l2-dkms is also installed.
|
||||
# Does NOT auto-start the service — that requires /dev/daedalus-v4l2
|
||||
# to already exist (the ConditionPathExists= in the .service file)
|
||||
# which may not be true on the very first install before the user
|
||||
# reboots or manually modprobes. Operator decides when to start.
|
||||
cat > "$ROOT/DEBIAN/postinst" <<'POSTINST'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
if [ "$1" = "configure" ]; then
|
||||
# Reload systemd so the new unit file is visible.
|
||||
if command -v systemctl >/dev/null 2>&1; then
|
||||
systemctl daemon-reload >/dev/null 2>&1 || true
|
||||
systemctl enable daedalus-v4l2.service >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# Trigger /usr/lib/modules-load.d/daedalus-v4l2.conf without a
|
||||
# reboot. Harmless if the module is already loaded; logs to
|
||||
# journal if it can't load (most common cause: dkms hasn't built
|
||||
# the module yet for the running kernel — see daedalus-v4l2-dkms
|
||||
# postinst for the loud-warning path).
|
||||
if command -v systemd-modules-load >/dev/null 2>&1; then
|
||||
systemd-modules-load >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# Auto-start if /dev/daedalus-v4l2 came up (i.e. module loaded
|
||||
# successfully). ConditionPathExists in the unit file means
|
||||
# `systemctl start` is a no-op if the device isn't there yet —
|
||||
# avoids spurious failures during apt install on a host where
|
||||
# daedalus-v4l2-dkms hasn't built yet.
|
||||
if [ -e /dev/daedalus-v4l2 ] && command -v systemctl >/dev/null 2>&1; then
|
||||
systemctl start daedalus-v4l2.service >/dev/null 2>&1 || true
|
||||
fi
|
||||
fi
|
||||
|
||||
#DEBHELPER#
|
||||
POSTINST
|
||||
chmod 755 "$ROOT/DEBIAN/postinst"
|
||||
|
||||
cat > "$ROOT/DEBIAN/prerm" <<'PRERM'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
if [ "$1" = "remove" ] && command -v systemctl >/dev/null 2>&1; then
|
||||
systemctl stop daedalus-v4l2.service >/dev/null 2>&1 || true
|
||||
systemctl disable daedalus-v4l2.service >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
#DEBHELPER#
|
||||
PRERM
|
||||
chmod 755 "$ROOT/DEBIAN/prerm"
|
||||
|
||||
cat > "$ROOT/DEBIAN/postrm" <<'POSTRM'
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
if command -v systemctl >/dev/null 2>&1; then
|
||||
systemctl daemon-reload >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
#DEBHELPER#
|
||||
POSTRM
|
||||
chmod 755 "$ROOT/DEBIAN/postrm"
|
||||
|
||||
cat > "$ROOT/DEBIAN/control" <<EOF
|
||||
Package: daedalus-v4l2
|
||||
Version: ${PKGVER}-${PKGREL}
|
||||
Section: video
|
||||
Priority: optional
|
||||
Architecture: arm64
|
||||
Depends: ffmpeg-v4l2-request-fourier (>= 2:8.1+rfourier), libdrm2
|
||||
Recommends: daedalus-v4l2-dkms
|
||||
Maintainer: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Homepage: https://git.reauktion.de/reauktion/daedalus-v4l2
|
||||
Description: Userspace daemon for the daedalus_v4l2 stateless decoder shim
|
||||
daedalus-v4l2 ships the userspace daemon that backs the daedalus_v4l2
|
||||
out-of-tree V4L2 kernel module on Raspberry Pi 5 / CM5. Together they
|
||||
expose /dev/videoNN + /dev/mediaNN as a V4L2 stateless decoder for
|
||||
VP9, AV1, and H.264 — actual decoding happens in this single-threaded
|
||||
daemon via dlopen'd FFmpeg, with decoded NV12 / P010 frames shipped
|
||||
back through dmabuf.
|
||||
.
|
||||
Consumed end-to-end by libva-v4l2-request-fourier (>= 1.0.0+r376) so
|
||||
that 'ffmpeg -hwaccel vaapi' against vp9_small.ivf produces a
|
||||
byte-exact NV12 frame.
|
||||
.
|
||||
The kernel module ships separately in daedalus-v4l2-dkms; install
|
||||
both to actually serve V4L2 clients.
|
||||
EOF
|
||||
|
||||
DEB_OUT="daedalus-v4l2_${PKGVER}-${PKGREL}_arm64.deb"
|
||||
dpkg-deb --root-owner-group --build "$ROOT" "$HERE/$DEB_OUT"
|
||||
echo "built: $HERE/$DEB_OUT"
|
||||
+250
@@ -0,0 +1,250 @@
|
||||
daedalus-v4l2 (0.1.0+r45+g872eec5-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 872eec5 — picks up daedalus-v4l2 PR #20 (closes #19).
|
||||
Wire-protocol cap DAEDALUS_PROTO_MAX_PAYLOAD raised from 64 KiB
|
||||
to 1 MiB. DAEDALUS_MAX_BITSTREAM follows; daedalus_fill_output_fmt
|
||||
now reports OUTPUT_MPLANE sizeimage = ~1 MiB instead of 65484.
|
||||
libva-v4l2-request-fourier's S_FMT-driven OUTPUT-pool resize
|
||||
finally succeeds; Firefox no longer falls off to libmozavcodec
|
||||
SW when an H.264 slice exceeds 64 KiB (routine on any
|
||||
720p+ stream).
|
||||
* #define-only change in include/daedalus_v4l2_proto.h; struct
|
||||
layout unchanged. But effective cap is min(kernel, daemon) —
|
||||
lock-step install of this package WITH
|
||||
daedalus-v4l2-dkms 0.1.0+r45+g872eec5 REQUIRED.
|
||||
* Daemon-side allocations are dynamic (malloc-on-payload), so
|
||||
the practical growth is one ~1 MiB read buffer per daemon
|
||||
process at startup. Negligible on Pi 5 / 8 GB.
|
||||
* Picks up the same r43 -> r45 transition as daedalus-v4l2-dkms
|
||||
(which had been stuck at r33+g5d8b436 since the parking-design
|
||||
revert because the kernel module didn't change in r37/r39/r41/r43).
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Fri, 22 May 2026 21:00:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r43+g1d8f5af-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 1d8f5af — picks up daedalus-v4l2 PR #18 (closes #17).
|
||||
Daemon now drops degenerate (<4 byte) bitstreams at the REQ_DECODE
|
||||
entry instead of letting avcodec_send_packet return
|
||||
AVERROR_INVALIDDATA. Reply RESP_FRAME with status=
|
||||
DAEDALUS_DECODE_NO_FRAME so libva's V4L2 surface pool stays
|
||||
healthy.
|
||||
* Fixes the Firefox YouTube avc1 pause→resume regression observed
|
||||
on higgs: libva-v4l2-request-fourier flushes a 3-byte stub
|
||||
(presumably a bare NAL start code) into OUTPUT_MPLANE at the
|
||||
pause boundary; the old INVALIDDATA error path made Firefox
|
||||
fall off to libmozavcodec SW for the rest of the session. With
|
||||
this filter the daemon logs the sentinel as 'tiny bitstream 3
|
||||
bytes — dropping as no-op' and the next real REQ_DECODE
|
||||
proceeds normally.
|
||||
* Wire protocol unchanged. No daedalus-v4l2-dkms bump needed.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Fri, 22 May 2026 17:30:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r41+g6e6dfa1-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 6e6dfa1 — daedalus-v4l2 PR #16. Daemon dlopens Kwiboo
|
||||
fourier fork's libavcodec.so.62 / libavformat.so.62 /
|
||||
libavutil.so.60 at /opt/fourier instead of Debian-stock
|
||||
soname 61/61/59. First step on the daedalus-fourier
|
||||
substitution arc (daedalus-v4l2#11): the next PR series
|
||||
layers daedalus_recipe_dispatch_h264_* substitution patches
|
||||
into ffmpeg-v4l2-request-fourier's H264DSPContext NEON init,
|
||||
reaching the daemon's production decode path.
|
||||
* Build: PKG_CONFIG_PATH now includes /opt/fourier/lib/pkgconfig
|
||||
so daemon's pkg_check_modules picks up the Kwiboo .pc files.
|
||||
* CI workflow build-deps: libavcodec-dev / libavformat-dev /
|
||||
libavutil-dev (Debian stock 7.1.3) → ffmpeg-v4l2-request-fourier
|
||||
(provides /opt/fourier/include + .pc files).
|
||||
* Wire protocol unchanged. No daedalus-v4l2-dkms bump.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 21:30:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r39+g3bc0da1-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 3bc0da1 — picks up daedalus-v4l2 PR #15. Per-frame
|
||||
`decoder: OK ...` log line gains `decode_us=N` (libavcodec
|
||||
send_packet + receive_frame wall-clock cost in microseconds).
|
||||
New `decoder stats` summary line every 60 decoded frames with
|
||||
codec, fps, avg decode_us, MBs/s throughput, B/MB bitrate.
|
||||
* Pure observability — no decode-path behaviour change.
|
||||
Establishes baseline metrics for the substitution work in
|
||||
daedalus-v4l2#11 step 2 (replacing libavcodec primitives with
|
||||
daedalus-fourier kernels one cycle at a time).
|
||||
* On Pi CM5 / bbb 720p H.264 baseline: ~4 ms decode_us / 24 fps
|
||||
/ 90 K MBs/s — workload is well under 1 % of any single
|
||||
daedalus-fourier kernel's NEON ceiling.
|
||||
* Wire protocol unchanged. No daedalus-v4l2-dkms bump needed.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 18:30:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r37+g77e14e5-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 77e14e5 — picks up daedalus-v4l2 PRs #12 + #13.
|
||||
* #12 (LOW_DELAY half-measure): the daemon now sets
|
||||
AV_CODEC_FLAG_LOW_DELAY on the H.264 AVCodecContext so libavcodec
|
||||
emits frames in decode order ~99% of the time (a few stragglers
|
||||
at GOP boundaries when the stream's SPS num_reorder_frames
|
||||
overrides the flag). Visible improvement vs the 2-1-4-3
|
||||
pair-swap on Firefox YouTube + mpv playback; not a permanent
|
||||
fix (see #11 for the architectural plan).
|
||||
* #13 (daedalus-fourier linkage): the daemon now pkg-config-links
|
||||
against the daedalus-fourier kernel library (marfrit/
|
||||
daedalus-fourier) and logs substrate availability at startup.
|
||||
No kernels dispatched yet — this is the build-time / link-time
|
||||
foundation for the H.264 daemon-rewrite plan in #11
|
||||
(substituting daedalus-fourier IDCT 4×4 / IDCT 8×8 / luma
|
||||
deblock primitives for libavcodec's per-MB pixel math, one
|
||||
cycle at a time, measuring CPU saved per substitution).
|
||||
* Build-deb.sh now fetches + builds + installs daedalus-fourier
|
||||
(pinned at d87239d, marfrit/daedalus-fourier PR #1) into a
|
||||
per-build temp prefix, then builds the daemon with
|
||||
PKG_CONFIG_PATH pointing at it. daedalus-fourier is
|
||||
statically linked into the daemon binary, so the resulting
|
||||
.deb has no new runtime deps. Requires libvulkan-dev +
|
||||
glslang-tools on the CI runner (the daedalus-fourier benches
|
||||
already needed those).
|
||||
* Wire protocol unchanged — DAEDALUS_PROTO_VERSION stays at 0.
|
||||
No daedalus-v4l2-dkms bump needed.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 16:30:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r33+g5d8b436-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 5d8b436 — reverts daedalus-v4l2 PRs #7 + #8 (the parking
|
||||
design that broke libva-v4l2-request-fourier's 1:1 CAPTURE
|
||||
contract; see daedalus-v4l2#9 + #10). After daemon-r28+g79256dc
|
||||
landed, mpv (--hwdec=vaapi-copy) failed pre-playing with
|
||||
"Unable to dequeue buffer: Resource temporarily unavailable" /
|
||||
"Failed to end picture decode" because the daemon parked CAPTURE
|
||||
buffers waiting for libavcodec to release H.264 B-frames in
|
||||
display order — violating the V4L2 stateless 1:1 contract.
|
||||
Firefox tolerated the mess (visible "2 1 4 3" pair-swap); mpv
|
||||
bailed.
|
||||
* This bump restores f0d4186-equivalent behaviour, plus PR #4
|
||||
(cosmetic H.264 DECODE_MODE / START_CODE menu controls). PR #7
|
||||
+ PR #8 wire-protocol additions (src_pts / output_src_pts /
|
||||
RESP_FRAME flags) are reverted — DAEDALUS_PROTO_VERSION drops
|
||||
back from 1 → 0. Lock-step install with daedalus-v4l2-dkms
|
||||
0.1.0+r33+g5d8b436 REQUIRED.
|
||||
* Visible regression: H.264 B-frame streams in Firefox revert to
|
||||
the original "2 1 4 3 6 5" pair-swap visual. The proper fix
|
||||
(concurrent in-flight requests in daemon + display-order reorder
|
||||
in libva-v4l2-request-fourier) is tracked at daedalus-v4l2#11.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 14:50:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r28+g79256dc-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 79256dc — H.264 B-frame display reorder fix (closes
|
||||
daedalus-v4l2#6 + #4 menu controls). Daemon side: the
|
||||
avcodec_send_packet → receive_frame loop now stamps pkt->pts =
|
||||
req->src_pts so libavcodec's display-ordered frame->pts identifies
|
||||
which OUTPUT bitstream's pixels each drained frame belongs to.
|
||||
chardev_client maintains a (src_pts → cookie) lookup table so the
|
||||
daemon can ship pixels to the cookie of the *originating*
|
||||
bitstream, not the cookie of whatever REQ triggered the
|
||||
receive_frame call. Multiple RESP_FRAME messages per REQ_DECODE
|
||||
are now possible (one for the just-consumed src, one or more for
|
||||
drained pixels).
|
||||
* Wire-protocol bump (DAEDALUS_PROTO_VERSION 0 → 1): REQ_DECODE
|
||||
gains __u64 src_pts; RESP_FRAME gains __u32 flags +
|
||||
__u64 output_src_pts. Daemon + kernel must install atomically
|
||||
(this package + daedalus-v4l2-dkms 0.1.0+r28+g79256dc).
|
||||
* Also subsumes 79256dc's predecessor 7ff2d89 — H.264 DECODE_MODE +
|
||||
START_CODE menu-control registration that retires the
|
||||
"Unable to set control(s) error_idx=2/2" warning libva-v4l2-
|
||||
request emitted on every context init.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Thu, 21 May 2026 12:00:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r24+gf0d4186-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to f0d4186 — kernel per-ctx vb2 lock fix. daedalus_queue_init
|
||||
was wiring src_vq->lock and dst_vq->lock to ctx->dev->m2m_lock (a
|
||||
device-wide mutex), serialising every vb2 ioctl across all
|
||||
concurrent clients of /dev/video0. For Firefox (which spawns
|
||||
separate content + RDD + GPU processes that each open the device
|
||||
and run libva probe simultaneously), one libva session's
|
||||
S_FMT(OUTPUT_MPLANE) hit EBUSY while another was mid-streamon —
|
||||
Firefox VAAPI playback fell apart at startup.
|
||||
* Fix gives each open() its own ctx->vb_mutex; vb2 ioctls run
|
||||
independently per client. Matches cedrus / rkvdec / hantro
|
||||
pattern.
|
||||
* Verified on higgs: Firefox YouTube playback engages VAAPI cleanly,
|
||||
sustained ~230 fps decode at 640x368 through the daedalus daemon,
|
||||
zero EBUSY in stderr or daemon journal.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 23:00:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r22+g462aa4b-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 462aa4b — kernel-side fix for control-binding gap that
|
||||
closes the libva→daemon SPS/PPS pipeline. Kernel device_run now
|
||||
calls v4l2_ctrl_request_setup() before reading ctrl->p_cur, so
|
||||
the daemon's daedalus_h264_meta block actually carries THIS
|
||||
request's V4L2 stateless H.264 control values instead of stale
|
||||
/default ones. Pairs with libva-v4l2-request-fourier r382+gc1bb444
|
||||
(Fix 3 + Fix 4 from issue libva-v4l2-request-fourier#8).
|
||||
* After-fix on higgs (Pi CM5): ffmpeg -hwaccel vaapi -i h264.mp4
|
||||
produces unique decoded P-frames (per-frame fnv1a hashes differ)
|
||||
and zero "error while decoding MB" / "reference frames exceeds
|
||||
max" warnings.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 22:00:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r20+g3dd0eb0-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 3dd0eb0 — DAEMON-PPS H.264 SPS/PPS NAL synthesiser.
|
||||
Daemon now reconstructs AnnexB SPS+PPS NAL units from the V4L2
|
||||
stateless H.264 control structs (forwarded by the kernel via
|
||||
a new struct daedalus_h264_meta block in REQ_DECODE) and
|
||||
prepends them to the slice bitstream before feeding libavcodec.
|
||||
Without this, ffmpeg -hwaccel vaapi on H.264 sources failed
|
||||
with "non-existing PPS 0 referenced" even after LIBVA-1/-2
|
||||
routing correctly delivered the request.
|
||||
* Wire protocol: new DAEDALUS_REQ_FLAG_H264_META bit + struct
|
||||
daedalus_h264_meta; daemon and kernel must be installed in
|
||||
lockstep (this package + daedalus-v4l2-dkms 0.1.0+r20+g3dd0eb0).
|
||||
* VP9 / AV1 paths unchanged.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 21:00:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r18+g481279c-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 481279c. Upstream landed the systemd unit + modules-load.d
|
||||
drop-in (packaging/systemd/daedalus-v4l2.{service,modules-load}).
|
||||
* Package now ships /lib/systemd/system/daedalus-v4l2.service and
|
||||
/usr/lib/modules-load.d/daedalus-v4l2.conf.
|
||||
* postinst: daemon-reload, enable the service, trigger
|
||||
systemd-modules-load, and start if /dev/daedalus-v4l2 is already
|
||||
present. Operator no longer needs to remember the modprobe +
|
||||
systemctl-enable dance after install.
|
||||
* prerm/postrm: stop + disable + daemon-reload.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 16:00:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r17+gf0cd29a-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to f0cd29a alongside daedalus-v4l2-dkms. No userspace
|
||||
change — the daemon binary in this release is bit-identical to
|
||||
the f55b2cd one (the f0cd29a commit only touches kernel/). Bump
|
||||
keeps the userspace + DKMS pkgver lockstep so depmod /
|
||||
LIBVA_DRIVER_NAME selection stays correct on hosts that
|
||||
upgrade.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Tue, 20 May 2026 10:30:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r16+gf55b2cd-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Initial Debian packaging for the daedalus-v4l2 userspace daemon.
|
||||
* Pinned to f55b2cd (Phase 8.13 close): first commit where the full
|
||||
ffmpeg -hwaccel vaapi → libva-v4l2-request-fourier → /dev/video0
|
||||
→ daemon path lands a pixel-correct decoded NV12 frame back in
|
||||
ffmpeg.
|
||||
* Codecs: VP9, AV1, H.264 (all via dlopen'd FFmpeg 7.1.3).
|
||||
* Capture formats: NV12M (2 plane), NV12 (1 plane, for libva),
|
||||
P010 (10-bit single plane).
|
||||
* Companion package: daedalus-v4l2-dkms (kernel module).
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Mon, 18 May 2026 23:00:00 +0000
|
||||
Vendored
+34
@@ -0,0 +1,34 @@
|
||||
Source: daedalus-v4l2
|
||||
Section: video
|
||||
Priority: optional
|
||||
Maintainer: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Build-Depends: debhelper-compat (= 13),
|
||||
cmake,
|
||||
ninja-build,
|
||||
pkg-config,
|
||||
libavformat-dev (>= 7.1),
|
||||
libavcodec-dev (>= 7.1),
|
||||
libavutil-dev (>= 7.1)
|
||||
Standards-Version: 4.6.2
|
||||
Homepage: https://git.reauktion.de/reauktion/daedalus-v4l2
|
||||
|
||||
Package: daedalus-v4l2
|
||||
Architecture: arm64
|
||||
Depends: ${misc:Depends}, ${shlibs:Depends},
|
||||
ffmpeg (>= 7.1),
|
||||
libdrm2
|
||||
Recommends: daedalus-v4l2-dkms
|
||||
Description: Userspace daemon for the daedalus_v4l2 stateless decoder shim
|
||||
daedalus-v4l2 ships the userspace daemon that backs the daedalus_v4l2
|
||||
out-of-tree V4L2 kernel module on Raspberry Pi 5 / CM5. Together they
|
||||
expose /dev/videoNN + /dev/mediaNN as a V4L2 stateless decoder for
|
||||
VP9, AV1, and H.264 — actual decoding happens in this single-threaded
|
||||
daemon via dlopen'd FFmpeg, with decoded NV12 / P010 frames shipped
|
||||
back through dmabuf.
|
||||
.
|
||||
Consumed end-to-end by libva-v4l2-request-fourier (>= 1.0.0+r376) so
|
||||
that 'ffmpeg -hwaccel vaapi' against vp9_small.ivf produces a
|
||||
byte-exact NV12 frame.
|
||||
.
|
||||
The kernel module ships separately in daedalus-v4l2-dkms; install
|
||||
both to actually serve V4L2 clients.
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: daedalus-v4l2
|
||||
Upstream-Contact: Markus Fritsche <fritsche.markus@gmail.com>
|
||||
Source: https://git.reauktion.de/reauktion/daedalus-v4l2
|
||||
|
||||
Files: *
|
||||
Copyright: 2026 Markus Fritsche <fritsche.markus@gmail.com>
|
||||
License: BSD-2-Clause
|
||||
|
||||
Files: include/daedalus_v4l2_proto.h
|
||||
Copyright: 2026 Markus Fritsche <fritsche.markus@gmail.com>
|
||||
License: GPL-2.0-or-later WITH Linux-syscall-note
|
||||
Comment:
|
||||
Shared kernel↔daemon wire-protocol header. GPL-2.0-or-later (matches
|
||||
the kernel module that includes it) with the standard
|
||||
Linux-syscall-note exception so userspace inclusion is BSD-clean.
|
||||
|
||||
License: BSD-2-Clause
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
.
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES ARE DISCLAIMED.
|
||||
|
||||
License: GPL-2.0-or-later
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
.
|
||||
On Debian systems, the complete text of the GNU General Public
|
||||
License v2 can be found in `/usr/share/common-licenses/GPL-2'.
|
||||
@@ -0,0 +1,166 @@
|
||||
--- a/libavutil/hwcontext_v4l2request.c
|
||||
+++ b/libavutil/hwcontext_v4l2request.c
|
||||
@@ -19,12 +19,13 @@
|
||||
#include "config.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/media.h>
|
||||
#include <sys/ioctl.h>
|
||||
+#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <drm_fourcc.h>
|
||||
#include <libudev.h>
|
||||
|
||||
@@ -690,12 +691,125 @@
|
||||
}
|
||||
|
||||
udev_enumerate_unref(enumerate);
|
||||
return ret;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Brute-force fallback used when libudev's scan fails (e.g. inside firefox's
|
||||
+ * RDD sandbox where Mozilla's broker rejects fd-relative openat used by
|
||||
+ * systemd's chase() symlink resolver). Iterates /dev/video[0..63], picks the
|
||||
+ * one whose major/minor matches the requested devnum.
|
||||
+ */
|
||||
+static char *v4l2request_devnum_to_video_path_brute(dev_t devnum)
|
||||
+{
|
||||
+ char path[32];
|
||||
+ struct stat st;
|
||||
+ for (int i = 0; i < 64; i++) {
|
||||
+ snprintf(path, sizeof(path), "/dev/video%d", i);
|
||||
+ if (stat(path, &st) < 0)
|
||||
+ continue;
|
||||
+ if (st.st_rdev == devnum)
|
||||
+ return av_strdup(path);
|
||||
+ }
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+/* Brute-force version of v4l2request_probe_video_devices: replaces the
|
||||
+ * udev_device_new_from_devnum + udev_device_get_devnode flow with
|
||||
+ * stat()-based major/minor matching against /dev/video[0..63]. */
|
||||
+static int v4l2request_probe_video_devices_brute(AVHWFramesContext *hwfc,
|
||||
+ uint32_t pixelformat,
|
||||
+ uint32_t buffersize)
|
||||
+{
|
||||
+ AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
|
||||
+ AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
|
||||
+ struct media_device_info device_info;
|
||||
+ struct media_v2_topology topology = {0};
|
||||
+ struct media_v2_interface *interfaces;
|
||||
+ char *path;
|
||||
+ dev_t devnum;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (ioctl(fctxi->media_fd, MEDIA_IOC_DEVICE_INFO, &device_info) < 0)
|
||||
+ return AVERROR(errno);
|
||||
+
|
||||
+ if (ioctl(fctxi->media_fd, MEDIA_IOC_G_TOPOLOGY, &topology) < 0)
|
||||
+ return AVERROR(errno);
|
||||
+
|
||||
+ if (!topology.num_interfaces)
|
||||
+ return AVERROR(ENOENT);
|
||||
+
|
||||
+ interfaces = av_calloc(topology.num_interfaces,
|
||||
+ sizeof(struct media_v2_interface));
|
||||
+ if (!interfaces)
|
||||
+ return AVERROR(ENOMEM);
|
||||
+
|
||||
+ topology.ptr_interfaces = (__u64)(uintptr_t)interfaces;
|
||||
+ if (ioctl(fctxi->media_fd, MEDIA_IOC_G_TOPOLOGY, &topology) < 0) {
|
||||
+ ret = AVERROR(errno);
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
+ ret = AVERROR(ENOENT);
|
||||
+ for (unsigned i = 0; i < topology.num_interfaces; i++) {
|
||||
+ if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO)
|
||||
+ continue;
|
||||
+
|
||||
+ devnum = makedev(interfaces[i].devnode.major,
|
||||
+ interfaces[i].devnode.minor);
|
||||
+ path = v4l2request_devnum_to_video_path_brute(devnum);
|
||||
+ if (!path)
|
||||
+ continue;
|
||||
+
|
||||
+ ret = v4l2request_probe_video_device(hwfc, path, pixelformat, buffersize);
|
||||
+ if (!ret) {
|
||||
+ av_log(hwfc, AV_LOG_INFO,
|
||||
+ "Using V4L2 media driver %s (brute-force) for %s\n",
|
||||
+ device_info.driver, av_fourcc2str(pixelformat));
|
||||
+ av_free(path);
|
||||
+ break;
|
||||
+ }
|
||||
+ av_free(path);
|
||||
+ }
|
||||
+
|
||||
+fail:
|
||||
+ av_free(interfaces);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+/* Brute-force fallback for v4l2request_probe_media_devices(). Iterates
|
||||
+ * /dev/media[0..15], opens each, probes via topology+stat. */
|
||||
+static int v4l2request_probe_media_devices_brute(AVHWFramesContext *hwfc,
|
||||
+ uint32_t pixelformat,
|
||||
+ uint32_t buffersize)
|
||||
+{
|
||||
+ AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
|
||||
+ AVV4L2RequestFramesContextInternal *fctxi = fctx->internal;
|
||||
+ char path[32];
|
||||
+ int ret = AVERROR(ENOENT);
|
||||
+
|
||||
+ for (int i = 0; i < 16; i++) {
|
||||
+ snprintf(path, sizeof(path), "/dev/media%d", i);
|
||||
+
|
||||
+ fctxi->media_fd = open(path, O_RDWR);
|
||||
+ if (fctxi->media_fd < 0)
|
||||
+ continue;
|
||||
+
|
||||
+ ret = v4l2request_probe_video_devices_brute(hwfc, pixelformat,
|
||||
+ buffersize);
|
||||
+ if (!ret)
|
||||
+ return 0;
|
||||
+
|
||||
+ close(fctxi->media_fd);
|
||||
+ fctxi->media_fd = -1;
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int v4l2request_open_decoder(AVHWFramesContext *hwfc)
|
||||
{
|
||||
AVV4L2RequestFramesContext *fctx = hwfc->hwctx;
|
||||
uint32_t buffersize;
|
||||
struct udev *udev;
|
||||
int ret;
|
||||
@@ -712,12 +826,23 @@
|
||||
|
||||
buffersize = FFMAX(hwfc->width * hwfc->height * 3 / 2, 256 * 1024);
|
||||
|
||||
// Probe all media devices (auto-detection)
|
||||
ret = v4l2request_probe_media_devices(hwfc, udev, fctx->pixelformat, buffersize);
|
||||
|
||||
+ // Brute-force fallback when libudev fails. Firefox-fourier hits this
|
||||
+ // because Mozilla's RDD sandbox blocks fd-relative openat used by
|
||||
+ // systemd's chase() symlink resolver inside udev_enumerate_scan_devices.
|
||||
+ if (ret < 0) {
|
||||
+ av_log(hwfc, AV_LOG_INFO,
|
||||
+ "libudev probe failed (%d), falling back to brute-force /dev/media*\n",
|
||||
+ ret);
|
||||
+ ret = v4l2request_probe_media_devices_brute(hwfc, fctx->pixelformat,
|
||||
+ buffersize);
|
||||
+ }
|
||||
+
|
||||
udev_unref(udev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static AVBufferRef *v4l2request_v4l2_buffer_alloc(AVHWFramesContext *hwfc,
|
||||
struct v4l2_format *format)
|
||||
@@ -0,0 +1,178 @@
|
||||
From 0cd6e669735e453ec8772f111065bbb2f70a5bc6 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Mon, 18 May 2026 07:27:10 +0000
|
||||
Subject: [PATCH] avutil/hwcontext_v4l2request: unpack NV15 to P010 in
|
||||
transfer_data_from
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
V4L2_PIX_FMT_NV15 (RK3399/RK3588 rkvdec 10-bit 4:2:0 capture) is mapped to
|
||||
sw_format = AV_PIX_FMT_YUV420P10 in v4l2request_capture_pixelformats[]. The
|
||||
existing transfer_get_formats explicitly blanked the format list for that
|
||||
sw_format, so 'ffmpeg -hwaccel v4l2request -vf hwdownload,format=p010le' on
|
||||
a Hi10P / Main10 input failed at filter init with EINVAL before reaching
|
||||
the actual decode (which itself succeeds — 2 frames decoded cleanly).
|
||||
|
||||
Expose AV_PIX_FMT_P010 as the transfer target for NV15-backed surfaces and
|
||||
unpack the packed 10-bit samples into the standard high-bits-of-16 layout
|
||||
inside transfer_data_from. Luma and chroma share the same packing format
|
||||
(5 bytes per 4 samples, little endian); chroma plane is W × H/2 samples
|
||||
for 4:2:0.
|
||||
|
||||
The other 'needs custom unpack' sw_formats (YUV420P / Allwinner NV12_32L32
|
||||
tiled and YUV422P10 / rkvdec NV20) keep the original ENOSYS path because
|
||||
they need different unpack code that isn't covered by this patch.
|
||||
|
||||
Closes marfrit/marfrit-packages#21.
|
||||
---
|
||||
libavutil/hwcontext_v4l2request.c | 111 +++++++++++++++++++++++++++++-
|
||||
1 file changed, 110 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/libavutil/hwcontext_v4l2request.c b/libavutil/hwcontext_v4l2request.c
|
||||
index b6633d9081..3842160dfb 100644
|
||||
--- a/libavutil/hwcontext_v4l2request.c
|
||||
+++ b/libavutil/hwcontext_v4l2request.c
|
||||
@@ -1073,6 +1073,56 @@ fail:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Unpack one NV15-packed 10-bit plane (5 bytes per 4 samples, little endian)
|
||||
+ * into a P010-style plane (10 bits in the high bits of a 16-bit container).
|
||||
+ * `dst_stride` is in bytes; `src_stride` is bytes per row of NV15 data.
|
||||
+ */
|
||||
+static void v4l2request_nv15_unpack_plane_to_p010(const uint8_t *src,
|
||||
+ uint16_t *dst,
|
||||
+ unsigned width,
|
||||
+ unsigned height,
|
||||
+ unsigned src_stride,
|
||||
+ unsigned dst_stride)
|
||||
+{
|
||||
+ for (unsigned y = 0; y < height; y++) {
|
||||
+ const uint8_t *s = src + y * src_stride;
|
||||
+ uint16_t *d = (uint16_t *)((uint8_t *)dst + y * dst_stride);
|
||||
+ unsigned x;
|
||||
+
|
||||
+ for (x = 0; x + 4 <= width; x += 4) {
|
||||
+ uint16_t a = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||||
+ uint16_t b = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
|
||||
+ uint16_t c = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
|
||||
+ uint16_t e = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
|
||||
+
|
||||
+ d[0] = (uint16_t)(a << 6);
|
||||
+ d[1] = (uint16_t)(b << 6);
|
||||
+ d[2] = (uint16_t)(c << 6);
|
||||
+ d[3] = (uint16_t)(e << 6);
|
||||
+
|
||||
+ d += 4;
|
||||
+ s += 5;
|
||||
+ }
|
||||
+
|
||||
+ if (x < width) {
|
||||
+ unsigned rem = width - x;
|
||||
+ uint16_t pix[4] = { 0, 0, 0, 0 };
|
||||
+
|
||||
+ pix[0] = (uint16_t)s[0] | ((uint16_t)(s[1] & 0x03) << 8);
|
||||
+ if (rem >= 2)
|
||||
+ pix[1] = ((uint16_t)s[1] >> 2) | ((uint16_t)(s[2] & 0x0F) << 6);
|
||||
+ if (rem >= 3)
|
||||
+ pix[2] = ((uint16_t)s[2] >> 4) | ((uint16_t)(s[3] & 0x3F) << 4);
|
||||
+ if (rem >= 4)
|
||||
+ pix[3] = ((uint16_t)s[3] >> 6) | ((uint16_t)s[4] << 2);
|
||||
+
|
||||
+ for (unsigned j = 0; j < rem; j++)
|
||||
+ d[j] = (uint16_t)(pix[j] << 6);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||||
enum AVHWFrameTransferDirection dir,
|
||||
enum AVPixelFormat **formats)
|
||||
@@ -1082,6 +1132,22 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||||
if (dir == AV_HWFRAME_TRANSFER_DIRECTION_TO)
|
||||
return AVERROR(ENOSYS);
|
||||
|
||||
+ /*
|
||||
+ * NV15-backed surfaces (sw_format = YUV420P10) are exposed as P010 to
|
||||
+ * downstream filters: the unpack below converts the packed 10-bit
|
||||
+ * samples into the standard high-bits-of-16 layout. Hi10P / Main10
|
||||
+ * VAAPI/v4l2-request decode reaches userspace through this path.
|
||||
+ */
|
||||
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
|
||||
+ fmts = av_malloc_array(2, sizeof(*fmts));
|
||||
+ if (!fmts)
|
||||
+ return AVERROR(ENOMEM);
|
||||
+ fmts[0] = AV_PIX_FMT_P010;
|
||||
+ fmts[1] = AV_PIX_FMT_NONE;
|
||||
+ *formats = fmts;
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
fmts = av_malloc_array(2, sizeof(*fmts));
|
||||
if (!fmts)
|
||||
return AVERROR(ENOMEM);
|
||||
@@ -1089,8 +1155,13 @@ static int v4l2request_transfer_get_formats(AVHWFramesContext *hwfc,
|
||||
fmts[0] = hwfc->sw_format;
|
||||
fmts[1] = AV_PIX_FMT_NONE;
|
||||
|
||||
+ /*
|
||||
+ * Tiled-NV12-32L32 (Allwinner) and NV20 (rkvdec 4:2:2 10-bit) still need
|
||||
+ * dedicated unpacks before hwdownload can consume them; leave them as
|
||||
+ * "no transfer formats" so the filter graph reports the limitation
|
||||
+ * rather than silently producing garbage.
|
||||
+ */
|
||||
if (hwfc->sw_format == AV_PIX_FMT_YUV420P ||
|
||||
- hwfc->sw_format == AV_PIX_FMT_YUV420P10 ||
|
||||
hwfc->sw_format == AV_PIX_FMT_YUV422P10)
|
||||
fmts[0] = AV_PIX_FMT_NONE;
|
||||
|
||||
@@ -1110,6 +1181,44 @@ static int v4l2request_transfer_data_from(AVHWFramesContext *hwfc,
|
||||
map = av_frame_alloc();
|
||||
if (!map)
|
||||
return AVERROR(ENOMEM);
|
||||
+
|
||||
+ /*
|
||||
+ * For NV15→P010, map the raw NV15 bytes (sw_format) and unpack into
|
||||
+ * dst's P010 storage. Otherwise fall through to the original byte-copy
|
||||
+ * path used for 1:1 sw_format matches (NV12, NV16, AFBC handled by DRM).
|
||||
+ */
|
||||
+ if (hwfc->sw_format == AV_PIX_FMT_YUV420P10) {
|
||||
+ /*
|
||||
+ * Only P010 is advertised by transfer_get_formats for this sw_format;
|
||||
+ * a caller that bypasses get_formats and asks for anything else would
|
||||
+ * silently corrupt output via av_frame_copy on NV15-packed bytes.
|
||||
+ * Reject explicitly.
|
||||
+ */
|
||||
+ if (dst->format != AV_PIX_FMT_P010) {
|
||||
+ ret = AVERROR(ENOSYS);
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
+ map->format = hwfc->sw_format;
|
||||
+ ret = v4l2request_map_frame(hwfc, map, src);
|
||||
+ if (ret)
|
||||
+ goto fail;
|
||||
+
|
||||
+ v4l2request_nv15_unpack_plane_to_p010(map->data[0],
|
||||
+ (uint16_t *)dst->data[0],
|
||||
+ dst->width, dst->height,
|
||||
+ map->linesize[0],
|
||||
+ dst->linesize[0]);
|
||||
+ /* NV15 chroma plane is W × H/2 samples (4:2:0, UV interleaved). */
|
||||
+ v4l2request_nv15_unpack_plane_to_p010(map->data[1],
|
||||
+ (uint16_t *)dst->data[1],
|
||||
+ dst->width, dst->height / 2,
|
||||
+ map->linesize[1],
|
||||
+ dst->linesize[1]);
|
||||
+ ret = 0;
|
||||
+ goto fail;
|
||||
+ }
|
||||
+
|
||||
map->format = dst->format;
|
||||
|
||||
ret = v4l2request_map_frame(hwfc, map, src);
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
From f760c0541586f43334c02611fcb4c212c08ad576 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Thu, 21 May 2026 21:40:22 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 4x4 IDCT through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264DSPContext.idct_add (called per 4x4 block from the intra-4x4
|
||||
decode path in h264_mb.c) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
|
||||
The recipe layer picks the substrate; for cycle 6 (H.264 IDCT 4x4)
|
||||
the recipe is CPU NEON, so this is effectively a NEON-to-NEON
|
||||
substitution with one extra dispatch call and recipe-table lookup.
|
||||
Provides the first end-to-end exercise of the daedalus-fourier
|
||||
kernel pack inside the libavcodec.so decode hot path; follow-up
|
||||
patches wire IDCT 8x8, luma-v deblock, and qpel mc20.
|
||||
|
||||
The library context is process-global, lazily initialised under
|
||||
pthread_once on first call. We pick the no-QPU constructor because
|
||||
libavcodec.so is loaded into arbitrary host processes
|
||||
(firefox-fourier, mpv-fourier, daedalus_v4l2_daemon, ...) and we
|
||||
cannot assume the host has a usable Vulkan instance. Higher cycles
|
||||
(deblock luma-v, MC) that benefit from the QPU will provision their
|
||||
own recipe-selected context once that path is wired.
|
||||
|
||||
Bulk paths (idct_add16, idct_add16intra, idct_add8 — used for
|
||||
non-intra4x4 macroblocks) remain on the stock NEON .S implementations
|
||||
and will be batched through daedalus_recipe_dispatch_h264_idct4 with
|
||||
n_blocks>1 in a follow-up.
|
||||
|
||||
Bit-exact against ff_h264_idct_add_neon (daedalus-fourier cycle 6
|
||||
green; see marfrit/daedalus-fourier/CYCLE_LOGS.md).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2.
|
||||
---
|
||||
libavcodec/aarch64/Makefile | 3 +-
|
||||
libavcodec/aarch64/h264_idct_daedalus.c | 49 +++++++++++++++++++++++
|
||||
libavcodec/aarch64/h264dsp_init_aarch64.c | 3 +-
|
||||
3 files changed, 53 insertions(+), 2 deletions(-)
|
||||
create mode 100644 libavcodec/aarch64/h264_idct_daedalus.c
|
||||
|
||||
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
|
||||
index 41ab025..7b95fb1 100644
|
||||
--- a/libavcodec/aarch64/Makefile
|
||||
+++ b/libavcodec/aarch64/Makefile
|
||||
@@ -3,7 +3,8 @@ OBJS-$(CONFIG_AC3DSP) += aarch64/ac3dsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_FDCTDSP) += aarch64/fdctdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_init.o
|
||||
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
|
||||
-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
|
||||
+OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o \
|
||||
+ aarch64/h264_idct_daedalus.o
|
||||
OBJS-$(CONFIG_HUFFYUVDSP) += aarch64/huffyuvdsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
||||
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
new file mode 100644
|
||||
index 0000000..538d223
|
||||
--- /dev/null
|
||||
+++ b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
@@ -0,0 +1,49 @@
|
||||
+/*
|
||||
+ * H.264 4x4 IDCT + add — daedalus-fourier substitution shim.
|
||||
+ *
|
||||
+ * Routes H264DSPContext.idct_add through
|
||||
+ * daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
+ * The recipe layer picks the substrate (CPU NEON by default for
|
||||
+ * cycle 6; future cycles may dispatch to V3D opportunistically).
|
||||
+ *
|
||||
+ * FFmpeg's 4x4 block memory layout matches daedalus's column-major
|
||||
+ * convention: block[r + 4*c] = coefficient at (row r, col c). Both
|
||||
+ * sides destructively zero the block after the transform.
|
||||
+ *
|
||||
+ * The library context is process-global and lazily initialised under
|
||||
+ * pthread_once. We pick the no-QPU constructor here because
|
||||
+ * libavcodec.so is loaded into arbitrary host processes
|
||||
+ * (firefox-fourier, mpv-fourier, daedalus_v4l2_daemon, ...) and we
|
||||
+ * cannot assume the host has a usable Vulkan instance. Higher cycles
|
||||
+ * (deblock, MC) that benefit from the QPU initialise their own
|
||||
+ * recipe-selected context once that path is wired.
|
||||
+ */
|
||||
+
|
||||
+#include <pthread.h>
|
||||
+#include <stddef.h>
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+#include <daedalus.h>
|
||||
+
|
||||
+#include "libavutil/attributes.h"
|
||||
+#include "libavcodec/h264dsp.h"
|
||||
+
|
||||
+static daedalus_ctx *g_dctx;
|
||||
+static pthread_once_t g_dctx_once = PTHREAD_ONCE_INIT;
|
||||
+
|
||||
+static void daedalus_ctx_init_once(void)
|
||||
+{
|
||||
+ g_dctx = daedalus_ctx_create_no_qpu();
|
||||
+}
|
||||
+
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
+
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
+{
|
||||
+ static const daedalus_h264_block_meta meta = { .dst_off = 0 };
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_idct4(g_dctx, dst, (size_t)stride,
|
||||
+ block, 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
index c684574..b993df2 100644
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
@@ -66,6 +66,7 @@ void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride
|
||||
int weights, int offset);
|
||||
|
||||
void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
|
||||
int16_t *block, int stride,
|
||||
@@ -139,7 +140,7 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
|
||||
c->biweight_pixels_tab[1] = ff_biweight_h264_pixels_8_neon;
|
||||
c->biweight_pixels_tab[2] = ff_biweight_h264_pixels_4_neon;
|
||||
|
||||
- c->idct_add = ff_h264_idct_add_neon;
|
||||
+ c->idct_add = ff_h264_idct_add_daedalus;
|
||||
c->idct_dc_add = ff_h264_idct_dc_add_neon;
|
||||
c->idct_add16 = ff_h264_idct_add16_neon;
|
||||
c->idct_add16intra = ff_h264_idct_add16intra_neon;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
From 1b286ddb4efaca26ec9b9e290e989fec77dc1c77 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Fri, 22 May 2026 10:18:21 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 8x8 IDCT through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264DSPContext.idct8_add (called per 8x8 block from the High-profile
|
||||
intra-8x8-DCT decode path in h264_mb.c) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_idct8 instead of ff_h264_idct8_add_neon.
|
||||
|
||||
The recipe layer picks the substrate; for cycle 7 (H.264 IDCT 8x8)
|
||||
the recipe is CPU NEON, so this is effectively a NEON-to-NEON
|
||||
substitution layered on top of the cycle-6 IDCT 4x4 wiring. Same
|
||||
pthread_once global context, same destructive-zero semantics; FFmpeg
|
||||
column-major 8x8 storage block[r + 8*c] matches daedalus's convention.
|
||||
|
||||
Bulk path c->idct8_add4 (used for inter 8x8-DCT macroblocks) remains
|
||||
on the in-tree NEON .S code and will be batched through
|
||||
daedalus_recipe_dispatch_h264_idct8 with n_blocks>1 in a follow-up.
|
||||
|
||||
Bit-exact against ff_h264_idct8_add_neon (daedalus-fourier cycle 7
|
||||
green).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 7.
|
||||
---
|
||||
libavcodec/aarch64/h264_idct_daedalus.c | 29 ++++++++++++++++-------
|
||||
libavcodec/aarch64/h264dsp_init_aarch64.c | 3 ++-
|
||||
2 files changed, 23 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
index 538d223..cbb98af 100644
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
+++ b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
@@ -1,14 +1,16 @@
|
||||
/*
|
||||
- * H.264 4x4 IDCT + add — daedalus-fourier substitution shim.
|
||||
+ * H.264 4x4 / 8x8 IDCT + add — daedalus-fourier substitution shims.
|
||||
*
|
||||
- * Routes H264DSPContext.idct_add through
|
||||
- * daedalus_recipe_dispatch_h264_idct4 instead of ff_h264_idct_add_neon.
|
||||
- * The recipe layer picks the substrate (CPU NEON by default for
|
||||
- * cycle 6; future cycles may dispatch to V3D opportunistically).
|
||||
+ * Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
+ * H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
+ * instead of the in-tree ff_h264_idct{,8}_add_neon assembly. The
|
||||
+ * recipe layer picks the substrate (CPU NEON by default for cycles
|
||||
+ * 6 + 7; future cycles may dispatch to V3D opportunistically).
|
||||
*
|
||||
- * FFmpeg's 4x4 block memory layout matches daedalus's column-major
|
||||
- * convention: block[r + 4*c] = coefficient at (row r, col c). Both
|
||||
- * sides destructively zero the block after the transform.
|
||||
+ * FFmpeg's 4x4 and 8x8 block memory layouts match daedalus's
|
||||
+ * column-major convention: block[r + N*c] = coefficient at
|
||||
+ * (row r, col c) for N ∈ {4, 8}. Both sides destructively zero the
|
||||
+ * block after the transform.
|
||||
*
|
||||
* The library context is process-global and lazily initialised under
|
||||
* pthread_once. We pick the no-QPU constructor here because
|
||||
@@ -37,6 +39,7 @@ static void daedalus_ctx_init_once(void)
|
||||
}
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -47,3 +50,13 @@ void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
daedalus_recipe_dispatch_h264_idct4(g_dctx, dst, (size_t)stride,
|
||||
block, 1, &meta);
|
||||
}
|
||||
+
|
||||
+void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
+{
|
||||
+ static const daedalus_h264_block_meta meta = { .dst_off = 0 };
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_idct8(g_dctx, dst, (size_t)stride,
|
||||
+ block, 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
index b993df2..741e551 100644
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
@@ -79,6 +79,7 @@ void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
|
||||
const uint8_t nnzc[15 * 8]);
|
||||
|
||||
void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
|
||||
int16_t *block, int stride,
|
||||
@@ -146,7 +147,7 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
|
||||
c->idct_add16intra = ff_h264_idct_add16intra_neon;
|
||||
if (chroma_format_idc <= 1)
|
||||
c->idct_add8 = ff_h264_idct_add8_neon;
|
||||
- c->idct8_add = ff_h264_idct8_add_neon;
|
||||
+ c->idct8_add = ff_h264_idct8_add_daedalus;
|
||||
c->idct8_dc_add = ff_h264_idct8_dc_add_neon;
|
||||
c->idct8_add4 = ff_h264_idct8_add4_neon;
|
||||
} else if (have_neon(cpu_flags) && bit_depth == 10) {
|
||||
--
|
||||
2.47.3
|
||||
|
||||
+121
@@ -0,0 +1,121 @@
|
||||
From 68731c41d7ea68be0e912b128cb4e71fb56e8263 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Fri, 22 May 2026 12:15:16 +0200
|
||||
Subject: [PATCH] avcodec/aarch64/h264dsp: route H.264 luma-v deblock through
|
||||
daedalus-fourier
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
H264DSPContext.v_loop_filter_luma (non-intra bS<4 vertical luma
|
||||
deblock, called per macroblock-row edge from the slice deblock
|
||||
loop) now dispatches through
|
||||
daedalus_recipe_dispatch_h264_deblock_luma_v instead of
|
||||
ff_h264_v_loop_filter_luma_neon.
|
||||
|
||||
The recipe layer picks the substrate; for cycle 8 the daedalus
|
||||
docstring marks the kernel "CPU primary; QPU opportunistic", but
|
||||
the libavcodec.so context here is built with
|
||||
daedalus_ctx_create_no_qpu — process-global pthread_once init,
|
||||
shared with cycles 6/7. QPU opportunism stays gated off until a
|
||||
follow-up adds an explicit feature flag (no implicit Vulkan init
|
||||
in arbitrary host processes). In the meantime cycle 8 is a
|
||||
plumbing-only substitution, NEON-to-NEON via the daedalus recipe.
|
||||
|
||||
Intra (bS=4) loop filter — c->v_loop_filter_luma_intra — stays on
|
||||
the in-tree NEON .S code; daedalus's daedalus_h264_deblock_meta
|
||||
only covers the non-intra path per its docstring.
|
||||
|
||||
FFmpeg `int alpha/beta/int8_t tc0[4]` → daedalus_h264_deblock_meta
|
||||
(int32_t alpha/beta + inline int8_t tc0[4]). pix already points
|
||||
to row 0 of the bottom block per FFmpeg's deblock convention,
|
||||
satisfying daedalus's `dst_off >= 4 * dst_stride` constraint.
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 cycle 8.
|
||||
---
|
||||
libavcodec/aarch64/h264_idct_daedalus.c | 36 +++++++++++++++++++----
|
||||
libavcodec/aarch64/h264dsp_init_aarch64.c | 4 ++-
|
||||
2 files changed, 33 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/libavcodec/aarch64/h264_idct_daedalus.c b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
index cbb98af..92365fa 100644
|
||||
--- a/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
+++ b/libavcodec/aarch64/h264_idct_daedalus.c
|
||||
@@ -1,11 +1,14 @@
|
||||
/*
|
||||
- * H.264 4x4 / 8x8 IDCT + add — daedalus-fourier substitution shims.
|
||||
+ * H.264 4x4 / 8x8 IDCT + luma-v deblock — daedalus-fourier substitution shims.
|
||||
*
|
||||
- * Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
- * H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
- * instead of the in-tree ff_h264_idct{,8}_add_neon assembly. The
|
||||
- * recipe layer picks the substrate (CPU NEON by default for cycles
|
||||
- * 6 + 7; future cycles may dispatch to V3D opportunistically).
|
||||
+ * Routes H264DSPContext.idct_add → daedalus_recipe_dispatch_h264_idct4
|
||||
+ * H264DSPContext.idct8_add → daedalus_recipe_dispatch_h264_idct8
|
||||
+ * H264DSPContext.v_loop_filter_luma → daedalus_recipe_dispatch_h264_deblock_luma_v
|
||||
+ * instead of the in-tree ff_h264_*_neon assembly. The recipe layer
|
||||
+ * picks the substrate (CPU NEON for cycles 6 + 7 by default; cycle 8
|
||||
+ * is CPU primary with QPU opportunistic — the ctx below is no-QPU,
|
||||
+ * so cycle 8 stays on the CPU NEON path until a separate change
|
||||
+ * gates QPU init on a daedalus-fourier feature flag).
|
||||
*
|
||||
* FFmpeg's 4x4 and 8x8 block memory layouts match daedalus's
|
||||
* column-major convention: block[r + N*c] = coefficient at
|
||||
@@ -40,6 +43,8 @@ static void daedalus_ctx_init_once(void)
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride);
|
||||
+void ff_h264_v_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
|
||||
void ff_h264_idct_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
{
|
||||
@@ -60,3 +65,22 @@ void ff_h264_idct8_add_daedalus(uint8_t *dst, int16_t *block, int stride)
|
||||
daedalus_recipe_dispatch_h264_idct8(g_dctx, dst, (size_t)stride,
|
||||
block, 1, &meta);
|
||||
}
|
||||
+
|
||||
+void ff_h264_v_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0)
|
||||
+{
|
||||
+ daedalus_h264_deblock_meta meta = {
|
||||
+ .dst_off = 0,
|
||||
+ .alpha = alpha,
|
||||
+ .beta = beta,
|
||||
+ };
|
||||
+ meta.tc0[0] = tc0[0];
|
||||
+ meta.tc0[1] = tc0[1];
|
||||
+ meta.tc0[2] = tc0[2];
|
||||
+ meta.tc0[3] = tc0[3];
|
||||
+
|
||||
+ pthread_once(&g_dctx_once, daedalus_ctx_init_once);
|
||||
+
|
||||
+ daedalus_recipe_dispatch_h264_deblock_luma_v(g_dctx, pix, (size_t)stride,
|
||||
+ 1, &meta);
|
||||
+}
|
||||
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
index 741e551..85ac381 100644
|
||||
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
|
||||
@@ -27,6 +27,8 @@
|
||||
|
||||
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
+void ff_h264_v_loop_filter_luma_daedalus(uint8_t *pix, ptrdiff_t stride,
|
||||
+ int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
int beta, int8_t *tc0);
|
||||
void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, ptrdiff_t stride, int alpha,
|
||||
@@ -114,7 +116,7 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_neon(cpu_flags) && bit_depth == 8) {
|
||||
- c->v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
|
||||
+ c->v_loop_filter_luma = ff_h264_v_loop_filter_luma_daedalus;
|
||||
c->h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
|
||||
c->v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon;
|
||||
c->h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon;
|
||||
--
|
||||
2.47.3
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
From 0d1292ea99bc4e5fa2da438259fa01a2374e3e04 Mon Sep 17 00:00:00 2001
|
||||
From: Markus Fritsche <mfritsche@reauktion.de>
|
||||
Date: Fri, 22 May 2026 14:18:25 +0200
|
||||
Subject: [PATCH] avcodec/h264: restore AV_CODEC_FLAG_LOW_DELAY semantics
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
FFmpeg 8.x dropped the H.264 decoder's low_delay path —
|
||||
AV_CODEC_FLAG_LOW_DELAY no longer prevents
|
||||
h264_select_output_frame from running the display-order DPB
|
||||
output queue. V4L2-stateless-style consumers (daedalus-v4l2
|
||||
daemon, libva-v4l2-request-fourier) that set the flag end up
|
||||
seeing the 2-1-4-3 pair-swap pattern on B-frame streams again.
|
||||
|
||||
Restore the documented semantics:
|
||||
|
||||
- Early-exit at the top of h264_select_output_frame when the
|
||||
flag is set: emit the just-decoded picture immediately as
|
||||
next_output_pic, mirror the corruption / recovery-point
|
||||
tracking the main path performs, and skip the entire
|
||||
delayed_pic[] / POC reorder machinery.
|
||||
|
||||
- Suppress the SPS-driven has_b_frames clobber in
|
||||
h264_field_start when the flag is set, so the per-slice
|
||||
bitstream_restriction_flag re-pickup cannot reintroduce a
|
||||
nonzero reorder buffer mid-stream.
|
||||
|
||||
This is a fork-only change required by the daedalus-v4l2 daemon's
|
||||
one-frame-per-send_packet contract; upstream FFmpeg consumers that
|
||||
expect display-order output remain untouched (flag default = off).
|
||||
|
||||
Refs reauktion/daedalus-v4l2#11 — substitution arc step 2 deblock
|
||||
+ flag-restoration follow-up.
|
||||
---
|
||||
libavcodec/h264_slice.c | 23 +++++++++++++++++++++++
|
||||
1 file changed, 23 insertions(+)
|
||||
|
||||
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
|
||||
index 97fab70..a7bfbd6 100644
|
||||
--- a/libavcodec/h264_slice.c
|
||||
+++ b/libavcodec/h264_slice.c
|
||||
@@ -1308,6 +1308,28 @@ static int h264_select_output_frame(H264Context *h)
|
||||
cur->mmco_reset = h->mmco_reset;
|
||||
h->mmco_reset = 0;
|
||||
|
||||
+ /* AV_CODEC_FLAG_LOW_DELAY restore (FFmpeg 8.x dropped the H.264
|
||||
+ * decoder's low_delay path). Bypass the display-order DPB
|
||||
+ * output queue: emit the just-decoded picture immediately, in
|
||||
+ * decode order, one per send_packet. V4L2-stateless-style
|
||||
+ * consumers (daedalus-v4l2 daemon, libva-v4l2-request-fourier)
|
||||
+ * do their own POC-based reorder downstream and require this
|
||||
+ * behaviour. */
|
||||
+ if (h->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
|
||||
+ h->next_output_pic = cur;
|
||||
+ h->next_outputed_poc = cur->poc;
|
||||
+ h->frame_recovered |= cur->recovered;
|
||||
+ cur->recovered |= h->frame_recovered & FRAME_RECOVERED_SEI;
|
||||
+ if (!cur->recovered) {
|
||||
+ if (!(h->avctx->flags & AV_CODEC_FLAG_OUTPUT_CORRUPT) &&
|
||||
+ !(h->avctx->flags2 & AV_CODEC_FLAG2_SHOW_ALL))
|
||||
+ h->next_output_pic = NULL;
|
||||
+ else
|
||||
+ cur->f->flags |= AV_FRAME_FLAG_CORRUPT;
|
||||
+ }
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
if (sps->bitstream_restriction_flag ||
|
||||
h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT) {
|
||||
h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, sps->num_reorder_frames);
|
||||
@@ -1415,6 +1437,7 @@ static int h264_field_start(H264Context *h, const H264SliceContext *sl,
|
||||
sps = h->ps.sps;
|
||||
|
||||
if (sps->bitstream_restriction_flag &&
|
||||
+ !(h->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) &&
|
||||
h->avctx->has_b_frames < sps->num_reorder_frames) {
|
||||
h->avctx->has_b_frames = sps->num_reorder_frames;
|
||||
}
|
||||
--
|
||||
2.47.3
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user