forked from marfrit/marfrit-packages
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 70c8c2b417 | |||
| 793187ff9e | |||
| 42bf6b1633 | |||
| 40719efc43 | |||
| e540384f50 | |||
| 9ca97374c8 | |||
| 902e855d92 | |||
| 64269d69ee | |||
| e976c88016 | |||
| 29cc145d44 | |||
| b16a3f1a77 |
+19
-30
@@ -1144,39 +1144,25 @@ jobs:
|
||||
echo "$result" >> "$GITHUB_OUTPUT"
|
||||
echo "decision: $result"
|
||||
|
||||
- name: install build-deps (sans ffmpeg — see [marfrit] step)
|
||||
- name: install build-deps
|
||||
if: steps.skip-check.outputs.skip != '1'
|
||||
run: |
|
||||
set -e
|
||||
retry() { for i in 1 2 3; do "$@" && return 0; rc=$?; echo "retry $i (exit=$rc)" >&2; sleep $((i*5)); done; return 1; }
|
||||
# Do NOT pull stock 'ffmpeg' here: the arch-aarch64 runner has
|
||||
# ffmpeg-v4l2-request-fourier pre-installed from the mpv-aarch64
|
||||
# job (configured via [marfrit]), and pacman -S ffmpeg would
|
||||
# conflict on the libav* drop-in. Daedalus build only needs
|
||||
# libavcodec/libavformat headers, which the fourier package
|
||||
# already supplies. Keep cmake/ninja/pkgconf/libdrm here; the
|
||||
# ffmpeg-dev equivalent comes via the next step.
|
||||
retry pacman -Syu --noconfirm --needed \
|
||||
dpkg openssh rsync curl base-devel git cmake ninja pkgconf \
|
||||
libdrm
|
||||
|
||||
- name: ensure ffmpeg-v4l2-request-fourier installed (link-time ABI source)
|
||||
if: steps.skip-check.outputs.skip != '1'
|
||||
run: |
|
||||
set -e
|
||||
# Idempotent: pre-install the marfrit fourier ffmpeg so cmake
|
||||
# finds libavcodec / libavformat / libavutil headers + .so's.
|
||||
# Mirrors mpv-fourier-debian's [marfrit] step.
|
||||
curl -sLo /tmp/marfrit.gpg https://packages.reauktion.de/marfrit.gpg
|
||||
pacman-key --add /tmp/marfrit.gpg
|
||||
pacman-key --lsign-key 92D5E96D8F63C75E4116AA1FF5C8C4603D0D250C
|
||||
rm -f /tmp/marfrit.gpg
|
||||
if ! grep -q '^\[marfrit\]' /etc/pacman.conf; then
|
||||
printf '\n[marfrit]\nServer = https://packages.reauktion.de/arch/$arch\nSigLevel = Required\n' >> /etc/pacman.conf
|
||||
fi
|
||||
pacman -Sy --noconfirm
|
||||
rm -f /var/cache/pacman/pkg/ffmpeg-v4l2-request-fourier-*-aarch64.pkg.tar.*
|
||||
printf 'y\ny\ny\n' | pacman -S --needed marfrit/ffmpeg-v4l2-request-fourier
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
retry apt-get update -qq
|
||||
# libav*-dev provide the headers daedalus daemon dlopens at
|
||||
# runtime — Debian's stock packages match the trixie ABI the
|
||||
# daemon will encounter on Pi 5 hosts (both ship libavcodec
|
||||
# 61.x). The fourier ffmpeg fork isn't needed here; the
|
||||
# daemon never link-binds against libav (Option γ — dlopen
|
||||
# at runtime), so any header set with the right struct
|
||||
# definitions works.
|
||||
retry apt-get install -y --no-install-recommends \
|
||||
build-essential cmake ninja-build pkg-config git \
|
||||
libavcodec-dev libavformat-dev libavutil-dev libdrm-dev \
|
||||
linux-libc-dev \
|
||||
curl ca-certificates openssh-client rsync dpkg-dev
|
||||
|
||||
- name: install hertz deploy ssh key
|
||||
if: steps.skip-check.outputs.skip != '1'
|
||||
@@ -1238,7 +1224,10 @@ jobs:
|
||||
run: |
|
||||
set -e
|
||||
retry() { for i in 1 2 3; do "$@" && return 0; rc=$?; echo "retry $i (exit=$rc)" >&2; sleep $((i*5)); done; return 1; }
|
||||
retry pacman -Syu --noconfirm --needed dpkg openssh rsync curl tar gzip
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
retry apt-get update -qq
|
||||
retry apt-get install -y --no-install-recommends \
|
||||
dpkg-dev openssh-client rsync curl ca-certificates tar gzip
|
||||
|
||||
- name: install hertz deploy ssh key
|
||||
if: steps.skip-check.outputs.skip != '1'
|
||||
|
||||
@@ -18,10 +18,10 @@ _module=daedalus_v4l2
|
||||
|
||||
# Same pin as arch/daedalus-v4l2 — keep kernel module + daemon
|
||||
# bit-versioned together so the chardev wire protocol stays in sync.
|
||||
_commit=481279c9bffd19e32c8f3299897e9b63fc5a24aa
|
||||
_commit=462aa4b4804972189d2f1b1ab5782f1ca1e8c423
|
||||
|
||||
pkgver=0.1.0.r18.481279c
|
||||
pkgrel=1 # reset for new upstream pin (481279c — Phase 8.13 close)
|
||||
pkgver=0.1.0.r22.462aa4b
|
||||
pkgrel=1 # reset for new upstream pin (3dd0eb0 — DAEMON-PPS H.264 SPS/PPS NAL synth)
|
||||
pkgdesc="V4L2 stateless decoder shim kernel module (DKMS) — Pi 5 / CM5"
|
||||
arch=('any')
|
||||
url="https://git.reauktion.de/reauktion/daedalus-v4l2"
|
||||
|
||||
@@ -21,12 +21,12 @@ _upstreampkg=daedalus-v4l2
|
||||
# ffmpeg -hwaccel vaapi → libva → /dev/video0 → daemon path lands a
|
||||
# pixel-correct decoded frame back in ffmpeg. Promote to a later pin
|
||||
# only after a future phase closes cleanly.
|
||||
_commit=481279c9bffd19e32c8f3299897e9b63fc5a24aa
|
||||
_commit=462aa4b4804972189d2f1b1ab5782f1ca1e8c423
|
||||
|
||||
# 0.1.0 (pre-1.0) + commit count + short sha. Bump the .Y on each
|
||||
# Phase 8.x close. pkgver() recomputes at build time.
|
||||
pkgver=0.1.0.r18.481279c
|
||||
pkgrel=1 # reset for new upstream pin (481279c — Phase 8.13 close)
|
||||
pkgver=0.1.0.r22.462aa4b
|
||||
pkgrel=1 # reset for new upstream pin (3dd0eb0 — DAEMON-PPS H.264 SPS/PPS NAL synth)
|
||||
pkgdesc="Userspace daemon for the daedalus-v4l2 V4L2 stateless decoder shim (VP9/AV1/H.264 on Pi 5 / CM5)"
|
||||
arch=('aarch64')
|
||||
url="https://git.reauktion.de/reauktion/daedalus-v4l2"
|
||||
|
||||
@@ -24,31 +24,29 @@ pkgname=libva-v4l2-request-fourier
|
||||
epoch=1
|
||||
_upstreampkg=libva-v4l2-request
|
||||
|
||||
# Pin the fork tip. de27e95 = "v4l2: log error_idx + failing ctrl id
|
||||
# on S_EXT_CTRLS failure" — Phase 8.13 diagnostic that surfaced the
|
||||
# real root cause of the libva→daedalus_v4l2 request-completion
|
||||
# timeout (turned out the EINVAL libva was logging was a harmless
|
||||
# H264/HEVC probe; actual VP9 stateless control SET worked all along).
|
||||
# Pin the fork tip. c1bb444 = PR #9 merge "h264: max_num_ref_frames
|
||||
# fallback + libva-boundary instrumentation (#8)" — addresses the
|
||||
# libva-side portion of marfrit/libva-v4l2-request-fourier#8: the
|
||||
# daedalus_v4l2-via-libavcodec strict consumer rejected frames whose
|
||||
# sps.max_num_ref_frames was 0 (older ffmpeg-vaapi paths leave the
|
||||
# field unset and HW decoders tolerated it). Adds a per-profile spec
|
||||
# minimum fallback (counts valid DPB entries first; 1 for baseline /
|
||||
# 4 for main+high if even that is 0) and a one-line request_log at
|
||||
# h264_set_controls entry dumping raw VAAPI bitfields (seq_fields,
|
||||
# pic_fields, num_ref_frames, bit_depth_*) for disambiguating where
|
||||
# the still-open PPS-flag-zero portion of issue #8 originates.
|
||||
#
|
||||
# Prior pin (7ac934e) was iter38b — fresnel-fourier multi-device probe
|
||||
# + MAX_PROFILES bounds-check fix. de27e95 added the daedalus_v4l2
|
||||
# probe slot (b5b3acf), the meson option gate (2146341), and the
|
||||
# S_EXT_CTRLS diagnostic (de27e95 itself). c332d34 (LIBVA-1) added
|
||||
# the per-codec dispatch: rpi-hevc-dec + daedalus_v4l2 both probe each
|
||||
# other as alts, VP9/AV1/H.264 route to daedalus via new 'd' kind,
|
||||
# HEVC stays on 'p' (rpi-hevc-dec). 9898331 (LIBVA-2) completes that
|
||||
# by adding video_fd_daedalus to any_fd_supports_output_format's probe
|
||||
# array — without it, H.264/VP9/AV1 profiles never got advertised on
|
||||
# Pi 5 mixed deployments (rpi-hevc-dec primary, daedalus alt) and
|
||||
# ffmpeg bailed with "No support for codec h264 profile 578" before
|
||||
# the per-codec dispatch could fire.
|
||||
_commit=989833114a7708ad999dc68309cbc181d9913bdb
|
||||
# Prior pin (9898331) was the LIBVA-2 close — config-side video_fd_daedalus
|
||||
# added to the profile enumeration probe array. c332d34 (LIBVA-1) added
|
||||
# the per-codec dispatch. Earlier de27e95 (Phase 8.13 diagnostic),
|
||||
# 7ac934e (iter38b multi-device probe + MAX_PROFILES bounds-check fix).
|
||||
_commit=c1bb444d0731ca7ca831c7e01ec4b52ed736ca8e
|
||||
|
||||
# Project version from meson.build (1.0.0) + commit count + short sha,
|
||||
# matching the ffmpeg-v4l2-request-fourier convention. Recomputed at
|
||||
# build time by pkgver() below; the static value here is a placeholder
|
||||
# so AUR-style consumers see something coherent before src/ exists.
|
||||
pkgver=1.0.0.r380.9898331
|
||||
pkgver=1.0.0.r382.c1bb444
|
||||
pkgrel=1
|
||||
pkgdesc="VA-API backend for V4L2 stateless decoders (multiplanar fork — fourier umbrella)"
|
||||
arch=('aarch64')
|
||||
|
||||
@@ -0,0 +1,328 @@
|
||||
--- a/src/panfrost/vulkan/panvk_shader.h 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_shader.h 2026-05-20 18:52:53.312698258 +0200
|
||||
@@ -150,6 +150,10 @@
|
||||
struct {
|
||||
#if PAN_ARCH < 9
|
||||
int32_t raw_vertex_offset;
|
||||
+ uint32_t num_vertices; /* iter13: XFB needs per-draw vertex count */
|
||||
+ /* aligned_u64 attribute below inserts the 4-byte alignment gap
|
||||
+ * after num_vertices automatically — no explicit pad needed. */
|
||||
+ aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */
|
||||
#endif
|
||||
int32_t first_vertex;
|
||||
int32_t base_instance;
|
||||
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 19:09:29.711145446 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 18:52:54.832720445 +0200
|
||||
@@ -169,6 +169,7 @@
|
||||
.EXT_provoking_vertex = true,
|
||||
.EXT_queue_family_foreign = true,
|
||||
.EXT_robustness2 = true,
|
||||
+ .EXT_transform_feedback = PAN_ARCH < 9, /* iter13: JM-class only for now */
|
||||
.EXT_sampler_filter_minmax = PAN_ARCH >= 10,
|
||||
.EXT_scalar_block_layout = true,
|
||||
.EXT_separate_stencil_usage = true,
|
||||
@@ -495,6 +496,10 @@
|
||||
.robustImageAccess2 = false,
|
||||
.nullDescriptor = true,
|
||||
|
||||
+ /* VK_EXT_transform_feedback (iter13) */
|
||||
+ .transformFeedback = PAN_ARCH < 9,
|
||||
+ .geometryStreams = false,
|
||||
+
|
||||
/* VK_KHR_shader_clock */
|
||||
.shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp,
|
||||
.shaderDeviceClock = device->kmod.dev->props.timestamp_device_coherent,
|
||||
@@ -1020,6 +1025,18 @@
|
||||
.robustStorageBufferAccessSizeAlignment = 1,
|
||||
.robustUniformBufferAccessSizeAlignment = 1,
|
||||
|
||||
+ /* VK_EXT_transform_feedback (iter13) */
|
||||
+ .maxTransformFeedbackStreams = 1,
|
||||
+ .maxTransformFeedbackBuffers = 4,
|
||||
+ .maxTransformFeedbackBufferSize = UINT32_MAX,
|
||||
+ .maxTransformFeedbackStreamDataSize = 512,
|
||||
+ .maxTransformFeedbackBufferDataSize = 512,
|
||||
+ .maxTransformFeedbackBufferDataStride = 2048,
|
||||
+ .transformFeedbackQueries = false,
|
||||
+ .transformFeedbackStreamsLinesTriangles = false,
|
||||
+ .transformFeedbackRasterizationStreamSelect = false,
|
||||
+ .transformFeedbackDraw = false,
|
||||
+
|
||||
/* VK_EXT_shader_object */
|
||||
/* We do not currently support VK_EXT_shader_object but this is used
|
||||
* internally by vk_shader
|
||||
--- a/src/panfrost/vulkan/panvk_vX_shader.c 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_shader.c 2026-05-20 18:52:56.556745611 +0200
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_sampler.h"
|
||||
#include "panvk_shader.h"
|
||||
+#include "pan_nir.h" /* iter13: pan_nir_lower_xfb */
|
||||
|
||||
#include "spirv/nir_spirv.h"
|
||||
#include "util/memstream.h"
|
||||
@@ -100,6 +101,20 @@
|
||||
case nir_intrinsic_load_raw_vertex_offset_pan:
|
||||
val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset);
|
||||
break;
|
||||
+ case nir_intrinsic_load_num_vertices: /* iter13: XFB index calc */
|
||||
+ val = load_sysval(b, graphics, bit_size, vs.num_vertices);
|
||||
+ break;
|
||||
+ case nir_intrinsic_load_xfb_address: { /* iter13: XFB buffer N base address */
|
||||
+ unsigned idx = nir_intrinsic_base(intr);
|
||||
+ switch (idx) {
|
||||
+ case 0: val = load_sysval(b, graphics, bit_size, vs.xfb_address[0]); break;
|
||||
+ case 1: val = load_sysval(b, graphics, bit_size, vs.xfb_address[1]); break;
|
||||
+ case 2: val = load_sysval(b, graphics, bit_size, vs.xfb_address[2]); break;
|
||||
+ case 3: val = load_sysval(b, graphics, bit_size, vs.xfb_address[3]); break;
|
||||
+ default: return false;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
case nir_intrinsic_load_layer_id:
|
||||
assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
val = load_sysval(b, graphics, bit_size, layer_id);
|
||||
@@ -457,6 +472,7 @@
|
||||
core_max_id);
|
||||
|
||||
pan_preprocess_nir(nir, pdev->kmod.dev->props.gpu_id);
|
||||
+
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -870,6 +886,18 @@
|
||||
nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
|
||||
glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics);
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
+ /* iter13: VK_EXT_transform_feedback — runs AFTER nir_lower_io so that
|
||||
+ * shader outputs are now store_output intrinsics that pan_nir_lower_xfb
|
||||
+ * can rewrite to nir_store_global+nir_load_xfb_address. */
|
||||
+ if (nir->info.stage == MESA_SHADER_VERTEX &&
|
||||
+ nir->info.has_transform_feedback_varyings) {
|
||||
+ NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
+ NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
|
||||
+ NIR_PASS(_, nir, pan_nir_lower_xfb);
|
||||
+ }
|
||||
+#endif
|
||||
}
|
||||
|
||||
static VkResult
|
||||
@@ -1288,6 +1316,9 @@
|
||||
.view_mask = (state && state->rp) ? state->rp->view_mask : 0,
|
||||
.robust2_modes = robust2_modes,
|
||||
.robust_descriptors = dev->vk.enabled_features.nullDescriptor,
|
||||
+ /* iter13: XFB shaders must disable IDVS (matches Panfrost-Gallium). */
|
||||
+ .no_idvs = (info->stage == MESA_SHADER_VERTEX) &&
|
||||
+ info->nir->info.has_transform_feedback_varyings,
|
||||
};
|
||||
|
||||
switch (info->stage) {
|
||||
--- a/src/panfrost/vulkan/panvk_cmd_draw.h 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_cmd_draw.h 2026-05-20 18:52:57.748763011 +0200
|
||||
@@ -135,6 +135,19 @@
|
||||
struct panvk_graphics_sysvals sysvals;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
+ /* iter13: VK_EXT_transform_feedback state (JM-class only for now). */
|
||||
+ struct {
|
||||
+ bool active;
|
||||
+ uint32_t buffer_count;
|
||||
+ struct {
|
||||
+ uint64_t addr;
|
||||
+ uint64_t offset;
|
||||
+ uint64_t size;
|
||||
+ } buffers[4];
|
||||
+ } xfb;
|
||||
+#endif
|
||||
+
|
||||
+#if PAN_ARCH < 9
|
||||
struct panvk_shader_link link;
|
||||
#endif
|
||||
|
||||
--- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-05-20 19:10:23.031919662 +0200
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "panvk_entrypoints.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
+#include "pan_compiler.h" /* PAN_SHADER_OOB_ADDRESS */
|
||||
#include "pan_util.h"
|
||||
|
||||
static void
|
||||
@@ -722,6 +723,35 @@
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
|
||||
info->vertex.raw_offset);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
|
||||
+
|
||||
+ /* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
|
||||
+ * reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
|
||||
+ {
|
||||
+ const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
|
||||
+ /* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS
|
||||
+ * (= 1<<63). This is the Panfrost-Gallium memory-sink idiom — the
|
||||
+ * Bifrost MMU silently discards stores to this address, so a pipeline
|
||||
+ * with XFB outputs used in a non-XFB draw (or in an XFB draw with
|
||||
+ * fewer bound buffers than the shader declares) is safe instead of
|
||||
+ * faulting. See gallium/drivers/panfrost/pan_cmdstream.c PAN_SYSVAL_XFB. */
|
||||
+ uint64_t _xa0 = PAN_SHADER_OOB_ADDRESS, _xa1 = PAN_SHADER_OOB_ADDRESS,
|
||||
+ _xa2 = PAN_SHADER_OOB_ADDRESS, _xa3 = PAN_SHADER_OOB_ADDRESS;
|
||||
+ if (_gfx->xfb.active) {
|
||||
+ if (_gfx->xfb.buffer_count > 0 && _gfx->xfb.buffers[0].addr)
|
||||
+ _xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset;
|
||||
+ if (_gfx->xfb.buffer_count > 1 && _gfx->xfb.buffers[1].addr)
|
||||
+ _xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset;
|
||||
+ if (_gfx->xfb.buffer_count > 2 && _gfx->xfb.buffers[2].addr)
|
||||
+ _xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset;
|
||||
+ if (_gfx->xfb.buffer_count > 3 && _gfx->xfb.buffers[3].addr)
|
||||
+ _xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset;
|
||||
+ }
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2);
|
||||
+ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3);
|
||||
+ }
|
||||
#endif
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
|
||||
--- a/src/panfrost/vulkan/meson.build 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/meson.build 2026-05-20 18:53:04.484861338 +0200
|
||||
@@ -73,6 +73,7 @@
|
||||
jm_inc_dir = ['jm']
|
||||
jm_files = [
|
||||
'jm/panvk_vX_bind_queue.c',
|
||||
+ 'jm/panvk_vX_cmd_xfb.c', # iter13
|
||||
'jm/panvk_vX_cmd_buffer.c',
|
||||
'jm/panvk_vX_cmd_dispatch.c',
|
||||
'jm/panvk_vX_cmd_draw.c',
|
||||
--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-04-29 22:19:00.000000000 +0200
|
||||
+++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-05-20 19:10:26.163965149 +0200
|
||||
@@ -473,5 +473,12 @@
|
||||
|
||||
vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
|
||||
|
||||
+#if PAN_ARCH < 9
|
||||
+ /* iter13: clear XFB state on Begin so a reused command buffer does not
|
||||
+ * inherit stale xfb.buffer_count / xfb.active / xfb.buffers[] from a
|
||||
+ * prior recording. */
|
||||
+ memset(&cmdbuf->state.gfx.xfb, 0, sizeof(cmdbuf->state.gfx.xfb));
|
||||
+#endif
|
||||
+
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-18 12:50:53.067999996 +0200
|
||||
+++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-20 19:10:27.175979847 +0200
|
||||
@@ -0,0 +1,111 @@
|
||||
+/*
|
||||
+ * Copyright © 2026 mfritsche / claude-noether
|
||||
+ * SPDX-License-Identifier: MIT
|
||||
+ *
|
||||
+ * iter13: VK_EXT_transform_feedback command handlers for the JM
|
||||
+ * architecture path (Bifrost v6/v7 + Valhall-JM v9).
|
||||
+ *
|
||||
+ * The runtime contract:
|
||||
+ * - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size)
|
||||
+ * for each slot into cmdbuf->state.gfx.xfb.buffers[].
|
||||
+ * - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true.
|
||||
+ * Mark sysvals dirty so the next draw re-emits vs.xfb_address[].
|
||||
+ * - vkCmdEndTransformFeedbackEXT: set active = false.
|
||||
+ *
|
||||
+ * Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/
|
||||
+ * pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't
|
||||
+ * support pause/resume. transformFeedbackDraw is advertised as false.
|
||||
+ *
|
||||
+ * Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb
|
||||
+ * and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb
|
||||
+ * pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers
|
||||
+ * (via panvk_vX_shader.c sysval handler) to a load from the per-draw
|
||||
+ * sysval push area.
|
||||
+ */
|
||||
+
|
||||
+#include "vk_log.h"
|
||||
+#include "util/log.h"
|
||||
+
|
||||
+#include "panvk_cmd_buffer.h"
|
||||
+#include "panvk_cmd_draw.h"
|
||||
+#include "panvk_buffer.h"
|
||||
+#include "panvk_entrypoints.h"
|
||||
+
|
||||
+VKAPI_ATTR void VKAPI_CALL
|
||||
+panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)(
|
||||
+ VkCommandBuffer commandBuffer,
|
||||
+ uint32_t firstBinding,
|
||||
+ uint32_t bindingCount,
|
||||
+ const VkBuffer *pBuffers,
|
||||
+ const VkDeviceSize *pOffsets,
|
||||
+ const VkDeviceSize *pSizes)
|
||||
+{
|
||||
+ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
+ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
+
|
||||
+ for (uint32_t i = 0; i < bindingCount; i++) {
|
||||
+ uint32_t slot = firstBinding + i;
|
||||
+ if (slot >= 4)
|
||||
+ continue;
|
||||
+
|
||||
+ VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]);
|
||||
+ gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0);
|
||||
+ gfx->xfb.buffers[slot].offset = pOffsets[i];
|
||||
+ gfx->xfb.buffers[slot].size =
|
||||
+ (pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE)
|
||||
+ ? pSizes[i]
|
||||
+ : (buf->vk.size - pOffsets[i]);
|
||||
+ }
|
||||
+
|
||||
+ if (firstBinding + bindingCount > gfx->xfb.buffer_count)
|
||||
+ gfx->xfb.buffer_count = firstBinding + bindingCount;
|
||||
+}
|
||||
+
|
||||
+VKAPI_ATTR void VKAPI_CALL
|
||||
+panvk_per_arch(CmdBeginTransformFeedbackEXT)(
|
||||
+ VkCommandBuffer commandBuffer,
|
||||
+ uint32_t firstCounterBuffer,
|
||||
+ uint32_t counterBufferCount,
|
||||
+ const VkBuffer *pCounterBuffers,
|
||||
+ const VkDeviceSize *pCounterBufferOffsets)
|
||||
+{
|
||||
+ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
+ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
+
|
||||
+ /* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback
|
||||
+ * PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c.
|
||||
+ * App is spec-compliant if it does not pass counter buffers (which our
|
||||
+ * features advertisement allows), but warn loudly if it does so we do not
|
||||
+ * silently produce wrong capture state. */
|
||||
+ (void)firstCounterBuffer;
|
||||
+ (void)pCounterBufferOffsets;
|
||||
+ if (counterBufferCount > 0 && pCounterBuffers != NULL) {
|
||||
+ mesa_logw("panvk: CmdBeginTransformFeedbackEXT: counter buffers not "
|
||||
+ "implemented (transformFeedbackDraw=false); XFB resume will "
|
||||
+ "restart at buffer offset 0");
|
||||
+ }
|
||||
+
|
||||
+ gfx->xfb.active = true;
|
||||
+ /* Per-draw set_gfx_sysval picks up the change automatically — no
|
||||
+ * explicit dirty marking required (set_gfx_sysval uses memcmp +
|
||||
+ * BITSET to detect state diffs and re-emit sysvals). */
|
||||
+}
|
||||
+
|
||||
+VKAPI_ATTR void VKAPI_CALL
|
||||
+panvk_per_arch(CmdEndTransformFeedbackEXT)(
|
||||
+ VkCommandBuffer commandBuffer,
|
||||
+ uint32_t firstCounterBuffer,
|
||||
+ uint32_t counterBufferCount,
|
||||
+ const VkBuffer *pCounterBuffers,
|
||||
+ const VkDeviceSize *pCounterBufferOffsets)
|
||||
+{
|
||||
+ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
+ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
+
|
||||
+ (void)firstCounterBuffer;
|
||||
+ (void)counterBufferCount;
|
||||
+ (void)pCounterBuffers;
|
||||
+ (void)pCounterBufferOffsets;
|
||||
+
|
||||
+ gfx->xfb.active = false;
|
||||
+}
|
||||
@@ -30,7 +30,7 @@
|
||||
|
||||
pkgname=mesa-panvk-bifrost
|
||||
_mesaver=26.0.6
|
||||
pkgver=26.0.6.r2
|
||||
pkgver=26.0.6.r3
|
||||
pkgrel=1
|
||||
pkgdesc="Patched Mesa libvulkan_panfrost.so exposing Bifrost-gen Mali to Vulkan apps (panvk-bifrost campaign)"
|
||||
arch=('aarch64')
|
||||
@@ -79,6 +79,7 @@ source=(
|
||||
"https://archive.mesa3d.org/mesa-${_mesaver}.tar.xz"
|
||||
"0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch"
|
||||
"0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch"
|
||||
"0003-panvk-bifrost-vk-ext-transform-feedback.patch"
|
||||
"brave-vulkan"
|
||||
"icd.json"
|
||||
)
|
||||
@@ -88,6 +89,7 @@ sha256sums=(
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
'SKIP'
|
||||
)
|
||||
|
||||
prepare() {
|
||||
@@ -107,12 +109,23 @@ prepare() {
|
||||
sed -i 's|bool has_vk1_1 = PAN_ARCH >= 10;|bool has_vk1_1 = true;|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
sed -i 's|bool has_vk1_2 = PAN_ARCH >= 10;|bool has_vk1_2 = true;|' src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
|
||||
# iter13: VK_EXT_transform_feedback implementation for Bifrost (PAN_ARCH<9).
|
||||
# Applied as a real unified-diff patch — the change is too large for sed.
|
||||
# Phase-doc context: ~/src/panvk-bifrost/phase{4,5,6}_iter13_close.md.
|
||||
# Unlocks ANGLE-Vulkan → GLES3 → WebGL2 / WebGPU on Brave (chrome://gpu
|
||||
# reports "Hardware accelerated" across the board for the affected paths).
|
||||
patch -p1 < "${srcdir}/0003-panvk-bifrost-vk-ext-transform-feedback.patch"
|
||||
|
||||
# Sanity-check the patches landed.
|
||||
grep -q "KHR_robustness2 = true," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "EXT_robustness2 = true," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "nullDescriptor = true," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "has_vk1_1 = true;" src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "has_vk1_2 = true;" src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
# iter13 sanity:
|
||||
grep -q "EXT_transform_feedback = PAN_ARCH < 9," src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
grep -q "pan_nir_lower_xfb" src/panfrost/vulkan/panvk_vX_shader.c
|
||||
test -f src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c
|
||||
}
|
||||
|
||||
build() {
|
||||
|
||||
+3
-3
@@ -14,9 +14,9 @@
|
||||
# Sibling userspace package: ../daedalus-v4l2/build-deb.sh
|
||||
set -euo pipefail
|
||||
|
||||
UPSTREAM_COMMIT=481279c9bffd19e32c8f3299897e9b63fc5a24aa
|
||||
PKGVER=0.1.0+r18+g481279c
|
||||
PKGREL=1 # reset for new upstream pin (481279c — Phase 8.13 close)
|
||||
UPSTREAM_COMMIT=462aa4b4804972189d2f1b1ab5782f1ca1e8c423
|
||||
PKGVER=0.1.0+r22+g462aa4b
|
||||
PKGREL=1 # reset for new upstream pin (3dd0eb0 — DAEMON-PPS H.264 SPS/PPS NAL synth)
|
||||
MODULE_NAME=daedalus_v4l2
|
||||
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
+27
@@ -1,3 +1,30 @@
|
||||
daedalus-v4l2-dkms (0.1.0+r22+g462aa4b-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 462aa4b — kernel device_run() now calls
|
||||
v4l2_ctrl_request_setup() before reading the H.264 stateless
|
||||
control values from the bound media_request, so the values
|
||||
daedalus ships to the userspace daemon match what the V4L2
|
||||
client (libva-v4l2-request-fourier) actually set. Closes the
|
||||
libva→kernel control-binding gap that was causing decoded
|
||||
frames to come back as best-effort zero garbage from libavcodec.
|
||||
* Wire-ABI lockstep with daedalus-v4l2 0.1.0+r22+g462aa4b.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 22:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r20+g3dd0eb0-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 3dd0eb0 — DAEMON-PPS kernel-side changes. device_run()
|
||||
now reads the V4L2 H.264 stateless control values from the bound
|
||||
media_request and ships them to the daemon inside REQ_DECODE
|
||||
via the new struct daedalus_h264_meta block (gated on
|
||||
DAEDALUS_REQ_FLAG_H264_META). Required for H.264 decode to
|
||||
work via the libva-v4l2-request -> daedalus daemon path; daemon
|
||||
synthesises AnnexB SPS+PPS NAL units from the structs.
|
||||
* Wire-ABI lockstep with daedalus-v4l2 0.1.0+r20+g3dd0eb0 — install
|
||||
both packages together.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 21:00:00 +0000
|
||||
|
||||
daedalus-v4l2-dkms (0.1.0+r18+g481279c-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 481279c in lockstep with the userspace daedalus-v4l2
|
||||
|
||||
Vendored
+3
-3
@@ -15,9 +15,9 @@ set -euo pipefail
|
||||
# end-to-end via libva (consumer target hit)" — first commit where the
|
||||
# full ffmpeg -hwaccel vaapi → libva → /dev/video0 → daemon path lands
|
||||
# a pixel-correct decoded frame back in ffmpeg.
|
||||
UPSTREAM_COMMIT=481279c9bffd19e32c8f3299897e9b63fc5a24aa
|
||||
PKGVER=0.1.0+r18+g481279c
|
||||
PKGREL=1 # reset for new upstream pin (481279c — Phase 8.13 close)
|
||||
UPSTREAM_COMMIT=462aa4b4804972189d2f1b1ab5782f1ca1e8c423
|
||||
PKGVER=0.1.0+r22+g462aa4b
|
||||
PKGREL=1 # reset for new upstream pin (3dd0eb0 — DAEMON-PPS H.264 SPS/PPS NAL synth)
|
||||
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
|
||||
+33
@@ -1,3 +1,36 @@
|
||||
daedalus-v4l2 (0.1.0+r22+g462aa4b-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 462aa4b — kernel-side fix for control-binding gap that
|
||||
closes the libva→daemon SPS/PPS pipeline. Kernel device_run now
|
||||
calls v4l2_ctrl_request_setup() before reading ctrl->p_cur, so
|
||||
the daemon's daedalus_h264_meta block actually carries THIS
|
||||
request's V4L2 stateless H.264 control values instead of stale
|
||||
/default ones. Pairs with libva-v4l2-request-fourier r382+gc1bb444
|
||||
(Fix 3 + Fix 4 from issue libva-v4l2-request-fourier#8).
|
||||
* After-fix on higgs (Pi CM5): ffmpeg -hwaccel vaapi -i h264.mp4
|
||||
produces unique decoded P-frames (per-frame fnv1a hashes differ)
|
||||
and zero "error while decoding MB" / "reference frames exceeds
|
||||
max" warnings.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 22:00:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r20+g3dd0eb0-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 3dd0eb0 — DAEMON-PPS H.264 SPS/PPS NAL synthesiser.
|
||||
Daemon now reconstructs AnnexB SPS+PPS NAL units from the V4L2
|
||||
stateless H.264 control structs (forwarded by the kernel via
|
||||
a new struct daedalus_h264_meta block in REQ_DECODE) and
|
||||
prepends them to the slice bitstream before feeding libavcodec.
|
||||
Without this, ffmpeg -hwaccel vaapi on H.264 sources failed
|
||||
with "non-existing PPS 0 referenced" even after LIBVA-1/-2
|
||||
routing correctly delivered the request.
|
||||
* Wire protocol: new DAEDALUS_REQ_FLAG_H264_META bit + struct
|
||||
daedalus_h264_meta; daemon and kernel must be installed in
|
||||
lockstep (this package + daedalus-v4l2-dkms 0.1.0+r20+g3dd0eb0).
|
||||
* VP9 / AV1 paths unchanged.
|
||||
|
||||
-- Markus Fritsche <mfritsche@reauktion.de> Wed, 20 May 2026 21:00:00 +0000
|
||||
|
||||
daedalus-v4l2 (0.1.0+r18+g481279c-1) bookworm trixie; urgency=medium
|
||||
|
||||
* Bump to 481279c. Upstream landed the systemd unit + modules-load.d
|
||||
|
||||
+15
-9
@@ -10,15 +10,21 @@
|
||||
# Upstream fork: https://git.reauktion.de/marfrit/libva-v4l2-request-fourier
|
||||
set -euo pipefail
|
||||
|
||||
# Same pin as the Arch PKGBUILD. 9898331 = LIBVA-2 close — completes
|
||||
# the per-codec dispatch from c332d34 (LIBVA-1) by adding video_fd_
|
||||
# daedalus to any_fd_supports_output_format's probe array. Without
|
||||
# it, H.264/VP9/AV1 profiles never got advertised on Pi 5 mixed
|
||||
# deployments (rpi-hevc-dec primary, daedalus_v4l2 alt) — ffmpeg
|
||||
# bailed with "No support for codec h264 profile 578" before the
|
||||
# per-codec dispatch could even fire.
|
||||
UPSTREAM_COMMIT=989833114a7708ad999dc68309cbc181d9913bdb
|
||||
PKGVER=1.0.0+r380+g9898331
|
||||
# Same pin as the Arch PKGBUILD. c1bb444 = PR #9 merge "h264:
|
||||
# max_num_ref_frames fallback + libva-boundary instrumentation (#8)" —
|
||||
# addresses the libva-side portion of marfrit/libva-v4l2-request-fourier#8.
|
||||
# The daedalus_v4l2-via-libavcodec strict consumer rejected frames whose
|
||||
# sps.max_num_ref_frames was 0 (older ffmpeg-vaapi paths leave the field
|
||||
# unset and HW decoders tolerated it). PR adds a per-profile spec
|
||||
# minimum fallback (counts valid DPB entries first; 1 for baseline / 4
|
||||
# for main+high if even that is 0) and a one-line request_log at
|
||||
# h264_set_controls entry dumping raw VAAPI bitfields for diagnosing
|
||||
# the still-open PPS-flag-zero portion of issue #8.
|
||||
#
|
||||
# Prior pin (9898331) was the LIBVA-2 close — config-side
|
||||
# video_fd_daedalus added to the profile enumeration probe array.
|
||||
UPSTREAM_COMMIT=c1bb444d0731ca7ca831c7e01ec4b52ed736ca8e
|
||||
PKGVER=1.0.0+r382+gc1bb444
|
||||
PKGREL=1
|
||||
|
||||
HERE=$(dirname "$(readlink -f "$0")")
|
||||
|
||||
Reference in New Issue
Block a user