From 4db64917bcbc95030b456381f3597b805ba7e5cf Mon Sep 17 00:00:00 2001
From: Markus Fritsche <mfritsche@reauktion.de>
Date: Fri, 22 May 2026 09:49:59 +0200
Subject: [PATCH] mesa-panvk-bifrost-video: r1-r4 patches as real files
 (symlinks broke CI)

The original PR #79 used symlinks for 0001..0004 patches (pointing into
../mesa-panvk-bifrost/) to avoid drift between siblings. CI's
"cp -r arch/mesa-panvk-bifrost-video /tmp/build-..." preserves the
symlinks, but the destination /tmp/build-... has no sibling dir to
resolve them against, so makepkg errors with:

  ==> ERROR: 0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch
             was not found in the build directory and is not a URL.

Each Arch PKGBUILD owns its source files per convention; the
duplication risk is low because r1..r4 are closed-release patches.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 ...e-robustness2-nullDescriptor-bifrost.patch |  58 +-
 ...nvk-expose-vulkan-1.1-1.2-on-bifrost.patch |  48 +-
 ...vk-bifrost-vk-ext-transform-feedback.patch | 329 ++++++++-
 ...-bifrost-xfb-primitive-decomposition.patch | 630 +++++++++++++++++-
 4 files changed, 1061 insertions(+), 4 deletions(-)
 mode change 120000 => 100644 arch/mesa-panvk-bifrost-video/0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch
 mode change 120000 => 100644 arch/mesa-panvk-bifrost-video/0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch
 mode change 120000 => 100644 arch/mesa-panvk-bifrost-video/0003-panvk-bifrost-vk-ext-transform-feedback.patch
 mode change 120000 => 100644 arch/mesa-panvk-bifrost-video/0004-panvk-bifrost-xfb-primitive-decomposition.patch

diff --git a/arch/mesa-panvk-bifrost-video/0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch b/arch/mesa-panvk-bifrost-video/0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch
deleted file mode 120000
index 0dcf8589b4..0000000000
--- a/arch/mesa-panvk-bifrost-video/0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch
+++ /dev/null
@@ -1 +0,0 @@
-../mesa-panvk-bifrost/0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch
\ No newline at end of file
diff --git a/arch/mesa-panvk-bifrost-video/0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch b/arch/mesa-panvk-bifrost-video/0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch
new file mode 100644
index 0000000000..8d2a377c5d
--- /dev/null
+++ b/arch/mesa-panvk-bifrost-video/0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch
@@ -0,0 +1,57 @@
+From: claude-noether (on behalf of mfritsche)
+Date: 2026-05-19
+Subject: panvk: expose VK_KHR/EXT_robustness2 + nullDescriptor on Bifrost (PAN_ARCH 6/7)
+
+Without this, Mesa's Zink driver refuses to use PanVk-Bifrost as its Vulkan
+backend, falling back silently to llvmpipe (software rasterizer) for all
+GL-via-Zink on Bifrost SBCs. That defeats the entire purpose of having a
+Vulkan driver on Bifrost — GL acceleration via Zink is the most natural
+near-term consumer.
+
+panvk_vX_nir_lower_descriptors.c:1309 and panvk_vX_shader.c:1355 already
+plumb dev->vk.enabled_features.nullDescriptor arch-agnostically — the gate
+at panvk_vX_physical_device.c was set conservatively when Bifrost was
+unmaintained, not because of hardware incapability.
+
+iter1–7 of the panvk-bifrost campaign proved fundamental driver functions
+on Mali-G52 r1 MC1 (PAN_ARCH=7). This patch is the iter8 follow-up.
+
+robustBufferAccess2 and robustImageAccess2 are NOT flipped — they're
+independent rb2 features Zink doesn't require, gated differently
+(robustBufferAccess2 = PAN_ARCH >= 11, robustImageAccess2 = false), and
+out of scope for iter8.
+
+---
+ src/panfrost/vulkan/panvk_vX_physical_device.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c
+--- a/src/panfrost/vulkan/panvk_vX_physical_device.c
++++ b/src/panfrost/vulkan/panvk_vX_physical_device.c
+@@ -91,7 +91,7 @@ get_device_extensions(const struct panvk_physical_device *device,
+       .KHR_pipeline_binary = true,
+       .KHR_pipeline_executable_properties = true,
+       .KHR_pipeline_library = true,
+-      .KHR_robustness2 = PAN_ARCH >= 10,
++      .KHR_robustness2 = true,
+       .KHR_sampler_mirror_clamp_to_edge = true,
+       .KHR_sampler_ycbcr_conversion = true,
+       .KHR_separate_depth_stencil_layouts = true,
+@@ -168,7 +168,7 @@ get_device_extensions(const struct panvk_physical_device *device,
+       .EXT_queue_family_foreign = true,
+       .EXT_robustness = pan_arch(device->kmod.dev->props.gpu_id) >= 9,
+       .EXT_image_robustness = true,
+-      .EXT_robustness2 = PAN_ARCH >= 10,
++      .EXT_robustness2 = true,
+       .EXT_sampler_filter_minmax = PAN_ARCH >= 10,
+       .EXT_scalar_block_layout = true,
+       .EXT_separate_stencil_usage = true,
+@@ -493,7 +493,7 @@ get_device_features(const struct panvk_physical_device *device,
+       /* VK_KHR_robustness2 */
+       .robustBufferAccess2 = PAN_ARCH >= 11,
+       .robustImageAccess2 = false,
+-      .nullDescriptor = PAN_ARCH >= 10,
++      .nullDescriptor = true,
+
+       /* VK_KHR_shader_clock */
+       .shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp,
diff --git a/arch/mesa-panvk-bifrost-video/0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch b/arch/mesa-panvk-bifrost-video/0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch
deleted file mode 120000
index 1d7a265b90..0000000000
--- a/arch/mesa-panvk-bifrost-video/0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch
+++ /dev/null
@@ -1 +0,0 @@
-../mesa-panvk-bifrost/0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch
\ No newline at end of file
diff --git a/arch/mesa-panvk-bifrost-video/0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch b/arch/mesa-panvk-bifrost-video/0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch
new file mode 100644
index 0000000000..f44ffcbb56
--- /dev/null
+++ b/arch/mesa-panvk-bifrost-video/0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch
@@ -0,0 +1,47 @@
+From: claude-noether (on behalf of mfritsche)
+Date: 2026-05-20
+Subject: panvk: expose Vulkan 1.1 + 1.2 on Bifrost (PAN_ARCH 6/7)
+
+ANGLE (Chromium's GL stack) requires apiVersion >= 1.1 to initialize. Without
+this, Brave / Chromium's GPU process fails at GL info collection:
+
+  vk_renderer.cpp:2659 (initialize): ANGLE Requires a minimum Vulkan device
+                                     version of 1.1
+  Display::initialize error 0: Internal Vulkan error (-9): The requested
+                               version of Vulkan is not supported by the driver
+
+Stack-up with iter8's robustness2 patch enables ANGLE → PanVk-Bifrost →
+Skia (via --enable-features=Vulkan) on Bifrost SBCs.
+
+PanVk-Bifrost already supports the bulk of 1.1-promoted features as extensions
+(multiview, maintenance1-3, descriptor update template, 16-bit storage,
+descriptor update template, sampler ycbcr, variable pointers, etc. — all
+visible in iter0 vulkaninfo). The version bump primarily bundles them.
+
+Risk: Vulkan 1.1 has features beyond what iter1–7 exercised (protected memory,
+full subgroup ops). Specific app failures will be characterizable.
+
+1.2 is also flipped — Brave's Vulkan path may want descriptor indexing,
+buffer device address, etc. (all listed in iter0 vulkaninfo as supported
+extensions, just gated as 1.0-with-extensions, not 1.2-core).
+
+---
+ src/panfrost/vulkan/panvk_vX_physical_device.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c
+--- a/src/panfrost/vulkan/panvk_vX_physical_device.c
++++ b/src/panfrost/vulkan/panvk_vX_physical_device.c
+@@ -38,8 +38,8 @@ get_device_extensions(const struct panvk_physical_device *device,
+                       struct vk_device_extension_table *ext)
+ {
+    *ext = (struct vk_device_extension_table){
+-      .KHR_8bit_storage = true,
+-      .KHR_16bit_storage = true,
+-      bool has_vk1_1 = PAN_ARCH >= 10;
+-      bool has_vk1_2 = PAN_ARCH >= 10;
++      .KHR_8bit_storage = true,
++      .KHR_16bit_storage = true,
++      bool has_vk1_1 = true;
++      bool has_vk1_2 = true;
+       *ext = (struct vk_device_extension_table){
diff --git a/arch/mesa-panvk-bifrost-video/0003-panvk-bifrost-vk-ext-transform-feedback.patch b/arch/mesa-panvk-bifrost-video/0003-panvk-bifrost-vk-ext-transform-feedback.patch
deleted file mode 120000
index 7aebd6f385..0000000000
--- a/arch/mesa-panvk-bifrost-video/0003-panvk-bifrost-vk-ext-transform-feedback.patch
+++ /dev/null
@@ -1 +0,0 @@
-../mesa-panvk-bifrost/0003-panvk-bifrost-vk-ext-transform-feedback.patch
\ No newline at end of file
diff --git a/arch/mesa-panvk-bifrost-video/0003-panvk-bifrost-vk-ext-transform-feedback.patch b/arch/mesa-panvk-bifrost-video/0003-panvk-bifrost-vk-ext-transform-feedback.patch
new file mode 100644
index 0000000000..4d162fc2e9
--- /dev/null
+++ b/arch/mesa-panvk-bifrost-video/0003-panvk-bifrost-vk-ext-transform-feedback.patch
@@ -0,0 +1,328 @@
+--- a/src/panfrost/vulkan/panvk_shader.h	2026-04-29 22:19:00.000000000 +0200
++++ b/src/panfrost/vulkan/panvk_shader.h	2026-05-20 18:52:53.312698258 +0200
+@@ -150,6 +150,10 @@
+    struct {
+ #if PAN_ARCH < 9
+       int32_t raw_vertex_offset;
++      uint32_t num_vertices;       /* iter13: XFB needs per-draw vertex count */
++      /* aligned_u64 attribute below inserts the 4-byte alignment gap
++       * after num_vertices automatically — no explicit pad needed. */
++      aligned_u64 xfb_address[4];  /* iter13: 4 transform feedback buffer base addresses */
+ #endif
+       int32_t first_vertex;
+       int32_t base_instance;
+--- a/src/panfrost/vulkan/panvk_vX_physical_device.c	2026-05-20 19:09:29.711145446 +0200
++++ b/src/panfrost/vulkan/panvk_vX_physical_device.c	2026-05-20 18:52:54.832720445 +0200
+@@ -169,6 +169,7 @@
+       .EXT_provoking_vertex = true,
+       .EXT_queue_family_foreign = true,
+       .EXT_robustness2 = true,
++      .EXT_transform_feedback = PAN_ARCH < 9,   /* iter13: JM-class only for now */
+       .EXT_sampler_filter_minmax = PAN_ARCH >= 10,
+       .EXT_scalar_block_layout = true,
+       .EXT_separate_stencil_usage = true,
+@@ -495,6 +496,10 @@
+       .robustImageAccess2 = false,
+       .nullDescriptor = true,
+ 
++      /* VK_EXT_transform_feedback (iter13) */
++      .transformFeedback = PAN_ARCH < 9,
++      .geometryStreams = false,
++
+       /* VK_KHR_shader_clock */
+       .shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp,
+       .shaderDeviceClock = device->kmod.dev->props.timestamp_device_coherent,
+@@ -1020,6 +1025,18 @@
+       .robustStorageBufferAccessSizeAlignment = 1,
+       .robustUniformBufferAccessSizeAlignment = 1,
+ 
++      /* VK_EXT_transform_feedback (iter13) */
++      .maxTransformFeedbackStreams = 1,
++      .maxTransformFeedbackBuffers = 4,
++      .maxTransformFeedbackBufferSize = UINT32_MAX,
++      .maxTransformFeedbackStreamDataSize = 512,
++      .maxTransformFeedbackBufferDataSize = 512,
++      .maxTransformFeedbackBufferDataStride = 2048,
++      .transformFeedbackQueries = false,
++      .transformFeedbackStreamsLinesTriangles = false,
++      .transformFeedbackRasterizationStreamSelect = false,
++      .transformFeedbackDraw = false,
++
+       /* VK_EXT_shader_object */
+       /* We do not currently support VK_EXT_shader_object but this is used
+        * internally by vk_shader
+--- a/src/panfrost/vulkan/panvk_vX_shader.c	2026-04-29 22:19:00.000000000 +0200
++++ b/src/panfrost/vulkan/panvk_vX_shader.c	2026-05-20 18:52:56.556745611 +0200
+@@ -21,6 +21,7 @@
+ #include "panvk_physical_device.h"
+ #include "panvk_sampler.h"
+ #include "panvk_shader.h"
++#include "pan_nir.h"   /* iter13: pan_nir_lower_xfb */
+ 
+ #include "spirv/nir_spirv.h"
+ #include "util/memstream.h"
+@@ -100,6 +101,20 @@
+    case nir_intrinsic_load_raw_vertex_offset_pan:
+       val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset);
+       break;
++   case nir_intrinsic_load_num_vertices:    /* iter13: XFB index calc */
++      val = load_sysval(b, graphics, bit_size, vs.num_vertices);
++      break;
++   case nir_intrinsic_load_xfb_address: {   /* iter13: XFB buffer N base address */
++      unsigned idx = nir_intrinsic_base(intr);
++      switch (idx) {
++      case 0: val = load_sysval(b, graphics, bit_size, vs.xfb_address[0]); break;
++      case 1: val = load_sysval(b, graphics, bit_size, vs.xfb_address[1]); break;
++      case 2: val = load_sysval(b, graphics, bit_size, vs.xfb_address[2]); break;
++      case 3: val = load_sysval(b, graphics, bit_size, vs.xfb_address[3]); break;
++      default: return false;
++      }
++      break;
++   }
+    case nir_intrinsic_load_layer_id:
+       assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
+       val = load_sysval(b, graphics, bit_size, layer_id);
+@@ -457,6 +472,7 @@
+             core_max_id);
+ 
+    pan_preprocess_nir(nir, pdev->kmod.dev->props.gpu_id);
++
+ }
+ 
+ static void
+@@ -870,6 +886,18 @@
+             nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
+    NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+             glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics);
++
++#if PAN_ARCH < 9
++   /* iter13: VK_EXT_transform_feedback — runs AFTER nir_lower_io so that
++    * shader outputs are now store_output intrinsics that pan_nir_lower_xfb
++    * can rewrite to nir_store_global+nir_load_xfb_address. */
++   if (nir->info.stage == MESA_SHADER_VERTEX &&
++       nir->info.has_transform_feedback_varyings) {
++      NIR_PASS(_, nir, nir_opt_constant_folding);
++      NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
++      NIR_PASS(_, nir, pan_nir_lower_xfb);
++   }
++#endif
+ }
+ 
+ static VkResult
+@@ -1288,6 +1316,9 @@
+       .view_mask = (state && state->rp) ? state->rp->view_mask : 0,
+       .robust2_modes = robust2_modes,
+       .robust_descriptors = dev->vk.enabled_features.nullDescriptor,
++      /* iter13: XFB shaders must disable IDVS (matches Panfrost-Gallium). */
++      .no_idvs = (info->stage == MESA_SHADER_VERTEX) &&
++                 info->nir->info.has_transform_feedback_varyings,
+    };
+ 
+    switch (info->stage) {
+--- a/src/panfrost/vulkan/panvk_cmd_draw.h	2026-04-29 22:19:00.000000000 +0200
++++ b/src/panfrost/vulkan/panvk_cmd_draw.h	2026-05-20 18:52:57.748763011 +0200
+@@ -135,6 +135,19 @@
+    struct panvk_graphics_sysvals sysvals;
+ 
+ #if PAN_ARCH < 9
++   /* iter13: VK_EXT_transform_feedback state (JM-class only for now). */
++   struct {
++      bool active;
++      uint32_t buffer_count;
++      struct {
++         uint64_t addr;
++         uint64_t offset;
++         uint64_t size;
++      } buffers[4];
++   } xfb;
++#endif
++
++#if PAN_ARCH < 9
+    struct panvk_shader_link link;
+ #endif
+ 
+--- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c	2026-04-29 22:19:00.000000000 +0200
++++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c	2026-05-20 19:10:23.031919662 +0200
+@@ -10,6 +10,7 @@
+ #include "panvk_entrypoints.h"
+ 
+ #include "pan_desc.h"
++#include "pan_compiler.h"   /* PAN_SHADER_OOB_ADDRESS */
+ #include "pan_util.h"
+ 
+ static void
+@@ -722,6 +723,35 @@
+    set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
+                   info->vertex.raw_offset);
+    set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
++
++   /* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
++    * reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
++   set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
++   {
++      const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
++      /* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS
++       * (= 1<<63). This is the Panfrost-Gallium memory-sink idiom — the
++       * Bifrost MMU silently discards stores to this address, so a pipeline
++       * with XFB outputs used in a non-XFB draw (or in an XFB draw with
++       * fewer bound buffers than the shader declares) is safe instead of
++       * faulting. See gallium/drivers/panfrost/pan_cmdstream.c PAN_SYSVAL_XFB. */
++      uint64_t _xa0 = PAN_SHADER_OOB_ADDRESS, _xa1 = PAN_SHADER_OOB_ADDRESS,
++               _xa2 = PAN_SHADER_OOB_ADDRESS, _xa3 = PAN_SHADER_OOB_ADDRESS;
++      if (_gfx->xfb.active) {
++         if (_gfx->xfb.buffer_count > 0 && _gfx->xfb.buffers[0].addr)
++            _xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset;
++         if (_gfx->xfb.buffer_count > 1 && _gfx->xfb.buffers[1].addr)
++            _xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset;
++         if (_gfx->xfb.buffer_count > 2 && _gfx->xfb.buffers[2].addr)
++            _xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset;
++         if (_gfx->xfb.buffer_count > 3 && _gfx->xfb.buffers[3].addr)
++            _xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset;
++      }
++      set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0);
++      set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1);
++      set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2);
++      set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3);
++   }
+ #endif
+ 
+    if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
+--- a/src/panfrost/vulkan/meson.build	2026-04-29 22:19:00.000000000 +0200
++++ b/src/panfrost/vulkan/meson.build	2026-05-20 18:53:04.484861338 +0200
+@@ -73,6 +73,7 @@
+ jm_inc_dir = ['jm']
+ jm_files = [
+   'jm/panvk_vX_bind_queue.c',
++  'jm/panvk_vX_cmd_xfb.c',   # iter13
+   'jm/panvk_vX_cmd_buffer.c',
+   'jm/panvk_vX_cmd_dispatch.c',
+   'jm/panvk_vX_cmd_draw.c',
+--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c	2026-04-29 22:19:00.000000000 +0200
++++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c	2026-05-20 19:10:26.163965149 +0200
+@@ -473,5 +473,12 @@
+ 
+    vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
+ 
++#if PAN_ARCH < 9
++   /* iter13: clear XFB state on Begin so a reused command buffer does not
++    * inherit stale xfb.buffer_count / xfb.active / xfb.buffers[] from a
++    * prior recording. */
++   memset(&cmdbuf->state.gfx.xfb, 0, sizeof(cmdbuf->state.gfx.xfb));
++#endif
++
+    return VK_SUCCESS;
+ }
+--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c	2026-05-18 12:50:53.067999996 +0200
++++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c	2026-05-20 19:10:27.175979847 +0200
+@@ -0,0 +1,111 @@
++/*
++ * Copyright © 2026 mfritsche / claude-noether
++ * SPDX-License-Identifier: MIT
++ *
++ * iter13: VK_EXT_transform_feedback command handlers for the JM
++ * architecture path (Bifrost v6/v7 + Valhall-JM v9).
++ *
++ * The runtime contract:
++ *   - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size)
++ *     for each slot into cmdbuf->state.gfx.xfb.buffers[].
++ *   - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true.
++ *     Mark sysvals dirty so the next draw re-emits vs.xfb_address[].
++ *   - vkCmdEndTransformFeedbackEXT: set active = false.
++ *
++ * Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/
++ * pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't
++ * support pause/resume. transformFeedbackDraw is advertised as false.
++ *
++ * Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb
++ * and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb
++ * pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers
++ * (via panvk_vX_shader.c sysval handler) to a load from the per-draw
++ * sysval push area.
++ */
++
++#include "vk_log.h"
++#include "util/log.h"
++
++#include "panvk_cmd_buffer.h"
++#include "panvk_cmd_draw.h"
++#include "panvk_buffer.h"
++#include "panvk_entrypoints.h"
++
++VKAPI_ATTR void VKAPI_CALL
++panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)(
++   VkCommandBuffer commandBuffer,
++   uint32_t firstBinding,
++   uint32_t bindingCount,
++   const VkBuffer *pBuffers,
++   const VkDeviceSize *pOffsets,
++   const VkDeviceSize *pSizes)
++{
++   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
++   struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
++
++   for (uint32_t i = 0; i < bindingCount; i++) {
++      uint32_t slot = firstBinding + i;
++      if (slot >= 4)
++         continue;
++
++      VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]);
++      gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0);
++      gfx->xfb.buffers[slot].offset = pOffsets[i];
++      gfx->xfb.buffers[slot].size =
++         (pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE)
++            ? pSizes[i]
++            : (buf->vk.size - pOffsets[i]);
++   }
++
++   if (firstBinding + bindingCount > gfx->xfb.buffer_count)
++      gfx->xfb.buffer_count = firstBinding + bindingCount;
++}
++
++VKAPI_ATTR void VKAPI_CALL
++panvk_per_arch(CmdBeginTransformFeedbackEXT)(
++   VkCommandBuffer commandBuffer,
++   uint32_t firstCounterBuffer,
++   uint32_t counterBufferCount,
++   const VkBuffer *pCounterBuffers,
++   const VkDeviceSize *pCounterBufferOffsets)
++{
++   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
++   struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
++
++   /* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback
++    * PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c.
++    * App is spec-compliant if it does not pass counter buffers (which our
++    * features advertisement allows), but warn loudly if it does so we do not
++    * silently produce wrong capture state. */
++   (void)firstCounterBuffer;
++   (void)pCounterBufferOffsets;
++   if (counterBufferCount > 0 && pCounterBuffers != NULL) {
++      mesa_logw("panvk: CmdBeginTransformFeedbackEXT: counter buffers not "
++                "implemented (transformFeedbackDraw=false); XFB resume will "
++                "restart at buffer offset 0");
++   }
++
++   gfx->xfb.active = true;
++   /* Per-draw set_gfx_sysval picks up the change automatically — no
++    * explicit dirty marking required (set_gfx_sysval uses memcmp +
++    * BITSET to detect state diffs and re-emit sysvals). */
++}
++
++VKAPI_ATTR void VKAPI_CALL
++panvk_per_arch(CmdEndTransformFeedbackEXT)(
++   VkCommandBuffer commandBuffer,
++   uint32_t firstCounterBuffer,
++   uint32_t counterBufferCount,
++   const VkBuffer *pCounterBuffers,
++   const VkDeviceSize *pCounterBufferOffsets)
++{
++   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
++   struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
++
++   (void)firstCounterBuffer;
++   (void)counterBufferCount;
++   (void)pCounterBuffers;
++   (void)pCounterBufferOffsets;
++
++   gfx->xfb.active = false;
++}
diff --git a/arch/mesa-panvk-bifrost-video/0004-panvk-bifrost-xfb-primitive-decomposition.patch b/arch/mesa-panvk-bifrost-video/0004-panvk-bifrost-xfb-primitive-decomposition.patch
deleted file mode 120000
index e9ba2ffbbb..0000000000
--- a/arch/mesa-panvk-bifrost-video/0004-panvk-bifrost-xfb-primitive-decomposition.patch
+++ /dev/null
@@ -1 +0,0 @@
-../mesa-panvk-bifrost/0004-panvk-bifrost-xfb-primitive-decomposition.patch
\ No newline at end of file
diff --git a/arch/mesa-panvk-bifrost-video/0004-panvk-bifrost-xfb-primitive-decomposition.patch b/arch/mesa-panvk-bifrost-video/0004-panvk-bifrost-xfb-primitive-decomposition.patch
new file mode 100644
index 0000000000..c015062ff6
--- /dev/null
+++ b/arch/mesa-panvk-bifrost-video/0004-panvk-bifrost-xfb-primitive-decomposition.patch
@@ -0,0 +1,629 @@
+diff -urN a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build
+--- a/src/panfrost/vulkan/meson.build	2026-05-21 14:04:02.529474145 +0200
++++ b/src/panfrost/vulkan/meson.build	2026-05-21 14:04:04.106755486 +0200
+@@ -123,6 +123,7 @@
+   'panvk_vX_nir_lower_input_attachment_loads.c',
+   'panvk_vX_sampler.c',
+   'panvk_vX_shader.c',
++  'panvk_vX_xfb_lower.c',
+   sha1_h,
+ ]
+ 
+diff -urN a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h
+--- a/src/panfrost/vulkan/panvk_shader.h	2026-05-21 14:04:02.525251986 +0200
++++ b/src/panfrost/vulkan/panvk_shader.h	2026-05-21 14:04:04.084251800 +0200
+@@ -154,6 +154,8 @@
+       /* aligned_u64 attribute below inserts the 4-byte alignment gap
+        * after num_vertices automatically — no explicit pad needed. */
+       aligned_u64 xfb_address[4];  /* iter13: 4 transform feedback buffer base addresses */
++      uint32_t xfb_topology;       /* iter17: panvk_xfb_topology enum value */
++      uint32_t xfb_output_count;   /* iter17: per-instance output verts after decomp */
+ #endif
+       int32_t first_vertex;
+       int32_t base_instance;
+@@ -569,4 +571,76 @@
+    struct pan_compute_dim local_size, const void *bin_ptr, size_t bin_size,
+    struct panvk_shader **shader_out);
+ 
++
++#if PAN_ARCH < 9
++/* iter17: encoding for vs.xfb_topology sysval. Maps VkPrimitiveTopology values
++ * we need to distinguish at shader runtime for XFB capture. LIST topologies
++ * use the iter13 single-store fast path; non-LIST need per-vertex decomposition. */
++enum panvk_xfb_topology {
++   PANVK_XFB_TOPO_LIST            = 0,
++   PANVK_XFB_TOPO_LINE_STRIP      = 1,
++   PANVK_XFB_TOPO_TRI_STRIP       = 2,
++   PANVK_XFB_TOPO_TRI_FAN         = 3,
++   PANVK_XFB_TOPO_LINE_LIST_ADJ   = 4,
++   PANVK_XFB_TOPO_LINE_STRIP_ADJ  = 5,
++   PANVK_XFB_TOPO_TRI_LIST_ADJ    = 6,
++   PANVK_XFB_TOPO_TRI_STRIP_ADJ   = 7,
++};
++
++#include "panvk_macros.h"
++struct nir_shader;
++bool panvk_per_arch(nir_lower_xfb)(struct nir_shader *nir);
++
++/* Map VkPrimitiveTopology to panvk_xfb_topology enum (driver-side helper). */
++static inline uint32_t
++panvk_vk_topology_to_xfb_enum(VkPrimitiveTopology topo)
++{
++   switch (topo) {
++   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
++      return PANVK_XFB_TOPO_LINE_STRIP;
++   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
++      return PANVK_XFB_TOPO_TRI_STRIP;
++   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
++      return PANVK_XFB_TOPO_TRI_FAN;
++   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
++      return PANVK_XFB_TOPO_LINE_LIST_ADJ;
++   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
++      return PANVK_XFB_TOPO_LINE_STRIP_ADJ;
++   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
++      return PANVK_XFB_TOPO_TRI_LIST_ADJ;
++   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
++      return PANVK_XFB_TOPO_TRI_STRIP_ADJ;
++   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
++   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
++   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
++   default:
++      return PANVK_XFB_TOPO_LIST;
++   }
++}
++
++/* Compute the per-instance output vertex count for a given (topology, input count). */
++static inline uint32_t
++panvk_xfb_output_count(VkPrimitiveTopology topo, uint32_t input_count)
++{
++   switch (topo) {
++   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
++      return input_count >= 1 ? 2u * (input_count - 1u) : 0u;
++   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
++   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
++      return input_count >= 2 ? 3u * (input_count - 2u) : 0u;
++   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
++      return (input_count / 4u) * 2u;
++   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
++      return input_count >= 3 ? 2u * (input_count - 3u) : 0u;
++   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
++      return (input_count / 6u) * 3u;
++   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
++      return input_count >= 6 ? 3u * (input_count / 2u - 2u) : 0u;
++   default:
++      return input_count;  /* LIST topologies: 1:1 mapping */
++   }
++}
++#endif
++
++
+ #endif
+diff -urN a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c
+--- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c	2026-05-21 14:04:02.528576354 +0200
++++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c	2026-05-21 14:04:04.091357598 +0200
+@@ -727,6 +727,20 @@
+    /* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
+     * reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
+    set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
++
++   /* iter17: XFB primitive-decomposition sysvals.
++    * xfb_topology = enum value for the current bound topology.
++    * xfb_output_count = per-instance output vertex count after decomposition.
++    * For LIST topologies, output_count == input vertex count and the shader
++    * takes the iter13 single-store fast path. */
++   {
++      VkPrimitiveTopology vk_topo =
++         cmdbuf->vk.dynamic_graphics_state.ia.primitive_topology;
++      uint32_t topo_enum = panvk_vk_topology_to_xfb_enum(vk_topo);
++      uint32_t out_count = panvk_xfb_output_count(vk_topo, info->vertex.count);
++      set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_topology, topo_enum);
++      set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_output_count, out_count);
++   }
+    {
+       const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
+       /* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS
+diff -urN a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c
+--- a/src/panfrost/vulkan/panvk_vX_shader.c	2026-05-21 14:04:02.527576494 +0200
++++ b/src/panfrost/vulkan/panvk_vX_shader.c	2026-05-21 14:04:04.098356619 +0200
+@@ -895,7 +895,10 @@
+        nir->info.has_transform_feedback_varyings) {
+       NIR_PASS(_, nir, nir_opt_constant_folding);
+       NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
+-      NIR_PASS(_, nir, pan_nir_lower_xfb);
++      /* iter17: panvk-specific replacement for pan_nir_lower_xfb that handles
++       * primitive decomposition for non-LIST topologies. Single-store LIST
++       * fast path matches iter13 behavior. */
++      NIR_PASS(_, nir, panvk_per_arch(nir_lower_xfb));
+    }
+ #endif
+ }
+diff -urN a/src/panfrost/vulkan/panvk_vX_xfb_lower.c b/src/panfrost/vulkan/panvk_vX_xfb_lower.c
+--- a/src/panfrost/vulkan/panvk_vX_xfb_lower.c	1970-01-01 01:00:00.000000000 +0100
++++ b/src/panfrost/vulkan/panvk_vX_xfb_lower.c	2026-05-21 14:04:04.115354242 +0200
+@@ -0,0 +1,486 @@
++/*
++ * Copyright © 2026 mfritsche / claude-noether
++ * SPDX-License-Identifier: MIT
++ *
++ * iter17: panvk-specific replacement for pan_nir_lower_xfb that handles
++ * primitive decomposition for transform_feedback on non-LIST topologies
++ * (TRIANGLE_STRIP/FAN, LINE_STRIP, *_WITH_ADJACENCY).
++ *
++ * Approach: emit a topology dispatch at the start of each store_output
++ * lowering. The shader reads vs.xfb_topology sysval at runtime and branches
++ * into per-topology emission logic. For each affected topology, the lowered
++ * code emits guarded conditional stores — one per primitive this vertex
++ * contributes to, computing the output buffer position via primitive index
++ * and slot within the decomposed primitive.
++ *
++ * For LIST topologies (POINT/LINE/TRIANGLE LIST), takes a fast path that
++ * matches iter13's single-store behavior.
++ *
++ * For TRIANGLE_FAN, the central vertex (v=0) contributes to ALL primitives
++ * as slot 2 — handled via a NIR loop bounded by num_vertices.
++ *
++ * See ~/src/panvk-bifrost/iter17/phase{0,1,2}_*.md for full design context.
++ */
++
++#include "panvk_macros.h"
++
++#if PAN_ARCH < 9
++
++#include "panvk_shader.h"
++
++#include "compiler/nir/nir_builder.h"
++#include "pan_nir.h"
++
++#include <vulkan/vulkan_core.h>
++
++/* ----- Address arithmetic ----- */
++
++static nir_def *
++xfb_store_addr(nir_builder *b, nir_def *buf, nir_def *out_idx,
++               uint16_t stride, uint16_t offset_bytes)
++{
++   nir_def *byte_off = nir_iadd_imm(b,
++      nir_imul_imm(b, out_idx, stride), offset_bytes);
++   return nir_iadd(b, buf, nir_u2u64(b, byte_off));
++}
++
++static void
++emit_list_store(nir_builder *b, nir_def *buf, nir_def *output_count,
++                nir_def *instance_id, nir_def *raw_vid, nir_def *value,
++                uint16_t stride, uint16_t offset_bytes)
++{
++   nir_def *out_idx = nir_iadd(b,
++      nir_imul(b, instance_id, output_count), raw_vid);
++   nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
++   nir_store_global(b, value, addr);
++}
++
++static void
++emit_prim_store(nir_builder *b, nir_def *buf, nir_def *output_count,
++                nir_def *instance_id, nir_def *eligible,
++                nir_def *prim_idx, nir_def *slot,
++                uint32_t verts_per_prim,
++                nir_def *value, uint16_t stride, uint16_t offset_bytes)
++{
++   nir_push_if(b, eligible);
++   {
++      nir_def *out_idx = nir_iadd(b,
++         nir_imul(b, instance_id, output_count),
++         nir_iadd(b, nir_imul_imm(b, prim_idx, verts_per_prim), slot));
++      nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
++      nir_store_global(b, value, addr);
++   }
++   nir_pop_if(b, NULL);
++}
++
++/* ----- Per-topology emission ----- */
++
++/* TRIANGLE_STRIP: vertex v contributes to prims v, v-1, v-2 (per eligibility). */
++static void
++emit_tri_strip(nir_builder *b, nir_def *v, nir_def *N,
++               nir_def *buf, nir_def *output_count, nir_def *instance_id,
++               nir_def *value, uint16_t stride, uint16_t offset_bytes)
++{
++   nir_def *Nm2 = nir_iadd_imm(b, N, -2);
++   nir_def *Nm1 = nir_iadd_imm(b, N, -1);
++
++   /* Prim v, slot 0: v < N-2 */
++   emit_prim_store(b, buf, output_count, instance_id,
++      nir_ult(b, v, Nm2),
++      v, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
++
++   /* Prim v-1, slot = 1 if prim even else 2: 1 <= v < N-1 */
++   {
++      nir_def *prim = nir_iadd_imm(b, v, -1);
++      nir_def *parity = nir_iand_imm(b, prim, 1u);
++      nir_def *slot = nir_iadd_imm(b, parity, 1);
++      nir_def *eligible = nir_iand(b,
++         nir_uge(b, v, nir_imm_int(b, 1)),
++         nir_ult(b, v, Nm1));
++      emit_prim_store(b, buf, output_count, instance_id, eligible,
++                      prim, slot, 3, value, stride, offset_bytes);
++   }
++
++   /* Prim v-2, slot = 2 if prim even else 1: 2 <= v < N */
++   {
++      nir_def *prim = nir_iadd_imm(b, v, -2);
++      nir_def *parity = nir_iand_imm(b, prim, 1u);
++      nir_def *slot = nir_isub(b, nir_imm_int(b, 2), parity);
++      nir_def *eligible = nir_iand(b,
++         nir_uge(b, v, nir_imm_int(b, 2)),
++         nir_ult(b, v, N));
++      emit_prim_store(b, buf, output_count, instance_id, eligible,
++                      prim, slot, 3, value, stride, offset_bytes);
++   }
++}
++
++/* LINE_STRIP: vertex v contributes to prim v slot 0 + prim v-1 slot 1. */
++static void
++emit_line_strip(nir_builder *b, nir_def *v, nir_def *N,
++                nir_def *buf, nir_def *output_count, nir_def *instance_id,
++                nir_def *value, uint16_t stride, uint16_t offset_bytes)
++{
++   nir_def *Nm1 = nir_iadd_imm(b, N, -1);
++
++   /* Prim v, slot 0: v < N-1 */
++   emit_prim_store(b, buf, output_count, instance_id,
++      nir_ult(b, v, Nm1),
++      v, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
++
++   /* Prim v-1, slot 1: 1 <= v < N */
++   {
++      nir_def *prim = nir_iadd_imm(b, v, -1);
++      nir_def *eligible = nir_iand(b,
++         nir_uge(b, v, nir_imm_int(b, 1)),
++         nir_ult(b, v, N));
++      emit_prim_store(b, buf, output_count, instance_id, eligible,
++                      prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
++   }
++}
++
++/* TRIANGLE_FAN: prim p emits {p+1, p+2, 0}.
++ *   vertex v=0: contributes to ALL prims as slot 2 (loop required)
++ *   vertex v>=1: contributes to prim v-1 as slot 0 (if 1 <= v <= N-2)
++ *   vertex v>=2: contributes to prim v-2 as slot 1 (if 2 <= v <= N-1)
++ */
++static void
++emit_tri_fan(nir_builder *b, nir_def *v, nir_def *N,
++             nir_def *buf, nir_def *output_count, nir_def *instance_id,
++             nir_def *value, uint16_t stride, uint16_t offset_bytes)
++{
++   nir_def *Nm1 = nir_iadd_imm(b, N, -1);
++   nir_def *Nm2 = nir_iadd_imm(b, N, -2);
++
++   /* Prim v-1, slot 0: 1 <= v < N-1 */
++   {
++      nir_def *prim = nir_iadd_imm(b, v, -1);
++      nir_def *eligible = nir_iand(b,
++         nir_uge(b, v, nir_imm_int(b, 1)),
++         nir_ult(b, v, Nm1));
++      emit_prim_store(b, buf, output_count, instance_id, eligible,
++                      prim, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
++   }
++
++   /* Prim v-2, slot 1: 2 <= v < N */
++   {
++      nir_def *prim = nir_iadd_imm(b, v, -2);
++      nir_def *eligible = nir_iand(b,
++         nir_uge(b, v, nir_imm_int(b, 2)),
++         nir_ult(b, v, N));
++      emit_prim_store(b, buf, output_count, instance_id, eligible,
++                      prim, nir_imm_int(b, 1), 3, value, stride, offset_bytes);
++   }
++
++   /* Central vertex (v == 0): loop over all prims, write to slot 2. */
++   nir_push_if(b, nir_ieq_imm(b, v, 0));
++   {
++      nir_variable *p_var = nir_local_variable_create(b->impl,
++         glsl_uint_type(), "fan_p");
++      nir_store_var(b, p_var, nir_imm_int(b, 0), 0x1);
++      nir_push_loop(b);
++      {
++         nir_def *p = nir_load_var(b, p_var);
++         nir_push_if(b, nir_uge(b, p, Nm2));
++         {
++            nir_jump(b, nir_jump_break);
++         }
++         nir_pop_if(b, NULL);
++
++         nir_def *out_idx = nir_iadd(b,
++            nir_imul(b, instance_id, output_count),
++            nir_iadd_imm(b, nir_imul_imm(b, p, 3), 2));
++         nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
++         nir_store_global(b, value, addr);
++
++         nir_store_var(b, p_var, nir_iadd_imm(b, p, 1), 0x1);
++      }
++      nir_pop_loop(b, NULL);
++   }
++   nir_pop_if(b, NULL);
++}
++
++/* LINE_LIST_WITH_ADJACENCY: 4-vertex groups [4i..4i+3]; output {4i+1, 4i+2}.
++ *   v contributes if v%4 == 1: prim v/4 slot 0
++ *   v contributes if v%4 == 2: prim v/4 slot 1
++ */
++static void
++emit_line_list_adj(nir_builder *b, nir_def *v, nir_def *N,
++                   nir_def *buf, nir_def *output_count, nir_def *instance_id,
++                   nir_def *value, uint16_t stride, uint16_t offset_bytes)
++{
++   (void)N; /* eligibility is mod-based, not range-based */
++   nir_def *vmod4 = nir_iand_imm(b, v, 3u);
++   nir_def *prim = nir_ushr_imm(b, v, 2);  /* v / 4 */
++
++   emit_prim_store(b, buf, output_count, instance_id,
++      nir_ieq_imm(b, vmod4, 1),
++      prim, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
++
++   emit_prim_store(b, buf, output_count, instance_id,
++      nir_ieq_imm(b, vmod4, 2),
++      prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
++}
++
++/* LINE_STRIP_WITH_ADJACENCY: prim p emits {p+1, p+2}.
++ *   v contributes to prim v-1 slot 0 (1 <= v <= N-2)
++ *   v contributes to prim v-2 slot 1 (2 <= v <= N-1)
++ */
++static void
++emit_line_strip_adj(nir_builder *b, nir_def *v, nir_def *N,
++                    nir_def *buf, nir_def *output_count, nir_def *instance_id,
++                    nir_def *value, uint16_t stride, uint16_t offset_bytes)
++{
++   nir_def *Nm1 = nir_iadd_imm(b, N, -1);
++   nir_def *Nm2 = nir_iadd_imm(b, N, -2);
++
++   /* Prim v-1, slot 0: 1 <= v <= N-2 ⇔ v >= 1 AND v <= N-2 ⇔ v >= 1 AND v < N-1 */
++   {
++      nir_def *prim = nir_iadd_imm(b, v, -1);
++      nir_def *eligible = nir_iand(b,
++         nir_uge(b, v, nir_imm_int(b, 1)),
++         nir_ult(b, v, Nm1));
++      (void)Nm2;
++      emit_prim_store(b, buf, output_count, instance_id, eligible,
++                      prim, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
++   }
++
++   /* Prim v-2, slot 1: 2 <= v <= N-1 ⇔ v >= 2 AND v < N */
++   {
++      nir_def *prim = nir_iadd_imm(b, v, -2);
++      nir_def *eligible = nir_iand(b,
++         nir_uge(b, v, nir_imm_int(b, 2)),
++         nir_ult(b, v, N));
++      emit_prim_store(b, buf, output_count, instance_id, eligible,
++                      prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
++   }
++}
++
++/* TRIANGLE_LIST_WITH_ADJACENCY: 6-vertex groups; output {6i, 6i+2, 6i+4}.
++ *   v contributes if v%6 == 0: prim v/6 slot 0
++ *   v contributes if v%6 == 2: prim v/6 slot 1
++ *   v contributes if v%6 == 4: prim v/6 slot 2
++ */
++static void
++emit_tri_list_adj(nir_builder *b, nir_def *v, nir_def *N,
++                  nir_def *buf, nir_def *output_count, nir_def *instance_id,
++                  nir_def *value, uint16_t stride, uint16_t offset_bytes)
++{
++   (void)N;
++   nir_def *vmod6 = nir_umod_imm(b, v, 6);
++   nir_def *prim = nir_udiv_imm(b, v, 6);
++
++   for (uint32_t slot = 0; slot < 3; slot++) {
++      emit_prim_store(b, buf, output_count, instance_id,
++         nir_ieq_imm(b, vmod6, slot * 2),
++         prim, nir_imm_int(b, slot), 3, value, stride, offset_bytes);
++   }
++}
++
++/* TRIANGLE_STRIP_WITH_ADJACENCY: prim i emits:
++ *   even i: {2i, 2i+2, 2i+4}    (slots 0, 1, 2 ← input indices 2i, 2i+2, 2i+4)
++ *   odd  i: {2i, 2i+4, 2i+2}    (slots 0, 1, 2 ← input indices 2i, 2i+4, 2i+2)
++ *
++ * Only EVEN input vertices contribute (since all output indices are 2*something).
++ * For even input v:
++ *   prim v/2 slot 0 (always, if v/2 < N/2-2)
++ *   prim (v-2)/2 slot 1 if (v-2)/2 even, slot 2 if odd   (when v >= 2)
++ *   prim (v-4)/2 slot 2 if (v-4)/2 even, slot 1 if odd   (when v >= 4)
++ */
++static void
++emit_tri_strip_adj(nir_builder *b, nir_def *v, nir_def *N,
++                   nir_def *buf, nir_def *output_count, nir_def *instance_id,
++                   nir_def *value, uint16_t stride, uint16_t offset_bytes)
++{
++   /* Bail for odd input vertices — they never contribute. */
++   nir_def *v_is_even = nir_ieq_imm(b, nir_iand_imm(b, v, 1u), 0);
++   nir_push_if(b, v_is_even);
++   {
++      nir_def *N_half = nir_ushr_imm(b, N, 1);
++      nir_def *max_prim = nir_iadd_imm(b, N_half, -2);  /* N/2 - 2 */
++      nir_def *v_half = nir_ushr_imm(b, v, 1);
++
++      /* Prim v/2 slot 0: v/2 < N/2 - 2 */
++      emit_prim_store(b, buf, output_count, instance_id,
++         nir_ult(b, v_half, max_prim),
++         v_half, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
++
++      /* Prim (v-2)/2 = v/2 - 1: v >= 2 AND prim < N/2-2 */
++      {
++         nir_def *prim = nir_iadd_imm(b, v_half, -1);
++         nir_def *parity = nir_iand_imm(b, prim, 1u);
++         nir_def *slot = nir_iadd_imm(b, parity, 1);  /* even→1, odd→2 */
++         nir_def *eligible = nir_iand(b,
++            nir_uge(b, v, nir_imm_int(b, 2)),
++            nir_ult(b, prim, max_prim));
++         emit_prim_store(b, buf, output_count, instance_id, eligible,
++                         prim, slot, 3, value, stride, offset_bytes);
++      }
++
++      /* Prim (v-4)/2 = v/2 - 2: v >= 4 AND prim < N/2-2 */
++      {
++         nir_def *prim = nir_iadd_imm(b, v_half, -2);
++         nir_def *parity = nir_iand_imm(b, prim, 1u);
++         nir_def *slot = nir_isub(b, nir_imm_int(b, 2), parity);  /* even→2, odd→1 */
++         nir_def *eligible = nir_iand(b,
++            nir_uge(b, v, nir_imm_int(b, 4)),
++            nir_ult(b, prim, max_prim));
++         emit_prim_store(b, buf, output_count, instance_id, eligible,
++                         prim, slot, 3, value, stride, offset_bytes);
++      }
++   }
++   nir_pop_if(b, NULL);
++}
++
++/* ----- Main lowering: per store_output XFB channel ----- */
++
++static void
++lower_xfb_output_iter17(nir_builder *b, nir_intrinsic_instr *intr,
++                        unsigned channel_idx, unsigned num_components,
++                        unsigned buffer, unsigned offset_words)
++{
++   assert(buffer < MAX_XFB_BUFFERS);
++   assert(nir_intrinsic_component(intr) == 0);
++
++   uint16_t stride = b->shader->info.xfb_stride[buffer] * 4;
++   assert(stride != 0);
++   uint16_t offset_bytes = offset_words * 4;
++
++   BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
++   BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
++
++   nir_def *topology = load_sysval(b, graphics, 32, vs.xfb_topology);
++   nir_def *out_count = load_sysval(b, graphics, 32, vs.xfb_output_count);
++   nir_def *N = nir_load_num_vertices(b);
++   nir_def *v = nir_load_raw_vertex_id_pan(b);
++   nir_def *instance = nir_load_instance_id(b);
++   nir_def *buf = nir_load_xfb_address(b, 64, .base = buffer);
++
++   nir_def *src = intr->src[0].ssa;
++   nir_component_mask_t mask = nir_component_mask(num_components);
++   nir_def *value = nir_channels(b, src, mask << channel_idx);
++
++   /* Topology dispatch ladder. LIST first (fast path). */
++   nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LIST));
++   {
++      emit_list_store(b, buf, out_count, instance, v, value,
++                      stride, offset_bytes);
++   }
++   nir_push_else(b, NULL);
++   {
++      /* iter17 Janet Finding 3: gate all non-LIST emission on
++       * output_count > 0. For degenerate input counts (N < min required
++       * for the topology), output_count is 0 and we must emit NO stores
++       * — otherwise N-2 / N-3 / etc. arithmetic underflows in the
++       * eligibility predicates and we falsely fire stores. */
++      nir_push_if(b, nir_ult(b, nir_imm_int(b, 0), out_count));
++      {
++      nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_STRIP));
++      {
++         emit_tri_strip(b, v, N, buf, out_count, instance, value,
++                        stride, offset_bytes);
++      }
++      nir_push_else(b, NULL);
++      {
++         nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_STRIP));
++         {
++            emit_line_strip(b, v, N, buf, out_count, instance, value,
++                            stride, offset_bytes);
++         }
++         nir_push_else(b, NULL);
++         {
++            nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_FAN));
++            {
++               emit_tri_fan(b, v, N, buf, out_count, instance, value,
++                            stride, offset_bytes);
++            }
++            nir_push_else(b, NULL);
++            {
++               nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_LIST_ADJ));
++               {
++                  emit_line_list_adj(b, v, N, buf, out_count, instance, value,
++                                     stride, offset_bytes);
++               }
++               nir_push_else(b, NULL);
++               {
++                  nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_STRIP_ADJ));
++                  {
++                     emit_line_strip_adj(b, v, N, buf, out_count, instance, value,
++                                         stride, offset_bytes);
++                  }
++                  nir_push_else(b, NULL);
++                  {
++                     nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_LIST_ADJ));
++                     {
++                        emit_tri_list_adj(b, v, N, buf, out_count, instance, value,
++                                          stride, offset_bytes);
++                     }
++                     nir_push_else(b, NULL);
++                     {
++                        /* TRI_STRIP_ADJ — last case */
++                        emit_tri_strip_adj(b, v, N, buf, out_count, instance, value,
++                                           stride, offset_bytes);
++                     }
++                     nir_pop_if(b, NULL);
++                  }
++                  nir_pop_if(b, NULL);
++               }
++               nir_pop_if(b, NULL);
++            }
++            nir_pop_if(b, NULL);
++         }
++         nir_pop_if(b, NULL);
++      }
++      nir_pop_if(b, NULL);
++      }
++      nir_pop_if(b, NULL);  /* Janet Finding 3: close output_count > 0 guard */
++   }
++   nir_pop_if(b, NULL);
++}
++
++/* Mirror of pan_nir_lower_xfb's lower_xfb: load_vertex_id rewrite +
++ * dispatch store_output through our topology-aware emission. */
++static bool
++lower_xfb_iter17(nir_builder *b, nir_intrinsic_instr *intr,
++                 UNUSED void *data)
++{
++   if (intr->intrinsic == nir_intrinsic_load_vertex_id) {
++      b->cursor = nir_instr_remove(&intr->instr);
++      nir_def *repl = nir_iadd(b, nir_load_raw_vertex_id_pan(b),
++                               nir_load_raw_vertex_offset_pan(b));
++      nir_def_rewrite_uses(&intr->def, repl);
++      return true;
++   }
++
++   if (intr->intrinsic != nir_intrinsic_store_output)
++      return false;
++
++   bool progress = false;
++   b->cursor = nir_before_instr(&intr->instr);
++
++   /* io_xfb has only out[0,1]; the other 2 channels are in io_xfb2.
++    * Outer loop selects which annotation; inner picks which channel. */
++   for (unsigned i = 0; i < 2; ++i) {
++      nir_io_xfb xfb = i ? nir_intrinsic_io_xfb2(intr)
++                         : nir_intrinsic_io_xfb(intr);
++      for (unsigned j = 0; j < 2; ++j) {
++         if (!xfb.out[j].num_components)
++            continue;
++         lower_xfb_output_iter17(b, intr, i * 2 + j, xfb.out[j].num_components,
++                                 xfb.out[j].buffer, xfb.out[j].offset);
++         progress = true;
++      }
++   }
++
++   if (progress)
++      nir_instr_remove(&intr->instr);
++   return progress;
++}
++
++bool
++panvk_per_arch(nir_lower_xfb)(nir_shader *nir)
++{
++   return nir_shader_intrinsics_pass(
++      nir, lower_xfb_iter17, nir_metadata_control_flow, NULL);
++}
++
++#endif /* PAN_ARCH < 9 */