#!/usr/bin/env python3 """ iter13: apply VK_EXT_transform_feedback implementation to Mesa 26.0.6 PanVk. Run from inside /home/mfritsche/mesa-build/mesa-26.0.6/ on ohm. Idempotent — checks if changes are already present and skips if so. The implementation is single-variant (Vulkan spec allows undefined behavior for XFB-output shaders bound outside Begin/EndTransformFeedback, so we don't need defensive two-variant compilation for v1). Files modified: 1. src/panfrost/vulkan/panvk_shader.h 2. src/panfrost/vulkan/panvk_vX_physical_device.c 3. src/panfrost/vulkan/panvk_vX_shader.c 4. src/panfrost/vulkan/panvk_cmd_draw.h 5. src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c 6. src/panfrost/vulkan/meson.build Files created: 7. src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c """ import os import sys ROOT = os.path.abspath(os.path.dirname(__file__)) if "MESA_ROOT" not in os.environ else os.environ["MESA_ROOT"] # Default: assume cwd is mesa root if os.path.basename(os.getcwd()).startswith("mesa-"): ROOT = os.getcwd() print(f"[iter13] applying patches under {ROOT}") def replace_once(path, old, new, marker_in_new=None): """Replace `old` with `new` in file at path. If `marker_in_new` is in the file already, treat as already-applied and skip.""" full = os.path.join(ROOT, path) with open(full) as f: content = f.read() if marker_in_new and marker_in_new in content: print(f" [skip] {path} — already patched ({marker_in_new!r} present)") return if old not in content: print(f" [FAIL] {path} — expected pattern not found:\n {old[:100]!r}") sys.exit(2) count = content.count(old) if count > 1: print(f" [FAIL] {path} — pattern matches {count} times, need exactly 1") sys.exit(2) new_content = content.replace(old, new) with open(full, "w") as f: f.write(new_content) print(f" [ok] {path}") def create_file(path, content, skip_if_exists=True): full = os.path.join(ROOT, path) if skip_if_exists and os.path.exists(full): print(f" [skip] {path} — exists") return os.makedirs(os.path.dirname(full), exist_ok=True) with open(full, "w") as f: f.write(content) print(f" [ok] {path} (created)") # ============================================================ # 1. panvk_shader.h — extend vs sysval struct (PAN_ARCH < 9) # ============================================================ print("\n[1/7] panvk_shader.h — add num_vertices + xfb_address[4] to vs sysvals") replace_once( "src/panfrost/vulkan/panvk_shader.h", """ struct { #if PAN_ARCH < 9 int32_t raw_vertex_offset; #endif int32_t first_vertex; int32_t base_instance; uint32_t noperspective_varyings; } vs;""", """ struct { #if PAN_ARCH < 9 int32_t raw_vertex_offset; uint32_t num_vertices; /* iter13: XFB needs per-draw vertex count */ uint32_t _pad_xfb; /* keep 8-byte alignment before u64 array */ aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */ #endif int32_t first_vertex; int32_t base_instance; uint32_t noperspective_varyings; } vs;""", marker_in_new="xfb_address[4]", ) # ============================================================ # 2. panvk_vX_physical_device.c — expose ext + features + properties # ============================================================ print("\n[2/7] panvk_vX_physical_device.c — expose VK_EXT_transform_feedback") # A. Add extension to the ext list (find a stable nearby line) replace_once( "src/panfrost/vulkan/panvk_vX_physical_device.c", " .EXT_robustness2 = true,", """ .EXT_robustness2 = true, .EXT_transform_feedback = PAN_ARCH < 9, /* iter13: JM-class only for now */""", marker_in_new="EXT_transform_feedback", ) # B. Add features. The features block has /* VK_KHR_robustness2 */ nearby. replace_once( "src/panfrost/vulkan/panvk_vX_physical_device.c", """ /* VK_KHR_robustness2 */ .robustBufferAccess2 = PAN_ARCH >= 11, .robustImageAccess2 = false, .nullDescriptor = true,""", """ /* VK_KHR_robustness2 */ .robustBufferAccess2 = PAN_ARCH >= 11, .robustImageAccess2 = false, .nullDescriptor = true, /* VK_EXT_transform_feedback (iter13) */ .transformFeedback = PAN_ARCH < 9, .geometryStreams = false,""", marker_in_new=".transformFeedback = PAN_ARCH < 9", ) # C. Add properties. Anchor to the existing /* VK_KHR_robustness2 */ properties # block near line 1019. We'll add right after it. replace_once( "src/panfrost/vulkan/panvk_vX_physical_device.c", """ /* VK_KHR_robustness2 */ .robustStorageBufferAccessSizeAlignment = 1, .robustUniformBufferAccessSizeAlignment = 1,""", """ /* VK_KHR_robustness2 */ .robustStorageBufferAccessSizeAlignment = 1, .robustUniformBufferAccessSizeAlignment = 1, /* VK_EXT_transform_feedback (iter13) */ .maxTransformFeedbackStreams = 1, .maxTransformFeedbackBuffers = 4, .maxTransformFeedbackBufferSize = UINT32_MAX, .maxTransformFeedbackStreamDataSize = 512, .maxTransformFeedbackBufferDataSize = 512, .maxTransformFeedbackBufferDataStride = 2048, .transformFeedbackQueries = false, .transformFeedbackStreamsLinesTriangles = false, .transformFeedbackRasterizationStreamSelect = false, .transformFeedbackDraw = false,""", marker_in_new="maxTransformFeedbackStreams", ) # ============================================================ # 3. panvk_vX_shader.c — intrinsic lowering + NIR pass wiring # ============================================================ print("\n[3/7] panvk_vX_shader.c — intrinsic lowering + pan_nir_lower_xfb wiring") # A. Add intrinsic cases inside the PAN_ARCH < 9 block. # Anchor to the existing `vs.raw_vertex_offset` case. replace_once( "src/panfrost/vulkan/panvk_vX_shader.c", """#if PAN_ARCH < 9 case nir_intrinsic_load_raw_vertex_offset_pan: val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset); break;""", """#if PAN_ARCH < 9 case nir_intrinsic_load_raw_vertex_offset_pan: val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset); break; case nir_intrinsic_load_num_vertices: /* iter13: XFB index calc */ val = load_sysval(b, graphics, bit_size, vs.num_vertices); break; case nir_intrinsic_load_xfb_address: { /* iter13: XFB buffer N base address */ unsigned idx = nir_intrinsic_base(intr); switch (idx) { case 0: val = load_sysval(b, graphics, bit_size, vs.xfb_address[0]); break; case 1: val = load_sysval(b, graphics, bit_size, vs.xfb_address[1]); break; case 2: val = load_sysval(b, graphics, bit_size, vs.xfb_address[2]); break; case 3: val = load_sysval(b, graphics, bit_size, vs.xfb_address[3]); break; default: return false; } break; }""", marker_in_new="load_num_vertices", ) # B. Wire pan_nir_lower_xfb into the lowering chain. # We want it right after nir_lower_system_values runs. # Look for the existing call. replace_once( "src/panfrost/vulkan/panvk_vX_shader.c", """ NIR_PASS(_, nir, nir_lower_system_values); nir_lower_compute_system_values_options options = {""", """ NIR_PASS(_, nir, nir_lower_system_values); #if PAN_ARCH < 9 /* iter13: VK_EXT_transform_feedback — if the shader has XFB output * decorations, run the Mesa standard XFB-info NIR pass + Panfrost's * own NIR lowering that turns store_output into nir_store_global * to the per-buffer base address (the panvk lowering above wires * nir_load_xfb_address to vs.xfb_address[N]). Single-variant: if * an app binds an XFB pipeline outside vkCmdBeginTransformFeedback, * the writes go to address 0 — undefined behavior per spec. */ if (nir->info.stage == MESA_SHADER_VERTEX && nir->xfb_info != NULL) { NIR_PASS(_, nir, pan_nir_lower_xfb); } #endif nir_lower_compute_system_values_options options = {""", marker_in_new="pan_nir_lower_xfb", ) # C. Add #include for pan_nir.h at the top (where pan_nir_lower_xfb is declared) replace_once( "src/panfrost/vulkan/panvk_vX_shader.c", '#include "panvk_shader.h"', '#include "panvk_shader.h"\n#include "pan_nir.h" /* iter13: pan_nir_lower_xfb */', marker_in_new='/* iter13: pan_nir_lower_xfb */', ) # ============================================================ # 4. panvk_cmd_draw.h — add XFB state struct + pipeline state member # ============================================================ print("\n[4/7] panvk_cmd_draw.h — add panvk_xfb_state to cmd buffer state") # We add a definition and inject xfb into the graphics state. # We need to find the right place. Looking at the file: there's a `struct # panvk_graphics_state` or similar that holds per-cmdbuf graphics state. # This is intrinsically file-specific; we need to read the file to find the right spot. # For now, place a self-contained inclusion at the top of the file and add # state as a separate sibling struct in the gfx state. The cleaner long-term # place is inside the existing graphics state struct. # Defer the inclusion approach. Instead use a forward declaration + put the # struct definition in jm/panvk_vX_cmd_xfb.c and reference via include. # Actually let's just add a state struct to panvk_cmd_draw.h after the sysvals member. replace_once( "src/panfrost/vulkan/panvk_cmd_draw.h", " struct panvk_graphics_sysvals sysvals;", """ struct panvk_graphics_sysvals sysvals; #if PAN_ARCH < 9 /* iter13: VK_EXT_transform_feedback state (JM-class only for now). */ struct { bool active; uint32_t buffer_count; struct { uint64_t addr; uint64_t offset; uint64_t size; } buffers[4]; } xfb; #endif""", marker_in_new="iter13: VK_EXT_transform_feedback state", ) # ============================================================ # 5. panvk_vX_cmd_draw.c (arch-templated, NOT jm/) — populate XFB sysvals # ============================================================ print("\n[5/7] panvk_vX_cmd_draw.c — populate vs.num_vertices + vs.xfb_address[] inside the PAN_ARCH<9 block") # Insert just inside the existing `#if PAN_ARCH < 9` block where # raw_vertex_offset is set. info->vertex.count is available in scope. replace_once( "src/panfrost/vulkan/panvk_vX_cmd_draw.c", """#if PAN_ARCH < 9 set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset, info->vertex.raw_offset); set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id); #endif""", """#if PAN_ARCH < 9 set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset, info->vertex.raw_offset); set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id); /* iter13: VK_EXT_transform_feedback sysvals — always set (per draw), * reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count); { const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx; uint64_t _xa0 = 0, _xa1 = 0, _xa2 = 0, _xa3 = 0; if (_gfx->xfb.active) { if (_gfx->xfb.buffer_count > 0) _xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset; if (_gfx->xfb.buffer_count > 1) _xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset; if (_gfx->xfb.buffer_count > 2) _xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset; if (_gfx->xfb.buffer_count > 3) _xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset; } set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0); set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1); set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2); set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3); } #endif""", marker_in_new="iter13: VK_EXT_transform_feedback sysvals", ) # ============================================================ # 6. NEW: jm/panvk_vX_cmd_xfb.c — Vulkan command handlers # ============================================================ print("\n[6/7] jm/panvk_vX_cmd_xfb.c — XFB Vulkan command handlers (NEW FILE)") xfb_c = r'''/* * Copyright © 2026 mfritsche / claude-noether * SPDX-License-Identifier: MIT * * iter13: VK_EXT_transform_feedback command handlers for the JM * architecture path (Bifrost v6/v7 + Valhall-JM v9). * * The runtime contract: * - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size) * for each slot into cmdbuf->state.gfx.xfb.buffers[]. * - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true. * Mark sysvals dirty so the next draw re-emits vs.xfb_address[]. * - vkCmdEndTransformFeedbackEXT: set active = false. * * Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/ * pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't * support pause/resume. transformFeedbackDraw is advertised as false. * * Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb * and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb * pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers * (via panvk_vX_shader.c sysval handler) to a load from the per-draw * sysval push area. */ #include "vk_log.h" #include "panvk_cmd_buffer.h" #include "panvk_cmd_draw.h" #include "panvk_buffer.h" #include "panvk_entrypoints.h" VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)( VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx; for (uint32_t i = 0; i < bindingCount; i++) { uint32_t slot = firstBinding + i; if (slot >= 4) continue; VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]); gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0); gfx->xfb.buffers[slot].offset = pOffsets[i]; gfx->xfb.buffers[slot].size = (pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE) ? pSizes[i] : (buf->vk.size - pOffsets[i]); } if (firstBinding + bindingCount > gfx->xfb.buffer_count) gfx->xfb.buffer_count = firstBinding + bindingCount; } VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdBeginTransformFeedbackEXT)( VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx; /* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback * PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c. */ (void)firstCounterBuffer; (void)counterBufferCount; (void)pCounterBuffers; (void)pCounterBufferOffsets; gfx->xfb.active = true; /* Per-draw set_gfx_sysval picks up the change automatically — no * explicit dirty marking required (set_gfx_sysval uses memcmp + * BITSET to detect state diffs and re-emit sysvals). */ } VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdEndTransformFeedbackEXT)( VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx; (void)firstCounterBuffer; (void)counterBufferCount; (void)pCounterBuffers; (void)pCounterBufferOffsets; gfx->xfb.active = false; } ''' create_file("src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c", xfb_c) # ============================================================ # 7. meson.build — register the new file in the jm_files array # ============================================================ print("\n[7/7] meson.build — register jm/panvk_vX_cmd_xfb.c") replace_once( "src/panfrost/vulkan/meson.build", "jm_files = [\n 'jm/panvk_vX_bind_queue.c',", "jm_files = [\n 'jm/panvk_vX_bind_queue.c',\n 'jm/panvk_vX_cmd_xfb.c', # iter13", marker_in_new="iter13", ) print("\n[iter13] all patches applied — run incremental ninja build next")