--- a/src/panfrost/vulkan/panvk_shader.h 2026-04-29 22:19:00.000000000 +0200 +++ b/src/panfrost/vulkan/panvk_shader.h 2026-05-20 18:52:53.312698258 +0200 @@ -150,6 +150,10 @@ struct { #if PAN_ARCH < 9 int32_t raw_vertex_offset; + uint32_t num_vertices; /* iter13: XFB needs per-draw vertex count */ + /* aligned_u64 attribute below inserts the 4-byte alignment gap + * after num_vertices automatically — no explicit pad needed. */ + aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */ #endif int32_t first_vertex; int32_t base_instance; --- a/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 19:09:29.711145446 +0200 +++ b/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 18:52:54.832720445 +0200 @@ -169,6 +169,7 @@ .EXT_provoking_vertex = true, .EXT_queue_family_foreign = true, .EXT_robustness2 = true, + .EXT_transform_feedback = PAN_ARCH < 9, /* iter13: JM-class only for now */ .EXT_sampler_filter_minmax = PAN_ARCH >= 10, .EXT_scalar_block_layout = true, .EXT_separate_stencil_usage = true, @@ -495,6 +496,10 @@ .robustImageAccess2 = false, .nullDescriptor = true, + /* VK_EXT_transform_feedback (iter13) */ + .transformFeedback = PAN_ARCH < 9, + .geometryStreams = false, + /* VK_KHR_shader_clock */ .shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp, .shaderDeviceClock = device->kmod.dev->props.timestamp_device_coherent, @@ -1020,6 +1025,18 @@ .robustStorageBufferAccessSizeAlignment = 1, .robustUniformBufferAccessSizeAlignment = 1, + /* VK_EXT_transform_feedback (iter13) */ + .maxTransformFeedbackStreams = 1, + .maxTransformFeedbackBuffers = 4, + .maxTransformFeedbackBufferSize = UINT32_MAX, + .maxTransformFeedbackStreamDataSize = 512, + .maxTransformFeedbackBufferDataSize = 512, + .maxTransformFeedbackBufferDataStride = 2048, + .transformFeedbackQueries = false, + .transformFeedbackStreamsLinesTriangles = false, + .transformFeedbackRasterizationStreamSelect = false, + .transformFeedbackDraw = false, + /* VK_EXT_shader_object */ /* We do not currently support VK_EXT_shader_object but this is used * internally by vk_shader --- a/src/panfrost/vulkan/panvk_vX_shader.c 2026-04-29 22:19:00.000000000 +0200 +++ b/src/panfrost/vulkan/panvk_vX_shader.c 2026-05-20 18:52:56.556745611 +0200 @@ -21,6 +21,7 @@ #include "panvk_physical_device.h" #include "panvk_sampler.h" #include "panvk_shader.h" +#include "pan_nir.h" /* iter13: pan_nir_lower_xfb */ #include "spirv/nir_spirv.h" #include "util/memstream.h" @@ -100,6 +101,20 @@ case nir_intrinsic_load_raw_vertex_offset_pan: val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset); break; + case nir_intrinsic_load_num_vertices: /* iter13: XFB index calc */ + val = load_sysval(b, graphics, bit_size, vs.num_vertices); + break; + case nir_intrinsic_load_xfb_address: { /* iter13: XFB buffer N base address */ + unsigned idx = nir_intrinsic_base(intr); + switch (idx) { + case 0: val = load_sysval(b, graphics, bit_size, vs.xfb_address[0]); break; + case 1: val = load_sysval(b, graphics, bit_size, vs.xfb_address[1]); break; + case 2: val = load_sysval(b, graphics, bit_size, vs.xfb_address[2]); break; + case 3: val = load_sysval(b, graphics, bit_size, vs.xfb_address[3]); break; + default: return false; + } + break; + } case nir_intrinsic_load_layer_id: assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); val = load_sysval(b, graphics, bit_size, layer_id); @@ -457,6 +472,7 @@ core_max_id); pan_preprocess_nir(nir, pdev->kmod.dev->props.gpu_id); + } static void @@ -870,6 +886,18 @@ nir_var_shader_in | nir_var_shader_out, UINT32_MAX); NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics); + +#if PAN_ARCH < 9 + /* iter13: VK_EXT_transform_feedback — runs AFTER nir_lower_io so that + * shader outputs are now store_output intrinsics that pan_nir_lower_xfb + * can rewrite to nir_store_global+nir_load_xfb_address. */ + if (nir->info.stage == MESA_SHADER_VERTEX && + nir->info.has_transform_feedback_varyings) { + NIR_PASS(_, nir, nir_opt_constant_folding); + NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info); + NIR_PASS(_, nir, pan_nir_lower_xfb); + } +#endif } static VkResult @@ -1288,6 +1316,9 @@ .view_mask = (state && state->rp) ? state->rp->view_mask : 0, .robust2_modes = robust2_modes, .robust_descriptors = dev->vk.enabled_features.nullDescriptor, + /* iter13: XFB shaders must disable IDVS (matches Panfrost-Gallium). */ + .no_idvs = (info->stage == MESA_SHADER_VERTEX) && + info->nir->info.has_transform_feedback_varyings, }; switch (info->stage) { --- a/src/panfrost/vulkan/panvk_cmd_draw.h 2026-04-29 22:19:00.000000000 +0200 +++ b/src/panfrost/vulkan/panvk_cmd_draw.h 2026-05-20 18:52:57.748763011 +0200 @@ -135,6 +135,19 @@ struct panvk_graphics_sysvals sysvals; #if PAN_ARCH < 9 + /* iter13: VK_EXT_transform_feedback state (JM-class only for now). */ + struct { + bool active; + uint32_t buffer_count; + struct { + uint64_t addr; + uint64_t offset; + uint64_t size; + } buffers[4]; + } xfb; +#endif + +#if PAN_ARCH < 9 struct panvk_shader_link link; #endif --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-04-29 22:19:00.000000000 +0200 +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-05-20 19:10:23.031919662 +0200 @@ -10,6 +10,7 @@ #include "panvk_entrypoints.h" #include "pan_desc.h" +#include "pan_compiler.h" /* PAN_SHADER_OOB_ADDRESS */ #include "pan_util.h" static void @@ -722,6 +723,35 @@ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset, info->vertex.raw_offset); set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id); + + /* iter13: VK_EXT_transform_feedback sysvals — always set (per draw), + * reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */ + set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count); + { + const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx; + /* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS + * (= 1<<63). This is the Panfrost-Gallium memory-sink idiom — the + * Bifrost MMU silently discards stores to this address, so a pipeline + * with XFB outputs used in a non-XFB draw (or in an XFB draw with + * fewer bound buffers than the shader declares) is safe instead of + * faulting. See gallium/drivers/panfrost/pan_cmdstream.c PAN_SYSVAL_XFB. */ + uint64_t _xa0 = PAN_SHADER_OOB_ADDRESS, _xa1 = PAN_SHADER_OOB_ADDRESS, + _xa2 = PAN_SHADER_OOB_ADDRESS, _xa3 = PAN_SHADER_OOB_ADDRESS; + if (_gfx->xfb.active) { + if (_gfx->xfb.buffer_count > 0 && _gfx->xfb.buffers[0].addr) + _xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset; + if (_gfx->xfb.buffer_count > 1 && _gfx->xfb.buffers[1].addr) + _xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset; + if (_gfx->xfb.buffer_count > 2 && _gfx->xfb.buffers[2].addr) + _xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset; + if (_gfx->xfb.buffer_count > 3 && _gfx->xfb.buffers[3].addr) + _xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset; + } + set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0); + set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1); + set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2); + set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3); + } #endif if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) { --- a/src/panfrost/vulkan/meson.build 2026-04-29 22:19:00.000000000 +0200 +++ b/src/panfrost/vulkan/meson.build 2026-05-20 18:53:04.484861338 +0200 @@ -73,6 +73,7 @@ jm_inc_dir = ['jm'] jm_files = [ 'jm/panvk_vX_bind_queue.c', + 'jm/panvk_vX_cmd_xfb.c', # iter13 'jm/panvk_vX_cmd_buffer.c', 'jm/panvk_vX_cmd_dispatch.c', 'jm/panvk_vX_cmd_draw.c', --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-04-29 22:19:00.000000000 +0200 +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-05-20 19:10:26.163965149 +0200 @@ -473,5 +473,12 @@ vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo); +#if PAN_ARCH < 9 + /* iter13: clear XFB state on Begin so a reused command buffer does not + * inherit stale xfb.buffer_count / xfb.active / xfb.buffers[] from a + * prior recording. */ + memset(&cmdbuf->state.gfx.xfb, 0, sizeof(cmdbuf->state.gfx.xfb)); +#endif + return VK_SUCCESS; } --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-18 12:50:53.067999996 +0200 +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-20 19:10:27.175979847 +0200 @@ -0,0 +1,111 @@ +/* + * Copyright © 2026 mfritsche / claude-noether + * SPDX-License-Identifier: MIT + * + * iter13: VK_EXT_transform_feedback command handlers for the JM + * architecture path (Bifrost v6/v7 + Valhall-JM v9). + * + * The runtime contract: + * - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size) + * for each slot into cmdbuf->state.gfx.xfb.buffers[]. + * - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true. + * Mark sysvals dirty so the next draw re-emits vs.xfb_address[]. + * - vkCmdEndTransformFeedbackEXT: set active = false. + * + * Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/ + * pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't + * support pause/resume. transformFeedbackDraw is advertised as false. + * + * Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb + * and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb + * pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers + * (via panvk_vX_shader.c sysval handler) to a load from the per-draw + * sysval push area. + */ + +#include "vk_log.h" +#include "util/log.h" + +#include "panvk_cmd_buffer.h" +#include "panvk_cmd_draw.h" +#include "panvk_buffer.h" +#include "panvk_entrypoints.h" + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets, + const VkDeviceSize *pSizes) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx; + + for (uint32_t i = 0; i < bindingCount; i++) { + uint32_t slot = firstBinding + i; + if (slot >= 4) + continue; + + VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]); + gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0); + gfx->xfb.buffers[slot].offset = pOffsets[i]; + gfx->xfb.buffers[slot].size = + (pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE) + ? pSizes[i] + : (buf->vk.size - pOffsets[i]); + } + + if (firstBinding + bindingCount > gfx->xfb.buffer_count) + gfx->xfb.buffer_count = firstBinding + bindingCount; +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdBeginTransformFeedbackEXT)( + VkCommandBuffer commandBuffer, + uint32_t firstCounterBuffer, + uint32_t counterBufferCount, + const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx; + + /* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback + * PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c. + * App is spec-compliant if it does not pass counter buffers (which our + * features advertisement allows), but warn loudly if it does so we do not + * silently produce wrong capture state. */ + (void)firstCounterBuffer; + (void)pCounterBufferOffsets; + if (counterBufferCount > 0 && pCounterBuffers != NULL) { + mesa_logw("panvk: CmdBeginTransformFeedbackEXT: counter buffers not " + "implemented (transformFeedbackDraw=false); XFB resume will " + "restart at buffer offset 0"); + } + + gfx->xfb.active = true; + /* Per-draw set_gfx_sysval picks up the change automatically — no + * explicit dirty marking required (set_gfx_sysval uses memcmp + + * BITSET to detect state diffs and re-emit sysvals). */ +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdEndTransformFeedbackEXT)( + VkCommandBuffer commandBuffer, + uint32_t firstCounterBuffer, + uint32_t counterBufferCount, + const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx; + + (void)firstCounterBuffer; + (void)counterBufferCount; + (void)pCounterBuffers; + (void)pCounterBufferOffsets; + + gfx->xfb.active = false; +}