diff --git a/arch/mesa-panvk-bifrost/0003-panvk-bifrost-vk-ext-transform-feedback.patch b/arch/mesa-panvk-bifrost/0003-panvk-bifrost-vk-ext-transform-feedback.patch new file mode 100644 index 000000000..4d162fc2e --- /dev/null +++ b/arch/mesa-panvk-bifrost/0003-panvk-bifrost-vk-ext-transform-feedback.patch @@ -0,0 +1,328 @@ +--- a/src/panfrost/vulkan/panvk_shader.h 2026-04-29 22:19:00.000000000 +0200 ++++ b/src/panfrost/vulkan/panvk_shader.h 2026-05-20 18:52:53.312698258 +0200 +@@ -150,6 +150,10 @@ + struct { + #if PAN_ARCH < 9 + int32_t raw_vertex_offset; ++ uint32_t num_vertices; /* iter13: XFB needs per-draw vertex count */ ++ /* aligned_u64 attribute below inserts the 4-byte alignment gap ++ * after num_vertices automatically — no explicit pad needed. */ ++ aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */ + #endif + int32_t first_vertex; + int32_t base_instance; +--- a/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 19:09:29.711145446 +0200 ++++ b/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-20 18:52:54.832720445 +0200 +@@ -169,6 +169,7 @@ + .EXT_provoking_vertex = true, + .EXT_queue_family_foreign = true, + .EXT_robustness2 = true, ++ .EXT_transform_feedback = PAN_ARCH < 9, /* iter13: JM-class only for now */ + .EXT_sampler_filter_minmax = PAN_ARCH >= 10, + .EXT_scalar_block_layout = true, + .EXT_separate_stencil_usage = true, +@@ -495,6 +496,10 @@ + .robustImageAccess2 = false, + .nullDescriptor = true, + ++ /* VK_EXT_transform_feedback (iter13) */ ++ .transformFeedback = PAN_ARCH < 9, ++ .geometryStreams = false, ++ + /* VK_KHR_shader_clock */ + .shaderSubgroupClock = device->kmod.dev->props.gpu_can_query_timestamp, + .shaderDeviceClock = device->kmod.dev->props.timestamp_device_coherent, +@@ -1020,6 +1025,18 @@ + .robustStorageBufferAccessSizeAlignment = 1, + .robustUniformBufferAccessSizeAlignment = 1, + ++ /* VK_EXT_transform_feedback (iter13) */ ++ .maxTransformFeedbackStreams = 1, ++ .maxTransformFeedbackBuffers = 4, ++ .maxTransformFeedbackBufferSize = UINT32_MAX, ++ .maxTransformFeedbackStreamDataSize = 512, ++ .maxTransformFeedbackBufferDataSize = 512, ++ .maxTransformFeedbackBufferDataStride = 2048, ++ .transformFeedbackQueries = false, ++ .transformFeedbackStreamsLinesTriangles = false, ++ .transformFeedbackRasterizationStreamSelect = false, ++ .transformFeedbackDraw = false, ++ + /* VK_EXT_shader_object */ + /* We do not currently support VK_EXT_shader_object but this is used + * internally by vk_shader +--- a/src/panfrost/vulkan/panvk_vX_shader.c 2026-04-29 22:19:00.000000000 +0200 ++++ b/src/panfrost/vulkan/panvk_vX_shader.c 2026-05-20 18:52:56.556745611 +0200 +@@ -21,6 +21,7 @@ + #include "panvk_physical_device.h" + #include "panvk_sampler.h" + #include "panvk_shader.h" ++#include "pan_nir.h" /* iter13: pan_nir_lower_xfb */ + + #include "spirv/nir_spirv.h" + #include "util/memstream.h" +@@ -100,6 +101,20 @@ + case nir_intrinsic_load_raw_vertex_offset_pan: + val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset); + break; ++ case nir_intrinsic_load_num_vertices: /* iter13: XFB index calc */ ++ val = load_sysval(b, graphics, bit_size, vs.num_vertices); ++ break; ++ case nir_intrinsic_load_xfb_address: { /* iter13: XFB buffer N base address */ ++ unsigned idx = nir_intrinsic_base(intr); ++ switch (idx) { ++ case 0: val = load_sysval(b, graphics, bit_size, vs.xfb_address[0]); break; ++ case 1: val = load_sysval(b, graphics, bit_size, vs.xfb_address[1]); break; ++ case 2: val = load_sysval(b, graphics, bit_size, vs.xfb_address[2]); break; ++ case 3: val = load_sysval(b, graphics, bit_size, vs.xfb_address[3]); break; ++ default: return false; ++ } ++ break; ++ } + case nir_intrinsic_load_layer_id: + assert(b->shader->info.stage == MESA_SHADER_FRAGMENT); + val = load_sysval(b, graphics, bit_size, layer_id); +@@ -457,6 +472,7 @@ + core_max_id); + + pan_preprocess_nir(nir, pdev->kmod.dev->props.gpu_id); ++ + } + + static void +@@ -870,6 +886,18 @@ + nir_var_shader_in | nir_var_shader_out, UINT32_MAX); + NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, + glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics); ++ ++#if PAN_ARCH < 9 ++ /* iter13: VK_EXT_transform_feedback — runs AFTER nir_lower_io so that ++ * shader outputs are now store_output intrinsics that pan_nir_lower_xfb ++ * can rewrite to nir_store_global+nir_load_xfb_address. */ ++ if (nir->info.stage == MESA_SHADER_VERTEX && ++ nir->info.has_transform_feedback_varyings) { ++ NIR_PASS(_, nir, nir_opt_constant_folding); ++ NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info); ++ NIR_PASS(_, nir, pan_nir_lower_xfb); ++ } ++#endif + } + + static VkResult +@@ -1288,6 +1316,9 @@ + .view_mask = (state && state->rp) ? state->rp->view_mask : 0, + .robust2_modes = robust2_modes, + .robust_descriptors = dev->vk.enabled_features.nullDescriptor, ++ /* iter13: XFB shaders must disable IDVS (matches Panfrost-Gallium). */ ++ .no_idvs = (info->stage == MESA_SHADER_VERTEX) && ++ info->nir->info.has_transform_feedback_varyings, + }; + + switch (info->stage) { +--- a/src/panfrost/vulkan/panvk_cmd_draw.h 2026-04-29 22:19:00.000000000 +0200 ++++ b/src/panfrost/vulkan/panvk_cmd_draw.h 2026-05-20 18:52:57.748763011 +0200 +@@ -135,6 +135,19 @@ + struct panvk_graphics_sysvals sysvals; + + #if PAN_ARCH < 9 ++ /* iter13: VK_EXT_transform_feedback state (JM-class only for now). */ ++ struct { ++ bool active; ++ uint32_t buffer_count; ++ struct { ++ uint64_t addr; ++ uint64_t offset; ++ uint64_t size; ++ } buffers[4]; ++ } xfb; ++#endif ++ ++#if PAN_ARCH < 9 + struct panvk_shader_link link; + #endif + +--- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-04-29 22:19:00.000000000 +0200 ++++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c 2026-05-20 19:10:23.031919662 +0200 +@@ -10,6 +10,7 @@ + #include "panvk_entrypoints.h" + + #include "pan_desc.h" ++#include "pan_compiler.h" /* PAN_SHADER_OOB_ADDRESS */ + #include "pan_util.h" + + static void +@@ -722,6 +723,35 @@ + set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset, + info->vertex.raw_offset); + set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id); ++ ++ /* iter13: VK_EXT_transform_feedback sysvals — always set (per draw), ++ * reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */ ++ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count); ++ { ++ const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx; ++ /* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS ++ * (= 1<<63). This is the Panfrost-Gallium memory-sink idiom — the ++ * Bifrost MMU silently discards stores to this address, so a pipeline ++ * with XFB outputs used in a non-XFB draw (or in an XFB draw with ++ * fewer bound buffers than the shader declares) is safe instead of ++ * faulting. See gallium/drivers/panfrost/pan_cmdstream.c PAN_SYSVAL_XFB. */ ++ uint64_t _xa0 = PAN_SHADER_OOB_ADDRESS, _xa1 = PAN_SHADER_OOB_ADDRESS, ++ _xa2 = PAN_SHADER_OOB_ADDRESS, _xa3 = PAN_SHADER_OOB_ADDRESS; ++ if (_gfx->xfb.active) { ++ if (_gfx->xfb.buffer_count > 0 && _gfx->xfb.buffers[0].addr) ++ _xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset; ++ if (_gfx->xfb.buffer_count > 1 && _gfx->xfb.buffers[1].addr) ++ _xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset; ++ if (_gfx->xfb.buffer_count > 2 && _gfx->xfb.buffers[2].addr) ++ _xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset; ++ if (_gfx->xfb.buffer_count > 3 && _gfx->xfb.buffers[3].addr) ++ _xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset; ++ } ++ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0); ++ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1); ++ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2); ++ set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3); ++ } + #endif + + if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) { +--- a/src/panfrost/vulkan/meson.build 2026-04-29 22:19:00.000000000 +0200 ++++ b/src/panfrost/vulkan/meson.build 2026-05-20 18:53:04.484861338 +0200 +@@ -73,6 +73,7 @@ + jm_inc_dir = ['jm'] + jm_files = [ + 'jm/panvk_vX_bind_queue.c', ++ 'jm/panvk_vX_cmd_xfb.c', # iter13 + 'jm/panvk_vX_cmd_buffer.c', + 'jm/panvk_vX_cmd_dispatch.c', + 'jm/panvk_vX_cmd_draw.c', +--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-04-29 22:19:00.000000000 +0200 ++++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c 2026-05-20 19:10:26.163965149 +0200 +@@ -473,5 +473,12 @@ + + vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo); + ++#if PAN_ARCH < 9 ++ /* iter13: clear XFB state on Begin so a reused command buffer does not ++ * inherit stale xfb.buffer_count / xfb.active / xfb.buffers[] from a ++ * prior recording. */ ++ memset(&cmdbuf->state.gfx.xfb, 0, sizeof(cmdbuf->state.gfx.xfb)); ++#endif ++ + return VK_SUCCESS; + } +--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-18 12:50:53.067999996 +0200 ++++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c 2026-05-20 19:10:27.175979847 +0200 +@@ -0,0 +1,111 @@ ++/* ++ * Copyright © 2026 mfritsche / claude-noether ++ * SPDX-License-Identifier: MIT ++ * ++ * iter13: VK_EXT_transform_feedback command handlers for the JM ++ * architecture path (Bifrost v6/v7 + Valhall-JM v9). ++ * ++ * The runtime contract: ++ * - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size) ++ * for each slot into cmdbuf->state.gfx.xfb.buffers[]. ++ * - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true. ++ * Mark sysvals dirty so the next draw re-emits vs.xfb_address[]. ++ * - vkCmdEndTransformFeedbackEXT: set active = false. ++ * ++ * Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/ ++ * pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't ++ * support pause/resume. transformFeedbackDraw is advertised as false. ++ * ++ * Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb ++ * and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb ++ * pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers ++ * (via panvk_vX_shader.c sysval handler) to a load from the per-draw ++ * sysval push area. ++ */ ++ ++#include "vk_log.h" ++#include "util/log.h" ++ ++#include "panvk_cmd_buffer.h" ++#include "panvk_cmd_draw.h" ++#include "panvk_buffer.h" ++#include "panvk_entrypoints.h" ++ ++VKAPI_ATTR void VKAPI_CALL ++panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)( ++ VkCommandBuffer commandBuffer, ++ uint32_t firstBinding, ++ uint32_t bindingCount, ++ const VkBuffer *pBuffers, ++ const VkDeviceSize *pOffsets, ++ const VkDeviceSize *pSizes) ++{ ++ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); ++ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx; ++ ++ for (uint32_t i = 0; i < bindingCount; i++) { ++ uint32_t slot = firstBinding + i; ++ if (slot >= 4) ++ continue; ++ ++ VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]); ++ gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0); ++ gfx->xfb.buffers[slot].offset = pOffsets[i]; ++ gfx->xfb.buffers[slot].size = ++ (pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE) ++ ? pSizes[i] ++ : (buf->vk.size - pOffsets[i]); ++ } ++ ++ if (firstBinding + bindingCount > gfx->xfb.buffer_count) ++ gfx->xfb.buffer_count = firstBinding + bindingCount; ++} ++ ++VKAPI_ATTR void VKAPI_CALL ++panvk_per_arch(CmdBeginTransformFeedbackEXT)( ++ VkCommandBuffer commandBuffer, ++ uint32_t firstCounterBuffer, ++ uint32_t counterBufferCount, ++ const VkBuffer *pCounterBuffers, ++ const VkDeviceSize *pCounterBufferOffsets) ++{ ++ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); ++ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx; ++ ++ /* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback ++ * PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c. ++ * App is spec-compliant if it does not pass counter buffers (which our ++ * features advertisement allows), but warn loudly if it does so we do not ++ * silently produce wrong capture state. */ ++ (void)firstCounterBuffer; ++ (void)pCounterBufferOffsets; ++ if (counterBufferCount > 0 && pCounterBuffers != NULL) { ++ mesa_logw("panvk: CmdBeginTransformFeedbackEXT: counter buffers not " ++ "implemented (transformFeedbackDraw=false); XFB resume will " ++ "restart at buffer offset 0"); ++ } ++ ++ gfx->xfb.active = true; ++ /* Per-draw set_gfx_sysval picks up the change automatically — no ++ * explicit dirty marking required (set_gfx_sysval uses memcmp + ++ * BITSET to detect state diffs and re-emit sysvals). */ ++} ++ ++VKAPI_ATTR void VKAPI_CALL ++panvk_per_arch(CmdEndTransformFeedbackEXT)( ++ VkCommandBuffer commandBuffer, ++ uint32_t firstCounterBuffer, ++ uint32_t counterBufferCount, ++ const VkBuffer *pCounterBuffers, ++ const VkDeviceSize *pCounterBufferOffsets) ++{ ++ VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); ++ struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx; ++ ++ (void)firstCounterBuffer; ++ (void)counterBufferCount; ++ (void)pCounterBuffers; ++ (void)pCounterBufferOffsets; ++ ++ gfx->xfb.active = false; ++} diff --git a/arch/mesa-panvk-bifrost/PKGBUILD b/arch/mesa-panvk-bifrost/PKGBUILD index dd9133882..9d62b835a 100644 --- a/arch/mesa-panvk-bifrost/PKGBUILD +++ b/arch/mesa-panvk-bifrost/PKGBUILD @@ -30,7 +30,7 @@ pkgname=mesa-panvk-bifrost _mesaver=26.0.6 -pkgver=26.0.6.r2 +pkgver=26.0.6.r3 pkgrel=1 pkgdesc="Patched Mesa libvulkan_panfrost.so exposing Bifrost-gen Mali to Vulkan apps (panvk-bifrost campaign)" arch=('aarch64') @@ -79,6 +79,7 @@ source=( "https://archive.mesa3d.org/mesa-${_mesaver}.tar.xz" "0001-panvk-expose-robustness2-nullDescriptor-bifrost.patch" "0002-panvk-expose-vulkan-1.1-1.2-on-bifrost.patch" + "0003-panvk-bifrost-vk-ext-transform-feedback.patch" "brave-vulkan" "icd.json" ) @@ -88,6 +89,7 @@ sha256sums=( 'SKIP' 'SKIP' 'SKIP' + 'SKIP' ) prepare() { @@ -107,12 +109,23 @@ prepare() { sed -i 's|bool has_vk1_1 = PAN_ARCH >= 10;|bool has_vk1_1 = true;|' src/panfrost/vulkan/panvk_vX_physical_device.c sed -i 's|bool has_vk1_2 = PAN_ARCH >= 10;|bool has_vk1_2 = true;|' src/panfrost/vulkan/panvk_vX_physical_device.c + # iter13: VK_EXT_transform_feedback implementation for Bifrost (PAN_ARCH<9). + # Applied as a real unified-diff patch — the change is too large for sed. + # Phase-doc context: ~/src/panvk-bifrost/phase{4,5,6}_iter13_close.md. + # Unlocks ANGLE-Vulkan → GLES3 → WebGL2 / WebGPU on Brave (chrome://gpu + # reports "Hardware accelerated" across the board for the affected paths). + patch -p1 < "${srcdir}/0003-panvk-bifrost-vk-ext-transform-feedback.patch" + # Sanity-check the patches landed. grep -q "KHR_robustness2 = true," src/panfrost/vulkan/panvk_vX_physical_device.c grep -q "EXT_robustness2 = true," src/panfrost/vulkan/panvk_vX_physical_device.c grep -q "nullDescriptor = true," src/panfrost/vulkan/panvk_vX_physical_device.c grep -q "has_vk1_1 = true;" src/panfrost/vulkan/panvk_vX_physical_device.c grep -q "has_vk1_2 = true;" src/panfrost/vulkan/panvk_vX_physical_device.c + # iter13 sanity: + grep -q "EXT_transform_feedback = PAN_ARCH < 9," src/panfrost/vulkan/panvk_vX_physical_device.c + grep -q "pan_nir_lower_xfb" src/panfrost/vulkan/panvk_vX_shader.c + test -f src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c } build() {