/* * iter13 minimal Vulkan transform feedback probe. * * Goal: drive a single-stream, single-buffer VK_EXT_transform_feedback * capture end-to-end on (patched) PanVk-Bifrost — 3 vertices, each emitting * one vec4 with a known pattern, captured into a host-visible buffer, read * back and verified byte-exactly. * * Uses VK_EXT_transform_feedback. If the extension isn't exposed by the * driver, the probe exits with an error before doing any GPU work. * * Pipeline shape: * - vertex shader (probe_xfb.vert) writes a vec4 per vertex * - no fragment shader needed (rasterizerDiscardEnable=VK_TRUE) * - dynamic rendering with 0 color attachments * - vkCmdBindTransformFeedbackBuffersEXT + vkCmdBeginTransformFeedbackEXT * wrap a vkCmdDraw(3, 1, 0, 0) * - readback buffer is 3*16 = 48 bytes * * Pure Vulkan 1.0 core + VK_KHR_dynamic_rendering + VK_EXT_transform_feedback. */ #include #include #include #include #include #include #include #define VERTEX_COUNT 3 #define XFB_BUFFER_BYTES (VERTEX_COUNT * 16) /* 3 vec4s = 48 bytes */ #define VSPV_PATH "probe_xfb.vert.spv" #define STEP(name) do { fprintf(stderr, "[step] " name "\n"); fflush(stderr); } while (0) #define VK_CHECK(call) do { \ VkResult _r = (call); \ if (_r != VK_SUCCESS) { \ fprintf(stderr, "[fail] " #call " => %d at %s:%d\n", \ (int)_r, __FILE__, __LINE__); \ exit(2); \ } \ } while (0) static uint32_t *read_spv(const char *path, size_t *out_bytes) { FILE *f = fopen(path, "rb"); if (!f) { fprintf(stderr, "[fail] open %s: %s\n", path, strerror(errno)); exit(3); } fseek(f, 0, SEEK_END); long n = ftell(f); fseek(f, 0, SEEK_SET); uint32_t *buf = malloc((size_t)n); fread(buf, 1, (size_t)n, f); fclose(f); *out_bytes = (size_t)n; return buf; } static uint32_t pick_memtype(const VkPhysicalDeviceMemoryProperties *mp, uint32_t type_bits, VkMemoryPropertyFlags want) { for (uint32_t i = 0; i < mp->memoryTypeCount; i++) { if ((type_bits & (1u << i)) && (mp->memoryTypes[i].propertyFlags & want) == want) return i; } fprintf(stderr, "[fail] no memtype\n"); exit(4); } static uint32_t pick_host_visible(const VkPhysicalDeviceMemoryProperties *mp, uint32_t type_bits) { VkMemoryPropertyFlags pref = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; for (uint32_t i = 0; i < mp->memoryTypeCount; i++) { if ((type_bits & (1u << i)) && (mp->memoryTypes[i].propertyFlags & pref) == pref) return i; } for (uint32_t i = 0; i < mp->memoryTypeCount; i++) { if ((type_bits & (1u << i)) && (mp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) return i; } fprintf(stderr, "[fail] no HOST_VISIBLE\n"); exit(4); } int main(void) { STEP("vkCreateInstance"); const char *inst_exts[] = { "VK_KHR_get_physical_device_properties2" }; VkApplicationInfo app = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pApplicationName = "panvk-bifrost iter13 XFB probe", .apiVersion = VK_API_VERSION_1_0, }; VkInstanceCreateInfo ici = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &app, .enabledExtensionCount = 1, .ppEnabledExtensionNames = inst_exts, }; VkInstance inst; VK_CHECK(vkCreateInstance(&ici, NULL, &inst)); uint32_t n_phys = 0; VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, NULL)); VkPhysicalDevice *phys = calloc(n_phys, sizeof(*phys)); VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, phys)); VkPhysicalDevice gpu = phys[0]; /* Check VK_EXT_transform_feedback is exposed before we proceed. */ uint32_t ext_count = 0; vkEnumerateDeviceExtensionProperties(gpu, NULL, &ext_count, NULL); VkExtensionProperties *exts = calloc(ext_count, sizeof(*exts)); vkEnumerateDeviceExtensionProperties(gpu, NULL, &ext_count, exts); int has_xfb = 0; for (uint32_t i = 0; i < ext_count; i++) { if (!strcmp(exts[i].extensionName, "VK_EXT_transform_feedback")) has_xfb = 1; } free(exts); if (!has_xfb) { fprintf(stderr, "[fail] VK_EXT_transform_feedback NOT exposed by driver " "(this is the iter13 implementation gap — re-run on a Mesa " "build with the iter13 patches applied)\n"); return 9; } fprintf(stderr, "[info] VK_EXT_transform_feedback present on device\n"); VkPhysicalDeviceMemoryProperties mp; vkGetPhysicalDeviceMemoryProperties(gpu, &mp); /* Query the transform feedback features struct via vkGetPhysicalDeviceFeatures2. */ PFN_vkGetPhysicalDeviceFeatures2KHR pGetFeats2 = (PFN_vkGetPhysicalDeviceFeatures2KHR)vkGetInstanceProcAddr( inst, "vkGetPhysicalDeviceFeatures2KHR"); if (!pGetFeats2) { fprintf(stderr, "[fail] no vkGetPhysicalDeviceFeatures2KHR\n"); return 5; } VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_feats = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, }; VkPhysicalDeviceFeatures2 feats2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = &xfb_feats, }; pGetFeats2(gpu, &feats2); fprintf(stderr, "[info] transformFeedback=%u geometryStreams=%u\n", xfb_feats.transformFeedback, xfb_feats.geometryStreams); if (!xfb_feats.transformFeedback) { fprintf(stderr, "[fail] transformFeedback feature is FALSE — driver exposes ext but not feature\n"); return 10; } /* ---- queue family ---- */ uint32_t n_qf = 0; vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, NULL); VkQueueFamilyProperties *qfp = calloc(n_qf, sizeof(*qfp)); vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, qfp); uint32_t qfam = UINT32_MAX; for (uint32_t i = 0; i < n_qf; i++) { if (qfp[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { qfam = i; break; } } /* ---- device with XFB + dynamic_rendering enabled ---- */ STEP("vkCreateDevice (+VK_EXT_transform_feedback, +dynamic_rendering chain)"); const char *dev_exts[] = { "VK_KHR_multiview", "VK_KHR_maintenance2", "VK_KHR_create_renderpass2", "VK_KHR_depth_stencil_resolve", "VK_KHR_dynamic_rendering", "VK_EXT_transform_feedback", }; VkPhysicalDeviceTransformFeedbackFeaturesEXT enable_xfb = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, .transformFeedback = VK_TRUE, .geometryStreams = VK_FALSE, }; VkPhysicalDeviceDynamicRenderingFeaturesKHR dyn_feat = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR, .pNext = &enable_xfb, .dynamicRendering = VK_TRUE, }; float qprio = 1.0f; VkDeviceQueueCreateInfo qci = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueFamilyIndex = qfam, .queueCount = 1, .pQueuePriorities = &qprio, }; VkDeviceCreateInfo dci = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = &dyn_feat, .queueCreateInfoCount = 1, .pQueueCreateInfos = &qci, .enabledExtensionCount = sizeof(dev_exts)/sizeof(dev_exts[0]), .ppEnabledExtensionNames = dev_exts, }; VkDevice dev; VK_CHECK(vkCreateDevice(gpu, &dci, NULL, &dev)); VkQueue queue; vkGetDeviceQueue(dev, qfam, 0, &queue); /* ---- XFB function pointers ---- */ PFN_vkCmdBindTransformFeedbackBuffersEXT pBindXfb = (PFN_vkCmdBindTransformFeedbackBuffersEXT)vkGetDeviceProcAddr( dev, "vkCmdBindTransformFeedbackBuffersEXT"); PFN_vkCmdBeginTransformFeedbackEXT pBeginXfb = (PFN_vkCmdBeginTransformFeedbackEXT)vkGetDeviceProcAddr( dev, "vkCmdBeginTransformFeedbackEXT"); PFN_vkCmdEndTransformFeedbackEXT pEndXfb = (PFN_vkCmdEndTransformFeedbackEXT)vkGetDeviceProcAddr( dev, "vkCmdEndTransformFeedbackEXT"); PFN_vkCmdBeginRenderingKHR pBeginRendering = (PFN_vkCmdBeginRenderingKHR)vkGetDeviceProcAddr(dev, "vkCmdBeginRenderingKHR"); PFN_vkCmdEndRenderingKHR pEndRendering = (PFN_vkCmdEndRenderingKHR)vkGetDeviceProcAddr(dev, "vkCmdEndRenderingKHR"); if (!pBindXfb || !pBeginXfb || !pEndXfb || !pBeginRendering || !pEndRendering) { fprintf(stderr, "[fail] one or more XFB / dynamic_rendering entry points missing\n"); return 11; } /* ---- XFB capture buffer (host-visible) ---- */ STEP("vkCreateBuffer XFB capture (host-visible)"); VkBufferCreateInfo xfb_bci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = XFB_BUFFER_BYTES, .usage = VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; VkBuffer xfb_buf; VK_CHECK(vkCreateBuffer(dev, &xfb_bci, NULL, &xfb_buf)); VkMemoryRequirements xfb_mr; vkGetBufferMemoryRequirements(dev, xfb_buf, &xfb_mr); VkMemoryAllocateInfo xfb_mai = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = xfb_mr.size, .memoryTypeIndex = pick_host_visible(&mp, xfb_mr.memoryTypeBits), }; VkDeviceMemory xfb_mem; VK_CHECK(vkAllocateMemory(dev, &xfb_mai, NULL, &xfb_mem)); VK_CHECK(vkBindBufferMemory(dev, xfb_buf, xfb_mem, 0)); /* Pre-fill with sentinel so we can detect "GPU never wrote" vs "wrong write". */ void *mapped = NULL; VK_CHECK(vkMapMemory(dev, xfb_mem, 0, VK_WHOLE_SIZE, 0, &mapped)); uint32_t *u32 = (uint32_t *)mapped; for (uint32_t i = 0; i < XFB_BUFFER_BYTES / 4; i++) u32[i] = 0xDEADBEEFu; /* ---- pipeline (vertex stage only, raster-discard, no color attachment) ---- */ STEP("vkCreatePipelineLayout + vert shader"); VkPipelineLayoutCreateInfo plci = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, }; VkPipelineLayout pl; VK_CHECK(vkCreatePipelineLayout(dev, &plci, NULL, &pl)); size_t spv_bytes = 0; uint32_t *spv = read_spv(VSPV_PATH, &spv_bytes); VkShaderModuleCreateInfo smci = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .codeSize = spv_bytes, .pCode = spv, }; VkShaderModule vsm; VK_CHECK(vkCreateShaderModule(dev, &smci, NULL, &vsm)); free(spv); VkPipelineShaderStageCreateInfo stages[1] = { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_VERTEX_BIT, .module = vsm, .pName = "main" }, }; VkPipelineVertexInputStateCreateInfo vi = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, }; VkPipelineInputAssemblyStateCreateInfo ia = { .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, }; VkViewport vp_dummy = { 0, 0, 1, 1, 0.0f, 1.0f }; VkRect2D sc_dummy = {{0,0}, {1,1}}; VkPipelineViewportStateCreateInfo vp = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, .viewportCount = 1, .pViewports = &vp_dummy, .scissorCount = 1, .pScissors = &sc_dummy, }; VkPipelineRasterizationStateCreateInfo rs = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .rasterizerDiscardEnable = VK_TRUE, /* THE point — no rasterization */ .polygonMode = VK_POLYGON_MODE_FILL, .cullMode = VK_CULL_MODE_NONE, .lineWidth = 1.0f, }; VkPipelineMultisampleStateCreateInfo ms = { .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, }; VkPipelineRenderingCreateInfoKHR pri = { .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR, .colorAttachmentCount = 0, /* No color attachment with raster discard. */ }; VkGraphicsPipelineCreateInfo gpci = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = &pri, .stageCount = 1, .pStages = stages, .pVertexInputState = &vi, .pInputAssemblyState = &ia, .pViewportState = &vp, .pRasterizationState = &rs, .pMultisampleState = &ms, .layout = pl, }; STEP("vkCreateGraphicsPipelines (raster-discard + XFB-output VS)"); VkPipeline pipe; VK_CHECK(vkCreateGraphicsPipelines(dev, VK_NULL_HANDLE, 1, &gpci, NULL, &pipe)); /* ---- command buffer ---- */ VkCommandPoolCreateInfo cpoolci = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .queueFamilyIndex = qfam, }; VkCommandPool cpool; VK_CHECK(vkCreateCommandPool(dev, &cpoolci, NULL, &cpool)); VkCommandBufferAllocateInfo cbai = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = cpool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VkCommandBuffer cb; VK_CHECK(vkAllocateCommandBuffers(dev, &cbai, &cb)); STEP("record (bind XFB buffer + begin XFB + draw + end XFB)"); VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; VK_CHECK(vkBeginCommandBuffer(cb, &cbbi)); /* Bind XFB buffer to slot 0 */ VkDeviceSize xfb_offset = 0, xfb_size = XFB_BUFFER_BYTES; pBindXfb(cb, 0, 1, &xfb_buf, &xfb_offset, &xfb_size); /* Dynamic rendering with NO color attachments (raster-discard). * Render-area is required by the spec to be > 0 even if discarded; * use 1x1. */ VkRenderingInfoKHR ri = { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, .renderArea = {{0,0}, {1,1}}, .layerCount = 1, .colorAttachmentCount = 0, }; pBeginRendering(cb, &ri); vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pipe); pBeginXfb(cb, 0, 0, NULL, NULL); vkCmdDraw(cb, VERTEX_COUNT, 1, 0, 0); pEndXfb(cb, 0, 0, NULL, NULL); pEndRendering(cb); /* Sync XFB writes for host read. */ VkBufferMemoryBarrier bb = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, .dstAccessMask = VK_ACCESS_HOST_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = xfb_buf, .offset = 0, .size = VK_WHOLE_SIZE, }; vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1, &bb, 0, NULL); VK_CHECK(vkEndCommandBuffer(cb)); /* ---- submit ---- */ VkFenceCreateInfo fci = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; VkFence fence; VK_CHECK(vkCreateFence(dev, &fci, NULL, &fence)); VkSubmitInfo si = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &cb, }; STEP("submit + wait (10s)"); VK_CHECK(vkQueueSubmit(queue, 1, &si, fence)); VkResult wr = vkWaitForFences(dev, 1, &fence, VK_TRUE, 10ULL * 1000 * 1000 * 1000); if (wr != VK_SUCCESS) { fprintf(stderr, "[fail] vkWaitForFences => %d\n", wr); return 7; } /* ---- verify ---- */ STEP("readback + verify"); VkMappedMemoryRange mmr = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = xfb_mem, .offset = 0, .size = VK_WHOLE_SIZE, }; vkInvalidateMappedMemoryRanges(dev, 1, &mmr); /* Expected: each vec4 = (vertex_id, 0, 4660.0, 51966.0) as float32 */ int mismatches = 0; float *floats = (float *)mapped; for (uint32_t v = 0; v < VERTEX_COUNT; v++) { float got[4] = { floats[v*4 + 0], floats[v*4 + 1], floats[v*4 + 2], floats[v*4 + 3] }; float want[4] = { (float)v, 0.0f, (float)0x1234, (float)0xcafe }; for (int c = 0; c < 4; c++) { if (got[c] != want[c]) { fprintf(stderr, "[diff] vertex %u comp %d: got=%f want=%f\n", v, c, got[c], want[c]); mismatches++; } } fprintf(stderr, "[info] vertex %u: (%f, %f, %f, %f)\n", v, got[0], got[1], got[2], got[3]); } /* ---- teardown ---- */ vkUnmapMemory(dev, xfb_mem); vkDestroyFence(dev, fence, NULL); vkDestroyCommandPool(dev, cpool, NULL); vkDestroyPipeline(dev, pipe, NULL); vkDestroyShaderModule(dev, vsm, NULL); vkDestroyPipelineLayout(dev, pl, NULL); vkDestroyBuffer(dev, xfb_buf, NULL); vkFreeMemory(dev, xfb_mem, NULL); vkDestroyDevice(dev, NULL); vkDestroyInstance(inst, NULL); free(phys); free(qfp); if (mismatches == 0) { fprintf(stderr, "[PASS] PanVk-Bifrost transform feedback: 3 vertices captured correctly.\n"); return 0; } else { fprintf(stderr, "[FAIL] %d mismatches across 3 vertices.\n", mismatches); return 1; } }