/* * iter1 minimal Vulkan compute probe for panvk-bifrost campaign. * * Goal: drive a single-invocation compute dispatch end-to-end on PanVk-Bifrost * (PineTab2 / Mali-G52 r1 MC1) and verify the shader wrote 0xCAFEBABE into a * host-visible storage buffer. * * If this works, iter2 moves to graphics. If it fails, the failure point names * which hypothesis in phase0_findings.md was right. * * Pure Vulkan 1.0 core. No instance/device extensions requested. * * Build: make * Run: PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 ./probe_compute * Trace: PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 \ * VK_INSTANCE_LAYERS=VK_LAYER_KHRONOS_validation ./probe_compute */ #include #include #include #include #include #include #define EXPECTED_PATTERN 0xCAFEBABEu #define BUFFER_BYTES 16 /* one uint32, but allocate a little extra */ #define SPV_PATH "probe_compute.spv" #define STEP(name) do { fprintf(stderr, "[step] " name "\n"); fflush(stderr); } while (0) #define VK_CHECK(call) do { \ VkResult _r = (call); \ if (_r != VK_SUCCESS) { \ fprintf(stderr, "[fail] " #call " => %d at %s:%d\n", \ (int)_r, __FILE__, __LINE__); \ exit(2); \ } \ } while (0) static uint32_t *read_spv(const char *path, size_t *out_bytes) { FILE *f = fopen(path, "rb"); if (!f) { fprintf(stderr, "[fail] open %s: %s\n", path, strerror(errno)); exit(3); } fseek(f, 0, SEEK_END); long n = ftell(f); fseek(f, 0, SEEK_SET); if (n <= 0 || (n & 3)) { fprintf(stderr, "[fail] bad SPV size %ld\n", n); exit(3); } uint32_t *buf = malloc((size_t)n); if (fread(buf, 1, (size_t)n, f) != (size_t)n) { fprintf(stderr, "[fail] short read\n"); exit(3); } fclose(f); *out_bytes = (size_t)n; return buf; } static uint32_t pick_host_visible_memtype(const VkPhysicalDeviceMemoryProperties *mp, uint32_t type_bits) { /* Prefer DEVICE_LOCAL|HOST_VISIBLE|HOST_COHERENT (no manual flush/invalidate). */ const uint32_t want_pref = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; for (uint32_t i = 0; i < mp->memoryTypeCount; i++) { if ((type_bits & (1u << i)) && (mp->memoryTypes[i].propertyFlags & want_pref) == want_pref) return i; } /* Fallback: any HOST_VISIBLE. */ for (uint32_t i = 0; i < mp->memoryTypeCount; i++) { if ((type_bits & (1u << i)) && (mp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) return i; } fprintf(stderr, "[fail] no HOST_VISIBLE memory type matches type_bits=0x%x\n", type_bits); exit(4); } int main(void) { /* ---- instance ---------------------------------------------------------- */ STEP("vkCreateInstance"); VkApplicationInfo app = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pApplicationName = "panvk-bifrost iter1 compute probe", .applicationVersion = 1, .pEngineName = "none", .engineVersion = 1, .apiVersion = VK_API_VERSION_1_0, }; VkInstanceCreateInfo ici = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &app, }; VkInstance inst; VK_CHECK(vkCreateInstance(&ici, NULL, &inst)); /* ---- enumerate + pick first physical device --------------------------- */ STEP("vkEnumeratePhysicalDevices"); uint32_t n_phys = 0; VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, NULL)); if (n_phys == 0) { fprintf(stderr, "[fail] no physical devices\n"); return 5; } VkPhysicalDevice *phys = calloc(n_phys, sizeof(*phys)); VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, phys)); VkPhysicalDevice gpu = phys[0]; VkPhysicalDeviceProperties pp; vkGetPhysicalDeviceProperties(gpu, &pp); fprintf(stderr, "[info] gpu='%s' apiVersion=%u.%u.%u driverVersion=%u\n", pp.deviceName, VK_VERSION_MAJOR(pp.apiVersion), VK_VERSION_MINOR(pp.apiVersion), VK_VERSION_PATCH(pp.apiVersion), pp.driverVersion); VkPhysicalDeviceMemoryProperties mp; vkGetPhysicalDeviceMemoryProperties(gpu, &mp); /* ---- queue family: graphics-or-compute -------------------------------- */ STEP("vkGetPhysicalDeviceQueueFamilyProperties"); uint32_t n_qf = 0; vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, NULL); VkQueueFamilyProperties *qfp = calloc(n_qf, sizeof(*qfp)); vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, qfp); uint32_t qfam = UINT32_MAX; for (uint32_t i = 0; i < n_qf; i++) { if (qfp[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { qfam = i; break; } } if (qfam == UINT32_MAX) { fprintf(stderr, "[fail] no compute queue family\n"); return 6; } fprintf(stderr, "[info] using queue family %u (flags=0x%x)\n", qfam, qfp[qfam].queueFlags); /* ---- device ----------------------------------------------------------- */ STEP("vkCreateDevice"); float qprio = 1.0f; VkDeviceQueueCreateInfo qci = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueFamilyIndex = qfam, .queueCount = 1, .pQueuePriorities = &qprio, }; VkDeviceCreateInfo dci = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .queueCreateInfoCount = 1, .pQueueCreateInfos = &qci, }; VkDevice dev; VK_CHECK(vkCreateDevice(gpu, &dci, NULL, &dev)); VkQueue queue; vkGetDeviceQueue(dev, qfam, 0, &queue); /* ---- storage buffer + memory ----------------------------------------- */ STEP("vkCreateBuffer (storage, host-visible)"); VkBufferCreateInfo bci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = BUFFER_BYTES, .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; VkBuffer buf; VK_CHECK(vkCreateBuffer(dev, &bci, NULL, &buf)); VkMemoryRequirements mr; vkGetBufferMemoryRequirements(dev, buf, &mr); fprintf(stderr, "[info] buffer memReq size=%llu alignment=%llu typeBits=0x%x\n", (unsigned long long)mr.size, (unsigned long long)mr.alignment, mr.memoryTypeBits); STEP("vkAllocateMemory"); VkMemoryAllocateInfo mai = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = mr.size, .memoryTypeIndex = pick_host_visible_memtype(&mp, mr.memoryTypeBits), }; VkDeviceMemory mem; VK_CHECK(vkAllocateMemory(dev, &mai, NULL, &mem)); VK_CHECK(vkBindBufferMemory(dev, buf, mem, 0)); /* Pre-write a known initial pattern so we can tell if the GPU did anything. */ STEP("vkMapMemory (pre-write 0xDEADBEEF sentinel)"); void *mapped = NULL; VK_CHECK(vkMapMemory(dev, mem, 0, VK_WHOLE_SIZE, 0, &mapped)); uint32_t *u32 = (uint32_t *)mapped; for (size_t i = 0; i < BUFFER_BYTES / 4; i++) u32[i] = 0xDEADBEEFu; /* ---- descriptor set --------------------------------------------------- */ STEP("vkCreateDescriptorSetLayout"); VkDescriptorSetLayoutBinding dslb = { .binding = 0, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, }; VkDescriptorSetLayoutCreateInfo dslci = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .bindingCount = 1, .pBindings = &dslb, }; VkDescriptorSetLayout dsl; VK_CHECK(vkCreateDescriptorSetLayout(dev, &dslci, NULL, &dsl)); STEP("vkCreateDescriptorPool"); VkDescriptorPoolSize dps = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 }; VkDescriptorPoolCreateInfo dpci = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .maxSets = 1, .poolSizeCount = 1, .pPoolSizes = &dps, }; VkDescriptorPool dpool; VK_CHECK(vkCreateDescriptorPool(dev, &dpci, NULL, &dpool)); STEP("vkAllocateDescriptorSets"); VkDescriptorSetAllocateInfo dsai = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = dpool, .descriptorSetCount = 1, .pSetLayouts = &dsl, }; VkDescriptorSet dset; VK_CHECK(vkAllocateDescriptorSets(dev, &dsai, &dset)); STEP("vkUpdateDescriptorSets"); VkDescriptorBufferInfo dbi = { buf, 0, VK_WHOLE_SIZE }; VkWriteDescriptorSet wds = { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = dset, .dstBinding = 0, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .pBufferInfo = &dbi, }; vkUpdateDescriptorSets(dev, 1, &wds, 0, NULL); /* ---- shader module + pipeline ---------------------------------------- */ STEP("vkCreateShaderModule (from " SPV_PATH ")"); size_t spv_bytes = 0; uint32_t *spv = read_spv(SPV_PATH, &spv_bytes); VkShaderModuleCreateInfo smci = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .codeSize = spv_bytes, .pCode = spv, }; VkShaderModule sm; VK_CHECK(vkCreateShaderModule(dev, &smci, NULL, &sm)); free(spv); STEP("vkCreatePipelineLayout"); VkPipelineLayoutCreateInfo plci = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = &dsl, }; VkPipelineLayout pl; VK_CHECK(vkCreatePipelineLayout(dev, &plci, NULL, &pl)); STEP("vkCreateComputePipelines"); VkComputePipelineCreateInfo cpci = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = sm, .pName = "main", }, .layout = pl, }; VkPipeline pipe; VK_CHECK(vkCreateComputePipelines(dev, VK_NULL_HANDLE, 1, &cpci, NULL, &pipe)); /* ---- command buffer --------------------------------------------------- */ STEP("vkCreateCommandPool"); VkCommandPoolCreateInfo cpoolci = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .queueFamilyIndex = qfam, }; VkCommandPool cpool; VK_CHECK(vkCreateCommandPool(dev, &cpoolci, NULL, &cpool)); STEP("vkAllocateCommandBuffers"); VkCommandBufferAllocateInfo cbai = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = cpool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VkCommandBuffer cb; VK_CHECK(vkAllocateCommandBuffers(dev, &cbai, &cb)); STEP("vkBeginCommandBuffer + record dispatch"); VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; VK_CHECK(vkBeginCommandBuffer(cb, &cbbi)); vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, pipe); vkCmdBindDescriptorSets(cb, VK_PIPELINE_BIND_POINT_COMPUTE, pl, 0, 1, &dset, 0, NULL); vkCmdDispatch(cb, 1, 1, 1); /* Barrier: shader storage write must be visible to host read. */ VkMemoryBarrier mb = { .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_HOST_READ_BIT, }; vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &mb, 0, NULL, 0, NULL); VK_CHECK(vkEndCommandBuffer(cb)); /* ---- submit + wait ---------------------------------------------------- */ STEP("vkCreateFence"); VkFenceCreateInfo fci = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; VkFence fence; VK_CHECK(vkCreateFence(dev, &fci, NULL, &fence)); STEP("vkQueueSubmit"); VkSubmitInfo si = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &cb, }; VK_CHECK(vkQueueSubmit(queue, 1, &si, fence)); STEP("vkWaitForFences (5s timeout)"); VkResult wr = vkWaitForFences(dev, 1, &fence, VK_TRUE, 5ULL * 1000 * 1000 * 1000); if (wr == VK_TIMEOUT) { fprintf(stderr, "[fail] fence TIMEOUT — GPU did not complete dispatch in 5s\n"); return 7; } if (wr != VK_SUCCESS) { fprintf(stderr, "[fail] vkWaitForFences => %d\n", wr); return 8; } /* ---- readback + verify ---------------------------------------------- */ STEP("vkInvalidateMappedMemoryRanges + readback"); VkMappedMemoryRange mmr = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = mem, .offset = 0, .size = VK_WHOLE_SIZE, }; /* Safe to invalidate even on COHERENT memory — it's a no-op then. */ vkInvalidateMappedMemoryRanges(dev, 1, &mmr); uint32_t got = u32[0]; fprintf(stderr, "[info] buffer[0] = 0x%08x (expected 0x%08x)\n", got, EXPECTED_PATTERN); int ok = (got == EXPECTED_PATTERN); /* ---- teardown -------------------------------------------------------- */ vkUnmapMemory(dev, mem); vkDestroyFence(dev, fence, NULL); vkDestroyPipeline(dev, pipe, NULL); vkDestroyPipelineLayout(dev, pl, NULL); vkDestroyShaderModule(dev, sm, NULL); vkDestroyDescriptorPool(dev, dpool, NULL); vkDestroyDescriptorSetLayout(dev, dsl, NULL); vkDestroyCommandPool(dev, cpool, NULL); vkDestroyBuffer(dev, buf, NULL); vkFreeMemory(dev, mem, NULL); vkDestroyDevice(dev, NULL); vkDestroyInstance(inst, NULL); if (ok) { fprintf(stderr, "[PASS] PanVk-Bifrost compute dispatch wrote the expected pattern.\n"); return 0; } else { fprintf(stderr, "[FAIL] readback mismatch.\n"); return 1; } }