a4e7d8ab90
panvk-bifrost campaigns (r1..r4 Vulkan compositor + r5.video1 Vulkan
video decode) shipped before this repo existed; the deliverable
patches live in marfrit-packages, but the reasoning chain, phase docs,
and source-state evidence lived only in local working trees on the
development host.
This retrofit imports:
- mesa-panvk-bifrost/ — r1..r4 era phase docs (iter1..iter18)
(libmali stub blobs at iter18/blob/ excluded
— 109MB of RE artifacts replaced with a README
pointer)
- mesa-panvk-bifrost-video/ — sibling campaign phase docs + probe
- evidence/ — frozen .tgz source snapshots at each milestone
(basis for the 0005 patch diff generation)
Future iterations should branch off here from day one, so each iter is
a commit rather than a snapshot. See [[feedback-session-local-process-pins]]
for the process drift this retrofit closes.
Total: 1.9 MB across 124 files.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
370 lines
14 KiB
C
370 lines
14 KiB
C
/*
|
|
* iter1 minimal Vulkan compute probe for panvk-bifrost campaign.
|
|
*
|
|
* Goal: drive a single-invocation compute dispatch end-to-end on PanVk-Bifrost
|
|
* (PineTab2 / Mali-G52 r1 MC1) and verify the shader wrote 0xCAFEBABE into a
|
|
* host-visible storage buffer.
|
|
*
|
|
* If this works, iter2 moves to graphics. If it fails, the failure point names
|
|
* which hypothesis in phase0_findings.md was right.
|
|
*
|
|
* Pure Vulkan 1.0 core. No instance/device extensions requested.
|
|
*
|
|
* Build: make
|
|
* Run: PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 ./probe_compute
|
|
* Trace: PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 \
|
|
* VK_INSTANCE_LAYERS=VK_LAYER_KHRONOS_validation ./probe_compute
|
|
*/
|
|
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <vulkan/vulkan.h>
|
|
|
|
#define EXPECTED_PATTERN 0xCAFEBABEu
|
|
#define BUFFER_BYTES 16 /* one uint32, but allocate a little extra */
|
|
#define SPV_PATH "probe_compute.spv"
|
|
|
|
#define STEP(name) do { fprintf(stderr, "[step] " name "\n"); fflush(stderr); } while (0)
|
|
|
|
#define VK_CHECK(call) do { \
|
|
VkResult _r = (call); \
|
|
if (_r != VK_SUCCESS) { \
|
|
fprintf(stderr, "[fail] " #call " => %d at %s:%d\n", \
|
|
(int)_r, __FILE__, __LINE__); \
|
|
exit(2); \
|
|
} \
|
|
} while (0)
|
|
|
|
static uint32_t *read_spv(const char *path, size_t *out_bytes)
|
|
{
|
|
FILE *f = fopen(path, "rb");
|
|
if (!f) { fprintf(stderr, "[fail] open %s: %s\n", path, strerror(errno)); exit(3); }
|
|
fseek(f, 0, SEEK_END);
|
|
long n = ftell(f);
|
|
fseek(f, 0, SEEK_SET);
|
|
if (n <= 0 || (n & 3)) { fprintf(stderr, "[fail] bad SPV size %ld\n", n); exit(3); }
|
|
uint32_t *buf = malloc((size_t)n);
|
|
if (fread(buf, 1, (size_t)n, f) != (size_t)n) { fprintf(stderr, "[fail] short read\n"); exit(3); }
|
|
fclose(f);
|
|
*out_bytes = (size_t)n;
|
|
return buf;
|
|
}
|
|
|
|
static uint32_t pick_host_visible_memtype(const VkPhysicalDeviceMemoryProperties *mp,
|
|
uint32_t type_bits)
|
|
{
|
|
/* Prefer DEVICE_LOCAL|HOST_VISIBLE|HOST_COHERENT (no manual flush/invalidate). */
|
|
const uint32_t want_pref =
|
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
|
for (uint32_t i = 0; i < mp->memoryTypeCount; i++) {
|
|
if ((type_bits & (1u << i)) &&
|
|
(mp->memoryTypes[i].propertyFlags & want_pref) == want_pref)
|
|
return i;
|
|
}
|
|
/* Fallback: any HOST_VISIBLE. */
|
|
for (uint32_t i = 0; i < mp->memoryTypeCount; i++) {
|
|
if ((type_bits & (1u << i)) &&
|
|
(mp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
|
|
return i;
|
|
}
|
|
fprintf(stderr, "[fail] no HOST_VISIBLE memory type matches type_bits=0x%x\n", type_bits);
|
|
exit(4);
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
/* ---- instance ---------------------------------------------------------- */
|
|
STEP("vkCreateInstance");
|
|
VkApplicationInfo app = {
|
|
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
|
.pApplicationName = "panvk-bifrost iter1 compute probe",
|
|
.applicationVersion = 1,
|
|
.pEngineName = "none",
|
|
.engineVersion = 1,
|
|
.apiVersion = VK_API_VERSION_1_0,
|
|
};
|
|
VkInstanceCreateInfo ici = {
|
|
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
|
.pApplicationInfo = &app,
|
|
};
|
|
VkInstance inst;
|
|
VK_CHECK(vkCreateInstance(&ici, NULL, &inst));
|
|
|
|
/* ---- enumerate + pick first physical device --------------------------- */
|
|
STEP("vkEnumeratePhysicalDevices");
|
|
uint32_t n_phys = 0;
|
|
VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, NULL));
|
|
if (n_phys == 0) { fprintf(stderr, "[fail] no physical devices\n"); return 5; }
|
|
VkPhysicalDevice *phys = calloc(n_phys, sizeof(*phys));
|
|
VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, phys));
|
|
VkPhysicalDevice gpu = phys[0];
|
|
|
|
VkPhysicalDeviceProperties pp;
|
|
vkGetPhysicalDeviceProperties(gpu, &pp);
|
|
fprintf(stderr, "[info] gpu='%s' apiVersion=%u.%u.%u driverVersion=%u\n",
|
|
pp.deviceName,
|
|
VK_VERSION_MAJOR(pp.apiVersion),
|
|
VK_VERSION_MINOR(pp.apiVersion),
|
|
VK_VERSION_PATCH(pp.apiVersion),
|
|
pp.driverVersion);
|
|
|
|
VkPhysicalDeviceMemoryProperties mp;
|
|
vkGetPhysicalDeviceMemoryProperties(gpu, &mp);
|
|
|
|
/* ---- queue family: graphics-or-compute -------------------------------- */
|
|
STEP("vkGetPhysicalDeviceQueueFamilyProperties");
|
|
uint32_t n_qf = 0;
|
|
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, NULL);
|
|
VkQueueFamilyProperties *qfp = calloc(n_qf, sizeof(*qfp));
|
|
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, qfp);
|
|
uint32_t qfam = UINT32_MAX;
|
|
for (uint32_t i = 0; i < n_qf; i++) {
|
|
if (qfp[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { qfam = i; break; }
|
|
}
|
|
if (qfam == UINT32_MAX) { fprintf(stderr, "[fail] no compute queue family\n"); return 6; }
|
|
fprintf(stderr, "[info] using queue family %u (flags=0x%x)\n", qfam, qfp[qfam].queueFlags);
|
|
|
|
/* ---- device ----------------------------------------------------------- */
|
|
STEP("vkCreateDevice");
|
|
float qprio = 1.0f;
|
|
VkDeviceQueueCreateInfo qci = {
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
|
.queueFamilyIndex = qfam,
|
|
.queueCount = 1,
|
|
.pQueuePriorities = &qprio,
|
|
};
|
|
VkDeviceCreateInfo dci = {
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
|
.queueCreateInfoCount = 1,
|
|
.pQueueCreateInfos = &qci,
|
|
};
|
|
VkDevice dev;
|
|
VK_CHECK(vkCreateDevice(gpu, &dci, NULL, &dev));
|
|
|
|
VkQueue queue;
|
|
vkGetDeviceQueue(dev, qfam, 0, &queue);
|
|
|
|
/* ---- storage buffer + memory ----------------------------------------- */
|
|
STEP("vkCreateBuffer (storage, host-visible)");
|
|
VkBufferCreateInfo bci = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
.size = BUFFER_BYTES,
|
|
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
|
};
|
|
VkBuffer buf;
|
|
VK_CHECK(vkCreateBuffer(dev, &bci, NULL, &buf));
|
|
|
|
VkMemoryRequirements mr;
|
|
vkGetBufferMemoryRequirements(dev, buf, &mr);
|
|
fprintf(stderr, "[info] buffer memReq size=%llu alignment=%llu typeBits=0x%x\n",
|
|
(unsigned long long)mr.size,
|
|
(unsigned long long)mr.alignment,
|
|
mr.memoryTypeBits);
|
|
|
|
STEP("vkAllocateMemory");
|
|
VkMemoryAllocateInfo mai = {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
|
.allocationSize = mr.size,
|
|
.memoryTypeIndex = pick_host_visible_memtype(&mp, mr.memoryTypeBits),
|
|
};
|
|
VkDeviceMemory mem;
|
|
VK_CHECK(vkAllocateMemory(dev, &mai, NULL, &mem));
|
|
VK_CHECK(vkBindBufferMemory(dev, buf, mem, 0));
|
|
|
|
/* Pre-write a known initial pattern so we can tell if the GPU did anything. */
|
|
STEP("vkMapMemory (pre-write 0xDEADBEEF sentinel)");
|
|
void *mapped = NULL;
|
|
VK_CHECK(vkMapMemory(dev, mem, 0, VK_WHOLE_SIZE, 0, &mapped));
|
|
uint32_t *u32 = (uint32_t *)mapped;
|
|
for (size_t i = 0; i < BUFFER_BYTES / 4; i++) u32[i] = 0xDEADBEEFu;
|
|
|
|
/* ---- descriptor set --------------------------------------------------- */
|
|
STEP("vkCreateDescriptorSetLayout");
|
|
VkDescriptorSetLayoutBinding dslb = {
|
|
.binding = 0,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.descriptorCount = 1,
|
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
};
|
|
VkDescriptorSetLayoutCreateInfo dslci = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
|
.bindingCount = 1,
|
|
.pBindings = &dslb,
|
|
};
|
|
VkDescriptorSetLayout dsl;
|
|
VK_CHECK(vkCreateDescriptorSetLayout(dev, &dslci, NULL, &dsl));
|
|
|
|
STEP("vkCreateDescriptorPool");
|
|
VkDescriptorPoolSize dps = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 };
|
|
VkDescriptorPoolCreateInfo dpci = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
|
.maxSets = 1,
|
|
.poolSizeCount = 1,
|
|
.pPoolSizes = &dps,
|
|
};
|
|
VkDescriptorPool dpool;
|
|
VK_CHECK(vkCreateDescriptorPool(dev, &dpci, NULL, &dpool));
|
|
|
|
STEP("vkAllocateDescriptorSets");
|
|
VkDescriptorSetAllocateInfo dsai = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
|
.descriptorPool = dpool,
|
|
.descriptorSetCount = 1,
|
|
.pSetLayouts = &dsl,
|
|
};
|
|
VkDescriptorSet dset;
|
|
VK_CHECK(vkAllocateDescriptorSets(dev, &dsai, &dset));
|
|
|
|
STEP("vkUpdateDescriptorSets");
|
|
VkDescriptorBufferInfo dbi = { buf, 0, VK_WHOLE_SIZE };
|
|
VkWriteDescriptorSet wds = {
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
.dstSet = dset,
|
|
.dstBinding = 0,
|
|
.descriptorCount = 1,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.pBufferInfo = &dbi,
|
|
};
|
|
vkUpdateDescriptorSets(dev, 1, &wds, 0, NULL);
|
|
|
|
/* ---- shader module + pipeline ---------------------------------------- */
|
|
STEP("vkCreateShaderModule (from " SPV_PATH ")");
|
|
size_t spv_bytes = 0;
|
|
uint32_t *spv = read_spv(SPV_PATH, &spv_bytes);
|
|
VkShaderModuleCreateInfo smci = {
|
|
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
|
.codeSize = spv_bytes,
|
|
.pCode = spv,
|
|
};
|
|
VkShaderModule sm;
|
|
VK_CHECK(vkCreateShaderModule(dev, &smci, NULL, &sm));
|
|
free(spv);
|
|
|
|
STEP("vkCreatePipelineLayout");
|
|
VkPipelineLayoutCreateInfo plci = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
.setLayoutCount = 1,
|
|
.pSetLayouts = &dsl,
|
|
};
|
|
VkPipelineLayout pl;
|
|
VK_CHECK(vkCreatePipelineLayout(dev, &plci, NULL, &pl));
|
|
|
|
STEP("vkCreateComputePipelines");
|
|
VkComputePipelineCreateInfo cpci = {
|
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
|
.stage = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.module = sm,
|
|
.pName = "main",
|
|
},
|
|
.layout = pl,
|
|
};
|
|
VkPipeline pipe;
|
|
VK_CHECK(vkCreateComputePipelines(dev, VK_NULL_HANDLE, 1, &cpci, NULL, &pipe));
|
|
|
|
/* ---- command buffer --------------------------------------------------- */
|
|
STEP("vkCreateCommandPool");
|
|
VkCommandPoolCreateInfo cpoolci = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
|
.queueFamilyIndex = qfam,
|
|
};
|
|
VkCommandPool cpool;
|
|
VK_CHECK(vkCreateCommandPool(dev, &cpoolci, NULL, &cpool));
|
|
|
|
STEP("vkAllocateCommandBuffers");
|
|
VkCommandBufferAllocateInfo cbai = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
|
.commandPool = cpool,
|
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
|
.commandBufferCount = 1,
|
|
};
|
|
VkCommandBuffer cb;
|
|
VK_CHECK(vkAllocateCommandBuffers(dev, &cbai, &cb));
|
|
|
|
STEP("vkBeginCommandBuffer + record dispatch");
|
|
VkCommandBufferBeginInfo cbbi = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
|
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
|
};
|
|
VK_CHECK(vkBeginCommandBuffer(cb, &cbbi));
|
|
|
|
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, pipe);
|
|
vkCmdBindDescriptorSets(cb, VK_PIPELINE_BIND_POINT_COMPUTE, pl, 0, 1, &dset, 0, NULL);
|
|
vkCmdDispatch(cb, 1, 1, 1);
|
|
|
|
/* Barrier: shader storage write must be visible to host read. */
|
|
VkMemoryBarrier mb = {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
|
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
|
.dstAccessMask = VK_ACCESS_HOST_READ_BIT,
|
|
};
|
|
vkCmdPipelineBarrier(cb,
|
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
|
|
0, 1, &mb, 0, NULL, 0, NULL);
|
|
|
|
VK_CHECK(vkEndCommandBuffer(cb));
|
|
|
|
/* ---- submit + wait ---------------------------------------------------- */
|
|
STEP("vkCreateFence");
|
|
VkFenceCreateInfo fci = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
|
|
VkFence fence;
|
|
VK_CHECK(vkCreateFence(dev, &fci, NULL, &fence));
|
|
|
|
STEP("vkQueueSubmit");
|
|
VkSubmitInfo si = {
|
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
|
.commandBufferCount = 1,
|
|
.pCommandBuffers = &cb,
|
|
};
|
|
VK_CHECK(vkQueueSubmit(queue, 1, &si, fence));
|
|
|
|
STEP("vkWaitForFences (5s timeout)");
|
|
VkResult wr = vkWaitForFences(dev, 1, &fence, VK_TRUE, 5ULL * 1000 * 1000 * 1000);
|
|
if (wr == VK_TIMEOUT) { fprintf(stderr, "[fail] fence TIMEOUT — GPU did not complete dispatch in 5s\n"); return 7; }
|
|
if (wr != VK_SUCCESS) { fprintf(stderr, "[fail] vkWaitForFences => %d\n", wr); return 8; }
|
|
|
|
/* ---- readback + verify ---------------------------------------------- */
|
|
STEP("vkInvalidateMappedMemoryRanges + readback");
|
|
VkMappedMemoryRange mmr = {
|
|
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
|
|
.memory = mem,
|
|
.offset = 0,
|
|
.size = VK_WHOLE_SIZE,
|
|
};
|
|
/* Safe to invalidate even on COHERENT memory — it's a no-op then. */
|
|
vkInvalidateMappedMemoryRanges(dev, 1, &mmr);
|
|
|
|
uint32_t got = u32[0];
|
|
fprintf(stderr, "[info] buffer[0] = 0x%08x (expected 0x%08x)\n", got, EXPECTED_PATTERN);
|
|
int ok = (got == EXPECTED_PATTERN);
|
|
|
|
/* ---- teardown -------------------------------------------------------- */
|
|
vkUnmapMemory(dev, mem);
|
|
vkDestroyFence(dev, fence, NULL);
|
|
vkDestroyPipeline(dev, pipe, NULL);
|
|
vkDestroyPipelineLayout(dev, pl, NULL);
|
|
vkDestroyShaderModule(dev, sm, NULL);
|
|
vkDestroyDescriptorPool(dev, dpool, NULL);
|
|
vkDestroyDescriptorSetLayout(dev, dsl, NULL);
|
|
vkDestroyCommandPool(dev, cpool, NULL);
|
|
vkDestroyBuffer(dev, buf, NULL);
|
|
vkFreeMemory(dev, mem, NULL);
|
|
vkDestroyDevice(dev, NULL);
|
|
vkDestroyInstance(inst, NULL);
|
|
|
|
if (ok) {
|
|
fprintf(stderr, "[PASS] PanVk-Bifrost compute dispatch wrote the expected pattern.\n");
|
|
return 0;
|
|
} else {
|
|
fprintf(stderr, "[FAIL] readback mismatch.\n");
|
|
return 1;
|
|
}
|
|
}
|