/* * v3d_runner — implementation. See v3d_runner.h. * * License: BSD-2-Clause. */ #include "v3d_runner.h" #include #include #include #define CHK(call) do { VkResult r__ = (call); if (r__ != VK_SUCCESS) { \ fprintf(stderr, "v3d_runner: vulkan error %d at %s:%d (%s)\n", \ r__, __FILE__, __LINE__, #call); return -1; } } while (0) #define CHK_NULL(call) do { VkResult r__ = (call); if (r__ != VK_SUCCESS) { \ fprintf(stderr, "v3d_runner: vulkan error %d at %s:%d (%s)\n", \ r__, __FILE__, __LINE__, #call); return NULL; } } while (0) struct v3d_runner { VkInstance instance; VkPhysicalDevice phys; VkDevice device; VkQueue queue; uint32_t queue_family; VkCommandPool pool; char device_name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; VkPhysicalDeviceMemoryProperties mem_props; }; static int pick_v3d_physical_device(VkInstance inst, VkPhysicalDevice *out, char name_out[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]) { uint32_t n = 0; if (vkEnumeratePhysicalDevices(inst, &n, NULL) != VK_SUCCESS || n == 0) { fprintf(stderr, "v3d_runner: no Vulkan physical devices\n"); return -1; } VkPhysicalDevice *pds = malloc(n * sizeof(*pds)); if (!pds) return -1; vkEnumeratePhysicalDevices(inst, &n, pds); int picked = -1; for (uint32_t i = 0; i < n; i++) { VkPhysicalDeviceProperties p; vkGetPhysicalDeviceProperties(pds[i], &p); if (strstr(p.deviceName, "V3D") != NULL) { *out = pds[i]; memcpy(name_out, p.deviceName, sizeof(p.deviceName)); picked = 0; break; } } free(pds); if (picked != 0) fprintf(stderr, "v3d_runner: no V3D device found (looked for " "\"V3D\" substring in deviceName)\n"); return picked; } static uint32_t pick_compute_queue_family(VkPhysicalDevice phys) { uint32_t n = 0; vkGetPhysicalDeviceQueueFamilyProperties(phys, &n, NULL); VkQueueFamilyProperties *q = malloc(n * sizeof(*q)); if (!q) return UINT32_MAX; vkGetPhysicalDeviceQueueFamilyProperties(phys, &n, q); uint32_t out = UINT32_MAX; for (uint32_t i = 0; i < n; i++) { if (q[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { out = i; break; } } free(q); return out; } v3d_runner *v3d_runner_create(void) { v3d_runner *r = calloc(1, sizeof(*r)); if (!r) return NULL; /* Instance — Vulkan 1.3 to inherit 1.2 promoted features. */ VkApplicationInfo app = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pApplicationName = "daedalus-fourier", .apiVersion = VK_API_VERSION_1_3, }; VkInstanceCreateInfo ici = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &app, }; CHK_NULL(vkCreateInstance(&ici, NULL, &r->instance)); if (pick_v3d_physical_device(r->instance, &r->phys, r->device_name) != 0) { vkDestroyInstance(r->instance, NULL); free(r); return NULL; } vkGetPhysicalDeviceMemoryProperties(r->phys, &r->mem_props); r->queue_family = pick_compute_queue_family(r->phys); if (r->queue_family == UINT32_MAX) { fprintf(stderr, "v3d_runner: no compute queue family\n"); vkDestroyInstance(r->instance, NULL); free(r); return NULL; } /* Enable 8-bit + 16-bit storage features. Both are exposed on * V3D 7.1 per vulkaninfo_v3d_7_1_7_hertz.txt; the kernel * declares storageBuffer8BitAccess (uint8_t dst[]) and * storageBuffer16BitAccess (int16_t coeffs[]). */ VkPhysicalDevice16BitStorageFeatures f16 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, .storageBuffer16BitAccess = VK_TRUE, .uniformAndStorageBuffer16BitAccess = VK_TRUE, }; VkPhysicalDevice8BitStorageFeatures f8 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES, .pNext = &f16, .storageBuffer8BitAccess = VK_TRUE, .uniformAndStorageBuffer8BitAccess = VK_TRUE, }; VkPhysicalDeviceFeatures2 f2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = &f8, }; float qprio = 1.0f; VkDeviceQueueCreateInfo dqci = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueFamilyIndex = r->queue_family, .queueCount = 1, .pQueuePriorities = &qprio, }; VkDeviceCreateInfo dci = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = &f2, .queueCreateInfoCount = 1, .pQueueCreateInfos = &dqci, }; if (vkCreateDevice(r->phys, &dci, NULL, &r->device) != VK_SUCCESS) { fprintf(stderr, "v3d_runner: vkCreateDevice failed\n"); vkDestroyInstance(r->instance, NULL); free(r); return NULL; } vkGetDeviceQueue(r->device, r->queue_family, 0, &r->queue); VkCommandPoolCreateInfo cpci = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .queueFamilyIndex = r->queue_family, }; if (vkCreateCommandPool(r->device, &cpci, NULL, &r->pool) != VK_SUCCESS) { fprintf(stderr, "v3d_runner: vkCreateCommandPool failed\n"); vkDestroyDevice(r->device, NULL); vkDestroyInstance(r->instance, NULL); free(r); return NULL; } return r; } void v3d_runner_destroy(v3d_runner *r) { if (!r) return; if (r->device != VK_NULL_HANDLE) vkDeviceWaitIdle(r->device); if (r->pool != VK_NULL_HANDLE) vkDestroyCommandPool(r->device, r->pool, NULL); if (r->device != VK_NULL_HANDLE) vkDestroyDevice(r->device, NULL); if (r->instance != VK_NULL_HANDLE) vkDestroyInstance(r->instance, NULL); free(r); } VkDevice v3d_runner_device(v3d_runner *r) { return r->device; } VkQueue v3d_runner_queue(v3d_runner *r) { return r->queue; } uint32_t v3d_runner_queue_family(v3d_runner *r) { return r->queue_family; } VkCommandPool v3d_runner_cmd_pool(v3d_runner *r) { return r->pool; } const char *v3d_runner_device_name(v3d_runner *r) { return r->device_name; } /* ---- Buffers ---------------------------------------------------- */ static int find_memory_type(VkPhysicalDeviceMemoryProperties *p, uint32_t type_bits, VkMemoryPropertyFlags wanted) { for (uint32_t i = 0; i < p->memoryTypeCount; i++) { if ((type_bits & (1u << i)) && (p->memoryTypes[i].propertyFlags & wanted) == wanted) return (int) i; } return -1; } int v3d_runner_create_buffer(v3d_runner *r, size_t size, v3d_buffer *out) { memset(out, 0, sizeof(*out)); out->size = size; VkBufferCreateInfo bci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = size, .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; CHK(vkCreateBuffer(r->device, &bci, NULL, &out->buffer)); VkMemoryRequirements req; vkGetBufferMemoryRequirements(r->device, out->buffer, &req); /* HOST_VISIBLE | HOST_COHERENT is the unified-memory zero-copy * path on Pi 5: CPU and GPU see the same LPDDR4x physical pages, * no explicit flush/invalidate needed (the COHERENT bit asserts * that). */ int mt = find_memory_type(&r->mem_props, req.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); if (mt < 0) { fprintf(stderr, "v3d_runner: no HOST_VISIBLE|COHERENT memory type\n"); return -1; } VkMemoryAllocateInfo mai = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = req.size, .memoryTypeIndex = (uint32_t) mt, }; CHK(vkAllocateMemory(r->device, &mai, NULL, &out->memory)); CHK(vkBindBufferMemory(r->device, out->buffer, out->memory, 0)); CHK(vkMapMemory(r->device, out->memory, 0, VK_WHOLE_SIZE, 0, &out->mapped)); return 0; } void v3d_runner_destroy_buffer(v3d_runner *r, v3d_buffer *buf) { if (!buf || buf->buffer == VK_NULL_HANDLE) return; if (buf->mapped) vkUnmapMemory(r->device, buf->memory); vkDestroyBuffer(r->device, buf->buffer, NULL); vkFreeMemory(r->device, buf->memory, NULL); memset(buf, 0, sizeof(*buf)); } /* ---- Pipelines -------------------------------------------------- */ static uint32_t *read_spv(const char *path, size_t *out_size) { FILE *f = fopen(path, "rb"); if (!f) { perror(path); return NULL; } fseek(f, 0, SEEK_END); long sz = ftell(f); fseek(f, 0, SEEK_SET); if (sz <= 0 || (sz & 3)) { fprintf(stderr, "%s: bad SPIR-V size %ld\n", path, sz); fclose(f); return NULL; } uint32_t *buf = malloc(sz); if (!buf || fread(buf, 1, sz, f) != (size_t)sz) { perror("read"); fclose(f); free(buf); return NULL; } fclose(f); *out_size = sz; return buf; } int v3d_runner_create_pipeline(v3d_runner *r, const char *spv_path, uint32_t n_ssbos, uint32_t push_const_size, v3d_pipeline *out) { memset(out, 0, sizeof(*out)); out->n_ssbos = n_ssbos; out->push_const_size = push_const_size; /* Descriptor set layout: n_ssbos SSBO bindings, compute-only. */ VkDescriptorSetLayoutBinding *binds = calloc(n_ssbos, sizeof(*binds)); if (!binds) return -1; for (uint32_t i = 0; i < n_ssbos; i++) { binds[i] = (VkDescriptorSetLayoutBinding){ .binding = i, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, }; } VkDescriptorSetLayoutCreateInfo dslci = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .bindingCount = n_ssbos, .pBindings = binds, }; VkResult vr = vkCreateDescriptorSetLayout(r->device, &dslci, NULL, &out->ds_layout); free(binds); if (vr != VK_SUCCESS) { fprintf(stderr, "vkCreateDescriptorSetLayout = %d\n", vr); return -1; } VkPushConstantRange pcr = { .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .offset = 0, .size = push_const_size, }; VkPipelineLayoutCreateInfo plci = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = &out->ds_layout, .pushConstantRangeCount = push_const_size ? 1 : 0, .pPushConstantRanges = push_const_size ? &pcr : NULL, }; CHK(vkCreatePipelineLayout(r->device, &plci, NULL, &out->layout)); size_t spv_size = 0; uint32_t *spv = read_spv(spv_path, &spv_size); if (!spv) return -1; VkShaderModuleCreateInfo smci = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .codeSize = spv_size, .pCode = spv, }; VkShaderModule shader; vr = vkCreateShaderModule(r->device, &smci, NULL, &shader); free(spv); if (vr != VK_SUCCESS) { fprintf(stderr, "vkCreateShaderModule(%s) = %d\n", spv_path, vr); return -1; } VkComputePipelineCreateInfo cpci = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = shader, .pName = "main", }, .layout = out->layout, }; vr = vkCreateComputePipelines(r->device, VK_NULL_HANDLE, 1, &cpci, NULL, &out->pipeline); vkDestroyShaderModule(r->device, shader, NULL); if (vr != VK_SUCCESS) { fprintf(stderr, "vkCreateComputePipelines = %d\n", vr); return -1; } /* Single descriptor pool + set for this pipeline. */ VkDescriptorPoolSize ps = { .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = n_ssbos, }; VkDescriptorPoolCreateInfo dpci = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .maxSets = 1, .poolSizeCount = 1, .pPoolSizes = &ps, }; CHK(vkCreateDescriptorPool(r->device, &dpci, NULL, &out->pool)); VkDescriptorSetAllocateInfo dsai = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = out->pool, .descriptorSetCount = 1, .pSetLayouts = &out->ds_layout, }; CHK(vkAllocateDescriptorSets(r->device, &dsai, &out->desc_set)); return 0; } void v3d_runner_destroy_pipeline(v3d_runner *r, v3d_pipeline *p) { if (!p || p->pipeline == VK_NULL_HANDLE) return; vkDestroyPipeline(r->device, p->pipeline, NULL); vkDestroyPipelineLayout(r->device, p->layout, NULL); vkDestroyDescriptorPool(r->device, p->pool, NULL); /* frees its set */ vkDestroyDescriptorSetLayout(r->device, p->ds_layout, NULL); memset(p, 0, sizeof(*p)); } int v3d_runner_bind_buffers(v3d_runner *r, v3d_pipeline *p, const v3d_buffer *bufs, uint32_t n) { if (n != p->n_ssbos) { fprintf(stderr, "bind_buffers: n=%u != pipeline n_ssbos=%u\n", n, p->n_ssbos); return -1; } VkDescriptorBufferInfo *bi = calloc(n, sizeof(*bi)); VkWriteDescriptorSet *wr = calloc(n, sizeof(*wr)); if (!bi || !wr) { free(bi); free(wr); return -1; } for (uint32_t i = 0; i < n; i++) { bi[i].buffer = bufs[i].buffer; bi[i].offset = 0; bi[i].range = bufs[i].size; wr[i] = (VkWriteDescriptorSet){ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = p->desc_set, .dstBinding = i, .descriptorCount = 1, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .pBufferInfo = &bi[i], }; } vkUpdateDescriptorSets(r->device, n, wr, 0, NULL); free(bi); free(wr); return 0; } /* ---- Command buffers ------------------------------------------- */ VkCommandBuffer v3d_runner_alloc_cmdbuf(v3d_runner *r) { VkCommandBufferAllocateInfo cbai = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = r->pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VkCommandBuffer cb = VK_NULL_HANDLE; if (vkAllocateCommandBuffers(r->device, &cbai, &cb) != VK_SUCCESS) return VK_NULL_HANDLE; return cb; } int v3d_runner_submit_wait(v3d_runner *r, VkCommandBuffer cb) { VkSubmitInfo si = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &cb, }; CHK(vkQueueSubmit(r->queue, 1, &si, VK_NULL_HANDLE)); CHK(vkQueueWaitIdle(r->queue)); return 0; }