98553278dd
Phase 2 of the QPU-default substrate campaign — eliminate
vkAllocateCommandBuffers from the dispatch hot path.
Attaches a VkCommandBuffer to each v3d_pipeline, allocated once in
v3d_runner_create_pipeline() and freed in destroy_pipeline(). The
five dispatch_*_qpu sites switch from v3d_runner_alloc_cmdbuf() to
v3d_runner_pipeline_cmdbuf_reset() — vkResetCommandBuffer is O(1)
versus the driver-side allocation walk. Pool was already created
with VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT so reset is
permitted.
Microbench (hertz, Pi 5, kernel 6.18.29, V3D 7.1):
before (task 160 pool only):
steady-state p50: 76.44 us
steady-state mean: 77.95 us
after (task 160 pool + task 161 persistent cb):
steady-state p50: 54.56 us
steady-state mean: 56.00 us
-> 28% per-dispatch reduction
The remaining ~54 us steady-state is dominated by vkQueueWaitIdle +
shader execution + the two memcpy(in/out) on the dst buffer — task
162 (dmabuf import for dst) targets the memcpy half.
test_api_idct stays bit-exact across CPU/QPU/AUTO substrates.
Refs daedalus-fourier task #161.
580 lines
19 KiB
C
580 lines
19 KiB
C
/*
|
|
* v3d_runner — implementation. See v3d_runner.h.
|
|
*
|
|
* License: BSD-2-Clause.
|
|
*/
|
|
#include "v3d_runner.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define CHK(call) do { VkResult r__ = (call); if (r__ != VK_SUCCESS) { \
|
|
fprintf(stderr, "v3d_runner: vulkan error %d at %s:%d (%s)\n", \
|
|
r__, __FILE__, __LINE__, #call); return -1; } } while (0)
|
|
|
|
#define CHK_NULL(call) do { VkResult r__ = (call); if (r__ != VK_SUCCESS) { \
|
|
fprintf(stderr, "v3d_runner: vulkan error %d at %s:%d (%s)\n", \
|
|
r__, __FILE__, __LINE__, #call); return NULL; } } while (0)
|
|
|
|
/* Power-of-2 size classes from 2^8 (256 B) up to 2^23 (8 MiB). Cycle
|
|
* 1's largest dispatch with n_blocks ≈ 8K is well under 8 MiB; oversize
|
|
* requests fall through to non-pooled allocation. */
|
|
#define V3D_POOL_MIN_LOG2 8
|
|
#define V3D_POOL_MAX_LOG2 23
|
|
#define V3D_POOL_BUCKETS (V3D_POOL_MAX_LOG2 - V3D_POOL_MIN_LOG2 + 1)
|
|
|
|
struct v3d_pool_entry {
|
|
v3d_buffer buf;
|
|
struct v3d_pool_entry *next;
|
|
};
|
|
|
|
struct v3d_runner {
|
|
VkInstance instance;
|
|
VkPhysicalDevice phys;
|
|
VkDevice device;
|
|
VkQueue queue;
|
|
uint32_t queue_family;
|
|
VkCommandPool pool;
|
|
char device_name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
|
|
VkPhysicalDeviceMemoryProperties mem_props;
|
|
|
|
/* Buffer pool: per-bucket freelist of previously-released
|
|
* v3d_buffer. bucket index = ceil_log2(size) - V3D_POOL_MIN_LOG2.
|
|
* pool_total_bytes accumulates every successful vkAllocateMemory
|
|
* we've done through the pool — never decreases (the freelist
|
|
* just hands buffers around, no vkFreeMemory until destroy).
|
|
*/
|
|
struct v3d_pool_entry *pool_free[V3D_POOL_BUCKETS];
|
|
size_t pool_total_bytes;
|
|
};
|
|
|
|
static int pick_v3d_physical_device(VkInstance inst, VkPhysicalDevice *out,
|
|
char name_out[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE])
|
|
{
|
|
uint32_t n = 0;
|
|
if (vkEnumeratePhysicalDevices(inst, &n, NULL) != VK_SUCCESS || n == 0) {
|
|
fprintf(stderr, "v3d_runner: no Vulkan physical devices\n");
|
|
return -1;
|
|
}
|
|
VkPhysicalDevice *pds = malloc(n * sizeof(*pds));
|
|
if (!pds) return -1;
|
|
vkEnumeratePhysicalDevices(inst, &n, pds);
|
|
|
|
int picked = -1;
|
|
for (uint32_t i = 0; i < n; i++) {
|
|
VkPhysicalDeviceProperties p;
|
|
vkGetPhysicalDeviceProperties(pds[i], &p);
|
|
if (strstr(p.deviceName, "V3D") != NULL) {
|
|
*out = pds[i];
|
|
memcpy(name_out, p.deviceName, sizeof(p.deviceName));
|
|
picked = 0;
|
|
break;
|
|
}
|
|
}
|
|
free(pds);
|
|
if (picked != 0)
|
|
fprintf(stderr, "v3d_runner: no V3D device found (looked for "
|
|
"\"V3D\" substring in deviceName)\n");
|
|
return picked;
|
|
}
|
|
|
|
static uint32_t pick_compute_queue_family(VkPhysicalDevice phys)
|
|
{
|
|
uint32_t n = 0;
|
|
vkGetPhysicalDeviceQueueFamilyProperties(phys, &n, NULL);
|
|
VkQueueFamilyProperties *q = malloc(n * sizeof(*q));
|
|
if (!q) return UINT32_MAX;
|
|
vkGetPhysicalDeviceQueueFamilyProperties(phys, &n, q);
|
|
uint32_t out = UINT32_MAX;
|
|
for (uint32_t i = 0; i < n; i++) {
|
|
if (q[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { out = i; break; }
|
|
}
|
|
free(q);
|
|
return out;
|
|
}
|
|
|
|
v3d_runner *v3d_runner_create(void)
|
|
{
|
|
v3d_runner *r = calloc(1, sizeof(*r));
|
|
if (!r) return NULL;
|
|
|
|
/* Instance — Vulkan 1.3 to inherit 1.2 promoted features. */
|
|
VkApplicationInfo app = {
|
|
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
|
.pApplicationName = "daedalus-fourier",
|
|
.apiVersion = VK_API_VERSION_1_3,
|
|
};
|
|
VkInstanceCreateInfo ici = {
|
|
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
|
.pApplicationInfo = &app,
|
|
};
|
|
CHK_NULL(vkCreateInstance(&ici, NULL, &r->instance));
|
|
|
|
if (pick_v3d_physical_device(r->instance, &r->phys, r->device_name) != 0) {
|
|
vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
|
|
vkGetPhysicalDeviceMemoryProperties(r->phys, &r->mem_props);
|
|
|
|
r->queue_family = pick_compute_queue_family(r->phys);
|
|
if (r->queue_family == UINT32_MAX) {
|
|
fprintf(stderr, "v3d_runner: no compute queue family\n");
|
|
vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
|
|
/* Enable 8-bit + 16-bit storage features. Both are exposed on
|
|
* V3D 7.1 per vulkaninfo_v3d_7_1_7_hertz.txt; the kernel
|
|
* declares storageBuffer8BitAccess (uint8_t dst[]) and
|
|
* storageBuffer16BitAccess (int16_t coeffs[]).
|
|
*/
|
|
VkPhysicalDevice16BitStorageFeatures f16 = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES,
|
|
.storageBuffer16BitAccess = VK_TRUE,
|
|
.uniformAndStorageBuffer16BitAccess = VK_TRUE,
|
|
};
|
|
VkPhysicalDevice8BitStorageFeatures f8 = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES,
|
|
.pNext = &f16,
|
|
.storageBuffer8BitAccess = VK_TRUE,
|
|
.uniformAndStorageBuffer8BitAccess = VK_TRUE,
|
|
};
|
|
VkPhysicalDeviceFeatures2 f2 = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
|
.pNext = &f8,
|
|
};
|
|
|
|
float qprio = 1.0f;
|
|
VkDeviceQueueCreateInfo dqci = {
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
|
.queueFamilyIndex = r->queue_family,
|
|
.queueCount = 1,
|
|
.pQueuePriorities = &qprio,
|
|
};
|
|
VkDeviceCreateInfo dci = {
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
|
.pNext = &f2,
|
|
.queueCreateInfoCount = 1,
|
|
.pQueueCreateInfos = &dqci,
|
|
};
|
|
if (vkCreateDevice(r->phys, &dci, NULL, &r->device) != VK_SUCCESS) {
|
|
fprintf(stderr, "v3d_runner: vkCreateDevice failed\n");
|
|
vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
vkGetDeviceQueue(r->device, r->queue_family, 0, &r->queue);
|
|
|
|
VkCommandPoolCreateInfo cpci = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
|
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
|
.queueFamilyIndex = r->queue_family,
|
|
};
|
|
if (vkCreateCommandPool(r->device, &cpci, NULL, &r->pool) != VK_SUCCESS) {
|
|
fprintf(stderr, "v3d_runner: vkCreateCommandPool failed\n");
|
|
vkDestroyDevice(r->device, NULL);
|
|
vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
void v3d_runner_destroy(v3d_runner *r)
|
|
{
|
|
if (!r) return;
|
|
if (r->device != VK_NULL_HANDLE) vkDeviceWaitIdle(r->device);
|
|
|
|
/* Drain the buffer pool BEFORE destroying device — the pool
|
|
* entries own VkBuffer/VkDeviceMemory handles, which need a live
|
|
* device for vkDestroyBuffer/vkFreeMemory. */
|
|
for (int b = 0; b < V3D_POOL_BUCKETS; b++) {
|
|
struct v3d_pool_entry *e = r->pool_free[b];
|
|
while (e) {
|
|
struct v3d_pool_entry *next = e->next;
|
|
v3d_runner_destroy_buffer(r, &e->buf);
|
|
free(e);
|
|
e = next;
|
|
}
|
|
r->pool_free[b] = NULL;
|
|
}
|
|
|
|
if (r->pool != VK_NULL_HANDLE)
|
|
vkDestroyCommandPool(r->device, r->pool, NULL);
|
|
if (r->device != VK_NULL_HANDLE) vkDestroyDevice(r->device, NULL);
|
|
if (r->instance != VK_NULL_HANDLE) vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
}
|
|
|
|
/* ---- Buffer pool ----------------------------------------------- */
|
|
|
|
/* ceil_log2 for buffer pool bucket selection. */
|
|
static int v3d_pool_bucket_for(size_t size)
|
|
{
|
|
int log2;
|
|
size_t m;
|
|
|
|
if (size <= ((size_t)1 << V3D_POOL_MIN_LOG2))
|
|
return 0;
|
|
m = size - 1;
|
|
log2 = 0;
|
|
while (m) { log2++; m >>= 1; }
|
|
if (log2 < V3D_POOL_MIN_LOG2) log2 = V3D_POOL_MIN_LOG2;
|
|
if (log2 > V3D_POOL_MAX_LOG2) return -1;
|
|
return log2 - V3D_POOL_MIN_LOG2;
|
|
}
|
|
|
|
int v3d_runner_acquire_buffer(v3d_runner *r, size_t size, v3d_buffer *out)
|
|
{
|
|
int bucket;
|
|
size_t bucket_size;
|
|
struct v3d_pool_entry *e;
|
|
int rc;
|
|
|
|
if (!r || !out || size == 0) return -1;
|
|
|
|
bucket = v3d_pool_bucket_for(size);
|
|
if (bucket < 0) {
|
|
/* Oversize — fall through to non-pooled allocation. Caller
|
|
* still calls v3d_runner_release_buffer(), which detects the
|
|
* oversize bucket via bucket_for() and destroys. */
|
|
return v3d_runner_create_buffer(r, size, out);
|
|
}
|
|
bucket_size = (size_t)1 << (bucket + V3D_POOL_MIN_LOG2);
|
|
|
|
e = r->pool_free[bucket];
|
|
if (e) {
|
|
r->pool_free[bucket] = e->next;
|
|
*out = e->buf;
|
|
free(e);
|
|
return 0;
|
|
}
|
|
|
|
/* Miss — allocate fresh at the bucket size. Subsequent acquire/
|
|
* release for the same bucket reuses this buffer. */
|
|
rc = v3d_runner_create_buffer(r, bucket_size, out);
|
|
if (rc == 0)
|
|
r->pool_total_bytes += bucket_size;
|
|
return rc;
|
|
}
|
|
|
|
void v3d_runner_release_buffer(v3d_runner *r, v3d_buffer *buf)
|
|
{
|
|
int bucket;
|
|
struct v3d_pool_entry *e;
|
|
|
|
if (!r || !buf || buf->buffer == VK_NULL_HANDLE) return;
|
|
|
|
bucket = v3d_pool_bucket_for(buf->size);
|
|
if (bucket < 0) {
|
|
/* Oversize — destroy outright; never made it into the pool. */
|
|
v3d_runner_destroy_buffer(r, buf);
|
|
memset(buf, 0, sizeof(*buf));
|
|
return;
|
|
}
|
|
|
|
e = malloc(sizeof(*e));
|
|
if (!e) {
|
|
/* Allocator failure: just destroy. Pool degenerates to
|
|
* non-pooled behaviour but doesn't leak. */
|
|
v3d_runner_destroy_buffer(r, buf);
|
|
memset(buf, 0, sizeof(*buf));
|
|
return;
|
|
}
|
|
e->buf = *buf;
|
|
e->next = r->pool_free[bucket];
|
|
r->pool_free[bucket] = e;
|
|
memset(buf, 0, sizeof(*buf));
|
|
}
|
|
|
|
size_t v3d_runner_pool_total_bytes(v3d_runner *r)
|
|
{
|
|
return r ? r->pool_total_bytes : 0;
|
|
}
|
|
|
|
VkDevice v3d_runner_device(v3d_runner *r) { return r->device; }
|
|
VkQueue v3d_runner_queue(v3d_runner *r) { return r->queue; }
|
|
uint32_t v3d_runner_queue_family(v3d_runner *r) { return r->queue_family; }
|
|
VkCommandPool v3d_runner_cmd_pool(v3d_runner *r) { return r->pool; }
|
|
const char *v3d_runner_device_name(v3d_runner *r) { return r->device_name; }
|
|
|
|
/* ---- Buffers ---------------------------------------------------- */
|
|
|
|
static int find_memory_type(VkPhysicalDeviceMemoryProperties *p,
|
|
uint32_t type_bits, VkMemoryPropertyFlags wanted)
|
|
{
|
|
for (uint32_t i = 0; i < p->memoryTypeCount; i++) {
|
|
if ((type_bits & (1u << i)) &&
|
|
(p->memoryTypes[i].propertyFlags & wanted) == wanted)
|
|
return (int) i;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int v3d_runner_create_buffer(v3d_runner *r, size_t size, v3d_buffer *out)
|
|
{
|
|
memset(out, 0, sizeof(*out));
|
|
out->size = size;
|
|
|
|
VkBufferCreateInfo bci = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
.size = size,
|
|
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
|
|
| VK_BUFFER_USAGE_TRANSFER_SRC_BIT
|
|
| VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
|
};
|
|
CHK(vkCreateBuffer(r->device, &bci, NULL, &out->buffer));
|
|
|
|
VkMemoryRequirements req;
|
|
vkGetBufferMemoryRequirements(r->device, out->buffer, &req);
|
|
|
|
/* HOST_VISIBLE | HOST_COHERENT is the unified-memory zero-copy
|
|
* path on Pi 5: CPU and GPU see the same LPDDR4x physical pages,
|
|
* no explicit flush/invalidate needed (the COHERENT bit asserts
|
|
* that). */
|
|
int mt = find_memory_type(&r->mem_props, req.memoryTypeBits,
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
|
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
|
if (mt < 0) {
|
|
fprintf(stderr, "v3d_runner: no HOST_VISIBLE|COHERENT memory type\n");
|
|
return -1;
|
|
}
|
|
|
|
VkMemoryAllocateInfo mai = {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
|
.allocationSize = req.size,
|
|
.memoryTypeIndex = (uint32_t) mt,
|
|
};
|
|
CHK(vkAllocateMemory(r->device, &mai, NULL, &out->memory));
|
|
CHK(vkBindBufferMemory(r->device, out->buffer, out->memory, 0));
|
|
CHK(vkMapMemory(r->device, out->memory, 0, VK_WHOLE_SIZE, 0, &out->mapped));
|
|
return 0;
|
|
}
|
|
|
|
void v3d_runner_destroy_buffer(v3d_runner *r, v3d_buffer *buf)
|
|
{
|
|
if (!buf || buf->buffer == VK_NULL_HANDLE) return;
|
|
if (buf->mapped) vkUnmapMemory(r->device, buf->memory);
|
|
vkDestroyBuffer(r->device, buf->buffer, NULL);
|
|
vkFreeMemory(r->device, buf->memory, NULL);
|
|
memset(buf, 0, sizeof(*buf));
|
|
}
|
|
|
|
/* ---- Pipelines -------------------------------------------------- */
|
|
|
|
static uint32_t *read_spv(const char *path, size_t *out_size)
|
|
{
|
|
FILE *f = fopen(path, "rb");
|
|
if (!f) { perror(path); return NULL; }
|
|
fseek(f, 0, SEEK_END);
|
|
long sz = ftell(f);
|
|
fseek(f, 0, SEEK_SET);
|
|
if (sz <= 0 || (sz & 3)) {
|
|
fprintf(stderr, "%s: bad SPIR-V size %ld\n", path, sz);
|
|
fclose(f); return NULL;
|
|
}
|
|
uint32_t *buf = malloc(sz);
|
|
if (!buf || fread(buf, 1, sz, f) != (size_t)sz) {
|
|
perror("read"); fclose(f); free(buf); return NULL;
|
|
}
|
|
fclose(f);
|
|
*out_size = sz;
|
|
return buf;
|
|
}
|
|
|
|
int v3d_runner_create_pipeline(v3d_runner *r, const char *spv_path,
|
|
uint32_t n_ssbos, uint32_t push_const_size,
|
|
v3d_pipeline *out)
|
|
{
|
|
memset(out, 0, sizeof(*out));
|
|
out->n_ssbos = n_ssbos;
|
|
out->push_const_size = push_const_size;
|
|
|
|
/* Descriptor set layout: n_ssbos SSBO bindings, compute-only. */
|
|
VkDescriptorSetLayoutBinding *binds = calloc(n_ssbos, sizeof(*binds));
|
|
if (!binds) return -1;
|
|
for (uint32_t i = 0; i < n_ssbos; i++) {
|
|
binds[i] = (VkDescriptorSetLayoutBinding){
|
|
.binding = i,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.descriptorCount = 1,
|
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
};
|
|
}
|
|
VkDescriptorSetLayoutCreateInfo dslci = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
|
.bindingCount = n_ssbos,
|
|
.pBindings = binds,
|
|
};
|
|
VkResult vr = vkCreateDescriptorSetLayout(r->device, &dslci, NULL,
|
|
&out->ds_layout);
|
|
free(binds);
|
|
if (vr != VK_SUCCESS) {
|
|
fprintf(stderr, "vkCreateDescriptorSetLayout = %d\n", vr); return -1;
|
|
}
|
|
|
|
VkPushConstantRange pcr = {
|
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.offset = 0,
|
|
.size = push_const_size,
|
|
};
|
|
VkPipelineLayoutCreateInfo plci = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
.setLayoutCount = 1,
|
|
.pSetLayouts = &out->ds_layout,
|
|
.pushConstantRangeCount = push_const_size ? 1 : 0,
|
|
.pPushConstantRanges = push_const_size ? &pcr : NULL,
|
|
};
|
|
CHK(vkCreatePipelineLayout(r->device, &plci, NULL, &out->layout));
|
|
|
|
size_t spv_size = 0;
|
|
uint32_t *spv = read_spv(spv_path, &spv_size);
|
|
if (!spv) return -1;
|
|
VkShaderModuleCreateInfo smci = {
|
|
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
|
.codeSize = spv_size,
|
|
.pCode = spv,
|
|
};
|
|
VkShaderModule shader;
|
|
vr = vkCreateShaderModule(r->device, &smci, NULL, &shader);
|
|
free(spv);
|
|
if (vr != VK_SUCCESS) {
|
|
fprintf(stderr, "vkCreateShaderModule(%s) = %d\n", spv_path, vr);
|
|
return -1;
|
|
}
|
|
|
|
VkComputePipelineCreateInfo cpci = {
|
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
|
.stage = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.module = shader,
|
|
.pName = "main",
|
|
},
|
|
.layout = out->layout,
|
|
};
|
|
vr = vkCreateComputePipelines(r->device, VK_NULL_HANDLE, 1, &cpci, NULL,
|
|
&out->pipeline);
|
|
vkDestroyShaderModule(r->device, shader, NULL);
|
|
if (vr != VK_SUCCESS) {
|
|
fprintf(stderr, "vkCreateComputePipelines = %d\n", vr); return -1;
|
|
}
|
|
|
|
/* Single descriptor pool + set for this pipeline. */
|
|
VkDescriptorPoolSize ps = {
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.descriptorCount = n_ssbos,
|
|
};
|
|
VkDescriptorPoolCreateInfo dpci = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
|
.maxSets = 1,
|
|
.poolSizeCount = 1,
|
|
.pPoolSizes = &ps,
|
|
};
|
|
CHK(vkCreateDescriptorPool(r->device, &dpci, NULL, &out->pool));
|
|
|
|
VkDescriptorSetAllocateInfo dsai = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
|
.descriptorPool = out->pool,
|
|
.descriptorSetCount = 1,
|
|
.pSetLayouts = &out->ds_layout,
|
|
};
|
|
CHK(vkAllocateDescriptorSets(r->device, &dsai, &out->desc_set));
|
|
|
|
/* Persistent command buffer — pool was created with
|
|
* RESET_COMMAND_BUFFER_BIT (see v3d_runner_create) so dispatch
|
|
* sites can call vkResetCommandBuffer on this same cb instead
|
|
* of paying vkAllocateCommandBuffers per call. */
|
|
VkCommandBufferAllocateInfo cbai = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
|
.commandPool = r->pool,
|
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
|
.commandBufferCount = 1,
|
|
};
|
|
CHK(vkAllocateCommandBuffers(r->device, &cbai, &out->cb));
|
|
|
|
return 0;
|
|
}
|
|
|
|
void v3d_runner_destroy_pipeline(v3d_runner *r, v3d_pipeline *p)
|
|
{
|
|
if (!p || p->pipeline == VK_NULL_HANDLE) return;
|
|
if (p->cb != VK_NULL_HANDLE)
|
|
vkFreeCommandBuffers(r->device, r->pool, 1, &p->cb);
|
|
vkDestroyPipeline(r->device, p->pipeline, NULL);
|
|
vkDestroyPipelineLayout(r->device, p->layout, NULL);
|
|
vkDestroyDescriptorPool(r->device, p->pool, NULL); /* frees its set */
|
|
vkDestroyDescriptorSetLayout(r->device, p->ds_layout, NULL);
|
|
memset(p, 0, sizeof(*p));
|
|
}
|
|
|
|
int v3d_runner_pipeline_cmdbuf_reset(v3d_runner *r, v3d_pipeline *p)
|
|
{
|
|
(void) r;
|
|
if (!p || p->cb == VK_NULL_HANDLE) return -1;
|
|
return vkResetCommandBuffer(p->cb, 0) == VK_SUCCESS ? 0 : -1;
|
|
}
|
|
|
|
int v3d_runner_bind_buffers(v3d_runner *r, v3d_pipeline *p,
|
|
const v3d_buffer *bufs, uint32_t n)
|
|
{
|
|
if (n != p->n_ssbos) {
|
|
fprintf(stderr, "bind_buffers: n=%u != pipeline n_ssbos=%u\n",
|
|
n, p->n_ssbos);
|
|
return -1;
|
|
}
|
|
VkDescriptorBufferInfo *bi = calloc(n, sizeof(*bi));
|
|
VkWriteDescriptorSet *wr = calloc(n, sizeof(*wr));
|
|
if (!bi || !wr) { free(bi); free(wr); return -1; }
|
|
for (uint32_t i = 0; i < n; i++) {
|
|
bi[i].buffer = bufs[i].buffer;
|
|
bi[i].offset = 0;
|
|
bi[i].range = bufs[i].size;
|
|
wr[i] = (VkWriteDescriptorSet){
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
.dstSet = p->desc_set,
|
|
.dstBinding = i,
|
|
.descriptorCount = 1,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.pBufferInfo = &bi[i],
|
|
};
|
|
}
|
|
vkUpdateDescriptorSets(r->device, n, wr, 0, NULL);
|
|
free(bi); free(wr);
|
|
return 0;
|
|
}
|
|
|
|
/* ---- Command buffers ------------------------------------------- */
|
|
|
|
VkCommandBuffer v3d_runner_alloc_cmdbuf(v3d_runner *r)
|
|
{
|
|
VkCommandBufferAllocateInfo cbai = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
|
.commandPool = r->pool,
|
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
|
.commandBufferCount = 1,
|
|
};
|
|
VkCommandBuffer cb = VK_NULL_HANDLE;
|
|
if (vkAllocateCommandBuffers(r->device, &cbai, &cb) != VK_SUCCESS)
|
|
return VK_NULL_HANDLE;
|
|
return cb;
|
|
}
|
|
|
|
int v3d_runner_submit_wait(v3d_runner *r, VkCommandBuffer cb)
|
|
{
|
|
VkSubmitInfo si = {
|
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
|
.commandBufferCount = 1,
|
|
.pCommandBuffers = &cb,
|
|
};
|
|
CHK(vkQueueSubmit(r->queue, 1, &si, VK_NULL_HANDLE));
|
|
CHK(vkQueueWaitIdle(r->queue));
|
|
return 0;
|
|
}
|