d66f22f333
First QPU IDCT8 kernel running and bit-exact on V3D 7.1 via Mesa
v3dv compute. Five iterations through a Phase 7→Phase 4' loopback;
production kernel is v4.
New files:
- src/v3d_runner.{c,h} — reusable Vulkan compute plumbing (instance,
V3D device picker, HOST_VISIBLE|COHERENT
SSBOs with mmap, compute pipeline from .spv,
enables storageBuffer{8,16}BitAccess)
- src/v3d_idct8.comp — VP9 8x8 DCT_DCT IDCT add, v4 production:
256 invocations/WG, 2 blocks/subgroup
(no idle lanes), uint8 dst SSBO (race-free
per phase5 finding 5), unrolled writes
(no chained ternary), oob-flag pattern
(barrier-safe per phase5 finding 7)
- tests/bench_v3d_idct.c — M1' bit-exact gate + M2 throughput vs C ref
- docs/phase7.md — full iteration journey + decision verdict
CMakeLists.txt updated to build the new shader, library, and bench
when DAEDALUS_BUILD_VULKAN=ON.
Iteration record (1920x1088 luma, 32640 blocks/dispatch, N=3):
ver change R ns/block
v1 first-light 0.230 533
v2 kill ternary + 2-blocks-per-sg 0.474 258
v3 per-pass scope oN 0.481 254 (noise)
v4 WG 64 -> 256 invocations 0.947 129
v5 packed uint32 coeff reads 0.938 130 (noise, reverted)
v4 final N=3 0.918 +/- 0.033
Bit-exactness 100.0000% across all iterations (10000-block sample
on 128x128, 32640-block sample on 1080p) against both the C
reference (tests/vp9_idct8_ref.c) and the vendored FFmpeg NEON
ff_vp9_idct_idct_8x8_add_neon.
Key learning over the Phase 5 review's prediction model: the
chained ternary was NOT a spill killer on V3D 7.1 (shaderdb
showed 0:0 spills:fills even in v1). The actual lever was
workgroup-size-driven latency hiding — going from 64 to 256
invocations doubled throughput with the same compiled code
(270 inst, 2 threads, 21 max-temps, 0 spills) because the
v3dv scheduler had 4x more in-flight work to overlap TMU
latency.
Verdict per phase1.md decision rules: YELLOW band (0.5 <= R < 1.0)
by a wide margin, near GREEN boundary. Phase 1 YELLOW rule:
add M4 (concurrent CPU+QPU throughput) before honest-close or
continue. M4 is the next measurement, not more shader tuning —
at R = 0.92 with all 4 A76 cores still 100% free for other work,
the question is whether the system aggregate beats pure 4-core
NEON.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
436 lines
15 KiB
C
436 lines
15 KiB
C
/*
|
|
* v3d_runner — implementation. See v3d_runner.h.
|
|
*
|
|
* License: BSD-2-Clause.
|
|
*/
|
|
#include "v3d_runner.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define CHK(call) do { VkResult r__ = (call); if (r__ != VK_SUCCESS) { \
|
|
fprintf(stderr, "v3d_runner: vulkan error %d at %s:%d (%s)\n", \
|
|
r__, __FILE__, __LINE__, #call); return -1; } } while (0)
|
|
|
|
#define CHK_NULL(call) do { VkResult r__ = (call); if (r__ != VK_SUCCESS) { \
|
|
fprintf(stderr, "v3d_runner: vulkan error %d at %s:%d (%s)\n", \
|
|
r__, __FILE__, __LINE__, #call); return NULL; } } while (0)
|
|
|
|
struct v3d_runner {
|
|
VkInstance instance;
|
|
VkPhysicalDevice phys;
|
|
VkDevice device;
|
|
VkQueue queue;
|
|
uint32_t queue_family;
|
|
VkCommandPool pool;
|
|
char device_name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
|
|
VkPhysicalDeviceMemoryProperties mem_props;
|
|
};
|
|
|
|
static int pick_v3d_physical_device(VkInstance inst, VkPhysicalDevice *out,
|
|
char name_out[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE])
|
|
{
|
|
uint32_t n = 0;
|
|
if (vkEnumeratePhysicalDevices(inst, &n, NULL) != VK_SUCCESS || n == 0) {
|
|
fprintf(stderr, "v3d_runner: no Vulkan physical devices\n");
|
|
return -1;
|
|
}
|
|
VkPhysicalDevice *pds = malloc(n * sizeof(*pds));
|
|
if (!pds) return -1;
|
|
vkEnumeratePhysicalDevices(inst, &n, pds);
|
|
|
|
int picked = -1;
|
|
for (uint32_t i = 0; i < n; i++) {
|
|
VkPhysicalDeviceProperties p;
|
|
vkGetPhysicalDeviceProperties(pds[i], &p);
|
|
if (strstr(p.deviceName, "V3D") != NULL) {
|
|
*out = pds[i];
|
|
memcpy(name_out, p.deviceName, sizeof(p.deviceName));
|
|
picked = 0;
|
|
break;
|
|
}
|
|
}
|
|
free(pds);
|
|
if (picked != 0)
|
|
fprintf(stderr, "v3d_runner: no V3D device found (looked for "
|
|
"\"V3D\" substring in deviceName)\n");
|
|
return picked;
|
|
}
|
|
|
|
static uint32_t pick_compute_queue_family(VkPhysicalDevice phys)
|
|
{
|
|
uint32_t n = 0;
|
|
vkGetPhysicalDeviceQueueFamilyProperties(phys, &n, NULL);
|
|
VkQueueFamilyProperties *q = malloc(n * sizeof(*q));
|
|
if (!q) return UINT32_MAX;
|
|
vkGetPhysicalDeviceQueueFamilyProperties(phys, &n, q);
|
|
uint32_t out = UINT32_MAX;
|
|
for (uint32_t i = 0; i < n; i++) {
|
|
if (q[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { out = i; break; }
|
|
}
|
|
free(q);
|
|
return out;
|
|
}
|
|
|
|
v3d_runner *v3d_runner_create(void)
|
|
{
|
|
v3d_runner *r = calloc(1, sizeof(*r));
|
|
if (!r) return NULL;
|
|
|
|
/* Instance — Vulkan 1.3 to inherit 1.2 promoted features. */
|
|
VkApplicationInfo app = {
|
|
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
|
.pApplicationName = "daedalus-fourier",
|
|
.apiVersion = VK_API_VERSION_1_3,
|
|
};
|
|
VkInstanceCreateInfo ici = {
|
|
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
|
.pApplicationInfo = &app,
|
|
};
|
|
CHK_NULL(vkCreateInstance(&ici, NULL, &r->instance));
|
|
|
|
if (pick_v3d_physical_device(r->instance, &r->phys, r->device_name) != 0) {
|
|
vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
|
|
vkGetPhysicalDeviceMemoryProperties(r->phys, &r->mem_props);
|
|
|
|
r->queue_family = pick_compute_queue_family(r->phys);
|
|
if (r->queue_family == UINT32_MAX) {
|
|
fprintf(stderr, "v3d_runner: no compute queue family\n");
|
|
vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
|
|
/* Enable 8-bit + 16-bit storage features. Both are exposed on
|
|
* V3D 7.1 per vulkaninfo_v3d_7_1_7_hertz.txt; the kernel
|
|
* declares storageBuffer8BitAccess (uint8_t dst[]) and
|
|
* storageBuffer16BitAccess (int16_t coeffs[]).
|
|
*/
|
|
VkPhysicalDevice16BitStorageFeatures f16 = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES,
|
|
.storageBuffer16BitAccess = VK_TRUE,
|
|
.uniformAndStorageBuffer16BitAccess = VK_TRUE,
|
|
};
|
|
VkPhysicalDevice8BitStorageFeatures f8 = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES,
|
|
.pNext = &f16,
|
|
.storageBuffer8BitAccess = VK_TRUE,
|
|
.uniformAndStorageBuffer8BitAccess = VK_TRUE,
|
|
};
|
|
VkPhysicalDeviceFeatures2 f2 = {
|
|
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
|
.pNext = &f8,
|
|
};
|
|
|
|
float qprio = 1.0f;
|
|
VkDeviceQueueCreateInfo dqci = {
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
|
.queueFamilyIndex = r->queue_family,
|
|
.queueCount = 1,
|
|
.pQueuePriorities = &qprio,
|
|
};
|
|
VkDeviceCreateInfo dci = {
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
|
.pNext = &f2,
|
|
.queueCreateInfoCount = 1,
|
|
.pQueueCreateInfos = &dqci,
|
|
};
|
|
if (vkCreateDevice(r->phys, &dci, NULL, &r->device) != VK_SUCCESS) {
|
|
fprintf(stderr, "v3d_runner: vkCreateDevice failed\n");
|
|
vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
vkGetDeviceQueue(r->device, r->queue_family, 0, &r->queue);
|
|
|
|
VkCommandPoolCreateInfo cpci = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
|
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
|
.queueFamilyIndex = r->queue_family,
|
|
};
|
|
if (vkCreateCommandPool(r->device, &cpci, NULL, &r->pool) != VK_SUCCESS) {
|
|
fprintf(stderr, "v3d_runner: vkCreateCommandPool failed\n");
|
|
vkDestroyDevice(r->device, NULL);
|
|
vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
return NULL;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
void v3d_runner_destroy(v3d_runner *r)
|
|
{
|
|
if (!r) return;
|
|
if (r->device != VK_NULL_HANDLE) vkDeviceWaitIdle(r->device);
|
|
if (r->pool != VK_NULL_HANDLE)
|
|
vkDestroyCommandPool(r->device, r->pool, NULL);
|
|
if (r->device != VK_NULL_HANDLE) vkDestroyDevice(r->device, NULL);
|
|
if (r->instance != VK_NULL_HANDLE) vkDestroyInstance(r->instance, NULL);
|
|
free(r);
|
|
}
|
|
|
|
VkDevice v3d_runner_device(v3d_runner *r) { return r->device; }
|
|
VkQueue v3d_runner_queue(v3d_runner *r) { return r->queue; }
|
|
uint32_t v3d_runner_queue_family(v3d_runner *r) { return r->queue_family; }
|
|
VkCommandPool v3d_runner_cmd_pool(v3d_runner *r) { return r->pool; }
|
|
const char *v3d_runner_device_name(v3d_runner *r) { return r->device_name; }
|
|
|
|
/* ---- Buffers ---------------------------------------------------- */
|
|
|
|
static int find_memory_type(VkPhysicalDeviceMemoryProperties *p,
|
|
uint32_t type_bits, VkMemoryPropertyFlags wanted)
|
|
{
|
|
for (uint32_t i = 0; i < p->memoryTypeCount; i++) {
|
|
if ((type_bits & (1u << i)) &&
|
|
(p->memoryTypes[i].propertyFlags & wanted) == wanted)
|
|
return (int) i;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int v3d_runner_create_buffer(v3d_runner *r, size_t size, v3d_buffer *out)
|
|
{
|
|
memset(out, 0, sizeof(*out));
|
|
out->size = size;
|
|
|
|
VkBufferCreateInfo bci = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
.size = size,
|
|
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
|
|
| VK_BUFFER_USAGE_TRANSFER_SRC_BIT
|
|
| VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
|
};
|
|
CHK(vkCreateBuffer(r->device, &bci, NULL, &out->buffer));
|
|
|
|
VkMemoryRequirements req;
|
|
vkGetBufferMemoryRequirements(r->device, out->buffer, &req);
|
|
|
|
/* HOST_VISIBLE | HOST_COHERENT is the unified-memory zero-copy
|
|
* path on Pi 5: CPU and GPU see the same LPDDR4x physical pages,
|
|
* no explicit flush/invalidate needed (the COHERENT bit asserts
|
|
* that). */
|
|
int mt = find_memory_type(&r->mem_props, req.memoryTypeBits,
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
|
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
|
if (mt < 0) {
|
|
fprintf(stderr, "v3d_runner: no HOST_VISIBLE|COHERENT memory type\n");
|
|
return -1;
|
|
}
|
|
|
|
VkMemoryAllocateInfo mai = {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
|
.allocationSize = req.size,
|
|
.memoryTypeIndex = (uint32_t) mt,
|
|
};
|
|
CHK(vkAllocateMemory(r->device, &mai, NULL, &out->memory));
|
|
CHK(vkBindBufferMemory(r->device, out->buffer, out->memory, 0));
|
|
CHK(vkMapMemory(r->device, out->memory, 0, VK_WHOLE_SIZE, 0, &out->mapped));
|
|
return 0;
|
|
}
|
|
|
|
void v3d_runner_destroy_buffer(v3d_runner *r, v3d_buffer *buf)
|
|
{
|
|
if (!buf || buf->buffer == VK_NULL_HANDLE) return;
|
|
if (buf->mapped) vkUnmapMemory(r->device, buf->memory);
|
|
vkDestroyBuffer(r->device, buf->buffer, NULL);
|
|
vkFreeMemory(r->device, buf->memory, NULL);
|
|
memset(buf, 0, sizeof(*buf));
|
|
}
|
|
|
|
/* ---- Pipelines -------------------------------------------------- */
|
|
|
|
static uint32_t *read_spv(const char *path, size_t *out_size)
|
|
{
|
|
FILE *f = fopen(path, "rb");
|
|
if (!f) { perror(path); return NULL; }
|
|
fseek(f, 0, SEEK_END);
|
|
long sz = ftell(f);
|
|
fseek(f, 0, SEEK_SET);
|
|
if (sz <= 0 || (sz & 3)) {
|
|
fprintf(stderr, "%s: bad SPIR-V size %ld\n", path, sz);
|
|
fclose(f); return NULL;
|
|
}
|
|
uint32_t *buf = malloc(sz);
|
|
if (!buf || fread(buf, 1, sz, f) != (size_t)sz) {
|
|
perror("read"); fclose(f); free(buf); return NULL;
|
|
}
|
|
fclose(f);
|
|
*out_size = sz;
|
|
return buf;
|
|
}
|
|
|
|
int v3d_runner_create_pipeline(v3d_runner *r, const char *spv_path,
|
|
uint32_t n_ssbos, uint32_t push_const_size,
|
|
v3d_pipeline *out)
|
|
{
|
|
memset(out, 0, sizeof(*out));
|
|
out->n_ssbos = n_ssbos;
|
|
out->push_const_size = push_const_size;
|
|
|
|
/* Descriptor set layout: n_ssbos SSBO bindings, compute-only. */
|
|
VkDescriptorSetLayoutBinding *binds = calloc(n_ssbos, sizeof(*binds));
|
|
if (!binds) return -1;
|
|
for (uint32_t i = 0; i < n_ssbos; i++) {
|
|
binds[i] = (VkDescriptorSetLayoutBinding){
|
|
.binding = i,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.descriptorCount = 1,
|
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
};
|
|
}
|
|
VkDescriptorSetLayoutCreateInfo dslci = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
|
.bindingCount = n_ssbos,
|
|
.pBindings = binds,
|
|
};
|
|
VkResult vr = vkCreateDescriptorSetLayout(r->device, &dslci, NULL,
|
|
&out->ds_layout);
|
|
free(binds);
|
|
if (vr != VK_SUCCESS) {
|
|
fprintf(stderr, "vkCreateDescriptorSetLayout = %d\n", vr); return -1;
|
|
}
|
|
|
|
VkPushConstantRange pcr = {
|
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.offset = 0,
|
|
.size = push_const_size,
|
|
};
|
|
VkPipelineLayoutCreateInfo plci = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
.setLayoutCount = 1,
|
|
.pSetLayouts = &out->ds_layout,
|
|
.pushConstantRangeCount = push_const_size ? 1 : 0,
|
|
.pPushConstantRanges = push_const_size ? &pcr : NULL,
|
|
};
|
|
CHK(vkCreatePipelineLayout(r->device, &plci, NULL, &out->layout));
|
|
|
|
size_t spv_size = 0;
|
|
uint32_t *spv = read_spv(spv_path, &spv_size);
|
|
if (!spv) return -1;
|
|
VkShaderModuleCreateInfo smci = {
|
|
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
|
.codeSize = spv_size,
|
|
.pCode = spv,
|
|
};
|
|
VkShaderModule shader;
|
|
vr = vkCreateShaderModule(r->device, &smci, NULL, &shader);
|
|
free(spv);
|
|
if (vr != VK_SUCCESS) {
|
|
fprintf(stderr, "vkCreateShaderModule(%s) = %d\n", spv_path, vr);
|
|
return -1;
|
|
}
|
|
|
|
VkComputePipelineCreateInfo cpci = {
|
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
|
.stage = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.module = shader,
|
|
.pName = "main",
|
|
},
|
|
.layout = out->layout,
|
|
};
|
|
vr = vkCreateComputePipelines(r->device, VK_NULL_HANDLE, 1, &cpci, NULL,
|
|
&out->pipeline);
|
|
vkDestroyShaderModule(r->device, shader, NULL);
|
|
if (vr != VK_SUCCESS) {
|
|
fprintf(stderr, "vkCreateComputePipelines = %d\n", vr); return -1;
|
|
}
|
|
|
|
/* Single descriptor pool + set for this pipeline. */
|
|
VkDescriptorPoolSize ps = {
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.descriptorCount = n_ssbos,
|
|
};
|
|
VkDescriptorPoolCreateInfo dpci = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
|
.maxSets = 1,
|
|
.poolSizeCount = 1,
|
|
.pPoolSizes = &ps,
|
|
};
|
|
CHK(vkCreateDescriptorPool(r->device, &dpci, NULL, &out->pool));
|
|
|
|
VkDescriptorSetAllocateInfo dsai = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
|
.descriptorPool = out->pool,
|
|
.descriptorSetCount = 1,
|
|
.pSetLayouts = &out->ds_layout,
|
|
};
|
|
CHK(vkAllocateDescriptorSets(r->device, &dsai, &out->desc_set));
|
|
return 0;
|
|
}
|
|
|
|
void v3d_runner_destroy_pipeline(v3d_runner *r, v3d_pipeline *p)
|
|
{
|
|
if (!p || p->pipeline == VK_NULL_HANDLE) return;
|
|
vkDestroyPipeline(r->device, p->pipeline, NULL);
|
|
vkDestroyPipelineLayout(r->device, p->layout, NULL);
|
|
vkDestroyDescriptorPool(r->device, p->pool, NULL); /* frees its set */
|
|
vkDestroyDescriptorSetLayout(r->device, p->ds_layout, NULL);
|
|
memset(p, 0, sizeof(*p));
|
|
}
|
|
|
|
int v3d_runner_bind_buffers(v3d_runner *r, v3d_pipeline *p,
|
|
const v3d_buffer *bufs, uint32_t n)
|
|
{
|
|
if (n != p->n_ssbos) {
|
|
fprintf(stderr, "bind_buffers: n=%u != pipeline n_ssbos=%u\n",
|
|
n, p->n_ssbos);
|
|
return -1;
|
|
}
|
|
VkDescriptorBufferInfo *bi = calloc(n, sizeof(*bi));
|
|
VkWriteDescriptorSet *wr = calloc(n, sizeof(*wr));
|
|
if (!bi || !wr) { free(bi); free(wr); return -1; }
|
|
for (uint32_t i = 0; i < n; i++) {
|
|
bi[i].buffer = bufs[i].buffer;
|
|
bi[i].offset = 0;
|
|
bi[i].range = bufs[i].size;
|
|
wr[i] = (VkWriteDescriptorSet){
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
.dstSet = p->desc_set,
|
|
.dstBinding = i,
|
|
.descriptorCount = 1,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.pBufferInfo = &bi[i],
|
|
};
|
|
}
|
|
vkUpdateDescriptorSets(r->device, n, wr, 0, NULL);
|
|
free(bi); free(wr);
|
|
return 0;
|
|
}
|
|
|
|
/* ---- Command buffers ------------------------------------------- */
|
|
|
|
VkCommandBuffer v3d_runner_alloc_cmdbuf(v3d_runner *r)
|
|
{
|
|
VkCommandBufferAllocateInfo cbai = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
|
.commandPool = r->pool,
|
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
|
.commandBufferCount = 1,
|
|
};
|
|
VkCommandBuffer cb = VK_NULL_HANDLE;
|
|
if (vkAllocateCommandBuffers(r->device, &cbai, &cb) != VK_SUCCESS)
|
|
return VK_NULL_HANDLE;
|
|
return cb;
|
|
}
|
|
|
|
int v3d_runner_submit_wait(v3d_runner *r, VkCommandBuffer cb)
|
|
{
|
|
VkSubmitInfo si = {
|
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
|
.commandBufferCount = 1,
|
|
.pCommandBuffers = &cb,
|
|
};
|
|
CHK(vkQueueSubmit(r->queue, 1, &si, VK_NULL_HANDLE));
|
|
CHK(vkQueueWaitIdle(r->queue));
|
|
return 0;
|
|
}
|