v3d_runner: buffer pool for QPU dispatch hot path #6

Merged
marfrit merged 2 commits from noether/v3d-buffer-pool into main 2026-05-23 18:59:19 +00:00
3 changed files with 44 additions and 10 deletions
Showing only changes of commit 98553278dd - Show all commits
+10 -10
View File
@@ -325,8 +325,8 @@ static int dispatch_idct8_qpu(daedalus_ctx *ctx,
._pad = 0, ._pad = 0,
}; };
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner); if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, &ctx->idct8_pipe)) goto fail;
if (cb == VK_NULL_HANDLE) goto fail; VkCommandBuffer cb = ctx->idct8_pipe.cb;
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
vkBeginCommandBuffer(cb, &cbbi); vkBeginCommandBuffer(cb, &cbbi);
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE,
@@ -442,8 +442,8 @@ static int dispatch_lpf_qpu(daedalus_ctx *ctx, int wd_8,
if (v3d_runner_bind_buffers(ctx->runner, p, binds, 2)) goto fail; if (v3d_runner_bind_buffers(ctx->runner, p, binds, 2)) goto fail;
uint32_t wg_count = (uint32_t)((n_edges + 31) / 32); uint32_t wg_count = (uint32_t)((n_edges + 31) / 32);
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner); if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, p)) goto fail;
if (cb == VK_NULL_HANDLE) goto fail; VkCommandBuffer cb = p->cb;
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
vkBeginCommandBuffer(cb, &cbbi); vkBeginCommandBuffer(cb, &cbbi);
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, p->pipeline); vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, p->pipeline);
@@ -530,8 +530,8 @@ static int dispatch_mc_8h_qpu(daedalus_ctx *ctx,
mc_pc pc = { .n_blocks = (uint32_t) n_blocks, mc_pc pc = { .n_blocks = (uint32_t) n_blocks,
.dst_stride_u8 = (uint32_t) dst_stride, .dst_stride_u8 = (uint32_t) dst_stride,
.src_stride_u8 = (uint32_t) src_stride }; .src_stride_u8 = (uint32_t) src_stride };
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner); if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, &ctx->mc8h_pipe)) goto fail;
if (cb == VK_NULL_HANDLE) goto fail; VkCommandBuffer cb = ctx->mc8h_pipe.cb;
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
vkBeginCommandBuffer(cb, &cbbi); vkBeginCommandBuffer(cb, &cbbi);
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, ctx->mc8h_pipe.pipeline); vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, ctx->mc8h_pipe.pipeline);
@@ -615,8 +615,8 @@ static int dispatch_cdef_qpu(daedalus_ctx *ctx,
cdef_pc pc = { .n_blocks = (uint32_t) n_blocks, cdef_pc pc = { .n_blocks = (uint32_t) n_blocks,
.tmp_stride_u16 = 16, .tmp_stride_u16 = 16,
.dst_stride_u8 = (uint32_t) dst_stride }; .dst_stride_u8 = (uint32_t) dst_stride };
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner); if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, &ctx->cdef_pipe)) goto fail;
if (cb == VK_NULL_HANDLE) goto fail; VkCommandBuffer cb = ctx->cdef_pipe.cb;
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
vkBeginCommandBuffer(cb, &cbbi); vkBeginCommandBuffer(cb, &cbbi);
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, ctx->cdef_pipe.pipeline); vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, ctx->cdef_pipe.pipeline);
@@ -691,8 +691,8 @@ static int dispatch_h264_deblock_qpu(daedalus_ctx *ctx,
uint32_t wg_count = (uint32_t)((n_edges + 15) / 16); uint32_t wg_count = (uint32_t)((n_edges + 15) / 16);
h264deblock_pc pc = { .n_edges = (uint32_t) n_edges, h264deblock_pc pc = { .n_edges = (uint32_t) n_edges,
.dst_stride_u8 = (uint32_t) dst_stride }; .dst_stride_u8 = (uint32_t) dst_stride };
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner); if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, &ctx->h264deblock_pipe)) goto fail;
if (cb == VK_NULL_HANDLE) goto fail; VkCommandBuffer cb = ctx->h264deblock_pipe.cb;
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO }; VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
vkBeginCommandBuffer(cb, &cbbi); vkBeginCommandBuffer(cb, &cbbi);
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, ctx->h264deblock_pipe.pipeline); vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, ctx->h264deblock_pipe.pipeline);
+22
View File
@@ -486,12 +486,27 @@ int v3d_runner_create_pipeline(v3d_runner *r, const char *spv_path,
.pSetLayouts = &out->ds_layout, .pSetLayouts = &out->ds_layout,
}; };
CHK(vkAllocateDescriptorSets(r->device, &dsai, &out->desc_set)); CHK(vkAllocateDescriptorSets(r->device, &dsai, &out->desc_set));
/* Persistent command buffer — pool was created with
* RESET_COMMAND_BUFFER_BIT (see v3d_runner_create) so dispatch
* sites can call vkResetCommandBuffer on this same cb instead
* of paying vkAllocateCommandBuffers per call. */
VkCommandBufferAllocateInfo cbai = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = r->pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
};
CHK(vkAllocateCommandBuffers(r->device, &cbai, &out->cb));
return 0; return 0;
} }
void v3d_runner_destroy_pipeline(v3d_runner *r, v3d_pipeline *p) void v3d_runner_destroy_pipeline(v3d_runner *r, v3d_pipeline *p)
{ {
if (!p || p->pipeline == VK_NULL_HANDLE) return; if (!p || p->pipeline == VK_NULL_HANDLE) return;
if (p->cb != VK_NULL_HANDLE)
vkFreeCommandBuffers(r->device, r->pool, 1, &p->cb);
vkDestroyPipeline(r->device, p->pipeline, NULL); vkDestroyPipeline(r->device, p->pipeline, NULL);
vkDestroyPipelineLayout(r->device, p->layout, NULL); vkDestroyPipelineLayout(r->device, p->layout, NULL);
vkDestroyDescriptorPool(r->device, p->pool, NULL); /* frees its set */ vkDestroyDescriptorPool(r->device, p->pool, NULL); /* frees its set */
@@ -499,6 +514,13 @@ void v3d_runner_destroy_pipeline(v3d_runner *r, v3d_pipeline *p)
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
} }
int v3d_runner_pipeline_cmdbuf_reset(v3d_runner *r, v3d_pipeline *p)
{
(void) r;
if (!p || p->cb == VK_NULL_HANDLE) return -1;
return vkResetCommandBuffer(p->cb, 0) == VK_SUCCESS ? 0 : -1;
}
int v3d_runner_bind_buffers(v3d_runner *r, v3d_pipeline *p, int v3d_runner_bind_buffers(v3d_runner *r, v3d_pipeline *p,
const v3d_buffer *bufs, uint32_t n) const v3d_buffer *bufs, uint32_t n)
{ {
+12
View File
@@ -34,6 +34,12 @@ typedef struct {
VkDescriptorSet desc_set; VkDescriptorSet desc_set;
uint32_t n_ssbos; uint32_t n_ssbos;
uint32_t push_const_size; uint32_t push_const_size;
/* Persistent command buffer. Allocated at create-pipeline time;
* dispatch sites use v3d_runner_pipeline_cmdbuf_reset() to
* vkResetCommandBuffer instead of paying vkAllocateCommandBuffers
* per dispatch. Pool flagged RESET_COMMAND_BUFFER_BIT so reset
* is permitted. */
VkCommandBuffer cb;
} v3d_pipeline; } v3d_pipeline;
/* /*
@@ -121,6 +127,12 @@ int v3d_runner_bind_buffers(v3d_runner *r,
/* Allocate a primary command buffer from the runner's pool. */ /* Allocate a primary command buffer from the runner's pool. */
VkCommandBuffer v3d_runner_alloc_cmdbuf(v3d_runner *r); VkCommandBuffer v3d_runner_alloc_cmdbuf(v3d_runner *r);
/* Reset @p->cb so it can be re-recorded. Returns 0 on success.
* Replaces v3d_runner_alloc_cmdbuf() on the dispatch hot path —
* vkResetCommandBuffer is O(1) vs vkAllocateCommandBuffers' ~1-5us
* driver cost. */
int v3d_runner_pipeline_cmdbuf_reset(v3d_runner *r, v3d_pipeline *p);
/* Submit `cb` to the queue and wait for completion. The classic /* Submit `cb` to the queue and wait for completion. The classic
* timed operation. Returns 0 on success. * timed operation. Returns 0 on success.
*/ */