v3d_runner: persistent per-pipeline command buffer
Phase 2 of the QPU-default substrate campaign — eliminate
vkAllocateCommandBuffers from the dispatch hot path.
Attaches a VkCommandBuffer to each v3d_pipeline, allocated once in
v3d_runner_create_pipeline() and freed in destroy_pipeline(). The
five dispatch_*_qpu sites switch from v3d_runner_alloc_cmdbuf() to
v3d_runner_pipeline_cmdbuf_reset() — vkResetCommandBuffer is O(1)
versus the driver-side allocation walk. Pool was already created
with VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT so reset is
permitted.
Microbench (hertz, Pi 5, kernel 6.18.29, V3D 7.1):
before (task 160 pool only):
steady-state p50: 76.44 us
steady-state mean: 77.95 us
after (task 160 pool + task 161 persistent cb):
steady-state p50: 54.56 us
steady-state mean: 56.00 us
-> 28% per-dispatch reduction
The remaining ~54 us steady-state is dominated by vkQueueWaitIdle +
shader execution + the two memcpy(in/out) on the dst buffer — task
162 (dmabuf import for dst) targets the memcpy half.
test_api_idct stays bit-exact across CPU/QPU/AUTO substrates.
Refs daedalus-fourier task #161.
This commit is contained in:
@@ -486,12 +486,27 @@ int v3d_runner_create_pipeline(v3d_runner *r, const char *spv_path,
|
||||
.pSetLayouts = &out->ds_layout,
|
||||
};
|
||||
CHK(vkAllocateDescriptorSets(r->device, &dsai, &out->desc_set));
|
||||
|
||||
/* Persistent command buffer — pool was created with
|
||||
* RESET_COMMAND_BUFFER_BIT (see v3d_runner_create) so dispatch
|
||||
* sites can call vkResetCommandBuffer on this same cb instead
|
||||
* of paying vkAllocateCommandBuffers per call. */
|
||||
VkCommandBufferAllocateInfo cbai = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.commandPool = r->pool,
|
||||
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
};
|
||||
CHK(vkAllocateCommandBuffers(r->device, &cbai, &out->cb));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void v3d_runner_destroy_pipeline(v3d_runner *r, v3d_pipeline *p)
|
||||
{
|
||||
if (!p || p->pipeline == VK_NULL_HANDLE) return;
|
||||
if (p->cb != VK_NULL_HANDLE)
|
||||
vkFreeCommandBuffers(r->device, r->pool, 1, &p->cb);
|
||||
vkDestroyPipeline(r->device, p->pipeline, NULL);
|
||||
vkDestroyPipelineLayout(r->device, p->layout, NULL);
|
||||
vkDestroyDescriptorPool(r->device, p->pool, NULL); /* frees its set */
|
||||
@@ -499,6 +514,13 @@ void v3d_runner_destroy_pipeline(v3d_runner *r, v3d_pipeline *p)
|
||||
memset(p, 0, sizeof(*p));
|
||||
}
|
||||
|
||||
int v3d_runner_pipeline_cmdbuf_reset(v3d_runner *r, v3d_pipeline *p)
|
||||
{
|
||||
(void) r;
|
||||
if (!p || p->cb == VK_NULL_HANDLE) return -1;
|
||||
return vkResetCommandBuffer(p->cb, 0) == VK_SUCCESS ? 0 : -1;
|
||||
}
|
||||
|
||||
int v3d_runner_bind_buffers(v3d_runner *r, v3d_pipeline *p,
|
||||
const v3d_buffer *bufs, uint32_t n)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user