v3d_runner: persistent per-pipeline command buffer
Phase 2 of the QPU-default substrate campaign — eliminate
vkAllocateCommandBuffers from the dispatch hot path.
Attaches a VkCommandBuffer to each v3d_pipeline, allocated once in
v3d_runner_create_pipeline() and freed in destroy_pipeline(). The
five dispatch_*_qpu sites switch from v3d_runner_alloc_cmdbuf() to
v3d_runner_pipeline_cmdbuf_reset() — vkResetCommandBuffer is O(1)
versus the driver-side allocation walk. Pool was already created
with VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT so reset is
permitted.
Microbench (hertz, Pi 5, kernel 6.18.29, V3D 7.1):
before (task 160 pool only):
steady-state p50: 76.44 us
steady-state mean: 77.95 us
after (task 160 pool + task 161 persistent cb):
steady-state p50: 54.56 us
steady-state mean: 56.00 us
-> 28% per-dispatch reduction
The remaining ~54 us steady-state is dominated by vkQueueWaitIdle +
shader execution + the two memcpy(in/out) on the dst buffer — task
162 (dmabuf import for dst) targets the memcpy half.
test_api_idct stays bit-exact across CPU/QPU/AUTO substrates.
Refs daedalus-fourier task #161.
This commit is contained in:
+10
-10
@@ -325,8 +325,8 @@ static int dispatch_idct8_qpu(daedalus_ctx *ctx,
|
||||
._pad = 0,
|
||||
};
|
||||
|
||||
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner);
|
||||
if (cb == VK_NULL_HANDLE) goto fail;
|
||||
if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, &ctx->idct8_pipe)) goto fail;
|
||||
VkCommandBuffer cb = ctx->idct8_pipe.cb;
|
||||
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
|
||||
vkBeginCommandBuffer(cb, &cbbi);
|
||||
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
@@ -442,8 +442,8 @@ static int dispatch_lpf_qpu(daedalus_ctx *ctx, int wd_8,
|
||||
if (v3d_runner_bind_buffers(ctx->runner, p, binds, 2)) goto fail;
|
||||
|
||||
uint32_t wg_count = (uint32_t)((n_edges + 31) / 32);
|
||||
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner);
|
||||
if (cb == VK_NULL_HANDLE) goto fail;
|
||||
if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, p)) goto fail;
|
||||
VkCommandBuffer cb = p->cb;
|
||||
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
|
||||
vkBeginCommandBuffer(cb, &cbbi);
|
||||
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, p->pipeline);
|
||||
@@ -530,8 +530,8 @@ static int dispatch_mc_8h_qpu(daedalus_ctx *ctx,
|
||||
mc_pc pc = { .n_blocks = (uint32_t) n_blocks,
|
||||
.dst_stride_u8 = (uint32_t) dst_stride,
|
||||
.src_stride_u8 = (uint32_t) src_stride };
|
||||
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner);
|
||||
if (cb == VK_NULL_HANDLE) goto fail;
|
||||
if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, &ctx->mc8h_pipe)) goto fail;
|
||||
VkCommandBuffer cb = ctx->mc8h_pipe.cb;
|
||||
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
|
||||
vkBeginCommandBuffer(cb, &cbbi);
|
||||
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, ctx->mc8h_pipe.pipeline);
|
||||
@@ -615,8 +615,8 @@ static int dispatch_cdef_qpu(daedalus_ctx *ctx,
|
||||
cdef_pc pc = { .n_blocks = (uint32_t) n_blocks,
|
||||
.tmp_stride_u16 = 16,
|
||||
.dst_stride_u8 = (uint32_t) dst_stride };
|
||||
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner);
|
||||
if (cb == VK_NULL_HANDLE) goto fail;
|
||||
if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, &ctx->cdef_pipe)) goto fail;
|
||||
VkCommandBuffer cb = ctx->cdef_pipe.cb;
|
||||
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
|
||||
vkBeginCommandBuffer(cb, &cbbi);
|
||||
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, ctx->cdef_pipe.pipeline);
|
||||
@@ -691,8 +691,8 @@ static int dispatch_h264_deblock_qpu(daedalus_ctx *ctx,
|
||||
uint32_t wg_count = (uint32_t)((n_edges + 15) / 16);
|
||||
h264deblock_pc pc = { .n_edges = (uint32_t) n_edges,
|
||||
.dst_stride_u8 = (uint32_t) dst_stride };
|
||||
VkCommandBuffer cb = v3d_runner_alloc_cmdbuf(ctx->runner);
|
||||
if (cb == VK_NULL_HANDLE) goto fail;
|
||||
if (v3d_runner_pipeline_cmdbuf_reset(ctx->runner, &ctx->h264deblock_pipe)) goto fail;
|
||||
VkCommandBuffer cb = ctx->h264deblock_pipe.cb;
|
||||
VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
|
||||
vkBeginCommandBuffer(cb, &cbbi);
|
||||
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, ctx->h264deblock_pipe.pipeline);
|
||||
|
||||
Reference in New Issue
Block a user