/* * v3d_runner — minimal Vulkan compute plumbing for V3D 7.1 on Pi 5. * * Factored out of tests/bench_vulkan_dispatch.c so successive kernel * benches can reuse the device/queue/buffer/pipeline machinery * without copy-paste. Kept deliberately small and concrete — no * generality beyond what daedalus-fourier needs. * * License: BSD-2-Clause. */ #ifndef DAEDALUS_V3D_RUNNER_H #define DAEDALUS_V3D_RUNNER_H #include #include #include typedef struct v3d_runner v3d_runner; /* Host-visible SSBO. .mapped is a CPU-side pointer to .size bytes. */ typedef struct { VkBuffer buffer; VkDeviceMemory memory; void *mapped; size_t size; } v3d_buffer; /* Compute pipeline + its descriptor set (one set per pipeline). */ typedef struct { VkPipeline pipeline; VkPipelineLayout layout; VkDescriptorSetLayout ds_layout; VkDescriptorPool pool; VkDescriptorSet desc_set; uint32_t n_ssbos; uint32_t push_const_size; /* Persistent command buffer. Allocated at create-pipeline time; * dispatch sites use v3d_runner_pipeline_cmdbuf_reset() to * vkResetCommandBuffer instead of paying vkAllocateCommandBuffers * per dispatch. Pool flagged RESET_COMMAND_BUFFER_BIT so reset * is permitted. */ VkCommandBuffer cb; } v3d_pipeline; /* * Create runner: Vulkan instance, V3D physical device, logical * device with storageBuffer{8,16}BitAccess features enabled, * compute queue, command pool. * * Returns NULL on failure (writes errors to stderr). */ v3d_runner *v3d_runner_create(void); void v3d_runner_destroy(v3d_runner *r); /* Expose a few internals for code that wants direct vkCmd*. */ VkDevice v3d_runner_device(v3d_runner *r); VkQueue v3d_runner_queue(v3d_runner *r); uint32_t v3d_runner_queue_family(v3d_runner *r); VkCommandPool v3d_runner_cmd_pool(v3d_runner *r); const char *v3d_runner_device_name(v3d_runner *r); /* Storage buffer, HOST_VISIBLE | HOST_COHERENT, mapped on the * host side. The mapping persists for the lifetime of the buffer. * * Returns 0 on success, non-zero on failure. * * NOTE: prefer v3d_runner_acquire_buffer() on the dispatch hot path — * create_buffer/destroy_buffer go straight to vkAllocateMemory each * call, which on V3D7's Mesa stack costs ~10-50us. The acquire/ * release pair pulls from a freelist and pays vkAllocateMemory only * on a cache miss. */ int v3d_runner_create_buffer(v3d_runner *r, size_t size, v3d_buffer *out); void v3d_runner_destroy_buffer(v3d_runner *r, v3d_buffer *buf); /* * Pooled buffer acquisition. Returns a v3d_buffer whose .size is the * smallest power-of-2 >= the requested size (so callers can pool * across similar-sized requests). Backed by HOST_VISIBLE | * HOST_COHERENT memory; mapped pointer is valid. * * On cache hit: zero-cost reuse of a previously-released buffer. * On miss: falls through to v3d_runner_create_buffer(). Release with * v3d_runner_release_buffer(); pool drains in v3d_runner_destroy(). * * Lifetime contract: the returned buffer's .mapped contents are * UNINITIALISED — the previous user's data may still be present. * Callers that need a clean buffer must memset themselves. This is * deliberate; the dispatch hot paths immediately overwrite the * buffer with new coefficients / meta anyway. * * Thread-safety: NOT thread-safe. A daedalus_ctx is single-threaded * by API contract; the pool inherits that constraint. */ int v3d_runner_acquire_buffer(v3d_runner *r, size_t size, v3d_buffer *out); void v3d_runner_release_buffer(v3d_runner *r, v3d_buffer *buf); /* Pool diagnostics: total allocated bytes (sum across all size * classes, including currently-released entries). Useful for * watermark logging. */ size_t v3d_runner_pool_total_bytes(v3d_runner *r); /* Compute pipeline from a SPIR-V file path. The descriptor-set * layout exposes `n_ssbos` storage buffer bindings at binding * indices 0..n_ssbos-1, all visible to the compute stage. A push * constant range of `push_const_size` bytes is added if non-zero. * * The single descriptor set is pre-allocated; bind buffers via * v3d_runner_bind_buffers(). */ int v3d_runner_create_pipeline(v3d_runner *r, const char *spv_path, uint32_t n_ssbos, uint32_t push_const_size, v3d_pipeline *out); void v3d_runner_destroy_pipeline(v3d_runner *r, v3d_pipeline *p); /* Bind SSBOs to the pipeline's descriptor set. `bufs` must have * exactly `p->n_ssbos` entries, in binding order. Idempotent — * rebind freely between dispatches if buffers change. */ int v3d_runner_bind_buffers(v3d_runner *r, v3d_pipeline *p, const v3d_buffer *bufs, uint32_t n); /* Allocate a primary command buffer from the runner's pool. */ VkCommandBuffer v3d_runner_alloc_cmdbuf(v3d_runner *r); /* Reset @p->cb so it can be re-recorded. Returns 0 on success. * Replaces v3d_runner_alloc_cmdbuf() on the dispatch hot path — * vkResetCommandBuffer is O(1) vs vkAllocateCommandBuffers' ~1-5us * driver cost. */ int v3d_runner_pipeline_cmdbuf_reset(v3d_runner *r, v3d_pipeline *p); /* Submit `cb` to the queue and wait for completion. The classic * timed operation. Returns 0 on success. */ int v3d_runner_submit_wait(v3d_runner *r, VkCommandBuffer cb); #endif /* DAEDALUS_V3D_RUNNER_H */