Files
panvk-bifrost/mesa-panvk-bifrost/iter13/applied_state/jm_panvk_vX_cmd_buffer.c
T
marfrit a4e7d8ab90 initial seed: retrofit campaign lineage from local working trees
panvk-bifrost campaigns (r1..r4 Vulkan compositor + r5.video1 Vulkan
video decode) shipped before this repo existed; the deliverable
patches live in marfrit-packages, but the reasoning chain, phase docs,
and source-state evidence lived only in local working trees on the
development host.

This retrofit imports:
- mesa-panvk-bifrost/   — r1..r4 era phase docs (iter1..iter18)
                          (libmali stub blobs at iter18/blob/ excluded
                          — 109MB of RE artifacts replaced with a README
                          pointer)
- mesa-panvk-bifrost-video/ — sibling campaign phase docs + probe
- evidence/             — frozen .tgz source snapshots at each milestone
                          (basis for the 0005 patch diff generation)

Future iterations should branch off here from day one, so each iter is
a commit rather than a snapshot. See [[feedback-session-local-process-pins]]
for the process drift this retrofit closes.

Total: 1.9 MB across 124 files.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 05:25:37 +02:00

485 lines
16 KiB
C

/*
* Copyright © 2021 Collabora Ltd.
*
* Derived from tu_cmd_buffer.c which is:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
* Copyright © 2015 Intel Corporation
*
* SPDX-License-Identifier: MIT
*/
#include "genxml/gen_macros.h"
#include "panvk_buffer.h"
#include "panvk_cmd_alloc.h"
#include "panvk_cmd_buffer.h"
#include "panvk_cmd_desc_state.h"
#include "panvk_cmd_draw.h"
#include "panvk_cmd_fb_preload.h"
#include "panvk_cmd_pool.h"
#include "panvk_cmd_push_constant.h"
#include "panvk_device.h"
#include "panvk_entrypoints.h"
#include "panvk_instance.h"
#include "panvk_meta.h"
#include "panvk_physical_device.h"
#include "panvk_priv_bo.h"
#include "pan_desc.h"
#include "pan_encoder.h"
#include "pan_props.h"
#include "pan_samples.h"
#include "vk_descriptor_update_template.h"
#include "vk_format.h"
static VkResult
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf, uint64_t fbd)
{
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
struct panvk_batch *batch = cmdbuf->cur_batch;
struct pan_ptr job_ptr = panvk_cmd_alloc_desc(cmdbuf, FRAGMENT_JOB);
if (!job_ptr.gpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
GENX(pan_emit_fragment_job_payload)(fbinfo, fbd, job_ptr.cpu);
pan_section_pack(job_ptr.cpu, FRAGMENT_JOB, HEADER, header) {
header.type = MALI_JOB_TYPE_FRAGMENT;
header.index = 1;
}
pan_jc_add_job(&batch->frag_jc, MALI_JOB_TYPE_FRAGMENT, false, false, 0, 0,
&job_ptr, false);
util_dynarray_append(&batch->jobs, job_ptr.cpu);
return VK_SUCCESS;
}
void
panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
{
struct panvk_batch *batch = cmdbuf->cur_batch;
if (!batch)
return;
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
assert(batch);
if (!batch->fb.desc.gpu && !batch->vtc_jc.first_job) {
if (util_dynarray_num_elements(&batch->event_ops,
struct panvk_cmd_event_op) == 0) {
/* Content-less batch, let's drop it */
vk_free(&cmdbuf->vk.pool->alloc, batch);
} else {
/* Batch has no jobs but is needed for synchronization, let's add a
* NULL job so the SUBMIT ioctl doesn't choke on it.
*/
struct pan_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, JOB_HEADER);
if (ptr.gpu) {
util_dynarray_append(&batch->jobs, ptr.cpu);
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_NULL, false, false, 0,
0, &ptr, false);
}
list_addtail(&batch->node, &cmdbuf->batches);
}
cmdbuf->cur_batch = NULL;
return;
}
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(dev->vk.physical);
list_addtail(&batch->node, &cmdbuf->batches);
if (batch->tlsinfo.tls.size) {
unsigned thread_tls_alloc =
pan_query_thread_tls_alloc(&phys_dev->kmod.dev->props);
unsigned core_id_range;
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
unsigned size = pan_get_total_stack_size(batch->tlsinfo.tls.size,
thread_tls_alloc, core_id_range);
batch->tlsinfo.tls.ptr =
panvk_cmd_alloc_dev_mem(cmdbuf, tls, size, 4096).gpu;
}
if (batch->tlsinfo.wls.size) {
assert(batch->wls_total_size);
batch->tlsinfo.wls.ptr =
panvk_cmd_alloc_dev_mem(cmdbuf, tls, batch->wls_total_size, 4096).gpu;
}
if (batch->tls.cpu)
GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
if (batch->fb.desc.cpu) {
panvk_per_arch(cmd_select_tile_size)(cmdbuf);
/* At this point, we should know sample count and the tile size should have
* been calculated */
assert(fbinfo->nr_samples > 0 && fbinfo->tile_size > 0);
fbinfo->sample_positions =
dev->sample_positions->addr.dev +
pan_sample_positions_offset(pan_sample_pattern(fbinfo->nr_samples));
fbinfo->first_provoking_vertex =
cmdbuf->state.gfx.render.first_provoking_vertex != U_TRISTATE_NO;
VkResult result = panvk_per_arch(cmd_fb_preload)(cmdbuf, fbinfo);
if (result != VK_SUCCESS)
return;
uint32_t view_mask = cmdbuf->state.gfx.render.view_mask;
assert(view_mask == 0 || util_bitcount(view_mask) <= batch->fb.layer_count);
uint32_t enabled_layer_count = view_mask ?
util_bitcount(view_mask) :
batch->fb.layer_count;
for (uint32_t i = 0; i < enabled_layer_count; i++) {
uint32_t layer_id = (view_mask != 0) ? u_bit_scan(&view_mask) : i;
VkResult result;
uint64_t fbd = batch->fb.desc.gpu + (batch->fb.desc_stride * layer_id);
result = panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, layer_id);
if (result != VK_SUCCESS)
break;
fbd |= GENX(pan_emit_fbd)(
&cmdbuf->state.gfx.render.fb.info, layer_id, &batch->tlsinfo,
&batch->tiler.ctx,
batch->fb.desc.cpu + (batch->fb.desc_stride * layer_id));
result = panvk_cmd_prepare_fragment_job(cmdbuf, fbd);
if (result != VK_SUCCESS)
break;
}
}
cmdbuf->cur_batch = NULL;
}
VkResult
panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
{
struct panvk_batch *batch = cmdbuf->cur_batch;
if (batch->fb.desc.gpu)
return VK_SUCCESS;
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
batch->fb.layer_count = cmdbuf->state.gfx.render.layer_count;
unsigned fbd_size = pan_size(FRAMEBUFFER);
if (has_zs_ext)
fbd_size = ALIGN_POT(fbd_size, pan_alignment(ZS_CRC_EXTENSION)) +
pan_size(ZS_CRC_EXTENSION);
fbd_size = ALIGN_POT(fbd_size, pan_alignment(RENDER_TARGET)) +
(MAX2(fbinfo->rt_count, 1) * pan_size(RENDER_TARGET));
batch->fb.bo_count = cmdbuf->state.gfx.render.fb.bo_count;
memcpy(batch->fb.bos, cmdbuf->state.gfx.render.fb.bos,
batch->fb.bo_count * sizeof(batch->fb.bos[0]));
batch->fb.desc =
panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbd_size * batch->fb.layer_count,
pan_alignment(FRAMEBUFFER));
batch->fb.desc_stride = fbd_size;
memset(&cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds, 0,
sizeof(cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds));
return batch->fb.desc.gpu ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
VkResult
panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
{
struct panvk_batch *batch = cmdbuf->cur_batch;
assert(batch);
if (!batch->tls.gpu) {
batch->tls = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
if (!batch->tls.gpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
return VK_SUCCESS;
}
VkResult
panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
uint32_t layer_idx)
{
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
struct panvk_batch *batch = cmdbuf->cur_batch;
uint64_t tiler_desc;
if (batch->tiler.ctx_descs.gpu) {
tiler_desc =
batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
goto out_set_layer_ctx;
}
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
batch->tiler.heap_desc = panvk_cmd_alloc_desc(cmdbuf, TILER_HEAP);
batch->tiler.ctx_descs =
panvk_cmd_alloc_desc_array(cmdbuf, layer_count, TILER_CONTEXT);
if (!batch->tiler.heap_desc.gpu || !batch->tiler.ctx_descs.gpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
tiler_desc =
batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
pan_pack(&batch->tiler.heap_templ, TILER_HEAP, cfg) {
cfg.size = pan_kmod_bo_size(dev->tiler_heap->bo);
cfg.base = dev->tiler_heap->addr.dev;
cfg.bottom = dev->tiler_heap->addr.dev;
cfg.top = cfg.base + cfg.size;
}
pan_pack(&batch->tiler.ctx_templ, TILER_CONTEXT, cfg) {
cfg.hierarchy_mask = panvk_select_tiler_hierarchy_mask(
phys_dev, &cmdbuf->state.gfx, pan_kmod_bo_size(dev->tiler_heap->bo));
cfg.fb_width = fbinfo->width;
cfg.fb_height = fbinfo->height;
cfg.heap = batch->tiler.heap_desc.gpu;
cfg.sample_pattern = pan_sample_pattern(fbinfo->nr_samples);
}
memcpy(batch->tiler.heap_desc.cpu, &batch->tiler.heap_templ,
sizeof(batch->tiler.heap_templ));
struct mali_tiler_context_packed *ctxs = batch->tiler.ctx_descs.cpu;
assert(layer_count > 0);
for (uint32_t i = 0; i < layer_count; i++) {
STATIC_ASSERT(
!(pan_size(TILER_CONTEXT) & (pan_alignment(TILER_CONTEXT) - 1)));
memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs));
}
out_set_layer_ctx:
if (PAN_ARCH >= 9)
batch->tiler.ctx.valhall.desc = tiler_desc;
else
batch->tiler.ctx.bifrost.desc = tiler_desc;
return VK_SUCCESS;
}
struct panvk_batch *
panvk_per_arch(cmd_open_batch)(struct panvk_cmd_buffer *cmdbuf)
{
assert(!cmdbuf->cur_batch);
cmdbuf->cur_batch =
vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*cmdbuf->cur_batch), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
cmdbuf->cur_batch->jobs = UTIL_DYNARRAY_INIT;
cmdbuf->cur_batch->event_ops = UTIL_DYNARRAY_INIT;
assert(cmdbuf->cur_batch);
return cmdbuf->cur_batch;
}
VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
panvk_per_arch(cmd_close_batch)(cmdbuf);
panvk_pool_flush_maps(&cmdbuf->desc_pool);
return vk_command_buffer_end(&cmdbuf->vk);
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
const VkDependencyInfo *pDependencyInfo)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
/* Caches are flushed/invalidated at batch boundaries for now, nothing to do
* for memory barriers assuming we implement barriers with the creation of a
* new batch.
* FIXME: We can probably do better with a CacheFlush job that has the
* barrier flag set to true.
*/
if (cmdbuf->cur_batch) {
bool preload_fb =
cmdbuf->cur_batch && cmdbuf->cur_batch->vtc_jc.first_tiler;
panvk_per_arch(cmd_close_batch)(cmdbuf);
if (preload_fb)
panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
panvk_per_arch(cmd_open_batch)(cmdbuf);
}
for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) {
const VkImageMemoryBarrier2 *barrier = &pDependencyInfo->pImageMemoryBarriers[i];
panvk_per_arch(cmd_transition_image_layout)(commandBuffer, barrier);
}
/* If we had any layout transition dispatches, the batch will be closed at
* this point, therefore establishing the sync between itself and the
* commands that follow.
*/
}
static void
panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
VkCommandBufferResetFlags flags)
{
struct panvk_cmd_buffer *cmdbuf =
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
vk_command_buffer_reset(&cmdbuf->vk);
list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
list_del(&batch->node);
util_dynarray_fini(&batch->jobs);
util_dynarray_fini(&batch->event_ops);
vk_free(&cmdbuf->vk.pool->alloc, batch);
}
panvk_pool_reset(&cmdbuf->desc_pool);
panvk_pool_reset(&cmdbuf->tls_pool);
panvk_pool_reset(&cmdbuf->varying_pool);
panvk_cmd_buffer_obj_list_reset(cmdbuf, push_sets);
memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
}
static void
panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
{
struct panvk_cmd_buffer *cmdbuf =
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
list_del(&batch->node);
util_dynarray_fini(&batch->jobs);
util_dynarray_fini(&batch->event_ops);
vk_free(&cmdbuf->vk.pool->alloc, batch);
}
panvk_pool_cleanup(&cmdbuf->desc_pool);
panvk_pool_cleanup(&cmdbuf->tls_pool);
panvk_pool_cleanup(&cmdbuf->varying_pool);
panvk_cmd_buffer_obj_list_cleanup(cmdbuf, push_sets);
vk_command_buffer_finish(&cmdbuf->vk);
vk_free(&dev->vk.alloc, cmdbuf);
}
static VkResult
panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
struct vk_command_buffer **cmdbuf_out)
{
struct panvk_device *device =
container_of(vk_pool->base.device, struct panvk_device, vk);
struct panvk_cmd_pool *pool =
container_of(vk_pool, struct panvk_cmd_pool, vk);
struct panvk_cmd_buffer *cmdbuf;
cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!cmdbuf)
return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
VkResult result = vk_command_buffer_init(
&pool->vk, &cmdbuf->vk, &panvk_per_arch(cmd_buffer_ops), level);
if (result != VK_SUCCESS) {
vk_free(&device->vk.alloc, cmdbuf);
return result;
}
panvk_cmd_buffer_obj_list_init(cmdbuf, push_sets);
cmdbuf->vk.dynamic_graphics_state.vi = &cmdbuf->state.gfx.dynamic.vi;
cmdbuf->vk.dynamic_graphics_state.ms.sample_locations =
&cmdbuf->state.gfx.dynamic.sl;
struct panvk_pool_properties desc_pool_props = {
.create_flags =
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_WB_MMAP),
.slab_size = 64 * 1024,
.label = "Command buffer descriptor pool",
.prealloc = true,
.owns_bos = true,
.needs_locking = false,
};
panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool, NULL,
&desc_pool_props);
struct panvk_pool_properties tls_pool_props = {
.create_flags =
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
.slab_size = 64 * 1024,
.label = "TLS pool",
.prealloc = false,
.owns_bos = true,
.needs_locking = false,
};
panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool, &pool->tls_big_bo_pool,
&tls_pool_props);
struct panvk_pool_properties var_pool_props = {
.create_flags =
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
.slab_size = 64 * 1024,
.label = "Varying pool",
.prealloc = false,
.owns_bos = true,
.needs_locking = false,
};
panvk_pool_init(&cmdbuf->varying_pool, device, &pool->varying_bo_pool, NULL,
&var_pool_props);
list_inithead(&cmdbuf->batches);
*cmdbuf_out = &cmdbuf->vk;
return VK_SUCCESS;
}
const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops) = {
.create = panvk_create_cmdbuf,
.reset = panvk_reset_cmdbuf,
.destroy = panvk_destroy_cmdbuf,
};
VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo *pBeginInfo)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
#if PAN_ARCH < 9
/* iter13: clear XFB state on Begin so a reused command buffer does not
* inherit stale xfb.buffer_count / xfb.active / xfb.buffers[] from a
* prior recording. */
memset(&cmdbuf->state.gfx.xfb, 0, sizeof(cmdbuf->state.gfx.xfb));
#endif
return VK_SUCCESS;
}