initial seed: retrofit campaign lineage from local working trees
panvk-bifrost campaigns (r1..r4 Vulkan compositor + r5.video1 Vulkan
video decode) shipped before this repo existed; the deliverable
patches live in marfrit-packages, but the reasoning chain, phase docs,
and source-state evidence lived only in local working trees on the
development host.
This retrofit imports:
- mesa-panvk-bifrost/ — r1..r4 era phase docs (iter1..iter18)
(libmali stub blobs at iter18/blob/ excluded
— 109MB of RE artifacts replaced with a README
pointer)
- mesa-panvk-bifrost-video/ — sibling campaign phase docs + probe
- evidence/ — frozen .tgz source snapshots at each milestone
(basis for the 0005 patch diff generation)
Future iterations should branch off here from day one, so each iter is
a commit rather than a snapshot. See [[feedback-session-local-process-pins]]
for the process drift this retrofit closes.
Total: 1.9 MB across 124 files.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
# iter13 XFB probe — build glue.
|
||||
|
||||
CC ?= cc
|
||||
CFLAGS ?= -O0 -g -Wall -Wextra -std=c11
|
||||
LDLIBS ?= -lvulkan
|
||||
|
||||
PROBE = probe_xfb
|
||||
NOPROBE = probe_xfb_nodraw
|
||||
SRC = probe_xfb.c
|
||||
NOSRC = probe_xfb_nodraw.c
|
||||
VERT = probe_xfb.vert
|
||||
VSPV = probe_xfb.vert.spv
|
||||
|
||||
all: $(PROBE) $(NOPROBE) $(VSPV)
|
||||
|
||||
$(PROBE): $(SRC)
|
||||
$(CC) $(CFLAGS) -o $@ $< $(LDLIBS)
|
||||
|
||||
$(NOPROBE): $(NOSRC)
|
||||
$(CC) $(CFLAGS) -o $@ $< $(LDLIBS)
|
||||
|
||||
# glslangValidator + xfb-aware compile. The -V flag enables Vulkan SPIR-V output.
|
||||
# xfb_buffer / xfb_offset / xfb_stride decorations are honored when the SPIR-V
|
||||
# is targeted at Vulkan (which is the default for -V).
|
||||
$(VSPV): $(VERT)
|
||||
glslangValidator -V $< -o $@
|
||||
|
||||
run: all
|
||||
PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 ./$(PROBE)
|
||||
|
||||
run-patched-mesa: all
|
||||
VK_ICD_FILENAMES=/usr/lib/panvk-bifrost/icd.json \
|
||||
PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 \
|
||||
./$(PROBE)
|
||||
|
||||
clean:
|
||||
rm -f $(PROBE) $(VSPV)
|
||||
|
||||
.PHONY: all run run-patched-mesa clean
|
||||
@@ -0,0 +1,484 @@
|
||||
/*
|
||||
* Copyright © 2021 Collabora Ltd.
|
||||
*
|
||||
* Derived from tu_cmd_buffer.c which is:
|
||||
* Copyright © 2016 Red Hat.
|
||||
* Copyright © 2016 Bas Nieuwenhuizen
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include "panvk_buffer.h"
|
||||
#include "panvk_cmd_alloc.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_draw.h"
|
||||
#include "panvk_cmd_fb_preload.h"
|
||||
#include "panvk_cmd_pool.h"
|
||||
#include "panvk_cmd_push_constant.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
#include "panvk_instance.h"
|
||||
#include "panvk_meta.h"
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_priv_bo.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_props.h"
|
||||
#include "pan_samples.h"
|
||||
|
||||
#include "vk_descriptor_update_template.h"
|
||||
#include "vk_format.h"
|
||||
|
||||
static VkResult
|
||||
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf, uint64_t fbd)
|
||||
{
|
||||
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
struct pan_ptr job_ptr = panvk_cmd_alloc_desc(cmdbuf, FRAGMENT_JOB);
|
||||
|
||||
if (!job_ptr.gpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
GENX(pan_emit_fragment_job_payload)(fbinfo, fbd, job_ptr.cpu);
|
||||
|
||||
pan_section_pack(job_ptr.cpu, FRAGMENT_JOB, HEADER, header) {
|
||||
header.type = MALI_JOB_TYPE_FRAGMENT;
|
||||
header.index = 1;
|
||||
}
|
||||
|
||||
pan_jc_add_job(&batch->frag_jc, MALI_JOB_TYPE_FRAGMENT, false, false, 0, 0,
|
||||
&job_ptr, false);
|
||||
util_dynarray_append(&batch->jobs, job_ptr.cpu);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
|
||||
if (!batch)
|
||||
return;
|
||||
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
|
||||
assert(batch);
|
||||
|
||||
if (!batch->fb.desc.gpu && !batch->vtc_jc.first_job) {
|
||||
if (util_dynarray_num_elements(&batch->event_ops,
|
||||
struct panvk_cmd_event_op) == 0) {
|
||||
/* Content-less batch, let's drop it */
|
||||
vk_free(&cmdbuf->vk.pool->alloc, batch);
|
||||
} else {
|
||||
/* Batch has no jobs but is needed for synchronization, let's add a
|
||||
* NULL job so the SUBMIT ioctl doesn't choke on it.
|
||||
*/
|
||||
struct pan_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, JOB_HEADER);
|
||||
|
||||
if (ptr.gpu) {
|
||||
util_dynarray_append(&batch->jobs, ptr.cpu);
|
||||
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_NULL, false, false, 0,
|
||||
0, &ptr, false);
|
||||
}
|
||||
|
||||
list_addtail(&batch->node, &cmdbuf->batches);
|
||||
}
|
||||
cmdbuf->cur_batch = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
|
||||
list_addtail(&batch->node, &cmdbuf->batches);
|
||||
|
||||
if (batch->tlsinfo.tls.size) {
|
||||
unsigned thread_tls_alloc =
|
||||
pan_query_thread_tls_alloc(&phys_dev->kmod.dev->props);
|
||||
unsigned core_id_range;
|
||||
|
||||
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
|
||||
|
||||
unsigned size = pan_get_total_stack_size(batch->tlsinfo.tls.size,
|
||||
thread_tls_alloc, core_id_range);
|
||||
batch->tlsinfo.tls.ptr =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, tls, size, 4096).gpu;
|
||||
}
|
||||
|
||||
if (batch->tlsinfo.wls.size) {
|
||||
assert(batch->wls_total_size);
|
||||
batch->tlsinfo.wls.ptr =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, tls, batch->wls_total_size, 4096).gpu;
|
||||
}
|
||||
|
||||
if (batch->tls.cpu)
|
||||
GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
|
||||
|
||||
if (batch->fb.desc.cpu) {
|
||||
panvk_per_arch(cmd_select_tile_size)(cmdbuf);
|
||||
|
||||
/* At this point, we should know sample count and the tile size should have
|
||||
* been calculated */
|
||||
assert(fbinfo->nr_samples > 0 && fbinfo->tile_size > 0);
|
||||
|
||||
fbinfo->sample_positions =
|
||||
dev->sample_positions->addr.dev +
|
||||
pan_sample_positions_offset(pan_sample_pattern(fbinfo->nr_samples));
|
||||
fbinfo->first_provoking_vertex =
|
||||
cmdbuf->state.gfx.render.first_provoking_vertex != U_TRISTATE_NO;
|
||||
|
||||
VkResult result = panvk_per_arch(cmd_fb_preload)(cmdbuf, fbinfo);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
uint32_t view_mask = cmdbuf->state.gfx.render.view_mask;
|
||||
assert(view_mask == 0 || util_bitcount(view_mask) <= batch->fb.layer_count);
|
||||
uint32_t enabled_layer_count = view_mask ?
|
||||
util_bitcount(view_mask) :
|
||||
batch->fb.layer_count;
|
||||
|
||||
for (uint32_t i = 0; i < enabled_layer_count; i++) {
|
||||
uint32_t layer_id = (view_mask != 0) ? u_bit_scan(&view_mask) : i;
|
||||
VkResult result;
|
||||
|
||||
uint64_t fbd = batch->fb.desc.gpu + (batch->fb.desc_stride * layer_id);
|
||||
|
||||
result = panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, layer_id);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
|
||||
fbd |= GENX(pan_emit_fbd)(
|
||||
&cmdbuf->state.gfx.render.fb.info, layer_id, &batch->tlsinfo,
|
||||
&batch->tiler.ctx,
|
||||
batch->fb.desc.cpu + (batch->fb.desc_stride * layer_id));
|
||||
|
||||
result = panvk_cmd_prepare_fragment_job(cmdbuf, fbd);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cmdbuf->cur_batch = NULL;
|
||||
}
|
||||
|
||||
VkResult
|
||||
panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
|
||||
if (batch->fb.desc.gpu)
|
||||
return VK_SUCCESS;
|
||||
|
||||
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
|
||||
batch->fb.layer_count = cmdbuf->state.gfx.render.layer_count;
|
||||
unsigned fbd_size = pan_size(FRAMEBUFFER);
|
||||
|
||||
if (has_zs_ext)
|
||||
fbd_size = ALIGN_POT(fbd_size, pan_alignment(ZS_CRC_EXTENSION)) +
|
||||
pan_size(ZS_CRC_EXTENSION);
|
||||
|
||||
fbd_size = ALIGN_POT(fbd_size, pan_alignment(RENDER_TARGET)) +
|
||||
(MAX2(fbinfo->rt_count, 1) * pan_size(RENDER_TARGET));
|
||||
|
||||
batch->fb.bo_count = cmdbuf->state.gfx.render.fb.bo_count;
|
||||
memcpy(batch->fb.bos, cmdbuf->state.gfx.render.fb.bos,
|
||||
batch->fb.bo_count * sizeof(batch->fb.bos[0]));
|
||||
|
||||
batch->fb.desc =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbd_size * batch->fb.layer_count,
|
||||
pan_alignment(FRAMEBUFFER));
|
||||
batch->fb.desc_stride = fbd_size;
|
||||
|
||||
memset(&cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds, 0,
|
||||
sizeof(cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds));
|
||||
|
||||
return batch->fb.desc.gpu ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
VkResult
|
||||
panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
|
||||
{
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
|
||||
assert(batch);
|
||||
if (!batch->tls.gpu) {
|
||||
batch->tls = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
|
||||
if (!batch->tls.gpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
|
||||
uint32_t layer_idx)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
uint64_t tiler_desc;
|
||||
|
||||
if (batch->tiler.ctx_descs.gpu) {
|
||||
tiler_desc =
|
||||
batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
|
||||
goto out_set_layer_ctx;
|
||||
}
|
||||
|
||||
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
|
||||
batch->tiler.heap_desc = panvk_cmd_alloc_desc(cmdbuf, TILER_HEAP);
|
||||
batch->tiler.ctx_descs =
|
||||
panvk_cmd_alloc_desc_array(cmdbuf, layer_count, TILER_CONTEXT);
|
||||
if (!batch->tiler.heap_desc.gpu || !batch->tiler.ctx_descs.gpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
tiler_desc =
|
||||
batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
|
||||
|
||||
pan_pack(&batch->tiler.heap_templ, TILER_HEAP, cfg) {
|
||||
cfg.size = pan_kmod_bo_size(dev->tiler_heap->bo);
|
||||
cfg.base = dev->tiler_heap->addr.dev;
|
||||
cfg.bottom = dev->tiler_heap->addr.dev;
|
||||
cfg.top = cfg.base + cfg.size;
|
||||
}
|
||||
|
||||
pan_pack(&batch->tiler.ctx_templ, TILER_CONTEXT, cfg) {
|
||||
cfg.hierarchy_mask = panvk_select_tiler_hierarchy_mask(
|
||||
phys_dev, &cmdbuf->state.gfx, pan_kmod_bo_size(dev->tiler_heap->bo));
|
||||
cfg.fb_width = fbinfo->width;
|
||||
cfg.fb_height = fbinfo->height;
|
||||
cfg.heap = batch->tiler.heap_desc.gpu;
|
||||
cfg.sample_pattern = pan_sample_pattern(fbinfo->nr_samples);
|
||||
}
|
||||
|
||||
memcpy(batch->tiler.heap_desc.cpu, &batch->tiler.heap_templ,
|
||||
sizeof(batch->tiler.heap_templ));
|
||||
|
||||
struct mali_tiler_context_packed *ctxs = batch->tiler.ctx_descs.cpu;
|
||||
|
||||
assert(layer_count > 0);
|
||||
for (uint32_t i = 0; i < layer_count; i++) {
|
||||
STATIC_ASSERT(
|
||||
!(pan_size(TILER_CONTEXT) & (pan_alignment(TILER_CONTEXT) - 1)));
|
||||
|
||||
memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs));
|
||||
}
|
||||
|
||||
out_set_layer_ctx:
|
||||
if (PAN_ARCH >= 9)
|
||||
batch->tiler.ctx.valhall.desc = tiler_desc;
|
||||
else
|
||||
batch->tiler.ctx.bifrost.desc = tiler_desc;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct panvk_batch *
|
||||
panvk_per_arch(cmd_open_batch)(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
assert(!cmdbuf->cur_batch);
|
||||
cmdbuf->cur_batch =
|
||||
vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*cmdbuf->cur_batch), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
cmdbuf->cur_batch->jobs = UTIL_DYNARRAY_INIT;
|
||||
cmdbuf->cur_batch->event_ops = UTIL_DYNARRAY_INIT;
|
||||
assert(cmdbuf->cur_batch);
|
||||
return cmdbuf->cur_batch;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
|
||||
panvk_pool_flush_maps(&cmdbuf->desc_pool);
|
||||
|
||||
return vk_command_buffer_end(&cmdbuf->vk);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
||||
const VkDependencyInfo *pDependencyInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
/* Caches are flushed/invalidated at batch boundaries for now, nothing to do
|
||||
* for memory barriers assuming we implement barriers with the creation of a
|
||||
* new batch.
|
||||
* FIXME: We can probably do better with a CacheFlush job that has the
|
||||
* barrier flag set to true.
|
||||
*/
|
||||
if (cmdbuf->cur_batch) {
|
||||
bool preload_fb =
|
||||
cmdbuf->cur_batch && cmdbuf->cur_batch->vtc_jc.first_tiler;
|
||||
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
|
||||
if (preload_fb)
|
||||
panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
|
||||
|
||||
panvk_per_arch(cmd_open_batch)(cmdbuf);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) {
|
||||
const VkImageMemoryBarrier2 *barrier = &pDependencyInfo->pImageMemoryBarriers[i];
|
||||
|
||||
panvk_per_arch(cmd_transition_image_layout)(commandBuffer, barrier);
|
||||
}
|
||||
|
||||
/* If we had any layout transition dispatches, the batch will be closed at
|
||||
* this point, therefore establishing the sync between itself and the
|
||||
* commands that follow.
|
||||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
|
||||
VkCommandBufferResetFlags flags)
|
||||
{
|
||||
struct panvk_cmd_buffer *cmdbuf =
|
||||
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
|
||||
|
||||
vk_command_buffer_reset(&cmdbuf->vk);
|
||||
|
||||
list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
|
||||
list_del(&batch->node);
|
||||
util_dynarray_fini(&batch->jobs);
|
||||
util_dynarray_fini(&batch->event_ops);
|
||||
|
||||
vk_free(&cmdbuf->vk.pool->alloc, batch);
|
||||
}
|
||||
|
||||
panvk_pool_reset(&cmdbuf->desc_pool);
|
||||
panvk_pool_reset(&cmdbuf->tls_pool);
|
||||
panvk_pool_reset(&cmdbuf->varying_pool);
|
||||
panvk_cmd_buffer_obj_list_reset(cmdbuf, push_sets);
|
||||
|
||||
memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
|
||||
{
|
||||
struct panvk_cmd_buffer *cmdbuf =
|
||||
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
|
||||
list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
|
||||
list_del(&batch->node);
|
||||
util_dynarray_fini(&batch->jobs);
|
||||
util_dynarray_fini(&batch->event_ops);
|
||||
|
||||
vk_free(&cmdbuf->vk.pool->alloc, batch);
|
||||
}
|
||||
|
||||
panvk_pool_cleanup(&cmdbuf->desc_pool);
|
||||
panvk_pool_cleanup(&cmdbuf->tls_pool);
|
||||
panvk_pool_cleanup(&cmdbuf->varying_pool);
|
||||
panvk_cmd_buffer_obj_list_cleanup(cmdbuf, push_sets);
|
||||
vk_command_buffer_finish(&cmdbuf->vk);
|
||||
vk_free(&dev->vk.alloc, cmdbuf);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
|
||||
struct vk_command_buffer **cmdbuf_out)
|
||||
{
|
||||
struct panvk_device *device =
|
||||
container_of(vk_pool->base.device, struct panvk_device, vk);
|
||||
struct panvk_cmd_pool *pool =
|
||||
container_of(vk_pool, struct panvk_cmd_pool, vk);
|
||||
struct panvk_cmd_buffer *cmdbuf;
|
||||
|
||||
cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!cmdbuf)
|
||||
return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
VkResult result = vk_command_buffer_init(
|
||||
&pool->vk, &cmdbuf->vk, &panvk_per_arch(cmd_buffer_ops), level);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&device->vk.alloc, cmdbuf);
|
||||
return result;
|
||||
}
|
||||
|
||||
panvk_cmd_buffer_obj_list_init(cmdbuf, push_sets);
|
||||
cmdbuf->vk.dynamic_graphics_state.vi = &cmdbuf->state.gfx.dynamic.vi;
|
||||
cmdbuf->vk.dynamic_graphics_state.ms.sample_locations =
|
||||
&cmdbuf->state.gfx.dynamic.sl;
|
||||
|
||||
struct panvk_pool_properties desc_pool_props = {
|
||||
.create_flags =
|
||||
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_WB_MMAP),
|
||||
.slab_size = 64 * 1024,
|
||||
.label = "Command buffer descriptor pool",
|
||||
.prealloc = true,
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
};
|
||||
panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool, NULL,
|
||||
&desc_pool_props);
|
||||
|
||||
struct panvk_pool_properties tls_pool_props = {
|
||||
.create_flags =
|
||||
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
|
||||
.slab_size = 64 * 1024,
|
||||
.label = "TLS pool",
|
||||
.prealloc = false,
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
};
|
||||
panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool, &pool->tls_big_bo_pool,
|
||||
&tls_pool_props);
|
||||
|
||||
struct panvk_pool_properties var_pool_props = {
|
||||
.create_flags =
|
||||
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
|
||||
.slab_size = 64 * 1024,
|
||||
.label = "Varying pool",
|
||||
.prealloc = false,
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
};
|
||||
panvk_pool_init(&cmdbuf->varying_pool, device, &pool->varying_bo_pool, NULL,
|
||||
&var_pool_props);
|
||||
|
||||
list_inithead(&cmdbuf->batches);
|
||||
*cmdbuf_out = &cmdbuf->vk;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops) = {
|
||||
.create = panvk_create_cmdbuf,
|
||||
.reset = panvk_reset_cmdbuf,
|
||||
.destroy = panvk_destroy_cmdbuf,
|
||||
};
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
|
||||
const VkCommandBufferBeginInfo *pBeginInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* iter13: clear XFB state on Begin so a reused command buffer does not
|
||||
* inherit stale xfb.buffer_count / xfb.active / xfb.buffers[] from a
|
||||
* prior recording. */
|
||||
memset(&cmdbuf->state.gfx.xfb, 0, sizeof(cmdbuf->state.gfx.xfb));
|
||||
#endif
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright © 2026 mfritsche / claude-noether
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* iter13: VK_EXT_transform_feedback command handlers for the JM
|
||||
* architecture path (Bifrost v6/v7 + Valhall-JM v9).
|
||||
*
|
||||
* The runtime contract:
|
||||
* - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size)
|
||||
* for each slot into cmdbuf->state.gfx.xfb.buffers[].
|
||||
* - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true.
|
||||
* Mark sysvals dirty so the next draw re-emits vs.xfb_address[].
|
||||
* - vkCmdEndTransformFeedbackEXT: set active = false.
|
||||
*
|
||||
* Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/
|
||||
* pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't
|
||||
* support pause/resume. transformFeedbackDraw is advertised as false.
|
||||
*
|
||||
* Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb
|
||||
* and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb
|
||||
* pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers
|
||||
* (via panvk_vX_shader.c sysval handler) to a load from the per-draw
|
||||
* sysval push area.
|
||||
*/
|
||||
|
||||
#include "vk_log.h"
|
||||
#include "util/log.h"
|
||||
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_draw.h"
|
||||
#include "panvk_buffer.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t firstBinding,
|
||||
uint32_t bindingCount,
|
||||
const VkBuffer *pBuffers,
|
||||
const VkDeviceSize *pOffsets,
|
||||
const VkDeviceSize *pSizes)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
|
||||
for (uint32_t i = 0; i < bindingCount; i++) {
|
||||
uint32_t slot = firstBinding + i;
|
||||
if (slot >= 4)
|
||||
continue;
|
||||
|
||||
VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]);
|
||||
gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0);
|
||||
gfx->xfb.buffers[slot].offset = pOffsets[i];
|
||||
gfx->xfb.buffers[slot].size =
|
||||
(pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE)
|
||||
? pSizes[i]
|
||||
: (buf->vk.size - pOffsets[i]);
|
||||
}
|
||||
|
||||
if (firstBinding + bindingCount > gfx->xfb.buffer_count)
|
||||
gfx->xfb.buffer_count = firstBinding + bindingCount;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBeginTransformFeedbackEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t firstCounterBuffer,
|
||||
uint32_t counterBufferCount,
|
||||
const VkBuffer *pCounterBuffers,
|
||||
const VkDeviceSize *pCounterBufferOffsets)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
|
||||
/* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback
|
||||
* PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c.
|
||||
* App is spec-compliant if it does not pass counter buffers (which our
|
||||
* features advertisement allows), but warn loudly if it does so we do not
|
||||
* silently produce wrong capture state. */
|
||||
(void)firstCounterBuffer;
|
||||
(void)pCounterBufferOffsets;
|
||||
if (counterBufferCount > 0 && pCounterBuffers != NULL) {
|
||||
mesa_logw("panvk: CmdBeginTransformFeedbackEXT: counter buffers not "
|
||||
"implemented (transformFeedbackDraw=false); XFB resume will "
|
||||
"restart at buffer offset 0");
|
||||
}
|
||||
|
||||
gfx->xfb.active = true;
|
||||
/* Per-draw set_gfx_sysval picks up the change automatically — no
|
||||
* explicit dirty marking required (set_gfx_sysval uses memcmp +
|
||||
* BITSET to detect state diffs and re-emit sysvals). */
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdEndTransformFeedbackEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t firstCounterBuffer,
|
||||
uint32_t counterBufferCount,
|
||||
const VkBuffer *pCounterBuffers,
|
||||
const VkDeviceSize *pCounterBufferOffsets)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
|
||||
(void)firstCounterBuffer;
|
||||
(void)counterBufferCount;
|
||||
(void)pCounterBuffers;
|
||||
(void)pCounterBufferOffsets;
|
||||
|
||||
gfx->xfb.active = false;
|
||||
}
|
||||
@@ -0,0 +1,275 @@
|
||||
# Copyright © 2021 Collabora Ltd.
|
||||
#
|
||||
# Derived from the freedreno driver which is:
|
||||
# Copyright © 2017 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
panvk_entrypoints = custom_target(
|
||||
'panvk_entrypoints.[ch]',
|
||||
input : [vk_entrypoints_gen, vk_api_xml],
|
||||
output : ['panvk_entrypoints.h', 'panvk_entrypoints.c'],
|
||||
command : [
|
||||
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
|
||||
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'panvk',
|
||||
'--device-prefix', 'panvk_v6', '--device-prefix', 'panvk_v7',
|
||||
'--device-prefix', 'panvk_v9', '--device-prefix', 'panvk_v10',
|
||||
'--device-prefix', 'panvk_v12', '--device-prefix', 'panvk_v13',
|
||||
'--beta', with_vulkan_beta.to_string()
|
||||
],
|
||||
depend_files : vk_entrypoints_gen_depend_files,
|
||||
)
|
||||
|
||||
panvk_tracepoints = custom_target(
|
||||
'panvk_tracepoints.[ch]',
|
||||
input: 'panvk_tracepoints.py',
|
||||
output: ['panvk_tracepoints.h',
|
||||
'panvk_tracepoints_perfetto.h',
|
||||
'panvk_tracepoints.c'],
|
||||
command: [
|
||||
prog_python, '@INPUT@',
|
||||
'--import-path', join_paths(dir_source_root, 'src/util/perf/'),
|
||||
'--utrace-hdr', '@OUTPUT0@',
|
||||
'--perfetto-hdr', '@OUTPUT1@',
|
||||
'--utrace-src', '@OUTPUT2@',
|
||||
],
|
||||
depend_files: u_trace_py,
|
||||
)
|
||||
|
||||
libpanvk_files = files(
|
||||
'panvk_buffer.c',
|
||||
'panvk_cmd_pool.c',
|
||||
'panvk_device_memory.c',
|
||||
'panvk_host_copy.c',
|
||||
'panvk_image.c',
|
||||
'panvk_instance.c',
|
||||
'panvk_mempool.c',
|
||||
'panvk_physical_device.c',
|
||||
'panvk_priv_bo.c',
|
||||
'panvk_sparse.c',
|
||||
'panvk_utrace.c',
|
||||
'panvk_wsi.c',
|
||||
)
|
||||
libpanvk_files += [sha1_h]
|
||||
|
||||
panvk_deps = []
|
||||
panvk_flags = []
|
||||
panvk_per_arch_libs = []
|
||||
|
||||
bifrost_archs = [6, 7]
|
||||
bifrost_inc_dir = ['bifrost']
|
||||
bifrost_files = [
|
||||
'bifrost/panvk_vX_meta_desc_copy.c',
|
||||
]
|
||||
|
||||
valhall_archs = [9, 10]
|
||||
valhall_inc_dir = ['valhall']
|
||||
valhall_files = []
|
||||
|
||||
fifthgen_archs = [12, 13]
|
||||
fifthgen_inc_dir = ['fifthgen']
|
||||
fifthgen_files = []
|
||||
|
||||
jm_archs = [6, 7]
|
||||
jm_inc_dir = ['jm']
|
||||
jm_files = [
|
||||
'jm/panvk_vX_bind_queue.c',
|
||||
'jm/panvk_vX_cmd_xfb.c', # iter13
|
||||
'jm/panvk_vX_cmd_buffer.c',
|
||||
'jm/panvk_vX_cmd_dispatch.c',
|
||||
'jm/panvk_vX_cmd_draw.c',
|
||||
'jm/panvk_vX_cmd_event.c',
|
||||
'jm/panvk_vX_cmd_query.c',
|
||||
'jm/panvk_vX_cmd_precomp.c',
|
||||
'jm/panvk_vX_event.c',
|
||||
'jm/panvk_vX_gpu_queue.c',
|
||||
]
|
||||
|
||||
csf_archs = [10, 12, 13]
|
||||
csf_inc_dir = ['csf']
|
||||
csf_files = [
|
||||
'csf/panvk_vX_bind_queue.c',
|
||||
'csf/panvk_vX_cmd_buffer.c',
|
||||
'csf/panvk_vX_cmd_dispatch.c',
|
||||
'csf/panvk_vX_cmd_draw.c',
|
||||
'csf/panvk_vX_cmd_event.c',
|
||||
'csf/panvk_vX_cmd_query.c',
|
||||
'csf/panvk_vX_cmd_precomp.c',
|
||||
'csf/panvk_vX_event.c',
|
||||
'csf/panvk_vX_exception_handler.c',
|
||||
'csf/panvk_vX_gpu_queue.c',
|
||||
'csf/panvk_vX_instr.c',
|
||||
'csf/panvk_vX_utrace.c',
|
||||
]
|
||||
|
||||
common_per_arch_files = [
|
||||
panvk_entrypoints[0],
|
||||
panvk_tracepoints[0],
|
||||
'panvk_vX_blend.c',
|
||||
'panvk_vX_buffer_view.c',
|
||||
'panvk_vX_cmd_fb_preload.c',
|
||||
'panvk_vX_cmd_desc_state.c',
|
||||
'panvk_vX_cmd_dispatch.c',
|
||||
'panvk_vX_cmd_draw.c',
|
||||
'panvk_vX_cmd_meta.c',
|
||||
'panvk_vX_cmd_push_constant.c',
|
||||
'panvk_vX_descriptor_set.c',
|
||||
'panvk_vX_descriptor_set_layout.c',
|
||||
'panvk_vX_device.c',
|
||||
'panvk_vX_physical_device.c',
|
||||
'panvk_vX_precomp_cache.c',
|
||||
'panvk_vX_query_pool.c',
|
||||
'panvk_vX_image_view.c',
|
||||
'panvk_vX_nir_lower_descriptors.c',
|
||||
'panvk_vX_nir_lower_input_attachment_loads.c',
|
||||
'panvk_vX_sampler.c',
|
||||
'panvk_vX_shader.c',
|
||||
sha1_h,
|
||||
]
|
||||
|
||||
foreach arch : [6, 7, 10, 12, 13]
|
||||
per_arch_files = common_per_arch_files
|
||||
inc_panvk_per_arch = []
|
||||
|
||||
if arch in bifrost_archs
|
||||
inc_panvk_per_arch += bifrost_inc_dir
|
||||
per_arch_files += bifrost_files
|
||||
elif arch in valhall_archs
|
||||
inc_panvk_per_arch += valhall_inc_dir
|
||||
per_arch_files += valhall_files
|
||||
elif arch in fifthgen_archs
|
||||
inc_panvk_per_arch += fifthgen_inc_dir
|
||||
per_arch_files += fifthgen_files
|
||||
endif
|
||||
|
||||
if arch in jm_archs
|
||||
inc_panvk_per_arch += jm_inc_dir
|
||||
per_arch_files += jm_files
|
||||
elif arch in csf_archs
|
||||
inc_panvk_per_arch += csf_inc_dir
|
||||
per_arch_files += csf_files
|
||||
endif
|
||||
|
||||
panvk_per_arch_libs += static_library(
|
||||
'panvk_v@0@'.format(arch),
|
||||
per_arch_files,
|
||||
include_directories : [
|
||||
inc_include,
|
||||
inc_src,
|
||||
inc_panfrost,
|
||||
inc_panvk_per_arch,
|
||||
],
|
||||
dependencies : [
|
||||
idep_nir_headers,
|
||||
idep_pan_packers,
|
||||
idep_vulkan_util_headers,
|
||||
idep_vulkan_runtime_headers,
|
||||
idep_vulkan_wsi_headers,
|
||||
idep_mesautil,
|
||||
dep_libdrm,
|
||||
dep_valgrind,
|
||||
idep_libpan_per_arch[arch.to_string()],
|
||||
],
|
||||
c_args : [no_override_init_args, panvk_flags, '-DPAN_ARCH=@0@'.format(arch)],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
)
|
||||
endforeach
|
||||
|
||||
if with_perfetto
|
||||
panvk_deps += dep_perfetto
|
||||
libpanvk_files += ['panvk_utrace_perfetto.cc']
|
||||
endif
|
||||
|
||||
if with_platform_wayland
|
||||
panvk_deps += dep_wayland_client
|
||||
endif
|
||||
|
||||
if with_platform_android
|
||||
libpanvk_files += files('panvk_android.c')
|
||||
endif
|
||||
|
||||
libvulkan_panfrost = shared_library(
|
||||
'vulkan_panfrost',
|
||||
[libpanvk_files, panvk_entrypoints, panvk_tracepoints],
|
||||
include_directories : [
|
||||
inc_include,
|
||||
inc_src,
|
||||
inc_panfrost,
|
||||
],
|
||||
link_whole : [panvk_per_arch_libs],
|
||||
link_with : [
|
||||
libpanfrost_shared,
|
||||
libpanfrost_decode,
|
||||
libpanfrost_lib,
|
||||
libpanfrost_compiler,
|
||||
],
|
||||
dependencies : [
|
||||
dep_dl,
|
||||
dep_elf,
|
||||
dep_libdrm,
|
||||
dep_m,
|
||||
dep_thread,
|
||||
dep_valgrind,
|
||||
idep_nir,
|
||||
idep_pan_packers,
|
||||
panvk_deps,
|
||||
idep_vulkan_util,
|
||||
idep_vulkan_runtime,
|
||||
idep_vulkan_wsi,
|
||||
idep_mesautil,
|
||||
],
|
||||
c_args : [no_override_init_args, panvk_flags],
|
||||
link_args : [vulkan_icd_link_args, ld_args_bsymbolic, ld_args_gc_sections, ld_args_build_id],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
install : true,
|
||||
)
|
||||
|
||||
if with_symbols_check
|
||||
test(
|
||||
'panvk symbols check',
|
||||
symbols_check,
|
||||
args : [
|
||||
'--lib', libvulkan_panfrost,
|
||||
'--symbols-file', vulkan_icd_symbols,
|
||||
symbols_check_args,
|
||||
],
|
||||
suite : ['panfrost'],
|
||||
)
|
||||
endif
|
||||
|
||||
icd_file_name = libname_prefix + 'vulkan_panfrost.' + libname_suffix
|
||||
|
||||
panfrost_icd = custom_target(
|
||||
'panfrost_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
output : 'panfrost_icd.' + vulkan_manifest_suffix,
|
||||
command : [
|
||||
prog_python, '@INPUT0@',
|
||||
'--api-version', '1.4', '--xml', '@INPUT1@',
|
||||
'--sizeof-pointer', sizeof_pointer,
|
||||
'--icd-lib-path', vulkan_icd_lib_path,
|
||||
'--icd-filename', icd_file_name,
|
||||
'--out', '@OUTPUT@',
|
||||
],
|
||||
build_by_default : true,
|
||||
install_dir : with_vulkan_icd_dir,
|
||||
install_tag : 'runtime',
|
||||
install : true,
|
||||
)
|
||||
|
||||
_dev_icdname = 'panfrost_devenv_icd.@0@.json'.format(host_machine.cpu())
|
||||
_dev_icd = custom_target(
|
||||
'panfrost_devenv_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
output : _dev_icdname,
|
||||
command : [
|
||||
prog_python, '@INPUT0@',
|
||||
'--api-version', '1.4', '--xml', '@INPUT1@',
|
||||
'--sizeof-pointer', sizeof_pointer,
|
||||
'--icd-lib-path', meson.current_build_dir(),
|
||||
'--icd-filename', icd_file_name,
|
||||
'--out', '@OUTPUT@',
|
||||
],
|
||||
build_by_default : true,
|
||||
)
|
||||
|
||||
devenv.append('VK_DRIVER_FILES', _dev_icd.full_path())
|
||||
@@ -0,0 +1,501 @@
|
||||
/*
|
||||
* Copyright © 2024 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PANVK_CMD_DRAW_H
|
||||
#define PANVK_CMD_DRAW_H
|
||||
|
||||
#ifndef PAN_ARCH
|
||||
#error "PAN_ARCH must be defined"
|
||||
#endif
|
||||
|
||||
#include "panvk_blend.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_query.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
#include "panvk_image.h"
|
||||
#include "panvk_image_view.h"
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_shader.h"
|
||||
|
||||
#include "vk_command_buffer.h"
|
||||
#include "vk_format.h"
|
||||
#include "util/u_tristate.h"
|
||||
|
||||
#include "pan_props.h"
|
||||
|
||||
#define MAX_VBS 16
|
||||
|
||||
struct panvk_cmd_buffer;
|
||||
|
||||
struct panvk_attrib_buf {
|
||||
uint64_t address;
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
struct panvk_resolve_attachment {
|
||||
VkResolveModeFlagBits mode;
|
||||
struct panvk_image_view *dst_iview;
|
||||
};
|
||||
|
||||
struct panvk_rendering_state {
|
||||
VkRenderingFlags flags;
|
||||
uint32_t layer_count;
|
||||
uint32_t view_mask;
|
||||
enum u_tristate first_provoking_vertex;
|
||||
|
||||
enum vk_rp_attachment_flags bound_attachments;
|
||||
struct {
|
||||
struct panvk_image_view *iviews[MAX_RTS];
|
||||
/* If non-null, preload_iviews[i] overrides iviews[i] for preloads. */
|
||||
struct panvk_image_view *preload_iviews[MAX_RTS];
|
||||
VkFormat fmts[MAX_RTS];
|
||||
uint8_t samples[MAX_RTS];
|
||||
struct panvk_resolve_attachment resolve[MAX_RTS];
|
||||
} color_attachments;
|
||||
|
||||
struct pan_image_view zs_pview;
|
||||
struct pan_image_view s_pview;
|
||||
|
||||
struct {
|
||||
struct panvk_image_view *iview;
|
||||
/* If non-null, preload_iview overrides iview for preloads. */
|
||||
struct panvk_image_view *preload_iview;
|
||||
VkFormat fmt;
|
||||
struct panvk_resolve_attachment resolve;
|
||||
} z_attachment, s_attachment;
|
||||
|
||||
struct {
|
||||
struct pan_fb_info info;
|
||||
bool crc_valid[MAX_RTS];
|
||||
|
||||
/* nr_samples to be used before framebuffer / tiler descriptor are emitted */
|
||||
uint32_t nr_samples;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
uint32_t bo_count;
|
||||
struct pan_kmod_bo *bos[(MAX_RTS * PANVK_MAX_PLANES) + 2];
|
||||
#endif
|
||||
} fb;
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
struct pan_ptr fbds;
|
||||
uint64_t tiler;
|
||||
|
||||
/* When a secondary command buffer has to flush draws, it disturbs the
|
||||
* inherited context, and the primary command buffer needs to know. */
|
||||
bool invalidate_inherited_ctx;
|
||||
|
||||
/* True if the last render pass was suspended. */
|
||||
bool suspended;
|
||||
|
||||
/* Blocks that can patch to flip the provoking vertex mode if we need to
|
||||
* emit FBDs/TDs before we know which mode the application is using */
|
||||
struct cs_maybe *maybe_set_tds_provoking_vertex;
|
||||
struct cs_maybe *maybe_set_fbds_provoking_vertex;
|
||||
|
||||
struct {
|
||||
/* != 0 if the render pass contains one or more occlusion queries to
|
||||
* signal. */
|
||||
uint64_t chain;
|
||||
|
||||
/* Point to the syncobj of the last occlusion query that was passed
|
||||
* to a draw. */
|
||||
uint64_t last;
|
||||
} oq;
|
||||
#endif
|
||||
};
|
||||
|
||||
enum panvk_cmd_graphics_dirty_state {
|
||||
PANVK_CMD_GRAPHICS_DIRTY_VS,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_FS,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_VB,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_IB,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_OQ,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_DESC_STATE,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_VS_PUSH_UNIFORMS,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_FS_PUSH_UNIFORMS,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT,
|
||||
};
|
||||
|
||||
struct panvk_cmd_graphics_state {
|
||||
struct panvk_descriptor_state desc_state;
|
||||
|
||||
struct {
|
||||
struct vk_vertex_input_state vi;
|
||||
struct vk_sample_locations_state sl;
|
||||
} dynamic;
|
||||
|
||||
struct panvk_occlusion_query_state occlusion_query;
|
||||
#if PAN_ARCH >= 10
|
||||
struct panvk_prims_generated_query_state prims_generated_query;
|
||||
#endif
|
||||
struct panvk_graphics_sysvals sysvals;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* iter13: VK_EXT_transform_feedback state (JM-class only for now). */
|
||||
struct {
|
||||
bool active;
|
||||
uint32_t buffer_count;
|
||||
struct {
|
||||
uint64_t addr;
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
} buffers[4];
|
||||
} xfb;
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_shader_link link;
|
||||
#endif
|
||||
|
||||
struct {
|
||||
const struct panvk_shader *shader;
|
||||
struct panvk_shader_desc_state desc;
|
||||
uint64_t blend_descs[MAX_RTS];
|
||||
uint64_t push_uniforms;
|
||||
bool required;
|
||||
#if PAN_ARCH < 9
|
||||
uint64_t rsd;
|
||||
#endif
|
||||
} fs;
|
||||
|
||||
struct {
|
||||
const struct panvk_shader *shader;
|
||||
struct panvk_shader_desc_state desc;
|
||||
uint64_t push_uniforms;
|
||||
#if PAN_ARCH < 9
|
||||
uint64_t attribs;
|
||||
uint64_t attrib_bufs;
|
||||
uint64_t indirect_attribs_infos;
|
||||
uint64_t indirect_attrib_bufs_infos;
|
||||
uint64_t indirect_varying_bufs_infos;
|
||||
bool previous_draw_was_indirect;
|
||||
#endif
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
struct panvk_attrib_buf bufs[MAX_VBS];
|
||||
unsigned count;
|
||||
} vb;
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
struct {
|
||||
uint32_t attribs_changing_on_base_instance;
|
||||
} vi;
|
||||
#endif
|
||||
|
||||
/* Index buffer */
|
||||
struct {
|
||||
uint64_t dev_addr;
|
||||
uint64_t size;
|
||||
uint8_t index_size;
|
||||
} ib;
|
||||
|
||||
struct {
|
||||
struct panvk_blend_info info;
|
||||
} cb;
|
||||
|
||||
struct panvk_rendering_state render;
|
||||
|
||||
bool vk_meta;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
uint64_t vpd;
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
uint64_t tsd;
|
||||
#endif
|
||||
|
||||
BITSET_DECLARE(dirty, PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT);
|
||||
};
|
||||
|
||||
#define dyn_gfx_state_dirty(__cmdbuf, __name) \
|
||||
BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty, \
|
||||
MESA_VK_DYNAMIC_##__name)
|
||||
|
||||
#define gfx_state_dirty(__cmdbuf, __name) \
|
||||
BITSET_TEST((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
|
||||
|
||||
#define gfx_state_set_dirty(__cmdbuf, __name) \
|
||||
BITSET_SET((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
|
||||
|
||||
#define gfx_state_clear_all_dirty(__cmdbuf) \
|
||||
BITSET_ZERO((__cmdbuf)->state.gfx.dirty)
|
||||
|
||||
#define gfx_state_set_all_dirty(__cmdbuf) \
|
||||
BITSET_ONES((__cmdbuf)->state.gfx.dirty)
|
||||
|
||||
#define set_gfx_sysval(__cmdbuf, __dirty, __name, __val) \
|
||||
do { \
|
||||
struct panvk_graphics_sysvals __new_sysval; \
|
||||
__new_sysval.__name = __val; \
|
||||
if (memcmp(&(__cmdbuf)->state.gfx.sysvals.__name, &__new_sysval.__name, \
|
||||
sizeof(__new_sysval.__name))) { \
|
||||
(__cmdbuf)->state.gfx.sysvals.__name = __new_sysval.__name; \
|
||||
BITSET_SET_RANGE(__dirty, sysval_fau_start(graphics, __name), \
|
||||
sysval_fau_end(graphics, __name)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
struct panvk_device_draw_context {
|
||||
struct panvk_priv_bo *fns_bo;
|
||||
uint64_t fn_set_fbds_provoking_vertex_stride;
|
||||
};
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
panvk_depth_range(const struct panvk_cmd_graphics_state *state,
|
||||
const struct vk_viewport_state *vp,
|
||||
float *z_min, float *z_max)
|
||||
{
|
||||
float a = vp->depth_clip_negative_one_to_one ?
|
||||
state->sysvals.viewport.offset.z - state->sysvals.viewport.scale.z :
|
||||
state->sysvals.viewport.offset.z;
|
||||
float b = state->sysvals.viewport.offset.z + state->sysvals.viewport.scale.z;
|
||||
*z_min = MIN2(a, b);
|
||||
*z_max = MAX2(a, b);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev,
|
||||
const struct panvk_cmd_graphics_state *state,
|
||||
unsigned bin_ptr_mem_budget)
|
||||
{
|
||||
struct pan_tiler_features tiler_features =
|
||||
pan_query_tiler_features(&phys_dev->kmod.dev->props);
|
||||
|
||||
uint32_t hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)(
|
||||
state->render.fb.info.width, state->render.fb.info.height,
|
||||
tiler_features.max_levels, state->render.fb.info.tile_size,
|
||||
bin_ptr_mem_budget);
|
||||
|
||||
return hierarchy_mask;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
fs_required(const struct panvk_cmd_graphics_state *state,
|
||||
const struct vk_dynamic_graphics_state *dyn_state)
|
||||
{
|
||||
const struct panvk_shader_variant *fs =
|
||||
panvk_shader_only_variant(state->fs.shader);
|
||||
const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
|
||||
const struct vk_color_blend_state *cb = &dyn_state->cb;
|
||||
const struct vk_rasterization_state *rs = &dyn_state->rs;
|
||||
|
||||
if (rs->rasterizer_discard_enable || !fs_info)
|
||||
return false;
|
||||
|
||||
/* If we generally have side effects */
|
||||
if (fs_info->fs.sidefx)
|
||||
return true;
|
||||
|
||||
/* If colour is written we need to execute */
|
||||
for (unsigned i = 0; i < cb->attachment_count; ++i) {
|
||||
if ((cb->color_write_enables & BITFIELD_BIT(i)) &&
|
||||
cb->attachments[i].write_mask)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If alpha-to-coverage is enabled, we need to run the fragment shader even
|
||||
* if we don't have a color attachment, so depth/stencil updates can be
|
||||
* discarded if alpha, and thus coverage, is 0. */
|
||||
if (dyn_state->ms.alpha_to_coverage_enable)
|
||||
return true;
|
||||
|
||||
/* If the sample mask is updated, we need to run the fragment shader,
|
||||
* otherwise the fixed-function depth/stencil results will apply to all
|
||||
* samples. */
|
||||
if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
|
||||
return true;
|
||||
|
||||
/* If depth is written and not implied we need to execute.
|
||||
* TODO: Predicate on Z/S writes being enabled */
|
||||
return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
|
||||
ASSERTED const struct vk_dynamic_graphics_state *dyn_state,
|
||||
bool cached_value)
|
||||
{
|
||||
/* Make sure the cached value was properly initialized. */
|
||||
assert(fs_required(state, dyn_state) == cached_value);
|
||||
return cached_value;
|
||||
}
|
||||
|
||||
#define get_fs(__cmdbuf) \
|
||||
(cached_fs_required(&(__cmdbuf)->state.gfx, \
|
||||
&(__cmdbuf)->vk.dynamic_graphics_state, \
|
||||
(__cmdbuf)->state.gfx.fs.required) \
|
||||
? (__cmdbuf)->state.gfx.fs.shader \
|
||||
: NULL)
|
||||
|
||||
/* Anything that might change the value returned by get_fs() makes users of the
|
||||
* fragment shader dirty, because not using the fragment shader (when
|
||||
* fs_required() returns false) impacts various other things, like VS -> FS
|
||||
* linking in the JM backend, or the update of the fragment shader pointer in
|
||||
* the CSF backend. Call gfx_state_dirty(cmdbuf, FS) if you only care about
|
||||
* fragment shader updates. */
|
||||
|
||||
#define fs_user_dirty(__cmdbuf) \
|
||||
(gfx_state_dirty(cmdbuf, FS) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE))
|
||||
|
||||
/* After a draw, all dirty flags are cleared except the FS dirty flag which
|
||||
* needs to be set again if the draw didn't use the fragment shader. */
|
||||
|
||||
#define clear_dirty_after_draw(__cmdbuf) \
|
||||
do { \
|
||||
bool __set_fs_dirty = \
|
||||
(__cmdbuf)->state.gfx.fs.shader != get_fs(__cmdbuf); \
|
||||
bool __set_fs_push_dirty = \
|
||||
__set_fs_dirty && gfx_state_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
|
||||
vk_dynamic_graphics_state_clear_dirty( \
|
||||
&(__cmdbuf)->vk.dynamic_graphics_state); \
|
||||
gfx_state_clear_all_dirty(__cmdbuf); \
|
||||
if (__set_fs_dirty) \
|
||||
gfx_state_set_dirty(__cmdbuf, FS); \
|
||||
if (__set_fs_push_dirty) \
|
||||
gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
VkResult
|
||||
panvk_per_arch(device_draw_context_init)(struct panvk_device *dev);
|
||||
|
||||
void
|
||||
panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev);
|
||||
#endif
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *pRenderingInfo);
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *render_info);
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_preload_render_area_border)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *render_info);
|
||||
|
||||
void panvk_per_arch(cmd_select_tile_size)(struct panvk_cmd_buffer *cmdbuf);
|
||||
|
||||
struct panvk_draw_info {
|
||||
struct {
|
||||
uint32_t size;
|
||||
uint32_t offset;
|
||||
} index;
|
||||
|
||||
struct {
|
||||
#if PAN_ARCH < 9
|
||||
int32_t raw_offset;
|
||||
#endif
|
||||
int32_t base;
|
||||
uint32_t count;
|
||||
} vertex;
|
||||
|
||||
struct {
|
||||
int32_t base;
|
||||
uint32_t count;
|
||||
} instance;
|
||||
|
||||
struct {
|
||||
uint64_t buffer_dev_addr;
|
||||
uint64_t count_buffer_dev_addr;
|
||||
uint32_t draw_count;
|
||||
uint32_t stride;
|
||||
} indirect;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
uint32_t layer_id;
|
||||
#endif
|
||||
};
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const struct panvk_draw_info *info);
|
||||
|
||||
static inline uint32_t
|
||||
color_attachment_written_mask(
|
||||
const struct panvk_shader_variant *fs,
|
||||
const struct vk_color_attachment_location_state *cal)
|
||||
{
|
||||
uint32_t written_by_shader =
|
||||
(fs->info.outputs_written >> FRAG_RESULT_DATA0) & BITFIELD_MASK(8);
|
||||
uint32_t catt_written_mask = 0;
|
||||
|
||||
for (uint32_t i = 0; i < MAX_RTS; i++) {
|
||||
if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
uint32_t shader_rt = cal->color_map[i];
|
||||
|
||||
if (written_by_shader & BITFIELD_BIT(shader_rt))
|
||||
catt_written_mask |= BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
return catt_written_mask;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
color_attachment_read_mask(const struct panvk_shader_variant *fs,
|
||||
const struct vk_input_attachment_location_state *ial,
|
||||
uint8_t color_attachment_mask)
|
||||
{
|
||||
uint32_t color_attachment_count =
|
||||
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
|
||||
? util_last_bit(color_attachment_mask)
|
||||
: ial->color_attachment_count;
|
||||
uint32_t catt_read_mask = 0;
|
||||
|
||||
for (uint32_t i = 0; i < color_attachment_count; i++) {
|
||||
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
uint32_t catt_idx = ial->color_map[i] + 1;
|
||||
if (fs->fs.input_attachment_read & BITFIELD_BIT(catt_idx)) {
|
||||
assert(color_attachment_mask & BITFIELD_BIT(i));
|
||||
catt_read_mask |= BITFIELD_BIT(i);
|
||||
}
|
||||
}
|
||||
|
||||
return catt_read_mask;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
z_attachment_read(const struct panvk_shader_variant *fs,
|
||||
const struct vk_input_attachment_location_state *ial)
|
||||
{
|
||||
uint32_t depth_mask = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX
|
||||
? BITFIELD_BIT(0)
|
||||
: ial->depth_att != MESA_VK_ATTACHMENT_UNUSED
|
||||
? BITFIELD_BIT(ial->depth_att + 1)
|
||||
: 0;
|
||||
return depth_mask & fs->fs.input_attachment_read;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
s_attachment_read(const struct panvk_shader_variant *fs,
|
||||
const struct vk_input_attachment_location_state *ial)
|
||||
{
|
||||
uint32_t stencil_mask = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX
|
||||
? BITFIELD_BIT(0)
|
||||
: ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED
|
||||
? BITFIELD_BIT(ial->stencil_att + 1)
|
||||
: 0;
|
||||
|
||||
return stencil_mask & fs->fs.input_attachment_read;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,572 @@
|
||||
/*
|
||||
* Copyright © 2021 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PANVK_SHADER_H
|
||||
#define PANVK_SHADER_H
|
||||
|
||||
#ifndef PAN_ARCH
|
||||
#error "PAN_ARCH must be defined"
|
||||
#endif
|
||||
|
||||
#include "compiler/pan_compiler.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
#include "pan_earlyzs.h"
|
||||
|
||||
#include "panvk_cmd_push_constant.h"
|
||||
#include "panvk_descriptor_set.h"
|
||||
#include "panvk_macros.h"
|
||||
#include "panvk_mempool.h"
|
||||
|
||||
#include "vk_pipeline_layout.h"
|
||||
|
||||
#include "vk_shader.h"
|
||||
|
||||
extern const struct vk_device_shader_ops panvk_per_arch(device_shader_ops);
|
||||
|
||||
#define MAX_RTS 8
|
||||
#define MAX_VS_ATTRIBS 16
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
|
||||
/* We could theoretically use the MAX_PER_SET values here (except for UBOs
|
||||
* where we're really limited to 256 on the shader side), but on Bifrost we
|
||||
* have to copy some tables around, which comes at an extra memory/processing
|
||||
* cost, so let's pick something smaller. */
|
||||
#define MAX_PER_STAGE_SAMPLED_IMAGES 256
|
||||
#define MAX_PER_STAGE_SAMPLERS 128
|
||||
#define MAX_PER_STAGE_UNIFORM_BUFFERS MAX_PER_SET_UNIFORM_BUFFERS
|
||||
#define MAX_PER_STAGE_STORAGE_BUFFERS 64
|
||||
#define MAX_PER_STAGE_STORAGE_IMAGES 32
|
||||
#define MAX_PER_STAGE_INPUT_ATTACHMENTS MAX_PER_SET_INPUT_ATTACHMENTS
|
||||
|
||||
#else
|
||||
|
||||
#define MAX_PER_STAGE_SAMPLED_IMAGES MAX_PER_SET_SAMPLED_IMAGES
|
||||
#define MAX_PER_STAGE_SAMPLERS MAX_PER_SET_SAMPLERS
|
||||
#define MAX_PER_STAGE_UNIFORM_BUFFERS MAX_PER_SET_UNIFORM_BUFFERS
|
||||
#define MAX_PER_STAGE_STORAGE_BUFFERS MAX_PER_SET_STORAGE_BUFFERS
|
||||
#define MAX_PER_STAGE_STORAGE_IMAGES MAX_PER_SET_STORAGE_IMAGES
|
||||
#define MAX_PER_STAGE_INPUT_ATTACHMENTS MAX_PER_SET_INPUT_ATTACHMENTS
|
||||
|
||||
#endif
|
||||
|
||||
#define MAX_PER_STAGE_RESOURCES ( \
|
||||
MAX_PER_STAGE_SAMPLED_IMAGES + MAX_PER_STAGE_SAMPLERS + \
|
||||
MAX_PER_STAGE_UNIFORM_BUFFERS + MAX_PER_STAGE_STORAGE_BUFFERS + \
|
||||
MAX_PER_STAGE_STORAGE_IMAGES + MAX_PER_STAGE_INPUT_ATTACHMENTS)
|
||||
|
||||
struct nir_shader;
|
||||
struct pan_blend_state;
|
||||
struct panvk_device;
|
||||
|
||||
enum panvk_varying_buf_id {
|
||||
PANVK_VARY_BUF_GENERAL,
|
||||
PANVK_VARY_BUF_POSITION,
|
||||
PANVK_VARY_BUF_PSIZ,
|
||||
|
||||
/* Keep last */
|
||||
PANVK_VARY_BUF_MAX,
|
||||
};
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
enum panvk_desc_table_id {
|
||||
PANVK_DESC_TABLE_USER = 0,
|
||||
PANVK_DESC_TABLE_CS_DYN_SSBOS = MAX_SETS,
|
||||
PANVK_DESC_TABLE_COMPUTE_COUNT = PANVK_DESC_TABLE_CS_DYN_SSBOS + 1,
|
||||
PANVK_DESC_TABLE_VS_DYN_SSBOS = MAX_SETS,
|
||||
PANVK_DESC_TABLE_FS_DYN_SSBOS = MAX_SETS + 1,
|
||||
PANVK_DESC_TABLE_GFX_COUNT = PANVK_DESC_TABLE_FS_DYN_SSBOS + 1,
|
||||
};
|
||||
#endif
|
||||
|
||||
#define PANVK_COLOR_ATTACHMENT(x) (x)
|
||||
#define PANVK_ZS_ATTACHMENT 255
|
||||
|
||||
struct panvk_input_attachment_info {
|
||||
uint32_t target;
|
||||
uint32_t conversion;
|
||||
};
|
||||
|
||||
/* One attachment per color, one for depth, one for stencil, and the last one
|
||||
* for the attachment without an InputAttachmentIndex attribute. */
|
||||
#define INPUT_ATTACHMENT_MAP_SIZE 11
|
||||
|
||||
#define FAU_WORD_SIZE sizeof(uint64_t)
|
||||
|
||||
#define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t
|
||||
|
||||
/* System values which are common to both graphics and compute. These are
|
||||
* always at the same offset in both graphics and compute allowing us to
|
||||
* compile the shader without knowing which queue it will be dispatched on.
|
||||
*/
|
||||
struct panvk_common_sysvals_inner {
|
||||
/* Address of sysval/push constant buffer used for indirect loads */
|
||||
aligned_u64 push_uniforms;
|
||||
|
||||
/* Address of the printf buffer */
|
||||
aligned_u64 printf_buffer_address;
|
||||
} __attribute__((aligned(FAU_WORD_SIZE)));
|
||||
|
||||
struct panvk_common_sysvals {
|
||||
uint32_t _pad[4];
|
||||
struct panvk_common_sysvals_inner common;
|
||||
} __attribute__((aligned(FAU_WORD_SIZE)));
|
||||
|
||||
static_assert((offsetof(struct panvk_common_sysvals, common) %
|
||||
FAU_WORD_SIZE) == 0,
|
||||
"struct panvk_graphics_sysvals_inner must be 8-byte aligned");
|
||||
static_assert((sizeof(struct panvk_common_sysvals_inner) %
|
||||
FAU_WORD_SIZE) == 0,
|
||||
"struct panvk_graphics_sysvals_inner must be 8-byte aligned");
|
||||
|
||||
#define SYSVALS_COMMON_START \
|
||||
(offsetof(struct panvk_common_sysvals, common) / FAU_WORD_SIZE)
|
||||
|
||||
#define SYSVALS_COMMON_COUNT \
|
||||
(sizeof(struct panvk_common_sysvals_inner) / FAU_WORD_SIZE)
|
||||
|
||||
#define SYSVALS_COMMON_END (SYSVALS_COMMON_START + SYSVALS_COMMON_COUNT)
|
||||
|
||||
struct panvk_graphics_sysvals {
|
||||
/* Blend constants MUST come first because their position cannot depend on
|
||||
* the FAU packing of the fragment shader.
|
||||
*/
|
||||
struct {
|
||||
float constants[4];
|
||||
} blend;
|
||||
|
||||
/* This must be at the same offset for both compute and graphics */
|
||||
struct panvk_common_sysvals_inner common;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
float x, y, z;
|
||||
} scale, offset;
|
||||
} viewport;
|
||||
|
||||
struct {
|
||||
#if PAN_ARCH < 9
|
||||
int32_t raw_vertex_offset;
|
||||
uint32_t num_vertices; /* iter13: XFB needs per-draw vertex count */
|
||||
/* aligned_u64 attribute below inserts the 4-byte alignment gap
|
||||
* after num_vertices automatically — no explicit pad needed. */
|
||||
aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */
|
||||
#endif
|
||||
int32_t first_vertex;
|
||||
int32_t base_instance;
|
||||
uint32_t noperspective_varyings;
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
aligned_u64 blend_descs[MAX_RTS];
|
||||
} fs;
|
||||
|
||||
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
|
||||
* layer, and filter primitives at the VS level.
|
||||
*/
|
||||
int32_t layer_id;
|
||||
|
||||
struct {
|
||||
aligned_u64 sets[PANVK_DESC_TABLE_GFX_COUNT];
|
||||
} desc;
|
||||
#endif
|
||||
} __attribute__((aligned(FAU_WORD_SIZE)));
|
||||
|
||||
static_assert(offsetof(struct panvk_graphics_sysvals, blend) == 0,
|
||||
"panvk_graphics_sysvals::blend must be at the start");
|
||||
static_assert(offsetof(struct panvk_graphics_sysvals, common) ==
|
||||
offsetof(struct panvk_common_sysvals, common),
|
||||
"Common sysvals must be at the same offset everywhere");
|
||||
static_assert((sizeof(struct panvk_graphics_sysvals) % FAU_WORD_SIZE) == 0,
|
||||
"struct panvk_graphics_sysvals must be 8-byte aligned");
|
||||
#if PAN_ARCH < 9
|
||||
static_assert((offsetof(struct panvk_graphics_sysvals, desc) % FAU_WORD_SIZE) ==
|
||||
0,
|
||||
"panvk_graphics_sysvals::desc must be 8-byte aligned");
|
||||
#endif
|
||||
|
||||
struct panvk_compute_sysvals {
|
||||
struct {
|
||||
uint32_t x, y, z;
|
||||
} base;
|
||||
|
||||
uint32_t _pad;
|
||||
|
||||
/* This must be at the same offset for both compute and graphics */
|
||||
struct panvk_common_sysvals_inner common;
|
||||
|
||||
struct {
|
||||
uint32_t x, y, z;
|
||||
} num_work_groups;
|
||||
struct {
|
||||
uint32_t x, y, z;
|
||||
} local_group_size;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct {
|
||||
aligned_u64 sets[PANVK_DESC_TABLE_COMPUTE_COUNT];
|
||||
} desc;
|
||||
#endif
|
||||
} __attribute__((aligned(FAU_WORD_SIZE)));
|
||||
|
||||
static_assert(offsetof(struct panvk_compute_sysvals, common) ==
|
||||
offsetof(struct panvk_common_sysvals, common),
|
||||
"Common sysvals must be at the same offset everywhere");
|
||||
static_assert((sizeof(struct panvk_compute_sysvals) % FAU_WORD_SIZE) == 0,
|
||||
"struct panvk_compute_sysvals must be 8-byte aligned");
|
||||
#if PAN_ARCH < 9
|
||||
static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) ==
|
||||
0,
|
||||
"panvk_compute_sysvals::desc must be 8-byte aligned");
|
||||
#endif
|
||||
|
||||
/* This is not the final offset in the push constant buffer (AKA FAU), but
|
||||
* just a magic offset we use before packing push constants so we can easily
|
||||
* identify the type of push constant (driver sysvals vs user push constants).
|
||||
*/
|
||||
#define SYSVALS_PUSH_CONST_BASE MAX_PUSH_CONSTANTS_SIZE
|
||||
|
||||
#define common_sysval_size(__name) \
|
||||
sizeof(((struct panvk_common_sysvals *)NULL)->common.__name)
|
||||
|
||||
#define graphics_sysval_size(__name) \
|
||||
sizeof(((struct panvk_graphics_sysvals *)NULL)->__name)
|
||||
|
||||
#define compute_sysval_size(__name) \
|
||||
sizeof(((struct panvk_compute_sysvals *)NULL)->__name)
|
||||
|
||||
#define sysval_size(__ptype, __name) __ptype##_sysval_size(__name)
|
||||
|
||||
#define common_sysval_offset(__name) \
|
||||
offsetof(struct panvk_common_sysvals, common.__name)
|
||||
|
||||
#define graphics_sysval_offset(__name) \
|
||||
offsetof(struct panvk_graphics_sysvals, __name)
|
||||
|
||||
#define compute_sysval_offset(__name) \
|
||||
offsetof(struct panvk_compute_sysvals, __name)
|
||||
|
||||
#define sysval_offset(__ptype, __name) __ptype##_sysval_offset(__name)
|
||||
|
||||
#define sysval_entry_size(__ptype, __name) \
|
||||
sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name[0])
|
||||
|
||||
#define sysval_entry_offset(__ptype, __name, __idx) \
|
||||
(sysval_offset(__ptype, __name) + \
|
||||
(sysval_entry_size(__ptype, __name) * __idx))
|
||||
|
||||
#define sysval_fau_start(__ptype, __name) \
|
||||
(sysval_offset(__ptype, __name) / FAU_WORD_SIZE)
|
||||
|
||||
#define sysval_fau_end(__ptype, __name) \
|
||||
((sysval_offset(__ptype, __name) + sysval_size(__ptype, __name) - 1) / \
|
||||
FAU_WORD_SIZE)
|
||||
|
||||
#define sysval_fau_entry_start(__ptype, __name, __idx) \
|
||||
(sysval_entry_offset(__ptype, __name, __idx) / FAU_WORD_SIZE)
|
||||
|
||||
#define sysval_fau_entry_end(__ptype, __name, __idx) \
|
||||
((sysval_entry_offset(__ptype, __name, __idx + 1) - 1) / FAU_WORD_SIZE)
|
||||
|
||||
#define shader_remapped_fau_offset(__shader, __kind, __offset) \
|
||||
((FAU_WORD_SIZE * BITSET_PREFIX_SUM((__shader)->fau.used_##__kind, \
|
||||
(__offset) / FAU_WORD_SIZE)) + \
|
||||
((__offset) % FAU_WORD_SIZE))
|
||||
|
||||
#define shader_remapped_sysval_offset(__shader, __offset) \
|
||||
shader_remapped_fau_offset(__shader, sysvals, __offset)
|
||||
|
||||
#define shader_remapped_push_const_offset(__shader, __offset) \
|
||||
(((__shader)->fau.sysval_count * FAU_WORD_SIZE) + \
|
||||
shader_remapped_fau_offset(__shader, push_consts, __offset))
|
||||
|
||||
#define shader_use_sysval(__shader, __ptype, __name) \
|
||||
BITSET_SET_RANGE((__shader)->fau.used_sysvals, \
|
||||
sysval_fau_start(__ptype, __name), \
|
||||
sysval_fau_end(__ptype, __name))
|
||||
|
||||
#define shader_uses_sysval(__shader, __ptype, __name) \
|
||||
BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \
|
||||
sysval_fau_start(__ptype, __name), \
|
||||
sysval_fau_end(__ptype, __name))
|
||||
|
||||
#define shader_uses_sysval_entry(__shader, __ptype, __name, __idx) \
|
||||
BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \
|
||||
sysval_fau_entry_start(__ptype, __name, __idx), \
|
||||
sysval_fau_entry_end(__ptype, __name, __idx))
|
||||
|
||||
#define shader_use_sysval_range(__shader, __base, __range) \
|
||||
BITSET_SET_RANGE((__shader)->fau.used_sysvals, (__base) / FAU_WORD_SIZE, \
|
||||
((__base) + (__range) - 1) / FAU_WORD_SIZE)
|
||||
|
||||
#define shader_use_push_const_range(__shader, __base, __range) \
|
||||
BITSET_SET_RANGE((__shader)->fau.used_push_consts, \
|
||||
(__base) / FAU_WORD_SIZE, \
|
||||
((__base) + (__range) - 1) / FAU_WORD_SIZE)
|
||||
|
||||
#define load_sysval(__b, __ptype, __bitsz, __name) \
|
||||
nir_load_push_constant( \
|
||||
__b, sysval_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \
|
||||
nir_imm_int(__b, sysval_offset(__ptype, __name)), \
|
||||
.base = SYSVALS_PUSH_CONST_BASE)
|
||||
|
||||
#define load_sysval_entry(__b, __ptype, __bitsz, __name, __dyn_idx) \
|
||||
nir_load_push_constant( \
|
||||
__b, sysval_entry_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \
|
||||
nir_imul_imm(__b, __dyn_idx, sysval_entry_size(__ptype, __name)), \
|
||||
.base = SYSVALS_PUSH_CONST_BASE + sysval_offset(__ptype, __name), \
|
||||
.range = sysval_size(__ptype, __name))
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
enum panvk_bifrost_desc_table_type {
|
||||
PANVK_BIFROST_DESC_TABLE_INVALID = -1,
|
||||
|
||||
/* UBO is encoded on 8 bytes */
|
||||
PANVK_BIFROST_DESC_TABLE_UBO = 0,
|
||||
|
||||
/* Images are using a <3DAttributeBuffer,Attribute> pair, each
|
||||
* of them being stored in a separate table. */
|
||||
PANVK_BIFROST_DESC_TABLE_IMG,
|
||||
|
||||
/* Texture and sampler are encoded on 32 bytes */
|
||||
PANVK_BIFROST_DESC_TABLE_TEXTURE,
|
||||
PANVK_BIFROST_DESC_TABLE_SAMPLER,
|
||||
|
||||
PANVK_BIFROST_DESC_TABLE_COUNT,
|
||||
};
|
||||
#endif
|
||||
|
||||
#define COPY_DESC_HANDLE(table, idx) ((table << 28) | (idx))
|
||||
#define COPY_DESC_HANDLE_EXTRACT_INDEX(handle) ((handle) & BITFIELD_MASK(28))
|
||||
#define COPY_DESC_HANDLE_EXTRACT_TABLE(handle) ((handle) >> 28)
|
||||
|
||||
#define MAX_COMPUTE_SYSVAL_FAUS \
|
||||
(sizeof(struct panvk_compute_sysvals) / FAU_WORD_SIZE)
|
||||
#define MAX_GFX_SYSVAL_FAUS \
|
||||
(sizeof(struct panvk_graphics_sysvals) / FAU_WORD_SIZE)
|
||||
#define MAX_SYSVAL_FAUS MAX2(MAX_COMPUTE_SYSVAL_FAUS, MAX_GFX_SYSVAL_FAUS)
|
||||
#define MAX_PUSH_CONST_FAUS (MAX_PUSH_CONSTANTS_SIZE / FAU_WORD_SIZE)
|
||||
|
||||
struct panvk_shader_fau_info {
|
||||
BITSET_DECLARE(used_sysvals, MAX_SYSVAL_FAUS);
|
||||
BITSET_DECLARE(used_push_consts, MAX_PUSH_CONST_FAUS);
|
||||
uint32_t sysval_count;
|
||||
uint32_t total_count;
|
||||
};
|
||||
|
||||
struct panvk_shader_desc_info {
|
||||
uint32_t used_set_mask;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct {
|
||||
uint32_t map[MAX_DYNAMIC_UNIFORM_BUFFERS];
|
||||
uint32_t count;
|
||||
} dyn_ubos;
|
||||
struct {
|
||||
uint32_t map[MAX_DYNAMIC_STORAGE_BUFFERS];
|
||||
uint32_t count;
|
||||
} dyn_ssbos;
|
||||
struct {
|
||||
struct panvk_priv_mem map;
|
||||
uint32_t count[PANVK_BIFROST_DESC_TABLE_COUNT];
|
||||
} others;
|
||||
#else
|
||||
struct {
|
||||
uint32_t map[MAX_DYNAMIC_BUFFERS];
|
||||
uint32_t count;
|
||||
} dyn_bufs;
|
||||
uint32_t fs_varying_attr_desc_count;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct panvk_shader_variant {
|
||||
struct pan_shader_info info;
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct pan_compute_dim local_size;
|
||||
} cs;
|
||||
|
||||
struct {
|
||||
struct pan_earlyzs_lut earlyzs_lut;
|
||||
uint32_t input_attachment_read;
|
||||
} fs;
|
||||
};
|
||||
|
||||
struct panvk_shader_desc_info desc_info;
|
||||
|
||||
struct panvk_shader_fau_info fau;
|
||||
|
||||
const void *bin_ptr;
|
||||
uint32_t bin_size;
|
||||
bool own_bin;
|
||||
|
||||
struct panvk_priv_mem code_mem;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_priv_mem rsd;
|
||||
#else
|
||||
union {
|
||||
struct panvk_priv_mem spd;
|
||||
struct {
|
||||
#if PAN_ARCH < 12
|
||||
struct panvk_priv_mem pos_points;
|
||||
struct panvk_priv_mem pos_triangles;
|
||||
struct panvk_priv_mem var;
|
||||
#else
|
||||
struct panvk_priv_mem all_points;
|
||||
struct panvk_priv_mem all_triangles;
|
||||
#endif
|
||||
} spds;
|
||||
};
|
||||
#endif
|
||||
|
||||
const char *nir_str;
|
||||
const char *asm_str;
|
||||
};
|
||||
|
||||
enum panvk_vs_variant {
|
||||
/* Hardware vertex shader, when next stage is fragment */
|
||||
PANVK_VS_VARIANT_HW,
|
||||
|
||||
PANVK_VS_VARIANTS,
|
||||
};
|
||||
|
||||
struct panvk_shader {
|
||||
struct vk_shader vk;
|
||||
|
||||
struct panvk_shader_variant variants[];
|
||||
};
|
||||
|
||||
static inline unsigned
|
||||
panvk_shader_num_variants(mesa_shader_stage stage)
|
||||
{
|
||||
if (stage == MESA_SHADER_VERTEX)
|
||||
return PANVK_VS_VARIANTS;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static const char *panvk_vs_shader_variant_name[] = {
|
||||
[PANVK_VS_VARIANT_HW] = NULL,
|
||||
};
|
||||
|
||||
static const char *
|
||||
panvk_shader_variant_name(const struct panvk_shader *shader,
|
||||
struct panvk_shader_variant *variant)
|
||||
{
|
||||
unsigned i = variant - shader->variants;
|
||||
assert(i < panvk_shader_num_variants(shader->vk.stage));
|
||||
|
||||
if (shader->vk.stage == MESA_SHADER_VERTEX) {
|
||||
assert(i < ARRAY_SIZE(panvk_vs_shader_variant_name));
|
||||
return panvk_vs_shader_variant_name[i];
|
||||
}
|
||||
|
||||
assert(panvk_shader_num_variants(shader->vk.stage) == 1);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const struct panvk_shader_variant *
|
||||
panvk_shader_only_variant(const struct panvk_shader *shader)
|
||||
{
|
||||
if (!shader)
|
||||
return NULL;
|
||||
|
||||
assert(panvk_shader_num_variants(shader->vk.stage) == 1);
|
||||
return &shader->variants[0];
|
||||
}
|
||||
|
||||
static const struct panvk_shader_variant *
|
||||
panvk_shader_hw_variant(const struct panvk_shader *shader)
|
||||
{
|
||||
if (!shader)
|
||||
return NULL;
|
||||
|
||||
return &shader->variants[0];
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
panvk_shader_variant_get_dev_addr(const struct panvk_shader_variant *shader)
|
||||
{
|
||||
return shader != NULL ? panvk_priv_mem_dev_addr(shader->code_mem) : 0;
|
||||
}
|
||||
|
||||
#define panvk_shader_foreach_variant(__shader, __var) \
|
||||
for (struct panvk_shader_variant *__var = (__shader)->variants; \
|
||||
__var < (__shader)->variants + \
|
||||
panvk_shader_num_variants((__shader)->vk.stage); \
|
||||
++__var)
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_shader_link {
|
||||
struct {
|
||||
struct panvk_priv_mem attribs;
|
||||
} vs, fs;
|
||||
unsigned buf_strides[PANVK_VARY_BUF_MAX];
|
||||
};
|
||||
|
||||
VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool,
|
||||
const struct panvk_shader_variant *vs,
|
||||
const struct panvk_shader_variant *fs,
|
||||
struct panvk_shader_link *link);
|
||||
|
||||
static inline void
|
||||
panvk_shader_link_cleanup(struct panvk_shader_link *link)
|
||||
{
|
||||
panvk_pool_free_mem(&link->vs.attribs);
|
||||
panvk_pool_free_mem(&link->fs.attribs);
|
||||
}
|
||||
#endif
|
||||
|
||||
bool panvk_per_arch(nir_lower_input_attachment_loads)(
|
||||
nir_shader *nir,
|
||||
const struct vk_graphics_pipeline_state *state,
|
||||
uint32_t *input_attachment_read_out);
|
||||
|
||||
void panvk_per_arch(nir_lower_descriptors)(
|
||||
nir_shader *nir, struct panvk_device *dev,
|
||||
const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count,
|
||||
struct vk_descriptor_set_layout *const *set_layouts,
|
||||
const struct vk_graphics_pipeline_state *state,
|
||||
struct panvk_shader_desc_info *desc_info);
|
||||
|
||||
/* This a stripped-down version of panvk_shader for internal shaders that
|
||||
* are managed by vk_meta (blend and preload shaders). Those don't need the
|
||||
* complexity inherent to user provided shaders as they're not exposed. */
|
||||
struct panvk_internal_shader {
|
||||
struct vk_shader vk;
|
||||
struct pan_shader_info info;
|
||||
struct panvk_priv_mem code_mem;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_priv_mem rsd;
|
||||
#else
|
||||
struct panvk_priv_mem spd;
|
||||
#endif
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_internal_shader, vk.base, VkShaderEXT,
|
||||
VK_OBJECT_TYPE_SHADER_EXT)
|
||||
|
||||
void panvk_per_arch(compiler_lock)(void);
|
||||
void panvk_per_arch(compiler_unlock)(void);
|
||||
|
||||
VkResult panvk_per_arch(create_internal_shader)(
|
||||
struct panvk_device *dev, nir_shader *nir,
|
||||
struct pan_compile_inputs *compiler_inputs,
|
||||
struct panvk_internal_shader **shader_out);
|
||||
|
||||
VkResult panvk_per_arch(create_shader_from_binary)(
|
||||
struct panvk_device *dev, const struct pan_shader_info *info,
|
||||
struct pan_compute_dim local_size, const void *bin_ptr, size_t bin_size,
|
||||
struct panvk_shader **shader_out);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,956 @@
|
||||
/*
|
||||
* Copyright © 2024 Collabora Ltd.
|
||||
* Copyright © 2024 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "panvk_buffer.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_device_memory.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
#include "pan_compiler.h" /* PAN_SHADER_OOB_ADDRESS */
|
||||
#include "pan_util.h"
|
||||
|
||||
static void
|
||||
att_set_clear_preload(const VkRenderingAttachmentInfo *att, bool *clear, bool *preload)
|
||||
{
|
||||
switch (att->loadOp) {
|
||||
case VK_ATTACHMENT_LOAD_OP_CLEAR:
|
||||
*clear = true;
|
||||
break;
|
||||
case VK_ATTACHMENT_LOAD_OP_LOAD:
|
||||
*preload = true;
|
||||
break;
|
||||
case VK_ATTACHMENT_LOAD_OP_NONE:
|
||||
case VK_ATTACHMENT_LOAD_OP_DONT_CARE:
|
||||
/* This is a very frustrating corner case. From the spec:
|
||||
*
|
||||
* VK_ATTACHMENT_STORE_OP_NONE specifies the contents within the
|
||||
* render area are not accessed by the store operation as long as
|
||||
* no values are written to the attachment during the render pass.
|
||||
*
|
||||
* With VK_ATTACHMENT_LOAD_OP_DONT_CARE + VK_ATTACHMENT_STORE_OP_NONE,
|
||||
* we need to preserve the contents throughout partial renders. The
|
||||
* easiest way to do that is forcing a preload, so that partial stores
|
||||
* for unused attachments will be no-op'd by writing existing contents.
|
||||
*
|
||||
* TODO: disable preload when we have clean_pixel_write_enable = false
|
||||
* as an optimization
|
||||
*/
|
||||
*preload |= att->storeOp == VK_ATTACHMENT_STORE_OP_NONE;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Unsupported loadOp");
|
||||
}
|
||||
}
|
||||
|
||||
static struct panvk_image_view *
|
||||
get_ms2ss_image_view(struct panvk_image_view *iview, uint32_t nr_samples)
|
||||
{
|
||||
assert(nr_samples >= 2 && nr_samples <= 16);
|
||||
assert(iview->pview.nr_samples == 1);
|
||||
assert(iview->vk.image->create_flags &
|
||||
VK_IMAGE_CREATE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_BIT_EXT);
|
||||
|
||||
/* sample count 2 is at index 0, 4 at 1, .. */
|
||||
uint32_t vidx = 0;
|
||||
switch (nr_samples) {
|
||||
case VK_SAMPLE_COUNT_2_BIT:
|
||||
vidx = 0;
|
||||
break;
|
||||
case VK_SAMPLE_COUNT_4_BIT:
|
||||
vidx = 1;
|
||||
break;
|
||||
case VK_SAMPLE_COUNT_8_BIT:
|
||||
vidx = 2;
|
||||
break;
|
||||
case VK_SAMPLE_COUNT_16_BIT:
|
||||
vidx = 3;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("unhandled sample count");
|
||||
}
|
||||
assert(iview->ms_views[vidx] != VK_NULL_HANDLE);
|
||||
|
||||
struct panvk_image_view *res =
|
||||
panvk_image_view_from_handle(iview->ms_views[vidx]);
|
||||
|
||||
assert(res->pview.nr_samples == nr_samples);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
render_state_set_color_attachment(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingAttachmentInfo *att,
|
||||
uint32_t index)
|
||||
{
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
struct panvk_image_view *iview_ss = NULL;
|
||||
const bool ms2ss = cmdbuf->state.gfx.render.fb.nr_samples > 1 &&
|
||||
iview->pview.nr_samples == 1;
|
||||
|
||||
if (ms2ss) {
|
||||
iview_ss = iview;
|
||||
iview =
|
||||
get_ms2ss_image_view(iview, cmdbuf->state.gfx.render.fb.nr_samples);
|
||||
}
|
||||
|
||||
struct panvk_image *img =
|
||||
container_of(iview->vk.image, struct panvk_image, vk);
|
||||
|
||||
state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(index);
|
||||
state->render.color_attachments.iviews[index] = iview;
|
||||
state->render.color_attachments.preload_iviews[index] =
|
||||
ms2ss ? iview_ss : NULL;
|
||||
state->render.color_attachments.fmts[index] = iview->vk.format;
|
||||
state->render.color_attachments.samples[index] = img->vk.samples;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
for (uint8_t p = 0; p < ARRAY_SIZE(iview->pview.planes); p++) {
|
||||
struct pan_image_plane_ref pref =
|
||||
pan_image_view_get_plane(&iview->pview, p);
|
||||
|
||||
if (!pref.image)
|
||||
continue;
|
||||
|
||||
assert(pref.plane_idx < ARRAY_SIZE(img->planes));
|
||||
assert(img->planes[pref.plane_idx].mem->bo != NULL);
|
||||
state->render.fb.bos[state->render.fb.bo_count++] =
|
||||
img->planes[pref.plane_idx].mem->bo;
|
||||
}
|
||||
#endif
|
||||
|
||||
fbinfo->rts[index].view = &iview->pview;
|
||||
fbinfo->rts[index].crc_valid = &state->render.fb.crc_valid[index];
|
||||
state->render.fb.nr_samples =
|
||||
MAX2(state->render.fb.nr_samples,
|
||||
pan_image_view_get_nr_samples(&iview->pview));
|
||||
|
||||
if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
|
||||
enum pipe_format fmt = vk_format_to_pipe_format(iview->vk.format);
|
||||
union pipe_color_union *col =
|
||||
(union pipe_color_union *)&att->clearValue.color;
|
||||
pan_pack_color(phys_dev->formats.blendable,
|
||||
fbinfo->rts[index].clear_value, col, fmt, false);
|
||||
}
|
||||
|
||||
att_set_clear_preload(att, &fbinfo->rts[index].clear,
|
||||
&fbinfo->rts[index].preload);
|
||||
|
||||
if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
|
||||
struct panvk_resolve_attachment *resolve_info =
|
||||
&state->render.color_attachments.resolve[index];
|
||||
VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
|
||||
|
||||
/* VUID-VkRenderingAttachmentInfo-imageView-06862 and
|
||||
* VUID-VkRenderingAttachmentInfo-imageView-06863:
|
||||
* If resolveMode != NONE, then
|
||||
* resolveView == NULL iff. multisampledRenderToSingleSampledEnable */
|
||||
assert(ms2ss == (resolve_iview == NULL));
|
||||
|
||||
resolve_info->mode = att->resolveMode;
|
||||
if (!ms2ss) {
|
||||
resolve_info->dst_iview = resolve_iview;
|
||||
} else {
|
||||
assert(iview_ss);
|
||||
resolve_info->dst_iview = iview_ss;
|
||||
assert(resolve_info->dst_iview->pview.nr_samples == 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_state_set_z_attachment(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingAttachmentInfo *att)
|
||||
{
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
struct panvk_image_view *iview_ss = NULL;
|
||||
const bool ms2ss = cmdbuf->state.gfx.render.fb.nr_samples > 1 &&
|
||||
iview->pview.nr_samples == 1;
|
||||
|
||||
if (ms2ss) {
|
||||
iview_ss = iview;
|
||||
iview =
|
||||
get_ms2ss_image_view(iview, cmdbuf->state.gfx.render.fb.nr_samples);
|
||||
}
|
||||
|
||||
struct panvk_image *img =
|
||||
container_of(iview->vk.image, struct panvk_image, vk);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* Depth plane always comes first. */
|
||||
state->render.fb.bos[state->render.fb.bo_count++] = img->planes[0].mem->bo;
|
||||
#endif
|
||||
|
||||
state->render.z_attachment.fmt = iview->vk.format;
|
||||
state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
|
||||
|
||||
state->render.zs_pview = iview->pview;
|
||||
fbinfo->zs.view.zs = &state->render.zs_pview;
|
||||
|
||||
/* Fixup view format when the image is multiplanar. */
|
||||
if (panvk_image_is_planar_depth_stencil(img))
|
||||
state->render.zs_pview.format = panvk_image_depth_only_pfmt(img);
|
||||
|
||||
state->render.zs_pview.planes[0] = (struct pan_image_plane_ref){
|
||||
.image = &img->planes[0].image,
|
||||
.plane_idx = 0,
|
||||
};
|
||||
state->render.zs_pview.planes[1] = (struct pan_image_plane_ref){0};
|
||||
state->render.fb.nr_samples =
|
||||
MAX2(state->render.fb.nr_samples,
|
||||
pan_image_view_get_nr_samples(&iview->pview));
|
||||
state->render.z_attachment.iview = iview;
|
||||
state->render.z_attachment.preload_iview = ms2ss ? iview_ss : NULL;
|
||||
|
||||
/* D24S8 is a single plane format where the depth/stencil are interleaved.
|
||||
* If we touch the depth component, we need to make sure the stencil
|
||||
* component is preserved, hence the preload, and the view format adjusment.
|
||||
*/
|
||||
if (panvk_image_is_interleaved_depth_stencil(img)) {
|
||||
fbinfo->zs.preload.s = true;
|
||||
cmdbuf->state.gfx.render.zs_pview.format =
|
||||
img->planes[0].image.props.format;
|
||||
} else {
|
||||
state->render.zs_pview.format = panvk_image_depth_only_pfmt(img);
|
||||
}
|
||||
|
||||
if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
fbinfo->zs.clear_value.depth = att->clearValue.depthStencil.depth;
|
||||
|
||||
att_set_clear_preload(att, &fbinfo->zs.clear.z, &fbinfo->zs.preload.z);
|
||||
|
||||
if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
|
||||
struct panvk_resolve_attachment *resolve_info =
|
||||
&state->render.z_attachment.resolve;
|
||||
VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
|
||||
|
||||
resolve_info->mode = att->resolveMode;
|
||||
if (!ms2ss) {
|
||||
resolve_info->dst_iview = resolve_iview;
|
||||
} else {
|
||||
assert(iview_ss);
|
||||
resolve_info->dst_iview = iview_ss;
|
||||
assert(resolve_info->dst_iview->pview.nr_samples == 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_state_set_s_attachment(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingAttachmentInfo *att)
|
||||
{
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
struct panvk_image_view *iview_ss = NULL;
|
||||
const bool ms2ss = cmdbuf->state.gfx.render.fb.nr_samples > 1 &&
|
||||
iview->pview.nr_samples == 1;
|
||||
|
||||
if (ms2ss) {
|
||||
iview_ss = iview;
|
||||
iview =
|
||||
get_ms2ss_image_view(iview, cmdbuf->state.gfx.render.fb.nr_samples);
|
||||
}
|
||||
|
||||
struct panvk_image *img =
|
||||
container_of(iview->vk.image, struct panvk_image, vk);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* The stencil plane is always last. */
|
||||
state->render.fb.bos[state->render.fb.bo_count++] =
|
||||
img->planes[img->plane_count - 1].mem->bo;
|
||||
#endif
|
||||
|
||||
state->render.s_attachment.fmt = iview->vk.format;
|
||||
state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
|
||||
|
||||
state->render.s_pview = iview->pview;
|
||||
fbinfo->zs.view.s = &state->render.s_pview;
|
||||
|
||||
if (panvk_image_is_planar_depth_stencil(img)) {
|
||||
state->render.s_pview.format = panvk_image_stencil_only_pfmt(img);
|
||||
state->render.s_pview.planes[0] = (struct pan_image_plane_ref){0};
|
||||
state->render.s_pview.planes[1] = (struct pan_image_plane_ref){
|
||||
.image = &img->planes[1].image,
|
||||
.plane_idx = 0,
|
||||
};
|
||||
} else {
|
||||
state->render.s_pview.format = panvk_image_stencil_only_pfmt(img);
|
||||
state->render.s_pview.planes[0] = (struct pan_image_plane_ref){
|
||||
.image = &img->planes[0].image,
|
||||
.plane_idx = 0,
|
||||
};
|
||||
state->render.s_pview.planes[1] = (struct pan_image_plane_ref){0};
|
||||
}
|
||||
|
||||
state->render.fb.nr_samples =
|
||||
MAX2(state->render.fb.nr_samples,
|
||||
pan_image_view_get_nr_samples(&iview->pview));
|
||||
state->render.s_attachment.iview = iview;
|
||||
state->render.s_attachment.preload_iview = ms2ss ? iview_ss : NULL;
|
||||
|
||||
/* If the depth and stencil attachments point to the same image,
|
||||
* and the format is D24S8, we can combine them in a single view
|
||||
* addressing both components.
|
||||
*/
|
||||
if (state->render.s_pview.format == PIPE_FORMAT_X24S8_UINT &&
|
||||
state->render.z_attachment.iview &&
|
||||
state->render.z_attachment.iview->vk.image == iview->vk.image) {
|
||||
state->render.zs_pview.format = PIPE_FORMAT_Z24_UNORM_S8_UINT;
|
||||
fbinfo->zs.preload.s = false;
|
||||
fbinfo->zs.view.s = NULL;
|
||||
|
||||
/* If there was no depth attachment, and the image format is D24S8,
|
||||
* we use the depth+stencil slot, so we can benefit from AFBC, which
|
||||
* is not supported on the stencil-only slot on Bifrost.
|
||||
*/
|
||||
} else if (img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
||||
state->render.s_pview.format == PIPE_FORMAT_X24S8_UINT &&
|
||||
fbinfo->zs.view.zs == NULL) {
|
||||
fbinfo->zs.view.zs = &state->render.s_pview;
|
||||
state->render.s_pview.format = PIPE_FORMAT_Z24_UNORM_S8_UINT;
|
||||
fbinfo->zs.preload.z = true;
|
||||
fbinfo->zs.view.s = NULL;
|
||||
}
|
||||
|
||||
if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
fbinfo->zs.clear_value.stencil = att->clearValue.depthStencil.stencil;
|
||||
|
||||
att_set_clear_preload(att, &fbinfo->zs.clear.s, &fbinfo->zs.preload.s);
|
||||
|
||||
if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
|
||||
struct panvk_resolve_attachment *resolve_info =
|
||||
&state->render.s_attachment.resolve;
|
||||
VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
|
||||
|
||||
resolve_info->mode = att->resolveMode;
|
||||
if (!ms2ss) {
|
||||
resolve_info->dst_iview = resolve_iview;
|
||||
} else {
|
||||
assert(iview_ss);
|
||||
resolve_info->dst_iview = iview_ss;
|
||||
assert(resolve_info->dst_iview->pview.nr_samples == 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *pRenderingInfo)
|
||||
{
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
uint32_t att_width = UINT32_MAX, att_height = UINT32_MAX;
|
||||
|
||||
state->render.flags = pRenderingInfo->flags;
|
||||
|
||||
BITSET_SET(state->dirty, PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
state->render.fb.bo_count = 0;
|
||||
memset(state->render.fb.bos, 0, sizeof(state->render.fb.bos));
|
||||
#endif
|
||||
|
||||
state->render.first_provoking_vertex = U_TRISTATE_UNSET;
|
||||
#if PAN_ARCH >= 10
|
||||
state->render.maybe_set_tds_provoking_vertex = NULL;
|
||||
state->render.maybe_set_fbds_provoking_vertex = NULL;
|
||||
#endif
|
||||
memset(state->render.fb.crc_valid, 0, sizeof(state->render.fb.crc_valid));
|
||||
memset(&state->render.color_attachments, 0,
|
||||
sizeof(state->render.color_attachments));
|
||||
memset(&state->render.z_attachment, 0, sizeof(state->render.z_attachment));
|
||||
memset(&state->render.s_attachment, 0, sizeof(state->render.s_attachment));
|
||||
state->render.bound_attachments = 0;
|
||||
|
||||
const VkMultisampledRenderToSingleSampledInfoEXT *ms2ss_info =
|
||||
vk_find_struct_const(pRenderingInfo,
|
||||
MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT);
|
||||
const bool ms2ss = ms2ss_info
|
||||
? ms2ss_info->multisampledRenderToSingleSampledEnable
|
||||
: VK_FALSE;
|
||||
|
||||
cmdbuf->state.gfx.render.layer_count = pRenderingInfo->viewMask ?
|
||||
util_last_bit(pRenderingInfo->viewMask) :
|
||||
pRenderingInfo->layerCount;
|
||||
cmdbuf->state.gfx.render.view_mask = pRenderingInfo->viewMask;
|
||||
*fbinfo = (struct pan_fb_info){
|
||||
.tile_buf_budget = pan_query_optimal_tib_size(PAN_ARCH, phys_dev->model),
|
||||
.z_tile_buf_budget = pan_query_optimal_z_tib_size(PAN_ARCH, phys_dev->model),
|
||||
.nr_samples = 0,
|
||||
.rt_count = pRenderingInfo->colorAttachmentCount,
|
||||
};
|
||||
/* In case ms2ss is enabled, use the provided sample count.
|
||||
* All attachments need to have sample count == 1 or the provided value.
|
||||
* But, if all attachments have 1, we would end up choosing the wrong value
|
||||
* if we don't set it here already. */
|
||||
cmdbuf->state.gfx.render.fb.nr_samples =
|
||||
ms2ss ? ms2ss_info->rasterizationSamples : 1;
|
||||
|
||||
assert(pRenderingInfo->colorAttachmentCount <= ARRAY_SIZE(fbinfo->rts));
|
||||
|
||||
for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
|
||||
const VkRenderingAttachmentInfo *att =
|
||||
&pRenderingInfo->pColorAttachments[i];
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
if (!iview)
|
||||
continue;
|
||||
|
||||
render_state_set_color_attachment(cmdbuf, att, i);
|
||||
att_width = MIN2(iview->vk.extent.width, att_width);
|
||||
att_height = MIN2(iview->vk.extent.height, att_height);
|
||||
}
|
||||
|
||||
if (pRenderingInfo->pDepthAttachment &&
|
||||
pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE) {
|
||||
const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
if (iview) {
|
||||
assert(iview->vk.image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
|
||||
render_state_set_z_attachment(cmdbuf, att);
|
||||
att_width = MIN2(iview->vk.extent.width, att_width);
|
||||
att_height = MIN2(iview->vk.extent.height, att_height);
|
||||
}
|
||||
}
|
||||
|
||||
if (pRenderingInfo->pStencilAttachment &&
|
||||
pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE) {
|
||||
const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
if (iview) {
|
||||
assert(iview->vk.image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
render_state_set_s_attachment(cmdbuf, att);
|
||||
att_width = MIN2(iview->vk.extent.width, att_width);
|
||||
att_height = MIN2(iview->vk.extent.height, att_height);
|
||||
}
|
||||
}
|
||||
|
||||
fbinfo->draw_extent.minx = pRenderingInfo->renderArea.offset.x;
|
||||
fbinfo->draw_extent.maxx = pRenderingInfo->renderArea.offset.x +
|
||||
pRenderingInfo->renderArea.extent.width - 1;
|
||||
fbinfo->draw_extent.miny = pRenderingInfo->renderArea.offset.y;
|
||||
fbinfo->draw_extent.maxy = pRenderingInfo->renderArea.offset.y +
|
||||
pRenderingInfo->renderArea.extent.height - 1;
|
||||
|
||||
fbinfo->frame_bounding_box = fbinfo->draw_extent;
|
||||
|
||||
if (state->render.bound_attachments) {
|
||||
fbinfo->width = att_width;
|
||||
fbinfo->height = att_height;
|
||||
} else {
|
||||
fbinfo->width = fbinfo->draw_extent.maxx + 1;
|
||||
fbinfo->height = fbinfo->draw_extent.maxy + 1;
|
||||
}
|
||||
|
||||
assert(fbinfo->width && fbinfo->height);
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_select_tile_size)(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
|
||||
/* In case we never emitted tiler/framebuffer descriptors, we emit the
|
||||
* current sample count and compute tile size */
|
||||
if (fbinfo->nr_samples == 0) {
|
||||
fbinfo->nr_samples = cmdbuf->state.gfx.render.fb.nr_samples;
|
||||
GENX(pan_select_tile_size)(fbinfo);
|
||||
|
||||
#if PAN_ARCH != 6
|
||||
if (fbinfo->cbuf_allocation > fbinfo->tile_buf_budget) {
|
||||
vk_perf(VK_LOG_OBJS(&cmdbuf->vk.base),
|
||||
"Using too much tile-memory, disabling pipelining");
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
/* In case we already emitted tiler/framebuffer descriptors, we ensure
|
||||
* that the sample count didn't change (this should never happen) */
|
||||
assert(fbinfo->nr_samples == cmdbuf->state.gfx.render.fb.nr_samples);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *render_info)
|
||||
{
|
||||
/* We force preloading for all active attachments when the render area is
|
||||
* unaligned or when a barrier flushes prior draw calls in the middle of a
|
||||
* render pass. The two cases can be distinguished by whether a
|
||||
* render_info is provided.
|
||||
*
|
||||
* When the render area is unaligned, we force preloading to preserve
|
||||
* contents falling outside of the render area. We also make sure the
|
||||
* initial attachment clears are performed.
|
||||
*/
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
VkClearAttachment clear_atts[MAX_RTS + 2];
|
||||
uint32_t clear_att_count = 0;
|
||||
|
||||
if (!state->render.bound_attachments)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < fbinfo->rt_count; i++) {
|
||||
if (!fbinfo->rts[i].view)
|
||||
continue;
|
||||
|
||||
fbinfo->rts[i].preload = true;
|
||||
|
||||
if (fbinfo->rts[i].clear) {
|
||||
if (render_info) {
|
||||
const VkRenderingAttachmentInfo *att =
|
||||
&render_info->pColorAttachments[i];
|
||||
|
||||
clear_atts[clear_att_count++] = (VkClearAttachment){
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.colorAttachment = i,
|
||||
.clearValue = att->clearValue,
|
||||
};
|
||||
}
|
||||
fbinfo->rts[i].clear = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (fbinfo->zs.view.zs) {
|
||||
fbinfo->zs.preload.z = true;
|
||||
|
||||
if (fbinfo->zs.clear.z) {
|
||||
if (render_info) {
|
||||
const VkRenderingAttachmentInfo *att =
|
||||
render_info->pDepthAttachment;
|
||||
|
||||
clear_atts[clear_att_count++] = (VkClearAttachment){
|
||||
.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
|
||||
.clearValue = att->clearValue,
|
||||
};
|
||||
}
|
||||
fbinfo->zs.clear.z = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (fbinfo->zs.view.s ||
|
||||
(fbinfo->zs.view.zs &&
|
||||
util_format_is_depth_and_stencil(fbinfo->zs.view.zs->format))) {
|
||||
fbinfo->zs.preload.s = true;
|
||||
|
||||
if (fbinfo->zs.clear.s) {
|
||||
if (render_info) {
|
||||
const VkRenderingAttachmentInfo *att =
|
||||
render_info->pStencilAttachment;
|
||||
|
||||
clear_atts[clear_att_count++] = (VkClearAttachment){
|
||||
.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
|
||||
.clearValue = att->clearValue,
|
||||
};
|
||||
}
|
||||
|
||||
fbinfo->zs.clear.s = false;
|
||||
}
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
/* insert a barrier for preload */
|
||||
const VkMemoryBarrier2 mem_barrier = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
|
||||
.srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
|
||||
.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
|
||||
.dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT,
|
||||
};
|
||||
const VkDependencyInfo dep_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.memoryBarrierCount = 1,
|
||||
.pMemoryBarriers = &mem_barrier,
|
||||
};
|
||||
panvk_per_arch(CmdPipelineBarrier2)(panvk_cmd_buffer_to_handle(cmdbuf),
|
||||
&dep_info);
|
||||
#endif
|
||||
|
||||
if (clear_att_count && render_info) {
|
||||
VkClearRect clear_rect = {
|
||||
.rect = render_info->renderArea,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = render_info->viewMask ? 1 : render_info->layerCount,
|
||||
};
|
||||
|
||||
panvk_per_arch(CmdClearAttachments)(panvk_cmd_buffer_to_handle(cmdbuf),
|
||||
clear_att_count, clear_atts, 1,
|
||||
&clear_rect);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_preload_render_area_border)(
|
||||
struct panvk_cmd_buffer *cmdbuf, const VkRenderingInfo *render_info)
|
||||
{
|
||||
const unsigned meta_tile_size = pan_meta_tile_size(PAN_ARCH);
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
|
||||
bool render_area_is_aligned =
|
||||
((fbinfo->draw_extent.minx | fbinfo->draw_extent.miny) %
|
||||
meta_tile_size) == 0 &&
|
||||
(fbinfo->draw_extent.maxx + 1 == fbinfo->width ||
|
||||
(fbinfo->draw_extent.maxx % meta_tile_size) == (meta_tile_size - 1)) &&
|
||||
(fbinfo->draw_extent.maxy + 1 == fbinfo->height ||
|
||||
(fbinfo->draw_extent.maxy % meta_tile_size) == (meta_tile_size - 1));
|
||||
|
||||
/* If the render area is aligned on the meta tile size, we're good. */
|
||||
if (!render_area_is_aligned)
|
||||
panvk_per_arch(cmd_force_fb_preload)(cmdbuf, render_info);
|
||||
}
|
||||
|
||||
static void
|
||||
prepare_iam_sysvals(struct panvk_cmd_buffer *cmdbuf, BITSET_WORD *dirty_sysvals)
|
||||
{
|
||||
const struct vk_input_attachment_location_state *ial =
|
||||
&cmdbuf->vk.dynamic_graphics_state.ial;
|
||||
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
|
||||
uint32_t catt_count =
|
||||
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
|
||||
? MAX_RTS
|
||||
: ial->color_attachment_count;
|
||||
|
||||
memset(iam, ~0, sizeof(iam));
|
||||
|
||||
assert(catt_count <= MAX_RTS);
|
||||
|
||||
for (uint32_t i = 0; i < catt_count; i++) {
|
||||
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED ||
|
||||
!(cmdbuf->state.gfx.render.bound_attachments &
|
||||
MESA_VK_RP_ATTACHMENT_COLOR_BIT(i)))
|
||||
continue;
|
||||
|
||||
VkFormat fmt = cmdbuf->state.gfx.render.color_attachments.fmts[i];
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
||||
struct mali_internal_conversion_packed conv;
|
||||
uint32_t ia_idx = ial->color_map[i] + 1;
|
||||
assert(ia_idx < ARRAY_SIZE(iam));
|
||||
|
||||
iam[ia_idx].target = PANVK_COLOR_ATTACHMENT(i);
|
||||
|
||||
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
|
||||
cfg.memory_format =
|
||||
GENX(pan_dithered_format_from_pipe_format)(pfmt, false);
|
||||
#if PAN_ARCH < 9
|
||||
cfg.register_format =
|
||||
vk_format_is_uint(fmt) ? MALI_REGISTER_FILE_FORMAT_U32
|
||||
: vk_format_is_sint(fmt) ? MALI_REGISTER_FILE_FORMAT_I32
|
||||
: MALI_REGISTER_FILE_FORMAT_F32;
|
||||
#endif
|
||||
}
|
||||
|
||||
iam[ia_idx].conversion = conv.opaque[0];
|
||||
}
|
||||
|
||||
if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) {
|
||||
uint32_t ia_idx =
|
||||
ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->depth_att + 1;
|
||||
|
||||
assert(ia_idx < ARRAY_SIZE(iam));
|
||||
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* On v7, we need to pass the depth format around. If we use a conversion
|
||||
* of zero, like we do on v9+, the GPU reports an INVALID_INSTR_ENC. */
|
||||
VkFormat fmt = cmdbuf->state.gfx.render.z_attachment.fmt;
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
||||
struct mali_internal_conversion_packed conv;
|
||||
|
||||
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
|
||||
cfg.register_format = MALI_REGISTER_FILE_FORMAT_F32;
|
||||
cfg.memory_format =
|
||||
GENX(pan_dithered_format_from_pipe_format)(pfmt, false);
|
||||
}
|
||||
iam[ia_idx].conversion = conv.opaque[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) {
|
||||
uint32_t ia_idx =
|
||||
ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->stencil_att + 1;
|
||||
|
||||
assert(ia_idx < ARRAY_SIZE(iam));
|
||||
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(iam); i++)
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, iam[i], iam[i]);
|
||||
}
|
||||
|
||||
/* This value has been selected to get
|
||||
* dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing.
|
||||
*/
|
||||
#define MIN_DEPTH_CLIP_RANGE 37.7E-06f
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const struct panvk_draw_info *info)
|
||||
{
|
||||
struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
|
||||
const struct panvk_shader_variant *fs =
|
||||
panvk_shader_only_variant(get_fs(cmdbuf));
|
||||
uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0;
|
||||
BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};
|
||||
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.noperspective_varyings,
|
||||
noperspective_varyings);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.first_vertex, info->vertex.base);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.base_instance, info->instance.base);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
|
||||
info->vertex.raw_offset);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
|
||||
|
||||
/* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
|
||||
* reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
|
||||
{
|
||||
const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
|
||||
/* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS
|
||||
* (= 1<<63). This is the Panfrost-Gallium memory-sink idiom — the
|
||||
* Bifrost MMU silently discards stores to this address, so a pipeline
|
||||
* with XFB outputs used in a non-XFB draw (or in an XFB draw with
|
||||
* fewer bound buffers than the shader declares) is safe instead of
|
||||
* faulting. See gallium/drivers/panfrost/pan_cmdstream.c PAN_SYSVAL_XFB. */
|
||||
uint64_t _xa0 = PAN_SHADER_OOB_ADDRESS, _xa1 = PAN_SHADER_OOB_ADDRESS,
|
||||
_xa2 = PAN_SHADER_OOB_ADDRESS, _xa3 = PAN_SHADER_OOB_ADDRESS;
|
||||
if (_gfx->xfb.active) {
|
||||
if (_gfx->xfb.buffer_count > 0 && _gfx->xfb.buffers[0].addr)
|
||||
_xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset;
|
||||
if (_gfx->xfb.buffer_count > 1 && _gfx->xfb.buffers[1].addr)
|
||||
_xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset;
|
||||
if (_gfx->xfb.buffer_count > 2 && _gfx->xfb.buffers[2].addr)
|
||||
_xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset;
|
||||
if (_gfx->xfb.buffer_count > 3 && _gfx->xfb.buffers[3].addr)
|
||||
_xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset;
|
||||
}
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, blend.constants[i],
|
||||
cb->blend_constants[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < MAX_RTS; i++) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, fs.blend_descs[i],
|
||||
cmdbuf->state.gfx.fs.blend_descs[i]);
|
||||
}
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) {
|
||||
const struct vk_rasterization_state *rs =
|
||||
&cmdbuf->vk.dynamic_graphics_state.rs;
|
||||
const struct vk_viewport_state *vp =
|
||||
&cmdbuf->vk.dynamic_graphics_state.vp;
|
||||
const VkViewport *viewport = &vp->viewports[0];
|
||||
|
||||
/* Doing the viewport transform in the vertex shader and then depth
|
||||
* clipping with the viewport depth range gets a similar result to
|
||||
* clipping in clip-space, but loses precision when the viewport depth
|
||||
* range is very small. When minDepth == maxDepth, this completely
|
||||
* flattens the clip-space depth and results in never clipping.
|
||||
*
|
||||
* To work around this, set a lower limit on depth range when clipping is
|
||||
* enabled. This results in slightly incorrect fragment depth values, and
|
||||
* doesn't help with the precision loss, but at least clipping isn't
|
||||
* completely broken.
|
||||
*/
|
||||
float z_min = viewport->minDepth;
|
||||
float z_max = viewport->maxDepth;
|
||||
if (vk_rasterization_state_depth_clip_enable(rs) &&
|
||||
fabsf(z_max - z_min) < MIN_DEPTH_CLIP_RANGE) {
|
||||
float z_sign = z_min <= z_max ? 1.0f : -1.0f;
|
||||
|
||||
float z_center = 0.5f * (z_max + z_min);
|
||||
/* Bump offset off-center if necessary, to not go out of range */
|
||||
z_center = CLAMP(z_center, 0.5f * MIN_DEPTH_CLIP_RANGE,
|
||||
1.0f - 0.5f * MIN_DEPTH_CLIP_RANGE);
|
||||
|
||||
z_min = z_center - 0.5f * z_sign * MIN_DEPTH_CLIP_RANGE;
|
||||
z_max = z_center + 0.5f * z_sign * MIN_DEPTH_CLIP_RANGE;
|
||||
}
|
||||
|
||||
/* Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of
|
||||
* section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
|
||||
* end of the section, the spec defines:
|
||||
*
|
||||
* px = width
|
||||
* py = height
|
||||
* pz = maxDepth - minDepth if negativeOneToOne is false
|
||||
* pz = (maxDepth - minDepth) / 2 if negativeOneToOne is true
|
||||
*/
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.x,
|
||||
0.5f * viewport->width);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.y,
|
||||
0.5f * viewport->height);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.z,
|
||||
vp->depth_clip_negative_one_to_one ?
|
||||
0.5f * (z_max - z_min) : z_max - z_min);
|
||||
|
||||
/* Upload the viewport offset. Defined as (ox, oy, oz) at the start of
|
||||
* section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
|
||||
* end of the section, the spec defines:
|
||||
*
|
||||
* ox = x + width/2
|
||||
* oy = y + height/2
|
||||
* oz = minDepth if negativeOneToOne is false
|
||||
* oz = (maxDepth + minDepth) / 2 if negativeOneToOne is true
|
||||
*/
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.x,
|
||||
(0.5f * viewport->width) + viewport->x);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.y,
|
||||
(0.5f * viewport->height) + viewport->y);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.z,
|
||||
vp->depth_clip_negative_one_to_one ?
|
||||
0.5f * (z_min + z_max) : z_min);
|
||||
|
||||
}
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP))
|
||||
prepare_iam_sysvals(cmdbuf, dirty_sysvals);
|
||||
|
||||
const struct panvk_shader_variant *vs =
|
||||
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
|
||||
struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
|
||||
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
|
||||
|
||||
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals,
|
||||
desc.sets[PANVK_DESC_TABLE_VS_DYN_SSBOS],
|
||||
vs_desc_state->dyn_ssbos);
|
||||
}
|
||||
|
||||
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals,
|
||||
desc.sets[PANVK_DESC_TABLE_FS_DYN_SSBOS],
|
||||
fs_desc_state->dyn_ssbos);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < MAX_SETS; i++) {
|
||||
uint32_t used_set_mask =
|
||||
vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
|
||||
|
||||
if (used_set_mask & BITFIELD_BIT(i)) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, desc.sets[i],
|
||||
desc_state->sets[i]->descs.dev);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* We mask the dirty sysvals by the shader usage, and only flag
|
||||
* the push uniforms dirty if those intersect. */
|
||||
BITSET_DECLARE(dirty_shader_sysvals, MAX_SYSVAL_FAUS);
|
||||
BITSET_AND(dirty_shader_sysvals, dirty_sysvals, vs->fau.used_sysvals);
|
||||
if (!BITSET_IS_EMPTY(dirty_shader_sysvals))
|
||||
gfx_state_set_dirty(cmdbuf, VS_PUSH_UNIFORMS);
|
||||
|
||||
if (fs) {
|
||||
BITSET_AND(dirty_shader_sysvals, dirty_sysvals, fs->fau.used_sysvals);
|
||||
|
||||
/* If blend constants are not read by the blend shader, we can consider
|
||||
* they are not read at all, so clear the dirty bits to avoid re-emitting
|
||||
* FAUs when we can. */
|
||||
if (!cmdbuf->state.gfx.cb.info.shader_loads_blend_const)
|
||||
BITSET_CLEAR_COUNT(dirty_shader_sysvals, 0, 4);
|
||||
|
||||
if (!BITSET_IS_EMPTY(dirty_shader_sysvals))
|
||||
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBindVertexBuffers2)(VkCommandBuffer commandBuffer,
|
||||
uint32_t firstBinding,
|
||||
uint32_t bindingCount,
|
||||
const VkBuffer *pBuffers,
|
||||
const VkDeviceSize *pOffsets,
|
||||
const VkDeviceSize *pSizes,
|
||||
const VkDeviceSize *pStrides)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
assert(firstBinding + bindingCount <= MAX_VBS);
|
||||
|
||||
if (pStrides) {
|
||||
vk_cmd_set_vertex_binding_strides(&cmdbuf->vk, firstBinding,
|
||||
bindingCount, pStrides);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < bindingCount; i++) {
|
||||
VK_FROM_HANDLE(panvk_buffer, buffer, pBuffers[i]);
|
||||
|
||||
if (buffer) {
|
||||
cmdbuf->state.gfx.vb.bufs[firstBinding + i].address =
|
||||
panvk_buffer_gpu_ptr(buffer, pOffsets[i]);
|
||||
cmdbuf->state.gfx.vb.bufs[firstBinding + i].size = panvk_buffer_range(
|
||||
buffer, pOffsets[i], pSizes ? pSizes[i] : VK_WHOLE_SIZE);
|
||||
} else {
|
||||
cmdbuf->state.gfx.vb.bufs[firstBinding + i].address = 0;
|
||||
cmdbuf->state.gfx.vb.bufs[firstBinding + i].size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
cmdbuf->state.gfx.vb.count =
|
||||
MAX2(cmdbuf->state.gfx.vb.count, firstBinding + bindingCount);
|
||||
gfx_state_set_dirty(cmdbuf, VB);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBindIndexBuffer2)(VkCommandBuffer commandBuffer,
|
||||
VkBuffer buffer, VkDeviceSize offset,
|
||||
VkDeviceSize size, VkIndexType indexType)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
VK_FROM_HANDLE(panvk_buffer, buf, buffer);
|
||||
|
||||
if (buf) {
|
||||
cmdbuf->state.gfx.ib.size = panvk_buffer_range(buf, offset, size);
|
||||
assert(cmdbuf->state.gfx.ib.size <= UINT32_MAX);
|
||||
cmdbuf->state.gfx.ib.dev_addr = panvk_buffer_gpu_ptr(buf, offset);
|
||||
} else {
|
||||
cmdbuf->state.gfx.ib.size = 0;
|
||||
/* In case of NullDescriptors, we need to set a non-NULL address and rely
|
||||
* on out-of-bounds behavior against the zero size of the buffer. Note
|
||||
* that this only works for v10+, as v9 does not have a way to specify the
|
||||
* index buffer size. */
|
||||
cmdbuf->state.gfx.ib.dev_addr = PAN_ARCH >= 10 ? 0x1000 : 0;
|
||||
}
|
||||
cmdbuf->state.gfx.ib.index_size = vk_index_type_to_bytes(indexType);
|
||||
|
||||
gfx_state_set_dirty(cmdbuf, IB);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,442 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
iter13: apply VK_EXT_transform_feedback implementation to Mesa 26.0.6 PanVk.
|
||||
|
||||
Run from inside /home/mfritsche/mesa-build/mesa-26.0.6/ on ohm.
|
||||
Idempotent — checks if changes are already present and skips if so.
|
||||
|
||||
The implementation is single-variant (Vulkan spec allows undefined behavior
|
||||
for XFB-output shaders bound outside Begin/EndTransformFeedback, so we
|
||||
don't need defensive two-variant compilation for v1).
|
||||
|
||||
Files modified:
|
||||
1. src/panfrost/vulkan/panvk_shader.h
|
||||
2. src/panfrost/vulkan/panvk_vX_physical_device.c
|
||||
3. src/panfrost/vulkan/panvk_vX_shader.c
|
||||
4. src/panfrost/vulkan/panvk_cmd_draw.h
|
||||
5. src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c
|
||||
6. src/panfrost/vulkan/meson.build
|
||||
Files created:
|
||||
7. src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
ROOT = os.path.abspath(os.path.dirname(__file__)) if "MESA_ROOT" not in os.environ else os.environ["MESA_ROOT"]
|
||||
# Default: assume cwd is mesa root
|
||||
if os.path.basename(os.getcwd()).startswith("mesa-"):
|
||||
ROOT = os.getcwd()
|
||||
|
||||
print(f"[iter13] applying patches under {ROOT}")
|
||||
|
||||
|
||||
def replace_once(path, old, new, marker_in_new=None):
|
||||
"""Replace `old` with `new` in file at path. If `marker_in_new` is in the
|
||||
file already, treat as already-applied and skip."""
|
||||
full = os.path.join(ROOT, path)
|
||||
with open(full) as f:
|
||||
content = f.read()
|
||||
if marker_in_new and marker_in_new in content:
|
||||
print(f" [skip] {path} — already patched ({marker_in_new!r} present)")
|
||||
return
|
||||
if old not in content:
|
||||
print(f" [FAIL] {path} — expected pattern not found:\n {old[:100]!r}")
|
||||
sys.exit(2)
|
||||
count = content.count(old)
|
||||
if count > 1:
|
||||
print(f" [FAIL] {path} — pattern matches {count} times, need exactly 1")
|
||||
sys.exit(2)
|
||||
new_content = content.replace(old, new)
|
||||
with open(full, "w") as f:
|
||||
f.write(new_content)
|
||||
print(f" [ok] {path}")
|
||||
|
||||
|
||||
def create_file(path, content, skip_if_exists=True):
|
||||
full = os.path.join(ROOT, path)
|
||||
if skip_if_exists and os.path.exists(full):
|
||||
print(f" [skip] {path} — exists")
|
||||
return
|
||||
os.makedirs(os.path.dirname(full), exist_ok=True)
|
||||
with open(full, "w") as f:
|
||||
f.write(content)
|
||||
print(f" [ok] {path} (created)")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 1. panvk_shader.h — extend vs sysval struct (PAN_ARCH < 9)
|
||||
# ============================================================
|
||||
|
||||
print("\n[1/7] panvk_shader.h — add num_vertices + xfb_address[4] to vs sysvals")
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/panvk_shader.h",
|
||||
""" struct {
|
||||
#if PAN_ARCH < 9
|
||||
int32_t raw_vertex_offset;
|
||||
#endif
|
||||
int32_t first_vertex;
|
||||
int32_t base_instance;
|
||||
uint32_t noperspective_varyings;
|
||||
} vs;""",
|
||||
""" struct {
|
||||
#if PAN_ARCH < 9
|
||||
int32_t raw_vertex_offset;
|
||||
uint32_t num_vertices; /* iter13: XFB needs per-draw vertex count */
|
||||
uint32_t _pad_xfb; /* keep 8-byte alignment before u64 array */
|
||||
aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */
|
||||
#endif
|
||||
int32_t first_vertex;
|
||||
int32_t base_instance;
|
||||
uint32_t noperspective_varyings;
|
||||
} vs;""",
|
||||
marker_in_new="xfb_address[4]",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2. panvk_vX_physical_device.c — expose ext + features + properties
|
||||
# ============================================================
|
||||
|
||||
print("\n[2/7] panvk_vX_physical_device.c — expose VK_EXT_transform_feedback")
|
||||
|
||||
# A. Add extension to the ext list (find a stable nearby line)
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/panvk_vX_physical_device.c",
|
||||
" .EXT_robustness2 = true,",
|
||||
""" .EXT_robustness2 = true,
|
||||
.EXT_transform_feedback = PAN_ARCH < 9, /* iter13: JM-class only for now */""",
|
||||
marker_in_new="EXT_transform_feedback",
|
||||
)
|
||||
|
||||
# B. Add features. The features block has /* VK_KHR_robustness2 */ nearby.
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/panvk_vX_physical_device.c",
|
||||
""" /* VK_KHR_robustness2 */
|
||||
.robustBufferAccess2 = PAN_ARCH >= 11,
|
||||
.robustImageAccess2 = false,
|
||||
.nullDescriptor = true,""",
|
||||
""" /* VK_KHR_robustness2 */
|
||||
.robustBufferAccess2 = PAN_ARCH >= 11,
|
||||
.robustImageAccess2 = false,
|
||||
.nullDescriptor = true,
|
||||
|
||||
/* VK_EXT_transform_feedback (iter13) */
|
||||
.transformFeedback = PAN_ARCH < 9,
|
||||
.geometryStreams = false,""",
|
||||
marker_in_new=".transformFeedback = PAN_ARCH < 9",
|
||||
)
|
||||
|
||||
# C. Add properties. Anchor to the existing /* VK_KHR_robustness2 */ properties
|
||||
# block near line 1019. We'll add right after it.
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/panvk_vX_physical_device.c",
|
||||
""" /* VK_KHR_robustness2 */
|
||||
.robustStorageBufferAccessSizeAlignment = 1,
|
||||
.robustUniformBufferAccessSizeAlignment = 1,""",
|
||||
""" /* VK_KHR_robustness2 */
|
||||
.robustStorageBufferAccessSizeAlignment = 1,
|
||||
.robustUniformBufferAccessSizeAlignment = 1,
|
||||
|
||||
/* VK_EXT_transform_feedback (iter13) */
|
||||
.maxTransformFeedbackStreams = 1,
|
||||
.maxTransformFeedbackBuffers = 4,
|
||||
.maxTransformFeedbackBufferSize = UINT32_MAX,
|
||||
.maxTransformFeedbackStreamDataSize = 512,
|
||||
.maxTransformFeedbackBufferDataSize = 512,
|
||||
.maxTransformFeedbackBufferDataStride = 2048,
|
||||
.transformFeedbackQueries = false,
|
||||
.transformFeedbackStreamsLinesTriangles = false,
|
||||
.transformFeedbackRasterizationStreamSelect = false,
|
||||
.transformFeedbackDraw = false,""",
|
||||
marker_in_new="maxTransformFeedbackStreams",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 3. panvk_vX_shader.c — intrinsic lowering + NIR pass wiring
|
||||
# ============================================================
|
||||
|
||||
print("\n[3/7] panvk_vX_shader.c — intrinsic lowering + pan_nir_lower_xfb wiring")
|
||||
|
||||
# A. Add intrinsic cases inside the PAN_ARCH < 9 block.
|
||||
# Anchor to the existing `vs.raw_vertex_offset` case.
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/panvk_vX_shader.c",
|
||||
"""#if PAN_ARCH < 9
|
||||
case nir_intrinsic_load_raw_vertex_offset_pan:
|
||||
val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset);
|
||||
break;""",
|
||||
"""#if PAN_ARCH < 9
|
||||
case nir_intrinsic_load_raw_vertex_offset_pan:
|
||||
val = load_sysval(b, graphics, bit_size, vs.raw_vertex_offset);
|
||||
break;
|
||||
case nir_intrinsic_load_num_vertices: /* iter13: XFB index calc */
|
||||
val = load_sysval(b, graphics, bit_size, vs.num_vertices);
|
||||
break;
|
||||
case nir_intrinsic_load_xfb_address: { /* iter13: XFB buffer N base address */
|
||||
unsigned idx = nir_intrinsic_base(intr);
|
||||
switch (idx) {
|
||||
case 0: val = load_sysval(b, graphics, bit_size, vs.xfb_address[0]); break;
|
||||
case 1: val = load_sysval(b, graphics, bit_size, vs.xfb_address[1]); break;
|
||||
case 2: val = load_sysval(b, graphics, bit_size, vs.xfb_address[2]); break;
|
||||
case 3: val = load_sysval(b, graphics, bit_size, vs.xfb_address[3]); break;
|
||||
default: return false;
|
||||
}
|
||||
break;
|
||||
}""",
|
||||
marker_in_new="load_num_vertices",
|
||||
)
|
||||
|
||||
# B. Wire pan_nir_lower_xfb into the lowering chain.
|
||||
# We want it right after nir_lower_system_values runs.
|
||||
# Look for the existing call.
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/panvk_vX_shader.c",
|
||||
""" NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
||||
nir_lower_compute_system_values_options options = {""",
|
||||
""" NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* iter13: VK_EXT_transform_feedback — if the shader has XFB output
|
||||
* decorations, run the Mesa standard XFB-info NIR pass + Panfrost's
|
||||
* own NIR lowering that turns store_output into nir_store_global
|
||||
* to the per-buffer base address (the panvk lowering above wires
|
||||
* nir_load_xfb_address to vs.xfb_address[N]). Single-variant: if
|
||||
* an app binds an XFB pipeline outside vkCmdBeginTransformFeedback,
|
||||
* the writes go to address 0 — undefined behavior per spec. */
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX &&
|
||||
nir->xfb_info != NULL) {
|
||||
NIR_PASS(_, nir, pan_nir_lower_xfb);
|
||||
}
|
||||
#endif
|
||||
|
||||
nir_lower_compute_system_values_options options = {""",
|
||||
marker_in_new="pan_nir_lower_xfb",
|
||||
)
|
||||
|
||||
# C. Add #include for pan_nir.h at the top (where pan_nir_lower_xfb is declared)
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/panvk_vX_shader.c",
|
||||
'#include "panvk_shader.h"',
|
||||
'#include "panvk_shader.h"\n#include "pan_nir.h" /* iter13: pan_nir_lower_xfb */',
|
||||
marker_in_new='/* iter13: pan_nir_lower_xfb */',
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 4. panvk_cmd_draw.h — add XFB state struct + pipeline state member
|
||||
# ============================================================
|
||||
|
||||
print("\n[4/7] panvk_cmd_draw.h — add panvk_xfb_state to cmd buffer state")
|
||||
|
||||
# We add a definition and inject xfb into the graphics state.
|
||||
# We need to find the right place. Looking at the file: there's a `struct
|
||||
# panvk_graphics_state` or similar that holds per-cmdbuf graphics state.
|
||||
|
||||
# This is intrinsically file-specific; we need to read the file to find the right spot.
|
||||
# For now, place a self-contained inclusion at the top of the file and add
|
||||
# state as a separate sibling struct in the gfx state. The cleaner long-term
|
||||
# place is inside the existing graphics state struct.
|
||||
|
||||
# Defer the inclusion approach. Instead use a forward declaration + put the
|
||||
# struct definition in jm/panvk_vX_cmd_xfb.c and reference via include.
|
||||
|
||||
# Actually let's just add a state struct to panvk_cmd_draw.h after the sysvals member.
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/panvk_cmd_draw.h",
|
||||
" struct panvk_graphics_sysvals sysvals;",
|
||||
""" struct panvk_graphics_sysvals sysvals;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* iter13: VK_EXT_transform_feedback state (JM-class only for now). */
|
||||
struct {
|
||||
bool active;
|
||||
uint32_t buffer_count;
|
||||
struct {
|
||||
uint64_t addr;
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
} buffers[4];
|
||||
} xfb;
|
||||
#endif""",
|
||||
marker_in_new="iter13: VK_EXT_transform_feedback state",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5. panvk_vX_cmd_draw.c (arch-templated, NOT jm/) — populate XFB sysvals
|
||||
# ============================================================
|
||||
|
||||
print("\n[5/7] panvk_vX_cmd_draw.c — populate vs.num_vertices + vs.xfb_address[] inside the PAN_ARCH<9 block")
|
||||
|
||||
# Insert just inside the existing `#if PAN_ARCH < 9` block where
|
||||
# raw_vertex_offset is set. info->vertex.count is available in scope.
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/panvk_vX_cmd_draw.c",
|
||||
"""#if PAN_ARCH < 9
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
|
||||
info->vertex.raw_offset);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
|
||||
#endif""",
|
||||
"""#if PAN_ARCH < 9
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
|
||||
info->vertex.raw_offset);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
|
||||
|
||||
/* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
|
||||
* reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
|
||||
{
|
||||
const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
|
||||
uint64_t _xa0 = 0, _xa1 = 0, _xa2 = 0, _xa3 = 0;
|
||||
if (_gfx->xfb.active) {
|
||||
if (_gfx->xfb.buffer_count > 0)
|
||||
_xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset;
|
||||
if (_gfx->xfb.buffer_count > 1)
|
||||
_xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset;
|
||||
if (_gfx->xfb.buffer_count > 2)
|
||||
_xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset;
|
||||
if (_gfx->xfb.buffer_count > 3)
|
||||
_xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset;
|
||||
}
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3);
|
||||
}
|
||||
#endif""",
|
||||
marker_in_new="iter13: VK_EXT_transform_feedback sysvals",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 6. NEW: jm/panvk_vX_cmd_xfb.c — Vulkan command handlers
|
||||
# ============================================================
|
||||
|
||||
print("\n[6/7] jm/panvk_vX_cmd_xfb.c — XFB Vulkan command handlers (NEW FILE)")
|
||||
|
||||
xfb_c = r'''/*
|
||||
* Copyright © 2026 mfritsche / claude-noether
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* iter13: VK_EXT_transform_feedback command handlers for the JM
|
||||
* architecture path (Bifrost v6/v7 + Valhall-JM v9).
|
||||
*
|
||||
* The runtime contract:
|
||||
* - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size)
|
||||
* for each slot into cmdbuf->state.gfx.xfb.buffers[].
|
||||
* - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true.
|
||||
* Mark sysvals dirty so the next draw re-emits vs.xfb_address[].
|
||||
* - vkCmdEndTransformFeedbackEXT: set active = false.
|
||||
*
|
||||
* Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/
|
||||
* pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't
|
||||
* support pause/resume. transformFeedbackDraw is advertised as false.
|
||||
*
|
||||
* Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb
|
||||
* and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb
|
||||
* pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers
|
||||
* (via panvk_vX_shader.c sysval handler) to a load from the per-draw
|
||||
* sysval push area.
|
||||
*/
|
||||
|
||||
#include "vk_log.h"
|
||||
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_draw.h"
|
||||
#include "panvk_buffer.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t firstBinding,
|
||||
uint32_t bindingCount,
|
||||
const VkBuffer *pBuffers,
|
||||
const VkDeviceSize *pOffsets,
|
||||
const VkDeviceSize *pSizes)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
|
||||
for (uint32_t i = 0; i < bindingCount; i++) {
|
||||
uint32_t slot = firstBinding + i;
|
||||
if (slot >= 4)
|
||||
continue;
|
||||
|
||||
VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]);
|
||||
gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0);
|
||||
gfx->xfb.buffers[slot].offset = pOffsets[i];
|
||||
gfx->xfb.buffers[slot].size =
|
||||
(pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE)
|
||||
? pSizes[i]
|
||||
: (buf->vk.size - pOffsets[i]);
|
||||
}
|
||||
|
||||
if (firstBinding + bindingCount > gfx->xfb.buffer_count)
|
||||
gfx->xfb.buffer_count = firstBinding + bindingCount;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBeginTransformFeedbackEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t firstCounterBuffer,
|
||||
uint32_t counterBufferCount,
|
||||
const VkBuffer *pCounterBuffers,
|
||||
const VkDeviceSize *pCounterBufferOffsets)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
|
||||
/* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback
|
||||
* PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c.
|
||||
*/
|
||||
(void)firstCounterBuffer;
|
||||
(void)counterBufferCount;
|
||||
(void)pCounterBuffers;
|
||||
(void)pCounterBufferOffsets;
|
||||
|
||||
gfx->xfb.active = true;
|
||||
/* Per-draw set_gfx_sysval picks up the change automatically — no
|
||||
* explicit dirty marking required (set_gfx_sysval uses memcmp +
|
||||
* BITSET to detect state diffs and re-emit sysvals). */
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdEndTransformFeedbackEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t firstCounterBuffer,
|
||||
uint32_t counterBufferCount,
|
||||
const VkBuffer *pCounterBuffers,
|
||||
const VkDeviceSize *pCounterBufferOffsets)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
|
||||
(void)firstCounterBuffer;
|
||||
(void)counterBufferCount;
|
||||
(void)pCounterBuffers;
|
||||
(void)pCounterBufferOffsets;
|
||||
|
||||
gfx->xfb.active = false;
|
||||
}
|
||||
'''
|
||||
create_file("src/panfrost/vulkan/jm/panvk_vX_cmd_xfb.c", xfb_c)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 7. meson.build — register the new file in the jm_files array
|
||||
# ============================================================
|
||||
|
||||
print("\n[7/7] meson.build — register jm/panvk_vX_cmd_xfb.c")
|
||||
replace_once(
|
||||
"src/panfrost/vulkan/meson.build",
|
||||
"jm_files = [\n 'jm/panvk_vX_bind_queue.c',",
|
||||
"jm_files = [\n 'jm/panvk_vX_bind_queue.c',\n 'jm/panvk_vX_cmd_xfb.c', # iter13",
|
||||
marker_in_new="iter13",
|
||||
)
|
||||
|
||||
|
||||
print("\n[iter13] all patches applied — run incremental ninja build next")
|
||||
@@ -0,0 +1,438 @@
|
||||
/*
|
||||
* iter13 minimal Vulkan transform feedback probe.
|
||||
*
|
||||
* Goal: drive a single-stream, single-buffer VK_EXT_transform_feedback
|
||||
* capture end-to-end on (patched) PanVk-Bifrost — 3 vertices, each emitting
|
||||
* one vec4 with a known pattern, captured into a host-visible buffer, read
|
||||
* back and verified byte-exactly.
|
||||
*
|
||||
* Uses VK_EXT_transform_feedback. If the extension isn't exposed by the
|
||||
* driver, the probe exits with an error before doing any GPU work.
|
||||
*
|
||||
* Pipeline shape:
|
||||
* - vertex shader (probe_xfb.vert) writes a vec4 per vertex
|
||||
* - no fragment shader needed (rasterizerDiscardEnable=VK_TRUE)
|
||||
* - dynamic rendering with 0 color attachments
|
||||
* - vkCmdBindTransformFeedbackBuffersEXT + vkCmdBeginTransformFeedbackEXT
|
||||
* wrap a vkCmdDraw(3, 1, 0, 0)
|
||||
* - readback buffer is 3*16 = 48 bytes
|
||||
*
|
||||
* Pure Vulkan 1.0 core + VK_KHR_dynamic_rendering + VK_EXT_transform_feedback.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#define VERTEX_COUNT 3
|
||||
#define XFB_BUFFER_BYTES (VERTEX_COUNT * 16) /* 3 vec4s = 48 bytes */
|
||||
#define VSPV_PATH "probe_xfb.vert.spv"
|
||||
|
||||
#define STEP(name) do { fprintf(stderr, "[step] " name "\n"); fflush(stderr); } while (0)
|
||||
|
||||
#define VK_CHECK(call) do { \
|
||||
VkResult _r = (call); \
|
||||
if (_r != VK_SUCCESS) { \
|
||||
fprintf(stderr, "[fail] " #call " => %d at %s:%d\n", \
|
||||
(int)_r, __FILE__, __LINE__); \
|
||||
exit(2); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static uint32_t *read_spv(const char *path, size_t *out_bytes)
|
||||
{
|
||||
FILE *f = fopen(path, "rb");
|
||||
if (!f) { fprintf(stderr, "[fail] open %s: %s\n", path, strerror(errno)); exit(3); }
|
||||
fseek(f, 0, SEEK_END);
|
||||
long n = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
uint32_t *buf = malloc((size_t)n);
|
||||
fread(buf, 1, (size_t)n, f);
|
||||
fclose(f);
|
||||
*out_bytes = (size_t)n;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static uint32_t pick_memtype(const VkPhysicalDeviceMemoryProperties *mp,
|
||||
uint32_t type_bits, VkMemoryPropertyFlags want)
|
||||
{
|
||||
for (uint32_t i = 0; i < mp->memoryTypeCount; i++) {
|
||||
if ((type_bits & (1u << i)) &&
|
||||
(mp->memoryTypes[i].propertyFlags & want) == want)
|
||||
return i;
|
||||
}
|
||||
fprintf(stderr, "[fail] no memtype\n"); exit(4);
|
||||
}
|
||||
|
||||
static uint32_t pick_host_visible(const VkPhysicalDeviceMemoryProperties *mp,
|
||||
uint32_t type_bits)
|
||||
{
|
||||
VkMemoryPropertyFlags pref =
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
for (uint32_t i = 0; i < mp->memoryTypeCount; i++) {
|
||||
if ((type_bits & (1u << i)) &&
|
||||
(mp->memoryTypes[i].propertyFlags & pref) == pref) return i;
|
||||
}
|
||||
for (uint32_t i = 0; i < mp->memoryTypeCount; i++) {
|
||||
if ((type_bits & (1u << i)) &&
|
||||
(mp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
|
||||
return i;
|
||||
}
|
||||
fprintf(stderr, "[fail] no HOST_VISIBLE\n"); exit(4);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
STEP("vkCreateInstance");
|
||||
const char *inst_exts[] = { "VK_KHR_get_physical_device_properties2" };
|
||||
VkApplicationInfo app = {
|
||||
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
||||
.pApplicationName = "panvk-bifrost iter13 XFB probe",
|
||||
.apiVersion = VK_API_VERSION_1_0,
|
||||
};
|
||||
VkInstanceCreateInfo ici = {
|
||||
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
||||
.pApplicationInfo = &app,
|
||||
.enabledExtensionCount = 1,
|
||||
.ppEnabledExtensionNames = inst_exts,
|
||||
};
|
||||
VkInstance inst;
|
||||
VK_CHECK(vkCreateInstance(&ici, NULL, &inst));
|
||||
|
||||
uint32_t n_phys = 0;
|
||||
VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, NULL));
|
||||
VkPhysicalDevice *phys = calloc(n_phys, sizeof(*phys));
|
||||
VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, phys));
|
||||
VkPhysicalDevice gpu = phys[0];
|
||||
|
||||
/* Check VK_EXT_transform_feedback is exposed before we proceed. */
|
||||
uint32_t ext_count = 0;
|
||||
vkEnumerateDeviceExtensionProperties(gpu, NULL, &ext_count, NULL);
|
||||
VkExtensionProperties *exts = calloc(ext_count, sizeof(*exts));
|
||||
vkEnumerateDeviceExtensionProperties(gpu, NULL, &ext_count, exts);
|
||||
int has_xfb = 0;
|
||||
for (uint32_t i = 0; i < ext_count; i++) {
|
||||
if (!strcmp(exts[i].extensionName, "VK_EXT_transform_feedback"))
|
||||
has_xfb = 1;
|
||||
}
|
||||
free(exts);
|
||||
if (!has_xfb) {
|
||||
fprintf(stderr, "[fail] VK_EXT_transform_feedback NOT exposed by driver "
|
||||
"(this is the iter13 implementation gap — re-run on a Mesa "
|
||||
"build with the iter13 patches applied)\n");
|
||||
return 9;
|
||||
}
|
||||
fprintf(stderr, "[info] VK_EXT_transform_feedback present on device\n");
|
||||
|
||||
VkPhysicalDeviceMemoryProperties mp;
|
||||
vkGetPhysicalDeviceMemoryProperties(gpu, &mp);
|
||||
|
||||
/* Query the transform feedback features struct via vkGetPhysicalDeviceFeatures2. */
|
||||
PFN_vkGetPhysicalDeviceFeatures2KHR pGetFeats2 =
|
||||
(PFN_vkGetPhysicalDeviceFeatures2KHR)vkGetInstanceProcAddr(
|
||||
inst, "vkGetPhysicalDeviceFeatures2KHR");
|
||||
if (!pGetFeats2) { fprintf(stderr, "[fail] no vkGetPhysicalDeviceFeatures2KHR\n"); return 5; }
|
||||
|
||||
VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_feats = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
|
||||
};
|
||||
VkPhysicalDeviceFeatures2 feats2 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
|
||||
.pNext = &xfb_feats,
|
||||
};
|
||||
pGetFeats2(gpu, &feats2);
|
||||
fprintf(stderr, "[info] transformFeedback=%u geometryStreams=%u\n",
|
||||
xfb_feats.transformFeedback, xfb_feats.geometryStreams);
|
||||
if (!xfb_feats.transformFeedback) {
|
||||
fprintf(stderr, "[fail] transformFeedback feature is FALSE — driver exposes ext but not feature\n");
|
||||
return 10;
|
||||
}
|
||||
|
||||
/* ---- queue family ---- */
|
||||
uint32_t n_qf = 0;
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, NULL);
|
||||
VkQueueFamilyProperties *qfp = calloc(n_qf, sizeof(*qfp));
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, qfp);
|
||||
uint32_t qfam = UINT32_MAX;
|
||||
for (uint32_t i = 0; i < n_qf; i++) {
|
||||
if (qfp[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { qfam = i; break; }
|
||||
}
|
||||
|
||||
/* ---- device with XFB + dynamic_rendering enabled ---- */
|
||||
STEP("vkCreateDevice (+VK_EXT_transform_feedback, +dynamic_rendering chain)");
|
||||
const char *dev_exts[] = {
|
||||
"VK_KHR_multiview", "VK_KHR_maintenance2",
|
||||
"VK_KHR_create_renderpass2", "VK_KHR_depth_stencil_resolve",
|
||||
"VK_KHR_dynamic_rendering",
|
||||
"VK_EXT_transform_feedback",
|
||||
};
|
||||
|
||||
VkPhysicalDeviceTransformFeedbackFeaturesEXT enable_xfb = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
|
||||
.transformFeedback = VK_TRUE,
|
||||
.geometryStreams = VK_FALSE,
|
||||
};
|
||||
VkPhysicalDeviceDynamicRenderingFeaturesKHR dyn_feat = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR,
|
||||
.pNext = &enable_xfb,
|
||||
.dynamicRendering = VK_TRUE,
|
||||
};
|
||||
float qprio = 1.0f;
|
||||
VkDeviceQueueCreateInfo qci = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||
.queueFamilyIndex = qfam, .queueCount = 1, .pQueuePriorities = &qprio,
|
||||
};
|
||||
VkDeviceCreateInfo dci = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
||||
.pNext = &dyn_feat,
|
||||
.queueCreateInfoCount = 1, .pQueueCreateInfos = &qci,
|
||||
.enabledExtensionCount = sizeof(dev_exts)/sizeof(dev_exts[0]),
|
||||
.ppEnabledExtensionNames = dev_exts,
|
||||
};
|
||||
VkDevice dev;
|
||||
VK_CHECK(vkCreateDevice(gpu, &dci, NULL, &dev));
|
||||
|
||||
VkQueue queue;
|
||||
vkGetDeviceQueue(dev, qfam, 0, &queue);
|
||||
|
||||
/* ---- XFB function pointers ---- */
|
||||
PFN_vkCmdBindTransformFeedbackBuffersEXT pBindXfb =
|
||||
(PFN_vkCmdBindTransformFeedbackBuffersEXT)vkGetDeviceProcAddr(
|
||||
dev, "vkCmdBindTransformFeedbackBuffersEXT");
|
||||
PFN_vkCmdBeginTransformFeedbackEXT pBeginXfb =
|
||||
(PFN_vkCmdBeginTransformFeedbackEXT)vkGetDeviceProcAddr(
|
||||
dev, "vkCmdBeginTransformFeedbackEXT");
|
||||
PFN_vkCmdEndTransformFeedbackEXT pEndXfb =
|
||||
(PFN_vkCmdEndTransformFeedbackEXT)vkGetDeviceProcAddr(
|
||||
dev, "vkCmdEndTransformFeedbackEXT");
|
||||
PFN_vkCmdBeginRenderingKHR pBeginRendering =
|
||||
(PFN_vkCmdBeginRenderingKHR)vkGetDeviceProcAddr(dev, "vkCmdBeginRenderingKHR");
|
||||
PFN_vkCmdEndRenderingKHR pEndRendering =
|
||||
(PFN_vkCmdEndRenderingKHR)vkGetDeviceProcAddr(dev, "vkCmdEndRenderingKHR");
|
||||
if (!pBindXfb || !pBeginXfb || !pEndXfb || !pBeginRendering || !pEndRendering) {
|
||||
fprintf(stderr, "[fail] one or more XFB / dynamic_rendering entry points missing\n");
|
||||
return 11;
|
||||
}
|
||||
|
||||
/* ---- XFB capture buffer (host-visible) ---- */
|
||||
STEP("vkCreateBuffer XFB capture (host-visible)");
|
||||
VkBufferCreateInfo xfb_bci = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.size = XFB_BUFFER_BYTES,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
};
|
||||
VkBuffer xfb_buf;
|
||||
VK_CHECK(vkCreateBuffer(dev, &xfb_bci, NULL, &xfb_buf));
|
||||
|
||||
VkMemoryRequirements xfb_mr;
|
||||
vkGetBufferMemoryRequirements(dev, xfb_buf, &xfb_mr);
|
||||
VkMemoryAllocateInfo xfb_mai = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = xfb_mr.size,
|
||||
.memoryTypeIndex = pick_host_visible(&mp, xfb_mr.memoryTypeBits),
|
||||
};
|
||||
VkDeviceMemory xfb_mem;
|
||||
VK_CHECK(vkAllocateMemory(dev, &xfb_mai, NULL, &xfb_mem));
|
||||
VK_CHECK(vkBindBufferMemory(dev, xfb_buf, xfb_mem, 0));
|
||||
|
||||
/* Pre-fill with sentinel so we can detect "GPU never wrote" vs "wrong write". */
|
||||
void *mapped = NULL;
|
||||
VK_CHECK(vkMapMemory(dev, xfb_mem, 0, VK_WHOLE_SIZE, 0, &mapped));
|
||||
uint32_t *u32 = (uint32_t *)mapped;
|
||||
for (uint32_t i = 0; i < XFB_BUFFER_BYTES / 4; i++) u32[i] = 0xDEADBEEFu;
|
||||
|
||||
/* ---- pipeline (vertex stage only, raster-discard, no color attachment) ---- */
|
||||
STEP("vkCreatePipelineLayout + vert shader");
|
||||
VkPipelineLayoutCreateInfo plci = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
};
|
||||
VkPipelineLayout pl;
|
||||
VK_CHECK(vkCreatePipelineLayout(dev, &plci, NULL, &pl));
|
||||
|
||||
size_t spv_bytes = 0;
|
||||
uint32_t *spv = read_spv(VSPV_PATH, &spv_bytes);
|
||||
VkShaderModuleCreateInfo smci = {
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.codeSize = spv_bytes, .pCode = spv,
|
||||
};
|
||||
VkShaderModule vsm;
|
||||
VK_CHECK(vkCreateShaderModule(dev, &smci, NULL, &vsm));
|
||||
free(spv);
|
||||
|
||||
VkPipelineShaderStageCreateInfo stages[1] = {
|
||||
{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_VERTEX_BIT, .module = vsm, .pName = "main" },
|
||||
};
|
||||
VkPipelineVertexInputStateCreateInfo vi = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
};
|
||||
VkPipelineInputAssemblyStateCreateInfo ia = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
||||
};
|
||||
VkViewport vp_dummy = { 0, 0, 1, 1, 0.0f, 1.0f };
|
||||
VkRect2D sc_dummy = {{0,0}, {1,1}};
|
||||
VkPipelineViewportStateCreateInfo vp = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.viewportCount = 1, .pViewports = &vp_dummy,
|
||||
.scissorCount = 1, .pScissors = &sc_dummy,
|
||||
};
|
||||
VkPipelineRasterizationStateCreateInfo rs = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.rasterizerDiscardEnable = VK_TRUE, /* THE point — no rasterization */
|
||||
.polygonMode = VK_POLYGON_MODE_FILL,
|
||||
.cullMode = VK_CULL_MODE_NONE,
|
||||
.lineWidth = 1.0f,
|
||||
};
|
||||
VkPipelineMultisampleStateCreateInfo ms = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
||||
};
|
||||
VkPipelineRenderingCreateInfoKHR pri = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR,
|
||||
.colorAttachmentCount = 0, /* No color attachment with raster discard. */
|
||||
};
|
||||
VkGraphicsPipelineCreateInfo gpci = {
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = &pri,
|
||||
.stageCount = 1, .pStages = stages,
|
||||
.pVertexInputState = &vi,
|
||||
.pInputAssemblyState = &ia,
|
||||
.pViewportState = &vp,
|
||||
.pRasterizationState = &rs,
|
||||
.pMultisampleState = &ms,
|
||||
.layout = pl,
|
||||
};
|
||||
STEP("vkCreateGraphicsPipelines (raster-discard + XFB-output VS)");
|
||||
VkPipeline pipe;
|
||||
VK_CHECK(vkCreateGraphicsPipelines(dev, VK_NULL_HANDLE, 1, &gpci, NULL, &pipe));
|
||||
|
||||
/* ---- command buffer ---- */
|
||||
VkCommandPoolCreateInfo cpoolci = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.queueFamilyIndex = qfam,
|
||||
};
|
||||
VkCommandPool cpool;
|
||||
VK_CHECK(vkCreateCommandPool(dev, &cpoolci, NULL, &cpool));
|
||||
VkCommandBufferAllocateInfo cbai = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.commandPool = cpool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
};
|
||||
VkCommandBuffer cb;
|
||||
VK_CHECK(vkAllocateCommandBuffers(dev, &cbai, &cb));
|
||||
|
||||
STEP("record (bind XFB buffer + begin XFB + draw + end XFB)");
|
||||
VkCommandBufferBeginInfo cbbi = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
};
|
||||
VK_CHECK(vkBeginCommandBuffer(cb, &cbbi));
|
||||
|
||||
/* Bind XFB buffer to slot 0 */
|
||||
VkDeviceSize xfb_offset = 0, xfb_size = XFB_BUFFER_BYTES;
|
||||
pBindXfb(cb, 0, 1, &xfb_buf, &xfb_offset, &xfb_size);
|
||||
|
||||
/* Dynamic rendering with NO color attachments (raster-discard).
|
||||
* Render-area is required by the spec to be > 0 even if discarded;
|
||||
* use 1x1. */
|
||||
VkRenderingInfoKHR ri = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR,
|
||||
.renderArea = {{0,0}, {1,1}},
|
||||
.layerCount = 1,
|
||||
.colorAttachmentCount = 0,
|
||||
};
|
||||
pBeginRendering(cb, &ri);
|
||||
|
||||
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pipe);
|
||||
pBeginXfb(cb, 0, 0, NULL, NULL);
|
||||
vkCmdDraw(cb, VERTEX_COUNT, 1, 0, 0);
|
||||
pEndXfb(cb, 0, 0, NULL, NULL);
|
||||
|
||||
pEndRendering(cb);
|
||||
|
||||
/* Sync XFB writes for host read. */
|
||||
VkBufferMemoryBarrier bb = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT,
|
||||
.dstAccessMask = VK_ACCESS_HOST_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = xfb_buf, .offset = 0, .size = VK_WHOLE_SIZE,
|
||||
};
|
||||
vkCmdPipelineBarrier(cb,
|
||||
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
|
||||
VK_PIPELINE_STAGE_HOST_BIT,
|
||||
0, 0, NULL, 1, &bb, 0, NULL);
|
||||
|
||||
VK_CHECK(vkEndCommandBuffer(cb));
|
||||
|
||||
/* ---- submit ---- */
|
||||
VkFenceCreateInfo fci = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
|
||||
VkFence fence;
|
||||
VK_CHECK(vkCreateFence(dev, &fci, NULL, &fence));
|
||||
VkSubmitInfo si = {
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.commandBufferCount = 1, .pCommandBuffers = &cb,
|
||||
};
|
||||
STEP("submit + wait (10s)");
|
||||
VK_CHECK(vkQueueSubmit(queue, 1, &si, fence));
|
||||
VkResult wr = vkWaitForFences(dev, 1, &fence, VK_TRUE, 10ULL * 1000 * 1000 * 1000);
|
||||
if (wr != VK_SUCCESS) {
|
||||
fprintf(stderr, "[fail] vkWaitForFences => %d\n", wr); return 7;
|
||||
}
|
||||
|
||||
/* ---- verify ---- */
|
||||
STEP("readback + verify");
|
||||
VkMappedMemoryRange mmr = {
|
||||
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
|
||||
.memory = xfb_mem, .offset = 0, .size = VK_WHOLE_SIZE,
|
||||
};
|
||||
vkInvalidateMappedMemoryRanges(dev, 1, &mmr);
|
||||
|
||||
/* Expected: each vec4 = (vertex_id, 0, 4660.0, 51966.0) as float32 */
|
||||
int mismatches = 0;
|
||||
float *floats = (float *)mapped;
|
||||
for (uint32_t v = 0; v < VERTEX_COUNT; v++) {
|
||||
float got[4] = { floats[v*4 + 0], floats[v*4 + 1], floats[v*4 + 2], floats[v*4 + 3] };
|
||||
float want[4] = { (float)v, 0.0f, (float)0x1234, (float)0xcafe };
|
||||
for (int c = 0; c < 4; c++) {
|
||||
if (got[c] != want[c]) {
|
||||
fprintf(stderr, "[diff] vertex %u comp %d: got=%f want=%f\n",
|
||||
v, c, got[c], want[c]);
|
||||
mismatches++;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "[info] vertex %u: (%f, %f, %f, %f)\n",
|
||||
v, got[0], got[1], got[2], got[3]);
|
||||
}
|
||||
|
||||
/* ---- teardown ---- */
|
||||
vkUnmapMemory(dev, xfb_mem);
|
||||
vkDestroyFence(dev, fence, NULL);
|
||||
vkDestroyCommandPool(dev, cpool, NULL);
|
||||
vkDestroyPipeline(dev, pipe, NULL);
|
||||
vkDestroyShaderModule(dev, vsm, NULL);
|
||||
vkDestroyPipelineLayout(dev, pl, NULL);
|
||||
vkDestroyBuffer(dev, xfb_buf, NULL);
|
||||
vkFreeMemory(dev, xfb_mem, NULL);
|
||||
vkDestroyDevice(dev, NULL);
|
||||
vkDestroyInstance(inst, NULL);
|
||||
free(phys); free(qfp);
|
||||
|
||||
if (mismatches == 0) {
|
||||
fprintf(stderr, "[PASS] PanVk-Bifrost transform feedback: 3 vertices captured correctly.\n");
|
||||
return 0;
|
||||
} else {
|
||||
fprintf(stderr, "[FAIL] %d mismatches across 3 vertices.\n", mismatches);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
#version 450
|
||||
|
||||
// iter13 XFB probe vertex shader.
|
||||
// Writes a known pattern per vertex into transform feedback buffer 0.
|
||||
// Each vertex emits one vec4: (vertex_id, instance_id, 0x1234, 0xcafe).
|
||||
// With a 3-vertex single-instance draw + buffer offset 0,
|
||||
// expected capture (LE float32 array of vec4s):
|
||||
// vertex 0: 0.0, 0.0, 4660.0, 51966.0
|
||||
// vertex 1: 1.0, 0.0, 4660.0, 51966.0
|
||||
// vertex 2: 2.0, 0.0, 4660.0, 51966.0
|
||||
|
||||
layout(xfb_buffer = 0, xfb_offset = 0, xfb_stride = 16, location = 0) out vec4 captured;
|
||||
|
||||
void main() {
|
||||
// Position is unused (rasterizerDiscardEnable=VK_TRUE) but needed for valid pipeline.
|
||||
gl_Position = vec4(0, 0, 0, 1);
|
||||
|
||||
captured = vec4(
|
||||
float(gl_VertexIndex),
|
||||
float(gl_InstanceIndex),
|
||||
float(0x1234),
|
||||
float(0xcafe)
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,266 @@
|
||||
/*
|
||||
* iter13 Janet-CRITICAL regression: XFB-capable pipeline used WITHOUT
|
||||
* vkCmdBeginTransformFeedback must NOT fault the GPU.
|
||||
*
|
||||
* Same pipeline shape as probe_xfb.c, but the draw is not wrapped in
|
||||
* Begin/End XFB and no XFB buffer is bound. The vertex shader still
|
||||
* emits a store_global instruction (xfb_address[0] is read from sysval).
|
||||
*
|
||||
* With the memory-sink fix (xfb_address defaults to PAN_SHADER_OOB_ADDRESS
|
||||
* = 0x8000_0000_0000_0000), the store is silently discarded by the MMU.
|
||||
* Without that fix, the store goes to address 0 → page fault → GPU job
|
||||
* failure.
|
||||
*
|
||||
* Pass criterion: vkQueueSubmit + vkWaitForFences returns VK_SUCCESS
|
||||
* (no DEVICE_LOST). No buffer to read back — we only care that the GPU
|
||||
* survives the draw.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#define VSPV_PATH "probe_xfb.vert.spv"
|
||||
|
||||
#define STEP(name) do { fprintf(stderr, "[step] " name "\n"); fflush(stderr); } while (0)
|
||||
|
||||
#define VK_CHECK(call) do { \
|
||||
VkResult _r = (call); \
|
||||
if (_r != VK_SUCCESS) { \
|
||||
fprintf(stderr, "[fail] " #call " => %d at %s:%d\n", \
|
||||
(int)_r, __FILE__, __LINE__); \
|
||||
exit(2); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static uint32_t *read_spv(const char *path, size_t *out_bytes)
|
||||
{
|
||||
FILE *f = fopen(path, "rb");
|
||||
if (!f) { fprintf(stderr, "[fail] open %s: %s\n", path, strerror(errno)); exit(3); }
|
||||
fseek(f, 0, SEEK_END);
|
||||
long n = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
uint32_t *buf = malloc((size_t)n);
|
||||
fread(buf, 1, (size_t)n, f);
|
||||
fclose(f);
|
||||
*out_bytes = (size_t)n;
|
||||
return buf;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
STEP("vkCreateInstance");
|
||||
VkApplicationInfo app = {
|
||||
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
||||
.pApplicationName = "panvk-bifrost iter13 XFB no-draw probe",
|
||||
.apiVersion = VK_API_VERSION_1_0,
|
||||
};
|
||||
const char *inst_exts[] = { "VK_KHR_get_physical_device_properties2" };
|
||||
VkInstanceCreateInfo ici = {
|
||||
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
||||
.pApplicationInfo = &app,
|
||||
.enabledExtensionCount = 1,
|
||||
.ppEnabledExtensionNames = inst_exts,
|
||||
};
|
||||
VkInstance inst;
|
||||
VK_CHECK(vkCreateInstance(&ici, NULL, &inst));
|
||||
|
||||
uint32_t n_phys = 0;
|
||||
VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, NULL));
|
||||
VkPhysicalDevice *phys = calloc(n_phys, sizeof(*phys));
|
||||
VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, phys));
|
||||
VkPhysicalDevice gpu = phys[0];
|
||||
|
||||
uint32_t n_qf = 0;
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, NULL);
|
||||
VkQueueFamilyProperties *qfp = calloc(n_qf, sizeof(*qfp));
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, qfp);
|
||||
uint32_t qfam = UINT32_MAX;
|
||||
for (uint32_t i = 0; i < n_qf; i++) {
|
||||
if (qfp[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { qfam = i; break; }
|
||||
}
|
||||
|
||||
STEP("vkCreateDevice (+XFB feature enabled + dynamic_rendering)");
|
||||
const char *dev_exts[] = {
|
||||
"VK_KHR_multiview", "VK_KHR_maintenance2",
|
||||
"VK_KHR_create_renderpass2", "VK_KHR_depth_stencil_resolve",
|
||||
"VK_KHR_dynamic_rendering",
|
||||
"VK_EXT_transform_feedback",
|
||||
};
|
||||
VkPhysicalDeviceTransformFeedbackFeaturesEXT enable_xfb = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
|
||||
.transformFeedback = VK_TRUE,
|
||||
.geometryStreams = VK_FALSE,
|
||||
};
|
||||
VkPhysicalDeviceDynamicRenderingFeaturesKHR dyn_feat = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR,
|
||||
.pNext = &enable_xfb,
|
||||
.dynamicRendering = VK_TRUE,
|
||||
};
|
||||
float qprio = 1.0f;
|
||||
VkDeviceQueueCreateInfo qci = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||
.queueFamilyIndex = qfam, .queueCount = 1, .pQueuePriorities = &qprio,
|
||||
};
|
||||
VkDeviceCreateInfo dci = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
||||
.pNext = &dyn_feat,
|
||||
.queueCreateInfoCount = 1, .pQueueCreateInfos = &qci,
|
||||
.enabledExtensionCount = sizeof(dev_exts)/sizeof(dev_exts[0]),
|
||||
.ppEnabledExtensionNames = dev_exts,
|
||||
};
|
||||
VkDevice dev;
|
||||
VK_CHECK(vkCreateDevice(gpu, &dci, NULL, &dev));
|
||||
|
||||
VkQueue queue;
|
||||
vkGetDeviceQueue(dev, qfam, 0, &queue);
|
||||
|
||||
PFN_vkCmdBeginRenderingKHR pBeginRendering =
|
||||
(PFN_vkCmdBeginRenderingKHR)vkGetDeviceProcAddr(dev, "vkCmdBeginRenderingKHR");
|
||||
PFN_vkCmdEndRenderingKHR pEndRendering =
|
||||
(PFN_vkCmdEndRenderingKHR)vkGetDeviceProcAddr(dev, "vkCmdEndRenderingKHR");
|
||||
|
||||
/* Same XFB-bearing vertex shader as probe_xfb — its SPIR-V has the
|
||||
* xfb_buffer / xfb_offset decorations on `captured`. PanVk's driver
|
||||
* will run pan_nir_lower_xfb on it, producing nir_store_global to
|
||||
* vs.xfb_address[0]. We rely on the driver setting that sysval to
|
||||
* PAN_SHADER_OOB_ADDRESS when xfb is inactive. */
|
||||
STEP("vkCreateGraphicsPipelines (XFB-capable VS, no XFB buffer bound)");
|
||||
VkPipelineLayoutCreateInfo plci = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
};
|
||||
VkPipelineLayout pl;
|
||||
VK_CHECK(vkCreatePipelineLayout(dev, &plci, NULL, &pl));
|
||||
|
||||
size_t spv_bytes = 0;
|
||||
uint32_t *spv = read_spv(VSPV_PATH, &spv_bytes);
|
||||
VkShaderModuleCreateInfo smci = {
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.codeSize = spv_bytes, .pCode = spv,
|
||||
};
|
||||
VkShaderModule vsm;
|
||||
VK_CHECK(vkCreateShaderModule(dev, &smci, NULL, &vsm));
|
||||
free(spv);
|
||||
|
||||
VkPipelineShaderStageCreateInfo stages[1] = {
|
||||
{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_VERTEX_BIT, .module = vsm, .pName = "main" },
|
||||
};
|
||||
VkPipelineVertexInputStateCreateInfo vi = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
};
|
||||
VkPipelineInputAssemblyStateCreateInfo ia = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
||||
};
|
||||
VkViewport vp_dummy = { 0, 0, 1, 1, 0.0f, 1.0f };
|
||||
VkRect2D sc_dummy = {{0,0}, {1,1}};
|
||||
VkPipelineViewportStateCreateInfo vp = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.viewportCount = 1, .pViewports = &vp_dummy,
|
||||
.scissorCount = 1, .pScissors = &sc_dummy,
|
||||
};
|
||||
VkPipelineRasterizationStateCreateInfo rs = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.rasterizerDiscardEnable = VK_TRUE,
|
||||
.polygonMode = VK_POLYGON_MODE_FILL,
|
||||
.cullMode = VK_CULL_MODE_NONE,
|
||||
.lineWidth = 1.0f,
|
||||
};
|
||||
VkPipelineMultisampleStateCreateInfo ms = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
||||
};
|
||||
VkPipelineRenderingCreateInfoKHR pri = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR,
|
||||
.colorAttachmentCount = 0,
|
||||
};
|
||||
VkGraphicsPipelineCreateInfo gpci = {
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = &pri,
|
||||
.stageCount = 1, .pStages = stages,
|
||||
.pVertexInputState = &vi,
|
||||
.pInputAssemblyState = &ia,
|
||||
.pViewportState = &vp,
|
||||
.pRasterizationState = &rs,
|
||||
.pMultisampleState = &ms,
|
||||
.layout = pl,
|
||||
};
|
||||
VkPipeline pipe;
|
||||
VK_CHECK(vkCreateGraphicsPipelines(dev, VK_NULL_HANDLE, 1, &gpci, NULL, &pipe));
|
||||
|
||||
VkCommandPoolCreateInfo cpoolci = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.queueFamilyIndex = qfam,
|
||||
};
|
||||
VkCommandPool cpool;
|
||||
VK_CHECK(vkCreateCommandPool(dev, &cpoolci, NULL, &cpool));
|
||||
VkCommandBufferAllocateInfo cbai = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.commandPool = cpool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
};
|
||||
VkCommandBuffer cb;
|
||||
VK_CHECK(vkAllocateCommandBuffers(dev, &cbai, &cb));
|
||||
|
||||
STEP("record (draw WITHOUT XFB Begin/End; no buffer bound)");
|
||||
VkCommandBufferBeginInfo cbbi = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
};
|
||||
VK_CHECK(vkBeginCommandBuffer(cb, &cbbi));
|
||||
|
||||
VkRenderingInfoKHR ri = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR,
|
||||
.renderArea = {{0,0}, {1,1}},
|
||||
.layerCount = 1,
|
||||
.colorAttachmentCount = 0,
|
||||
};
|
||||
pBeginRendering(cb, &ri);
|
||||
|
||||
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pipe);
|
||||
/* No vkCmdBindTransformFeedbackBuffersEXT.
|
||||
* No vkCmdBeginTransformFeedbackEXT.
|
||||
* Just draw — the XFB store in the shader must be silently discarded. */
|
||||
vkCmdDraw(cb, 3, 1, 0, 0);
|
||||
|
||||
pEndRendering(cb);
|
||||
|
||||
VK_CHECK(vkEndCommandBuffer(cb));
|
||||
|
||||
VkFenceCreateInfo fci = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
|
||||
VkFence fence;
|
||||
VK_CHECK(vkCreateFence(dev, &fci, NULL, &fence));
|
||||
VkSubmitInfo si = {
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.commandBufferCount = 1, .pCommandBuffers = &cb,
|
||||
};
|
||||
STEP("submit + wait (10s) — expect VK_SUCCESS, not DEVICE_LOST");
|
||||
VK_CHECK(vkQueueSubmit(queue, 1, &si, fence));
|
||||
VkResult wr = vkWaitForFences(dev, 1, &fence, VK_TRUE, 10ULL * 1000 * 1000 * 1000);
|
||||
if (wr == VK_ERROR_DEVICE_LOST) {
|
||||
fprintf(stderr, "[FAIL] DEVICE_LOST — the XFB store-global probably faulted "
|
||||
"(memory-sink sentinel not applied).\n");
|
||||
return 1;
|
||||
}
|
||||
if (wr != VK_SUCCESS) {
|
||||
fprintf(stderr, "[FAIL] vkWaitForFences => %d\n", wr);
|
||||
return 2;
|
||||
}
|
||||
|
||||
vkDestroyFence(dev, fence, NULL);
|
||||
vkDestroyCommandPool(dev, cpool, NULL);
|
||||
vkDestroyPipeline(dev, pipe, NULL);
|
||||
vkDestroyShaderModule(dev, vsm, NULL);
|
||||
vkDestroyPipelineLayout(dev, pl, NULL);
|
||||
vkDestroyDevice(dev, NULL);
|
||||
vkDestroyInstance(inst, NULL);
|
||||
free(phys); free(qfp);
|
||||
|
||||
fprintf(stderr, "[PASS] XFB-capable pipeline survives non-XFB draw — memory-sink active.\n");
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user