initial seed: retrofit campaign lineage from local working trees
panvk-bifrost campaigns (r1..r4 Vulkan compositor + r5.video1 Vulkan
video decode) shipped before this repo existed; the deliverable
patches live in marfrit-packages, but the reasoning chain, phase docs,
and source-state evidence lived only in local working trees on the
development host.
This retrofit imports:
- mesa-panvk-bifrost/ — r1..r4 era phase docs (iter1..iter18)
(libmali stub blobs at iter18/blob/ excluded
— 109MB of RE artifacts replaced with a README
pointer)
- mesa-panvk-bifrost-video/ — sibling campaign phase docs + probe
- evidence/ — frozen .tgz source snapshots at each milestone
(basis for the 0005 patch diff generation)
Future iterations should branch off here from day one, so each iter is
a commit rather than a snapshot. See [[feedback-session-local-process-pins]]
for the process drift this retrofit closes.
Total: 1.9 MB across 124 files.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,484 @@
|
||||
/*
|
||||
* Copyright © 2021 Collabora Ltd.
|
||||
*
|
||||
* Derived from tu_cmd_buffer.c which is:
|
||||
* Copyright © 2016 Red Hat.
|
||||
* Copyright © 2016 Bas Nieuwenhuizen
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include "panvk_buffer.h"
|
||||
#include "panvk_cmd_alloc.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_draw.h"
|
||||
#include "panvk_cmd_fb_preload.h"
|
||||
#include "panvk_cmd_pool.h"
|
||||
#include "panvk_cmd_push_constant.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
#include "panvk_instance.h"
|
||||
#include "panvk_meta.h"
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_priv_bo.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_props.h"
|
||||
#include "pan_samples.h"
|
||||
|
||||
#include "vk_descriptor_update_template.h"
|
||||
#include "vk_format.h"
|
||||
|
||||
static VkResult
|
||||
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf, uint64_t fbd)
|
||||
{
|
||||
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
struct pan_ptr job_ptr = panvk_cmd_alloc_desc(cmdbuf, FRAGMENT_JOB);
|
||||
|
||||
if (!job_ptr.gpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
GENX(pan_emit_fragment_job_payload)(fbinfo, fbd, job_ptr.cpu);
|
||||
|
||||
pan_section_pack(job_ptr.cpu, FRAGMENT_JOB, HEADER, header) {
|
||||
header.type = MALI_JOB_TYPE_FRAGMENT;
|
||||
header.index = 1;
|
||||
}
|
||||
|
||||
pan_jc_add_job(&batch->frag_jc, MALI_JOB_TYPE_FRAGMENT, false, false, 0, 0,
|
||||
&job_ptr, false);
|
||||
util_dynarray_append(&batch->jobs, job_ptr.cpu);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
|
||||
if (!batch)
|
||||
return;
|
||||
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
|
||||
assert(batch);
|
||||
|
||||
if (!batch->fb.desc.gpu && !batch->vtc_jc.first_job) {
|
||||
if (util_dynarray_num_elements(&batch->event_ops,
|
||||
struct panvk_cmd_event_op) == 0) {
|
||||
/* Content-less batch, let's drop it */
|
||||
vk_free(&cmdbuf->vk.pool->alloc, batch);
|
||||
} else {
|
||||
/* Batch has no jobs but is needed for synchronization, let's add a
|
||||
* NULL job so the SUBMIT ioctl doesn't choke on it.
|
||||
*/
|
||||
struct pan_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, JOB_HEADER);
|
||||
|
||||
if (ptr.gpu) {
|
||||
util_dynarray_append(&batch->jobs, ptr.cpu);
|
||||
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_NULL, false, false, 0,
|
||||
0, &ptr, false);
|
||||
}
|
||||
|
||||
list_addtail(&batch->node, &cmdbuf->batches);
|
||||
}
|
||||
cmdbuf->cur_batch = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
|
||||
list_addtail(&batch->node, &cmdbuf->batches);
|
||||
|
||||
if (batch->tlsinfo.tls.size) {
|
||||
unsigned thread_tls_alloc =
|
||||
pan_query_thread_tls_alloc(&phys_dev->kmod.dev->props);
|
||||
unsigned core_id_range;
|
||||
|
||||
pan_query_core_count(&phys_dev->kmod.dev->props, &core_id_range);
|
||||
|
||||
unsigned size = pan_get_total_stack_size(batch->tlsinfo.tls.size,
|
||||
thread_tls_alloc, core_id_range);
|
||||
batch->tlsinfo.tls.ptr =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, tls, size, 4096).gpu;
|
||||
}
|
||||
|
||||
if (batch->tlsinfo.wls.size) {
|
||||
assert(batch->wls_total_size);
|
||||
batch->tlsinfo.wls.ptr =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, tls, batch->wls_total_size, 4096).gpu;
|
||||
}
|
||||
|
||||
if (batch->tls.cpu)
|
||||
GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
|
||||
|
||||
if (batch->fb.desc.cpu) {
|
||||
panvk_per_arch(cmd_select_tile_size)(cmdbuf);
|
||||
|
||||
/* At this point, we should know sample count and the tile size should have
|
||||
* been calculated */
|
||||
assert(fbinfo->nr_samples > 0 && fbinfo->tile_size > 0);
|
||||
|
||||
fbinfo->sample_positions =
|
||||
dev->sample_positions->addr.dev +
|
||||
pan_sample_positions_offset(pan_sample_pattern(fbinfo->nr_samples));
|
||||
fbinfo->first_provoking_vertex =
|
||||
cmdbuf->state.gfx.render.first_provoking_vertex != U_TRISTATE_NO;
|
||||
|
||||
VkResult result = panvk_per_arch(cmd_fb_preload)(cmdbuf, fbinfo);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
uint32_t view_mask = cmdbuf->state.gfx.render.view_mask;
|
||||
assert(view_mask == 0 || util_bitcount(view_mask) <= batch->fb.layer_count);
|
||||
uint32_t enabled_layer_count = view_mask ?
|
||||
util_bitcount(view_mask) :
|
||||
batch->fb.layer_count;
|
||||
|
||||
for (uint32_t i = 0; i < enabled_layer_count; i++) {
|
||||
uint32_t layer_id = (view_mask != 0) ? u_bit_scan(&view_mask) : i;
|
||||
VkResult result;
|
||||
|
||||
uint64_t fbd = batch->fb.desc.gpu + (batch->fb.desc_stride * layer_id);
|
||||
|
||||
result = panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, layer_id);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
|
||||
fbd |= GENX(pan_emit_fbd)(
|
||||
&cmdbuf->state.gfx.render.fb.info, layer_id, &batch->tlsinfo,
|
||||
&batch->tiler.ctx,
|
||||
batch->fb.desc.cpu + (batch->fb.desc_stride * layer_id));
|
||||
|
||||
result = panvk_cmd_prepare_fragment_job(cmdbuf, fbd);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cmdbuf->cur_batch = NULL;
|
||||
}
|
||||
|
||||
VkResult
|
||||
panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
|
||||
if (batch->fb.desc.gpu)
|
||||
return VK_SUCCESS;
|
||||
|
||||
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
|
||||
batch->fb.layer_count = cmdbuf->state.gfx.render.layer_count;
|
||||
unsigned fbd_size = pan_size(FRAMEBUFFER);
|
||||
|
||||
if (has_zs_ext)
|
||||
fbd_size = ALIGN_POT(fbd_size, pan_alignment(ZS_CRC_EXTENSION)) +
|
||||
pan_size(ZS_CRC_EXTENSION);
|
||||
|
||||
fbd_size = ALIGN_POT(fbd_size, pan_alignment(RENDER_TARGET)) +
|
||||
(MAX2(fbinfo->rt_count, 1) * pan_size(RENDER_TARGET));
|
||||
|
||||
batch->fb.bo_count = cmdbuf->state.gfx.render.fb.bo_count;
|
||||
memcpy(batch->fb.bos, cmdbuf->state.gfx.render.fb.bos,
|
||||
batch->fb.bo_count * sizeof(batch->fb.bos[0]));
|
||||
|
||||
batch->fb.desc =
|
||||
panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbd_size * batch->fb.layer_count,
|
||||
pan_alignment(FRAMEBUFFER));
|
||||
batch->fb.desc_stride = fbd_size;
|
||||
|
||||
memset(&cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds, 0,
|
||||
sizeof(cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds));
|
||||
|
||||
return batch->fb.desc.gpu ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
VkResult
|
||||
panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
|
||||
{
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
|
||||
assert(batch);
|
||||
if (!batch->tls.gpu) {
|
||||
batch->tls = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
|
||||
if (!batch->tls.gpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
|
||||
uint32_t layer_idx)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
uint64_t tiler_desc;
|
||||
|
||||
if (batch->tiler.ctx_descs.gpu) {
|
||||
tiler_desc =
|
||||
batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
|
||||
goto out_set_layer_ctx;
|
||||
}
|
||||
|
||||
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
|
||||
batch->tiler.heap_desc = panvk_cmd_alloc_desc(cmdbuf, TILER_HEAP);
|
||||
batch->tiler.ctx_descs =
|
||||
panvk_cmd_alloc_desc_array(cmdbuf, layer_count, TILER_CONTEXT);
|
||||
if (!batch->tiler.heap_desc.gpu || !batch->tiler.ctx_descs.gpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
tiler_desc =
|
||||
batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
|
||||
|
||||
pan_pack(&batch->tiler.heap_templ, TILER_HEAP, cfg) {
|
||||
cfg.size = pan_kmod_bo_size(dev->tiler_heap->bo);
|
||||
cfg.base = dev->tiler_heap->addr.dev;
|
||||
cfg.bottom = dev->tiler_heap->addr.dev;
|
||||
cfg.top = cfg.base + cfg.size;
|
||||
}
|
||||
|
||||
pan_pack(&batch->tiler.ctx_templ, TILER_CONTEXT, cfg) {
|
||||
cfg.hierarchy_mask = panvk_select_tiler_hierarchy_mask(
|
||||
phys_dev, &cmdbuf->state.gfx, pan_kmod_bo_size(dev->tiler_heap->bo));
|
||||
cfg.fb_width = fbinfo->width;
|
||||
cfg.fb_height = fbinfo->height;
|
||||
cfg.heap = batch->tiler.heap_desc.gpu;
|
||||
cfg.sample_pattern = pan_sample_pattern(fbinfo->nr_samples);
|
||||
}
|
||||
|
||||
memcpy(batch->tiler.heap_desc.cpu, &batch->tiler.heap_templ,
|
||||
sizeof(batch->tiler.heap_templ));
|
||||
|
||||
struct mali_tiler_context_packed *ctxs = batch->tiler.ctx_descs.cpu;
|
||||
|
||||
assert(layer_count > 0);
|
||||
for (uint32_t i = 0; i < layer_count; i++) {
|
||||
STATIC_ASSERT(
|
||||
!(pan_size(TILER_CONTEXT) & (pan_alignment(TILER_CONTEXT) - 1)));
|
||||
|
||||
memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs));
|
||||
}
|
||||
|
||||
out_set_layer_ctx:
|
||||
if (PAN_ARCH >= 9)
|
||||
batch->tiler.ctx.valhall.desc = tiler_desc;
|
||||
else
|
||||
batch->tiler.ctx.bifrost.desc = tiler_desc;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct panvk_batch *
|
||||
panvk_per_arch(cmd_open_batch)(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
assert(!cmdbuf->cur_batch);
|
||||
cmdbuf->cur_batch =
|
||||
vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*cmdbuf->cur_batch), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
cmdbuf->cur_batch->jobs = UTIL_DYNARRAY_INIT;
|
||||
cmdbuf->cur_batch->event_ops = UTIL_DYNARRAY_INIT;
|
||||
assert(cmdbuf->cur_batch);
|
||||
return cmdbuf->cur_batch;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
|
||||
panvk_pool_flush_maps(&cmdbuf->desc_pool);
|
||||
|
||||
return vk_command_buffer_end(&cmdbuf->vk);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
|
||||
const VkDependencyInfo *pDependencyInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
/* Caches are flushed/invalidated at batch boundaries for now, nothing to do
|
||||
* for memory barriers assuming we implement barriers with the creation of a
|
||||
* new batch.
|
||||
* FIXME: We can probably do better with a CacheFlush job that has the
|
||||
* barrier flag set to true.
|
||||
*/
|
||||
if (cmdbuf->cur_batch) {
|
||||
bool preload_fb =
|
||||
cmdbuf->cur_batch && cmdbuf->cur_batch->vtc_jc.first_tiler;
|
||||
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
|
||||
if (preload_fb)
|
||||
panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
|
||||
|
||||
panvk_per_arch(cmd_open_batch)(cmdbuf);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) {
|
||||
const VkImageMemoryBarrier2 *barrier = &pDependencyInfo->pImageMemoryBarriers[i];
|
||||
|
||||
panvk_per_arch(cmd_transition_image_layout)(commandBuffer, barrier);
|
||||
}
|
||||
|
||||
/* If we had any layout transition dispatches, the batch will be closed at
|
||||
* this point, therefore establishing the sync between itself and the
|
||||
* commands that follow.
|
||||
*/
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
|
||||
VkCommandBufferResetFlags flags)
|
||||
{
|
||||
struct panvk_cmd_buffer *cmdbuf =
|
||||
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
|
||||
|
||||
vk_command_buffer_reset(&cmdbuf->vk);
|
||||
|
||||
list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
|
||||
list_del(&batch->node);
|
||||
util_dynarray_fini(&batch->jobs);
|
||||
util_dynarray_fini(&batch->event_ops);
|
||||
|
||||
vk_free(&cmdbuf->vk.pool->alloc, batch);
|
||||
}
|
||||
|
||||
panvk_pool_reset(&cmdbuf->desc_pool);
|
||||
panvk_pool_reset(&cmdbuf->tls_pool);
|
||||
panvk_pool_reset(&cmdbuf->varying_pool);
|
||||
panvk_cmd_buffer_obj_list_reset(cmdbuf, push_sets);
|
||||
|
||||
memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
|
||||
{
|
||||
struct panvk_cmd_buffer *cmdbuf =
|
||||
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
|
||||
list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
|
||||
list_del(&batch->node);
|
||||
util_dynarray_fini(&batch->jobs);
|
||||
util_dynarray_fini(&batch->event_ops);
|
||||
|
||||
vk_free(&cmdbuf->vk.pool->alloc, batch);
|
||||
}
|
||||
|
||||
panvk_pool_cleanup(&cmdbuf->desc_pool);
|
||||
panvk_pool_cleanup(&cmdbuf->tls_pool);
|
||||
panvk_pool_cleanup(&cmdbuf->varying_pool);
|
||||
panvk_cmd_buffer_obj_list_cleanup(cmdbuf, push_sets);
|
||||
vk_command_buffer_finish(&cmdbuf->vk);
|
||||
vk_free(&dev->vk.alloc, cmdbuf);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
|
||||
struct vk_command_buffer **cmdbuf_out)
|
||||
{
|
||||
struct panvk_device *device =
|
||||
container_of(vk_pool->base.device, struct panvk_device, vk);
|
||||
struct panvk_cmd_pool *pool =
|
||||
container_of(vk_pool, struct panvk_cmd_pool, vk);
|
||||
struct panvk_cmd_buffer *cmdbuf;
|
||||
|
||||
cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!cmdbuf)
|
||||
return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
VkResult result = vk_command_buffer_init(
|
||||
&pool->vk, &cmdbuf->vk, &panvk_per_arch(cmd_buffer_ops), level);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&device->vk.alloc, cmdbuf);
|
||||
return result;
|
||||
}
|
||||
|
||||
panvk_cmd_buffer_obj_list_init(cmdbuf, push_sets);
|
||||
cmdbuf->vk.dynamic_graphics_state.vi = &cmdbuf->state.gfx.dynamic.vi;
|
||||
cmdbuf->vk.dynamic_graphics_state.ms.sample_locations =
|
||||
&cmdbuf->state.gfx.dynamic.sl;
|
||||
|
||||
struct panvk_pool_properties desc_pool_props = {
|
||||
.create_flags =
|
||||
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_WB_MMAP),
|
||||
.slab_size = 64 * 1024,
|
||||
.label = "Command buffer descriptor pool",
|
||||
.prealloc = true,
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
};
|
||||
panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool, NULL,
|
||||
&desc_pool_props);
|
||||
|
||||
struct panvk_pool_properties tls_pool_props = {
|
||||
.create_flags =
|
||||
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
|
||||
.slab_size = 64 * 1024,
|
||||
.label = "TLS pool",
|
||||
.prealloc = false,
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
};
|
||||
panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool, &pool->tls_big_bo_pool,
|
||||
&tls_pool_props);
|
||||
|
||||
struct panvk_pool_properties var_pool_props = {
|
||||
.create_flags =
|
||||
panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
|
||||
.slab_size = 64 * 1024,
|
||||
.label = "Varying pool",
|
||||
.prealloc = false,
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
};
|
||||
panvk_pool_init(&cmdbuf->varying_pool, device, &pool->varying_bo_pool, NULL,
|
||||
&var_pool_props);
|
||||
|
||||
list_inithead(&cmdbuf->batches);
|
||||
*cmdbuf_out = &cmdbuf->vk;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops) = {
|
||||
.create = panvk_create_cmdbuf,
|
||||
.reset = panvk_reset_cmdbuf,
|
||||
.destroy = panvk_destroy_cmdbuf,
|
||||
};
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
|
||||
const VkCommandBufferBeginInfo *pBeginInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* iter13: clear XFB state on Begin so a reused command buffer does not
|
||||
* inherit stale xfb.buffer_count / xfb.active / xfb.buffers[] from a
|
||||
* prior recording. */
|
||||
memset(&cmdbuf->state.gfx.xfb, 0, sizeof(cmdbuf->state.gfx.xfb));
|
||||
#endif
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright © 2026 mfritsche / claude-noether
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* iter13: VK_EXT_transform_feedback command handlers for the JM
|
||||
* architecture path (Bifrost v6/v7 + Valhall-JM v9).
|
||||
*
|
||||
* The runtime contract:
|
||||
* - vkCmdBindTransformFeedbackBuffersEXT: stash (gpu_addr, offset, size)
|
||||
* for each slot into cmdbuf->state.gfx.xfb.buffers[].
|
||||
* - vkCmdBeginTransformFeedbackEXT: set cmdbuf->state.gfx.xfb.active = true.
|
||||
* Mark sysvals dirty so the next draw re-emits vs.xfb_address[].
|
||||
* - vkCmdEndTransformFeedbackEXT: set active = false.
|
||||
*
|
||||
* Counter buffers (firstCounterBuffer/counterBufferCount/pCounterBuffers/
|
||||
* pCounterBufferOffsets) are accepted by API but ignored — v1 doesn't
|
||||
* support pause/resume. transformFeedbackDraw is advertised as false.
|
||||
*
|
||||
* Per-draw integration: jm/panvk_vX_cmd_draw.c reads cmdbuf->state.gfx.xfb
|
||||
* and populates vs.xfb_address[i] for shader use. The pan_nir_lower_xfb
|
||||
* pass in panvk_vX_shader.c emits nir_load_xfb_address(i) which lowers
|
||||
* (via panvk_vX_shader.c sysval handler) to a load from the per-draw
|
||||
* sysval push area.
|
||||
*/
|
||||
|
||||
#include "vk_log.h"
|
||||
#include "util/log.h"
|
||||
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_draw.h"
|
||||
#include "panvk_buffer.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBindTransformFeedbackBuffersEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t firstBinding,
|
||||
uint32_t bindingCount,
|
||||
const VkBuffer *pBuffers,
|
||||
const VkDeviceSize *pOffsets,
|
||||
const VkDeviceSize *pSizes)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
|
||||
for (uint32_t i = 0; i < bindingCount; i++) {
|
||||
uint32_t slot = firstBinding + i;
|
||||
if (slot >= 4)
|
||||
continue;
|
||||
|
||||
VK_FROM_HANDLE(panvk_buffer, buf, pBuffers[i]);
|
||||
gfx->xfb.buffers[slot].addr = panvk_buffer_gpu_ptr(buf, 0);
|
||||
gfx->xfb.buffers[slot].offset = pOffsets[i];
|
||||
gfx->xfb.buffers[slot].size =
|
||||
(pSizes != NULL && pSizes[i] != VK_WHOLE_SIZE)
|
||||
? pSizes[i]
|
||||
: (buf->vk.size - pOffsets[i]);
|
||||
}
|
||||
|
||||
if (firstBinding + bindingCount > gfx->xfb.buffer_count)
|
||||
gfx->xfb.buffer_count = firstBinding + bindingCount;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBeginTransformFeedbackEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t firstCounterBuffer,
|
||||
uint32_t counterBufferCount,
|
||||
const VkBuffer *pCounterBuffers,
|
||||
const VkDeviceSize *pCounterBufferOffsets)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
|
||||
/* Counter buffers ignored in v1 — see VkPhysicalDeviceTransformFeedback
|
||||
* PropertiesEXT.transformFeedbackDraw = false in panvk_vX_physical_device.c.
|
||||
* App is spec-compliant if it does not pass counter buffers (which our
|
||||
* features advertisement allows), but warn loudly if it does so we do not
|
||||
* silently produce wrong capture state. */
|
||||
(void)firstCounterBuffer;
|
||||
(void)pCounterBufferOffsets;
|
||||
if (counterBufferCount > 0 && pCounterBuffers != NULL) {
|
||||
mesa_logw("panvk: CmdBeginTransformFeedbackEXT: counter buffers not "
|
||||
"implemented (transformFeedbackDraw=false); XFB resume will "
|
||||
"restart at buffer offset 0");
|
||||
}
|
||||
|
||||
gfx->xfb.active = true;
|
||||
/* Per-draw set_gfx_sysval picks up the change automatically — no
|
||||
* explicit dirty marking required (set_gfx_sysval uses memcmp +
|
||||
* BITSET to detect state diffs and re-emit sysvals). */
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdEndTransformFeedbackEXT)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t firstCounterBuffer,
|
||||
uint32_t counterBufferCount,
|
||||
const VkBuffer *pCounterBuffers,
|
||||
const VkDeviceSize *pCounterBufferOffsets)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
struct panvk_cmd_graphics_state *gfx = &cmdbuf->state.gfx;
|
||||
|
||||
(void)firstCounterBuffer;
|
||||
(void)counterBufferCount;
|
||||
(void)pCounterBuffers;
|
||||
(void)pCounterBufferOffsets;
|
||||
|
||||
gfx->xfb.active = false;
|
||||
}
|
||||
@@ -0,0 +1,275 @@
|
||||
# Copyright © 2021 Collabora Ltd.
|
||||
#
|
||||
# Derived from the freedreno driver which is:
|
||||
# Copyright © 2017 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
panvk_entrypoints = custom_target(
|
||||
'panvk_entrypoints.[ch]',
|
||||
input : [vk_entrypoints_gen, vk_api_xml],
|
||||
output : ['panvk_entrypoints.h', 'panvk_entrypoints.c'],
|
||||
command : [
|
||||
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
|
||||
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'panvk',
|
||||
'--device-prefix', 'panvk_v6', '--device-prefix', 'panvk_v7',
|
||||
'--device-prefix', 'panvk_v9', '--device-prefix', 'panvk_v10',
|
||||
'--device-prefix', 'panvk_v12', '--device-prefix', 'panvk_v13',
|
||||
'--beta', with_vulkan_beta.to_string()
|
||||
],
|
||||
depend_files : vk_entrypoints_gen_depend_files,
|
||||
)
|
||||
|
||||
panvk_tracepoints = custom_target(
|
||||
'panvk_tracepoints.[ch]',
|
||||
input: 'panvk_tracepoints.py',
|
||||
output: ['panvk_tracepoints.h',
|
||||
'panvk_tracepoints_perfetto.h',
|
||||
'panvk_tracepoints.c'],
|
||||
command: [
|
||||
prog_python, '@INPUT@',
|
||||
'--import-path', join_paths(dir_source_root, 'src/util/perf/'),
|
||||
'--utrace-hdr', '@OUTPUT0@',
|
||||
'--perfetto-hdr', '@OUTPUT1@',
|
||||
'--utrace-src', '@OUTPUT2@',
|
||||
],
|
||||
depend_files: u_trace_py,
|
||||
)
|
||||
|
||||
libpanvk_files = files(
|
||||
'panvk_buffer.c',
|
||||
'panvk_cmd_pool.c',
|
||||
'panvk_device_memory.c',
|
||||
'panvk_host_copy.c',
|
||||
'panvk_image.c',
|
||||
'panvk_instance.c',
|
||||
'panvk_mempool.c',
|
||||
'panvk_physical_device.c',
|
||||
'panvk_priv_bo.c',
|
||||
'panvk_sparse.c',
|
||||
'panvk_utrace.c',
|
||||
'panvk_wsi.c',
|
||||
)
|
||||
libpanvk_files += [sha1_h]
|
||||
|
||||
panvk_deps = []
|
||||
panvk_flags = []
|
||||
panvk_per_arch_libs = []
|
||||
|
||||
bifrost_archs = [6, 7]
|
||||
bifrost_inc_dir = ['bifrost']
|
||||
bifrost_files = [
|
||||
'bifrost/panvk_vX_meta_desc_copy.c',
|
||||
]
|
||||
|
||||
valhall_archs = [9, 10]
|
||||
valhall_inc_dir = ['valhall']
|
||||
valhall_files = []
|
||||
|
||||
fifthgen_archs = [12, 13]
|
||||
fifthgen_inc_dir = ['fifthgen']
|
||||
fifthgen_files = []
|
||||
|
||||
jm_archs = [6, 7]
|
||||
jm_inc_dir = ['jm']
|
||||
jm_files = [
|
||||
'jm/panvk_vX_bind_queue.c',
|
||||
'jm/panvk_vX_cmd_xfb.c', # iter13
|
||||
'jm/panvk_vX_cmd_buffer.c',
|
||||
'jm/panvk_vX_cmd_dispatch.c',
|
||||
'jm/panvk_vX_cmd_draw.c',
|
||||
'jm/panvk_vX_cmd_event.c',
|
||||
'jm/panvk_vX_cmd_query.c',
|
||||
'jm/panvk_vX_cmd_precomp.c',
|
||||
'jm/panvk_vX_event.c',
|
||||
'jm/panvk_vX_gpu_queue.c',
|
||||
]
|
||||
|
||||
csf_archs = [10, 12, 13]
|
||||
csf_inc_dir = ['csf']
|
||||
csf_files = [
|
||||
'csf/panvk_vX_bind_queue.c',
|
||||
'csf/panvk_vX_cmd_buffer.c',
|
||||
'csf/panvk_vX_cmd_dispatch.c',
|
||||
'csf/panvk_vX_cmd_draw.c',
|
||||
'csf/panvk_vX_cmd_event.c',
|
||||
'csf/panvk_vX_cmd_query.c',
|
||||
'csf/panvk_vX_cmd_precomp.c',
|
||||
'csf/panvk_vX_event.c',
|
||||
'csf/panvk_vX_exception_handler.c',
|
||||
'csf/panvk_vX_gpu_queue.c',
|
||||
'csf/panvk_vX_instr.c',
|
||||
'csf/panvk_vX_utrace.c',
|
||||
]
|
||||
|
||||
common_per_arch_files = [
|
||||
panvk_entrypoints[0],
|
||||
panvk_tracepoints[0],
|
||||
'panvk_vX_blend.c',
|
||||
'panvk_vX_buffer_view.c',
|
||||
'panvk_vX_cmd_fb_preload.c',
|
||||
'panvk_vX_cmd_desc_state.c',
|
||||
'panvk_vX_cmd_dispatch.c',
|
||||
'panvk_vX_cmd_draw.c',
|
||||
'panvk_vX_cmd_meta.c',
|
||||
'panvk_vX_cmd_push_constant.c',
|
||||
'panvk_vX_descriptor_set.c',
|
||||
'panvk_vX_descriptor_set_layout.c',
|
||||
'panvk_vX_device.c',
|
||||
'panvk_vX_physical_device.c',
|
||||
'panvk_vX_precomp_cache.c',
|
||||
'panvk_vX_query_pool.c',
|
||||
'panvk_vX_image_view.c',
|
||||
'panvk_vX_nir_lower_descriptors.c',
|
||||
'panvk_vX_nir_lower_input_attachment_loads.c',
|
||||
'panvk_vX_sampler.c',
|
||||
'panvk_vX_shader.c',
|
||||
sha1_h,
|
||||
]
|
||||
|
||||
foreach arch : [6, 7, 10, 12, 13]
|
||||
per_arch_files = common_per_arch_files
|
||||
inc_panvk_per_arch = []
|
||||
|
||||
if arch in bifrost_archs
|
||||
inc_panvk_per_arch += bifrost_inc_dir
|
||||
per_arch_files += bifrost_files
|
||||
elif arch in valhall_archs
|
||||
inc_panvk_per_arch += valhall_inc_dir
|
||||
per_arch_files += valhall_files
|
||||
elif arch in fifthgen_archs
|
||||
inc_panvk_per_arch += fifthgen_inc_dir
|
||||
per_arch_files += fifthgen_files
|
||||
endif
|
||||
|
||||
if arch in jm_archs
|
||||
inc_panvk_per_arch += jm_inc_dir
|
||||
per_arch_files += jm_files
|
||||
elif arch in csf_archs
|
||||
inc_panvk_per_arch += csf_inc_dir
|
||||
per_arch_files += csf_files
|
||||
endif
|
||||
|
||||
panvk_per_arch_libs += static_library(
|
||||
'panvk_v@0@'.format(arch),
|
||||
per_arch_files,
|
||||
include_directories : [
|
||||
inc_include,
|
||||
inc_src,
|
||||
inc_panfrost,
|
||||
inc_panvk_per_arch,
|
||||
],
|
||||
dependencies : [
|
||||
idep_nir_headers,
|
||||
idep_pan_packers,
|
||||
idep_vulkan_util_headers,
|
||||
idep_vulkan_runtime_headers,
|
||||
idep_vulkan_wsi_headers,
|
||||
idep_mesautil,
|
||||
dep_libdrm,
|
||||
dep_valgrind,
|
||||
idep_libpan_per_arch[arch.to_string()],
|
||||
],
|
||||
c_args : [no_override_init_args, panvk_flags, '-DPAN_ARCH=@0@'.format(arch)],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
)
|
||||
endforeach
|
||||
|
||||
if with_perfetto
|
||||
panvk_deps += dep_perfetto
|
||||
libpanvk_files += ['panvk_utrace_perfetto.cc']
|
||||
endif
|
||||
|
||||
if with_platform_wayland
|
||||
panvk_deps += dep_wayland_client
|
||||
endif
|
||||
|
||||
if with_platform_android
|
||||
libpanvk_files += files('panvk_android.c')
|
||||
endif
|
||||
|
||||
libvulkan_panfrost = shared_library(
|
||||
'vulkan_panfrost',
|
||||
[libpanvk_files, panvk_entrypoints, panvk_tracepoints],
|
||||
include_directories : [
|
||||
inc_include,
|
||||
inc_src,
|
||||
inc_panfrost,
|
||||
],
|
||||
link_whole : [panvk_per_arch_libs],
|
||||
link_with : [
|
||||
libpanfrost_shared,
|
||||
libpanfrost_decode,
|
||||
libpanfrost_lib,
|
||||
libpanfrost_compiler,
|
||||
],
|
||||
dependencies : [
|
||||
dep_dl,
|
||||
dep_elf,
|
||||
dep_libdrm,
|
||||
dep_m,
|
||||
dep_thread,
|
||||
dep_valgrind,
|
||||
idep_nir,
|
||||
idep_pan_packers,
|
||||
panvk_deps,
|
||||
idep_vulkan_util,
|
||||
idep_vulkan_runtime,
|
||||
idep_vulkan_wsi,
|
||||
idep_mesautil,
|
||||
],
|
||||
c_args : [no_override_init_args, panvk_flags],
|
||||
link_args : [vulkan_icd_link_args, ld_args_bsymbolic, ld_args_gc_sections, ld_args_build_id],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
install : true,
|
||||
)
|
||||
|
||||
if with_symbols_check
|
||||
test(
|
||||
'panvk symbols check',
|
||||
symbols_check,
|
||||
args : [
|
||||
'--lib', libvulkan_panfrost,
|
||||
'--symbols-file', vulkan_icd_symbols,
|
||||
symbols_check_args,
|
||||
],
|
||||
suite : ['panfrost'],
|
||||
)
|
||||
endif
|
||||
|
||||
icd_file_name = libname_prefix + 'vulkan_panfrost.' + libname_suffix
|
||||
|
||||
panfrost_icd = custom_target(
|
||||
'panfrost_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
output : 'panfrost_icd.' + vulkan_manifest_suffix,
|
||||
command : [
|
||||
prog_python, '@INPUT0@',
|
||||
'--api-version', '1.4', '--xml', '@INPUT1@',
|
||||
'--sizeof-pointer', sizeof_pointer,
|
||||
'--icd-lib-path', vulkan_icd_lib_path,
|
||||
'--icd-filename', icd_file_name,
|
||||
'--out', '@OUTPUT@',
|
||||
],
|
||||
build_by_default : true,
|
||||
install_dir : with_vulkan_icd_dir,
|
||||
install_tag : 'runtime',
|
||||
install : true,
|
||||
)
|
||||
|
||||
_dev_icdname = 'panfrost_devenv_icd.@0@.json'.format(host_machine.cpu())
|
||||
_dev_icd = custom_target(
|
||||
'panfrost_devenv_icd',
|
||||
input : [vk_icd_gen, vk_api_xml],
|
||||
output : _dev_icdname,
|
||||
command : [
|
||||
prog_python, '@INPUT0@',
|
||||
'--api-version', '1.4', '--xml', '@INPUT1@',
|
||||
'--sizeof-pointer', sizeof_pointer,
|
||||
'--icd-lib-path', meson.current_build_dir(),
|
||||
'--icd-filename', icd_file_name,
|
||||
'--out', '@OUTPUT@',
|
||||
],
|
||||
build_by_default : true,
|
||||
)
|
||||
|
||||
devenv.append('VK_DRIVER_FILES', _dev_icd.full_path())
|
||||
@@ -0,0 +1,501 @@
|
||||
/*
|
||||
* Copyright © 2024 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PANVK_CMD_DRAW_H
|
||||
#define PANVK_CMD_DRAW_H
|
||||
|
||||
#ifndef PAN_ARCH
|
||||
#error "PAN_ARCH must be defined"
|
||||
#endif
|
||||
|
||||
#include "panvk_blend.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_query.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
#include "panvk_image.h"
|
||||
#include "panvk_image_view.h"
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_shader.h"
|
||||
|
||||
#include "vk_command_buffer.h"
|
||||
#include "vk_format.h"
|
||||
#include "util/u_tristate.h"
|
||||
|
||||
#include "pan_props.h"
|
||||
|
||||
#define MAX_VBS 16
|
||||
|
||||
struct panvk_cmd_buffer;
|
||||
|
||||
struct panvk_attrib_buf {
|
||||
uint64_t address;
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
struct panvk_resolve_attachment {
|
||||
VkResolveModeFlagBits mode;
|
||||
struct panvk_image_view *dst_iview;
|
||||
};
|
||||
|
||||
struct panvk_rendering_state {
|
||||
VkRenderingFlags flags;
|
||||
uint32_t layer_count;
|
||||
uint32_t view_mask;
|
||||
enum u_tristate first_provoking_vertex;
|
||||
|
||||
enum vk_rp_attachment_flags bound_attachments;
|
||||
struct {
|
||||
struct panvk_image_view *iviews[MAX_RTS];
|
||||
/* If non-null, preload_iviews[i] overrides iviews[i] for preloads. */
|
||||
struct panvk_image_view *preload_iviews[MAX_RTS];
|
||||
VkFormat fmts[MAX_RTS];
|
||||
uint8_t samples[MAX_RTS];
|
||||
struct panvk_resolve_attachment resolve[MAX_RTS];
|
||||
} color_attachments;
|
||||
|
||||
struct pan_image_view zs_pview;
|
||||
struct pan_image_view s_pview;
|
||||
|
||||
struct {
|
||||
struct panvk_image_view *iview;
|
||||
/* If non-null, preload_iview overrides iview for preloads. */
|
||||
struct panvk_image_view *preload_iview;
|
||||
VkFormat fmt;
|
||||
struct panvk_resolve_attachment resolve;
|
||||
} z_attachment, s_attachment;
|
||||
|
||||
struct {
|
||||
struct pan_fb_info info;
|
||||
bool crc_valid[MAX_RTS];
|
||||
|
||||
/* nr_samples to be used before framebuffer / tiler descriptor are emitted */
|
||||
uint32_t nr_samples;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
uint32_t bo_count;
|
||||
struct pan_kmod_bo *bos[(MAX_RTS * PANVK_MAX_PLANES) + 2];
|
||||
#endif
|
||||
} fb;
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
struct pan_ptr fbds;
|
||||
uint64_t tiler;
|
||||
|
||||
/* When a secondary command buffer has to flush draws, it disturbs the
|
||||
* inherited context, and the primary command buffer needs to know. */
|
||||
bool invalidate_inherited_ctx;
|
||||
|
||||
/* True if the last render pass was suspended. */
|
||||
bool suspended;
|
||||
|
||||
/* Blocks that can patch to flip the provoking vertex mode if we need to
|
||||
* emit FBDs/TDs before we know which mode the application is using */
|
||||
struct cs_maybe *maybe_set_tds_provoking_vertex;
|
||||
struct cs_maybe *maybe_set_fbds_provoking_vertex;
|
||||
|
||||
struct {
|
||||
/* != 0 if the render pass contains one or more occlusion queries to
|
||||
* signal. */
|
||||
uint64_t chain;
|
||||
|
||||
/* Point to the syncobj of the last occlusion query that was passed
|
||||
* to a draw. */
|
||||
uint64_t last;
|
||||
} oq;
|
||||
#endif
|
||||
};
|
||||
|
||||
enum panvk_cmd_graphics_dirty_state {
|
||||
PANVK_CMD_GRAPHICS_DIRTY_VS,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_FS,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_VB,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_IB,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_OQ,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_DESC_STATE,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_VS_PUSH_UNIFORMS,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_FS_PUSH_UNIFORMS,
|
||||
PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT,
|
||||
};
|
||||
|
||||
struct panvk_cmd_graphics_state {
|
||||
struct panvk_descriptor_state desc_state;
|
||||
|
||||
struct {
|
||||
struct vk_vertex_input_state vi;
|
||||
struct vk_sample_locations_state sl;
|
||||
} dynamic;
|
||||
|
||||
struct panvk_occlusion_query_state occlusion_query;
|
||||
#if PAN_ARCH >= 10
|
||||
struct panvk_prims_generated_query_state prims_generated_query;
|
||||
#endif
|
||||
struct panvk_graphics_sysvals sysvals;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* iter13: VK_EXT_transform_feedback state (JM-class only for now). */
|
||||
struct {
|
||||
bool active;
|
||||
uint32_t buffer_count;
|
||||
struct {
|
||||
uint64_t addr;
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
} buffers[4];
|
||||
} xfb;
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_shader_link link;
|
||||
#endif
|
||||
|
||||
struct {
|
||||
const struct panvk_shader *shader;
|
||||
struct panvk_shader_desc_state desc;
|
||||
uint64_t blend_descs[MAX_RTS];
|
||||
uint64_t push_uniforms;
|
||||
bool required;
|
||||
#if PAN_ARCH < 9
|
||||
uint64_t rsd;
|
||||
#endif
|
||||
} fs;
|
||||
|
||||
struct {
|
||||
const struct panvk_shader *shader;
|
||||
struct panvk_shader_desc_state desc;
|
||||
uint64_t push_uniforms;
|
||||
#if PAN_ARCH < 9
|
||||
uint64_t attribs;
|
||||
uint64_t attrib_bufs;
|
||||
uint64_t indirect_attribs_infos;
|
||||
uint64_t indirect_attrib_bufs_infos;
|
||||
uint64_t indirect_varying_bufs_infos;
|
||||
bool previous_draw_was_indirect;
|
||||
#endif
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
struct panvk_attrib_buf bufs[MAX_VBS];
|
||||
unsigned count;
|
||||
} vb;
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
struct {
|
||||
uint32_t attribs_changing_on_base_instance;
|
||||
} vi;
|
||||
#endif
|
||||
|
||||
/* Index buffer */
|
||||
struct {
|
||||
uint64_t dev_addr;
|
||||
uint64_t size;
|
||||
uint8_t index_size;
|
||||
} ib;
|
||||
|
||||
struct {
|
||||
struct panvk_blend_info info;
|
||||
} cb;
|
||||
|
||||
struct panvk_rendering_state render;
|
||||
|
||||
bool vk_meta;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
uint64_t vpd;
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
uint64_t tsd;
|
||||
#endif
|
||||
|
||||
BITSET_DECLARE(dirty, PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT);
|
||||
};
|
||||
|
||||
#define dyn_gfx_state_dirty(__cmdbuf, __name) \
|
||||
BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty, \
|
||||
MESA_VK_DYNAMIC_##__name)
|
||||
|
||||
#define gfx_state_dirty(__cmdbuf, __name) \
|
||||
BITSET_TEST((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
|
||||
|
||||
#define gfx_state_set_dirty(__cmdbuf, __name) \
|
||||
BITSET_SET((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
|
||||
|
||||
#define gfx_state_clear_all_dirty(__cmdbuf) \
|
||||
BITSET_ZERO((__cmdbuf)->state.gfx.dirty)
|
||||
|
||||
#define gfx_state_set_all_dirty(__cmdbuf) \
|
||||
BITSET_ONES((__cmdbuf)->state.gfx.dirty)
|
||||
|
||||
#define set_gfx_sysval(__cmdbuf, __dirty, __name, __val) \
|
||||
do { \
|
||||
struct panvk_graphics_sysvals __new_sysval; \
|
||||
__new_sysval.__name = __val; \
|
||||
if (memcmp(&(__cmdbuf)->state.gfx.sysvals.__name, &__new_sysval.__name, \
|
||||
sizeof(__new_sysval.__name))) { \
|
||||
(__cmdbuf)->state.gfx.sysvals.__name = __new_sysval.__name; \
|
||||
BITSET_SET_RANGE(__dirty, sysval_fau_start(graphics, __name), \
|
||||
sysval_fau_end(graphics, __name)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
struct panvk_device_draw_context {
|
||||
struct panvk_priv_bo *fns_bo;
|
||||
uint64_t fn_set_fbds_provoking_vertex_stride;
|
||||
};
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
panvk_depth_range(const struct panvk_cmd_graphics_state *state,
|
||||
const struct vk_viewport_state *vp,
|
||||
float *z_min, float *z_max)
|
||||
{
|
||||
float a = vp->depth_clip_negative_one_to_one ?
|
||||
state->sysvals.viewport.offset.z - state->sysvals.viewport.scale.z :
|
||||
state->sysvals.viewport.offset.z;
|
||||
float b = state->sysvals.viewport.offset.z + state->sysvals.viewport.scale.z;
|
||||
*z_min = MIN2(a, b);
|
||||
*z_max = MAX2(a, b);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev,
|
||||
const struct panvk_cmd_graphics_state *state,
|
||||
unsigned bin_ptr_mem_budget)
|
||||
{
|
||||
struct pan_tiler_features tiler_features =
|
||||
pan_query_tiler_features(&phys_dev->kmod.dev->props);
|
||||
|
||||
uint32_t hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)(
|
||||
state->render.fb.info.width, state->render.fb.info.height,
|
||||
tiler_features.max_levels, state->render.fb.info.tile_size,
|
||||
bin_ptr_mem_budget);
|
||||
|
||||
return hierarchy_mask;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
fs_required(const struct panvk_cmd_graphics_state *state,
|
||||
const struct vk_dynamic_graphics_state *dyn_state)
|
||||
{
|
||||
const struct panvk_shader_variant *fs =
|
||||
panvk_shader_only_variant(state->fs.shader);
|
||||
const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
|
||||
const struct vk_color_blend_state *cb = &dyn_state->cb;
|
||||
const struct vk_rasterization_state *rs = &dyn_state->rs;
|
||||
|
||||
if (rs->rasterizer_discard_enable || !fs_info)
|
||||
return false;
|
||||
|
||||
/* If we generally have side effects */
|
||||
if (fs_info->fs.sidefx)
|
||||
return true;
|
||||
|
||||
/* If colour is written we need to execute */
|
||||
for (unsigned i = 0; i < cb->attachment_count; ++i) {
|
||||
if ((cb->color_write_enables & BITFIELD_BIT(i)) &&
|
||||
cb->attachments[i].write_mask)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If alpha-to-coverage is enabled, we need to run the fragment shader even
|
||||
* if we don't have a color attachment, so depth/stencil updates can be
|
||||
* discarded if alpha, and thus coverage, is 0. */
|
||||
if (dyn_state->ms.alpha_to_coverage_enable)
|
||||
return true;
|
||||
|
||||
/* If the sample mask is updated, we need to run the fragment shader,
|
||||
* otherwise the fixed-function depth/stencil results will apply to all
|
||||
* samples. */
|
||||
if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
|
||||
return true;
|
||||
|
||||
/* If depth is written and not implied we need to execute.
|
||||
* TODO: Predicate on Z/S writes being enabled */
|
||||
return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
|
||||
ASSERTED const struct vk_dynamic_graphics_state *dyn_state,
|
||||
bool cached_value)
|
||||
{
|
||||
/* Make sure the cached value was properly initialized. */
|
||||
assert(fs_required(state, dyn_state) == cached_value);
|
||||
return cached_value;
|
||||
}
|
||||
|
||||
#define get_fs(__cmdbuf) \
|
||||
(cached_fs_required(&(__cmdbuf)->state.gfx, \
|
||||
&(__cmdbuf)->vk.dynamic_graphics_state, \
|
||||
(__cmdbuf)->state.gfx.fs.required) \
|
||||
? (__cmdbuf)->state.gfx.fs.shader \
|
||||
: NULL)
|
||||
|
||||
/* Anything that might change the value returned by get_fs() makes users of the
|
||||
* fragment shader dirty, because not using the fragment shader (when
|
||||
* fs_required() returns false) impacts various other things, like VS -> FS
|
||||
* linking in the JM backend, or the update of the fragment shader pointer in
|
||||
* the CSF backend. Call gfx_state_dirty(cmdbuf, FS) if you only care about
|
||||
* fragment shader updates. */
|
||||
|
||||
#define fs_user_dirty(__cmdbuf) \
|
||||
(gfx_state_dirty(cmdbuf, FS) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || \
|
||||
dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE))
|
||||
|
||||
/* After a draw, all dirty flags are cleared except the FS dirty flag which
|
||||
* needs to be set again if the draw didn't use the fragment shader. */
|
||||
|
||||
#define clear_dirty_after_draw(__cmdbuf) \
|
||||
do { \
|
||||
bool __set_fs_dirty = \
|
||||
(__cmdbuf)->state.gfx.fs.shader != get_fs(__cmdbuf); \
|
||||
bool __set_fs_push_dirty = \
|
||||
__set_fs_dirty && gfx_state_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
|
||||
vk_dynamic_graphics_state_clear_dirty( \
|
||||
&(__cmdbuf)->vk.dynamic_graphics_state); \
|
||||
gfx_state_clear_all_dirty(__cmdbuf); \
|
||||
if (__set_fs_dirty) \
|
||||
gfx_state_set_dirty(__cmdbuf, FS); \
|
||||
if (__set_fs_push_dirty) \
|
||||
gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
VkResult
|
||||
panvk_per_arch(device_draw_context_init)(struct panvk_device *dev);
|
||||
|
||||
void
|
||||
panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev);
|
||||
#endif
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *pRenderingInfo);
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *render_info);
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_preload_render_area_border)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *render_info);
|
||||
|
||||
void panvk_per_arch(cmd_select_tile_size)(struct panvk_cmd_buffer *cmdbuf);
|
||||
|
||||
struct panvk_draw_info {
|
||||
struct {
|
||||
uint32_t size;
|
||||
uint32_t offset;
|
||||
} index;
|
||||
|
||||
struct {
|
||||
#if PAN_ARCH < 9
|
||||
int32_t raw_offset;
|
||||
#endif
|
||||
int32_t base;
|
||||
uint32_t count;
|
||||
} vertex;
|
||||
|
||||
struct {
|
||||
int32_t base;
|
||||
uint32_t count;
|
||||
} instance;
|
||||
|
||||
struct {
|
||||
uint64_t buffer_dev_addr;
|
||||
uint64_t count_buffer_dev_addr;
|
||||
uint32_t draw_count;
|
||||
uint32_t stride;
|
||||
} indirect;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
uint32_t layer_id;
|
||||
#endif
|
||||
};
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const struct panvk_draw_info *info);
|
||||
|
||||
static inline uint32_t
|
||||
color_attachment_written_mask(
|
||||
const struct panvk_shader_variant *fs,
|
||||
const struct vk_color_attachment_location_state *cal)
|
||||
{
|
||||
uint32_t written_by_shader =
|
||||
(fs->info.outputs_written >> FRAG_RESULT_DATA0) & BITFIELD_MASK(8);
|
||||
uint32_t catt_written_mask = 0;
|
||||
|
||||
for (uint32_t i = 0; i < MAX_RTS; i++) {
|
||||
if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
uint32_t shader_rt = cal->color_map[i];
|
||||
|
||||
if (written_by_shader & BITFIELD_BIT(shader_rt))
|
||||
catt_written_mask |= BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
return catt_written_mask;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
color_attachment_read_mask(const struct panvk_shader_variant *fs,
|
||||
const struct vk_input_attachment_location_state *ial,
|
||||
uint8_t color_attachment_mask)
|
||||
{
|
||||
uint32_t color_attachment_count =
|
||||
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
|
||||
? util_last_bit(color_attachment_mask)
|
||||
: ial->color_attachment_count;
|
||||
uint32_t catt_read_mask = 0;
|
||||
|
||||
for (uint32_t i = 0; i < color_attachment_count; i++) {
|
||||
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
uint32_t catt_idx = ial->color_map[i] + 1;
|
||||
if (fs->fs.input_attachment_read & BITFIELD_BIT(catt_idx)) {
|
||||
assert(color_attachment_mask & BITFIELD_BIT(i));
|
||||
catt_read_mask |= BITFIELD_BIT(i);
|
||||
}
|
||||
}
|
||||
|
||||
return catt_read_mask;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
z_attachment_read(const struct panvk_shader_variant *fs,
|
||||
const struct vk_input_attachment_location_state *ial)
|
||||
{
|
||||
uint32_t depth_mask = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX
|
||||
? BITFIELD_BIT(0)
|
||||
: ial->depth_att != MESA_VK_ATTACHMENT_UNUSED
|
||||
? BITFIELD_BIT(ial->depth_att + 1)
|
||||
: 0;
|
||||
return depth_mask & fs->fs.input_attachment_read;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
s_attachment_read(const struct panvk_shader_variant *fs,
|
||||
const struct vk_input_attachment_location_state *ial)
|
||||
{
|
||||
uint32_t stencil_mask = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX
|
||||
? BITFIELD_BIT(0)
|
||||
: ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED
|
||||
? BITFIELD_BIT(ial->stencil_att + 1)
|
||||
: 0;
|
||||
|
||||
return stencil_mask & fs->fs.input_attachment_read;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,572 @@
|
||||
/*
|
||||
* Copyright © 2021 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PANVK_SHADER_H
|
||||
#define PANVK_SHADER_H
|
||||
|
||||
#ifndef PAN_ARCH
|
||||
#error "PAN_ARCH must be defined"
|
||||
#endif
|
||||
|
||||
#include "compiler/pan_compiler.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
#include "pan_earlyzs.h"
|
||||
|
||||
#include "panvk_cmd_push_constant.h"
|
||||
#include "panvk_descriptor_set.h"
|
||||
#include "panvk_macros.h"
|
||||
#include "panvk_mempool.h"
|
||||
|
||||
#include "vk_pipeline_layout.h"
|
||||
|
||||
#include "vk_shader.h"
|
||||
|
||||
extern const struct vk_device_shader_ops panvk_per_arch(device_shader_ops);
|
||||
|
||||
#define MAX_RTS 8
|
||||
#define MAX_VS_ATTRIBS 16
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
|
||||
/* We could theoretically use the MAX_PER_SET values here (except for UBOs
|
||||
* where we're really limited to 256 on the shader side), but on Bifrost we
|
||||
* have to copy some tables around, which comes at an extra memory/processing
|
||||
* cost, so let's pick something smaller. */
|
||||
#define MAX_PER_STAGE_SAMPLED_IMAGES 256
|
||||
#define MAX_PER_STAGE_SAMPLERS 128
|
||||
#define MAX_PER_STAGE_UNIFORM_BUFFERS MAX_PER_SET_UNIFORM_BUFFERS
|
||||
#define MAX_PER_STAGE_STORAGE_BUFFERS 64
|
||||
#define MAX_PER_STAGE_STORAGE_IMAGES 32
|
||||
#define MAX_PER_STAGE_INPUT_ATTACHMENTS MAX_PER_SET_INPUT_ATTACHMENTS
|
||||
|
||||
#else
|
||||
|
||||
#define MAX_PER_STAGE_SAMPLED_IMAGES MAX_PER_SET_SAMPLED_IMAGES
|
||||
#define MAX_PER_STAGE_SAMPLERS MAX_PER_SET_SAMPLERS
|
||||
#define MAX_PER_STAGE_UNIFORM_BUFFERS MAX_PER_SET_UNIFORM_BUFFERS
|
||||
#define MAX_PER_STAGE_STORAGE_BUFFERS MAX_PER_SET_STORAGE_BUFFERS
|
||||
#define MAX_PER_STAGE_STORAGE_IMAGES MAX_PER_SET_STORAGE_IMAGES
|
||||
#define MAX_PER_STAGE_INPUT_ATTACHMENTS MAX_PER_SET_INPUT_ATTACHMENTS
|
||||
|
||||
#endif
|
||||
|
||||
#define MAX_PER_STAGE_RESOURCES ( \
|
||||
MAX_PER_STAGE_SAMPLED_IMAGES + MAX_PER_STAGE_SAMPLERS + \
|
||||
MAX_PER_STAGE_UNIFORM_BUFFERS + MAX_PER_STAGE_STORAGE_BUFFERS + \
|
||||
MAX_PER_STAGE_STORAGE_IMAGES + MAX_PER_STAGE_INPUT_ATTACHMENTS)
|
||||
|
||||
struct nir_shader;
|
||||
struct pan_blend_state;
|
||||
struct panvk_device;
|
||||
|
||||
enum panvk_varying_buf_id {
|
||||
PANVK_VARY_BUF_GENERAL,
|
||||
PANVK_VARY_BUF_POSITION,
|
||||
PANVK_VARY_BUF_PSIZ,
|
||||
|
||||
/* Keep last */
|
||||
PANVK_VARY_BUF_MAX,
|
||||
};
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
enum panvk_desc_table_id {
|
||||
PANVK_DESC_TABLE_USER = 0,
|
||||
PANVK_DESC_TABLE_CS_DYN_SSBOS = MAX_SETS,
|
||||
PANVK_DESC_TABLE_COMPUTE_COUNT = PANVK_DESC_TABLE_CS_DYN_SSBOS + 1,
|
||||
PANVK_DESC_TABLE_VS_DYN_SSBOS = MAX_SETS,
|
||||
PANVK_DESC_TABLE_FS_DYN_SSBOS = MAX_SETS + 1,
|
||||
PANVK_DESC_TABLE_GFX_COUNT = PANVK_DESC_TABLE_FS_DYN_SSBOS + 1,
|
||||
};
|
||||
#endif
|
||||
|
||||
#define PANVK_COLOR_ATTACHMENT(x) (x)
|
||||
#define PANVK_ZS_ATTACHMENT 255
|
||||
|
||||
struct panvk_input_attachment_info {
|
||||
uint32_t target;
|
||||
uint32_t conversion;
|
||||
};
|
||||
|
||||
/* One attachment per color, one for depth, one for stencil, and the last one
|
||||
* for the attachment without an InputAttachmentIndex attribute. */
|
||||
#define INPUT_ATTACHMENT_MAP_SIZE 11
|
||||
|
||||
#define FAU_WORD_SIZE sizeof(uint64_t)
|
||||
|
||||
#define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t
|
||||
|
||||
/* System values which are common to both graphics and compute. These are
|
||||
* always at the same offset in both graphics and compute allowing us to
|
||||
* compile the shader without knowing which queue it will be dispatched on.
|
||||
*/
|
||||
struct panvk_common_sysvals_inner {
|
||||
/* Address of sysval/push constant buffer used for indirect loads */
|
||||
aligned_u64 push_uniforms;
|
||||
|
||||
/* Address of the printf buffer */
|
||||
aligned_u64 printf_buffer_address;
|
||||
} __attribute__((aligned(FAU_WORD_SIZE)));
|
||||
|
||||
struct panvk_common_sysvals {
|
||||
uint32_t _pad[4];
|
||||
struct panvk_common_sysvals_inner common;
|
||||
} __attribute__((aligned(FAU_WORD_SIZE)));
|
||||
|
||||
static_assert((offsetof(struct panvk_common_sysvals, common) %
|
||||
FAU_WORD_SIZE) == 0,
|
||||
"struct panvk_graphics_sysvals_inner must be 8-byte aligned");
|
||||
static_assert((sizeof(struct panvk_common_sysvals_inner) %
|
||||
FAU_WORD_SIZE) == 0,
|
||||
"struct panvk_graphics_sysvals_inner must be 8-byte aligned");
|
||||
|
||||
#define SYSVALS_COMMON_START \
|
||||
(offsetof(struct panvk_common_sysvals, common) / FAU_WORD_SIZE)
|
||||
|
||||
#define SYSVALS_COMMON_COUNT \
|
||||
(sizeof(struct panvk_common_sysvals_inner) / FAU_WORD_SIZE)
|
||||
|
||||
#define SYSVALS_COMMON_END (SYSVALS_COMMON_START + SYSVALS_COMMON_COUNT)
|
||||
|
||||
struct panvk_graphics_sysvals {
|
||||
/* Blend constants MUST come first because their position cannot depend on
|
||||
* the FAU packing of the fragment shader.
|
||||
*/
|
||||
struct {
|
||||
float constants[4];
|
||||
} blend;
|
||||
|
||||
/* This must be at the same offset for both compute and graphics */
|
||||
struct panvk_common_sysvals_inner common;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
float x, y, z;
|
||||
} scale, offset;
|
||||
} viewport;
|
||||
|
||||
struct {
|
||||
#if PAN_ARCH < 9
|
||||
int32_t raw_vertex_offset;
|
||||
uint32_t num_vertices; /* iter13: XFB needs per-draw vertex count */
|
||||
/* aligned_u64 attribute below inserts the 4-byte alignment gap
|
||||
* after num_vertices automatically — no explicit pad needed. */
|
||||
aligned_u64 xfb_address[4]; /* iter13: 4 transform feedback buffer base addresses */
|
||||
#endif
|
||||
int32_t first_vertex;
|
||||
int32_t base_instance;
|
||||
uint32_t noperspective_varyings;
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
aligned_u64 blend_descs[MAX_RTS];
|
||||
} fs;
|
||||
|
||||
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
|
||||
* layer, and filter primitives at the VS level.
|
||||
*/
|
||||
int32_t layer_id;
|
||||
|
||||
struct {
|
||||
aligned_u64 sets[PANVK_DESC_TABLE_GFX_COUNT];
|
||||
} desc;
|
||||
#endif
|
||||
} __attribute__((aligned(FAU_WORD_SIZE)));
|
||||
|
||||
static_assert(offsetof(struct panvk_graphics_sysvals, blend) == 0,
|
||||
"panvk_graphics_sysvals::blend must be at the start");
|
||||
static_assert(offsetof(struct panvk_graphics_sysvals, common) ==
|
||||
offsetof(struct panvk_common_sysvals, common),
|
||||
"Common sysvals must be at the same offset everywhere");
|
||||
static_assert((sizeof(struct panvk_graphics_sysvals) % FAU_WORD_SIZE) == 0,
|
||||
"struct panvk_graphics_sysvals must be 8-byte aligned");
|
||||
#if PAN_ARCH < 9
|
||||
static_assert((offsetof(struct panvk_graphics_sysvals, desc) % FAU_WORD_SIZE) ==
|
||||
0,
|
||||
"panvk_graphics_sysvals::desc must be 8-byte aligned");
|
||||
#endif
|
||||
|
||||
struct panvk_compute_sysvals {
|
||||
struct {
|
||||
uint32_t x, y, z;
|
||||
} base;
|
||||
|
||||
uint32_t _pad;
|
||||
|
||||
/* This must be at the same offset for both compute and graphics */
|
||||
struct panvk_common_sysvals_inner common;
|
||||
|
||||
struct {
|
||||
uint32_t x, y, z;
|
||||
} num_work_groups;
|
||||
struct {
|
||||
uint32_t x, y, z;
|
||||
} local_group_size;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct {
|
||||
aligned_u64 sets[PANVK_DESC_TABLE_COMPUTE_COUNT];
|
||||
} desc;
|
||||
#endif
|
||||
} __attribute__((aligned(FAU_WORD_SIZE)));
|
||||
|
||||
static_assert(offsetof(struct panvk_compute_sysvals, common) ==
|
||||
offsetof(struct panvk_common_sysvals, common),
|
||||
"Common sysvals must be at the same offset everywhere");
|
||||
static_assert((sizeof(struct panvk_compute_sysvals) % FAU_WORD_SIZE) == 0,
|
||||
"struct panvk_compute_sysvals must be 8-byte aligned");
|
||||
#if PAN_ARCH < 9
|
||||
static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) ==
|
||||
0,
|
||||
"panvk_compute_sysvals::desc must be 8-byte aligned");
|
||||
#endif
|
||||
|
||||
/* This is not the final offset in the push constant buffer (AKA FAU), but
|
||||
* just a magic offset we use before packing push constants so we can easily
|
||||
* identify the type of push constant (driver sysvals vs user push constants).
|
||||
*/
|
||||
#define SYSVALS_PUSH_CONST_BASE MAX_PUSH_CONSTANTS_SIZE
|
||||
|
||||
#define common_sysval_size(__name) \
|
||||
sizeof(((struct panvk_common_sysvals *)NULL)->common.__name)
|
||||
|
||||
#define graphics_sysval_size(__name) \
|
||||
sizeof(((struct panvk_graphics_sysvals *)NULL)->__name)
|
||||
|
||||
#define compute_sysval_size(__name) \
|
||||
sizeof(((struct panvk_compute_sysvals *)NULL)->__name)
|
||||
|
||||
#define sysval_size(__ptype, __name) __ptype##_sysval_size(__name)
|
||||
|
||||
#define common_sysval_offset(__name) \
|
||||
offsetof(struct panvk_common_sysvals, common.__name)
|
||||
|
||||
#define graphics_sysval_offset(__name) \
|
||||
offsetof(struct panvk_graphics_sysvals, __name)
|
||||
|
||||
#define compute_sysval_offset(__name) \
|
||||
offsetof(struct panvk_compute_sysvals, __name)
|
||||
|
||||
#define sysval_offset(__ptype, __name) __ptype##_sysval_offset(__name)
|
||||
|
||||
#define sysval_entry_size(__ptype, __name) \
|
||||
sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name[0])
|
||||
|
||||
#define sysval_entry_offset(__ptype, __name, __idx) \
|
||||
(sysval_offset(__ptype, __name) + \
|
||||
(sysval_entry_size(__ptype, __name) * __idx))
|
||||
|
||||
#define sysval_fau_start(__ptype, __name) \
|
||||
(sysval_offset(__ptype, __name) / FAU_WORD_SIZE)
|
||||
|
||||
#define sysval_fau_end(__ptype, __name) \
|
||||
((sysval_offset(__ptype, __name) + sysval_size(__ptype, __name) - 1) / \
|
||||
FAU_WORD_SIZE)
|
||||
|
||||
#define sysval_fau_entry_start(__ptype, __name, __idx) \
|
||||
(sysval_entry_offset(__ptype, __name, __idx) / FAU_WORD_SIZE)
|
||||
|
||||
#define sysval_fau_entry_end(__ptype, __name, __idx) \
|
||||
((sysval_entry_offset(__ptype, __name, __idx + 1) - 1) / FAU_WORD_SIZE)
|
||||
|
||||
#define shader_remapped_fau_offset(__shader, __kind, __offset) \
|
||||
((FAU_WORD_SIZE * BITSET_PREFIX_SUM((__shader)->fau.used_##__kind, \
|
||||
(__offset) / FAU_WORD_SIZE)) + \
|
||||
((__offset) % FAU_WORD_SIZE))
|
||||
|
||||
#define shader_remapped_sysval_offset(__shader, __offset) \
|
||||
shader_remapped_fau_offset(__shader, sysvals, __offset)
|
||||
|
||||
#define shader_remapped_push_const_offset(__shader, __offset) \
|
||||
(((__shader)->fau.sysval_count * FAU_WORD_SIZE) + \
|
||||
shader_remapped_fau_offset(__shader, push_consts, __offset))
|
||||
|
||||
#define shader_use_sysval(__shader, __ptype, __name) \
|
||||
BITSET_SET_RANGE((__shader)->fau.used_sysvals, \
|
||||
sysval_fau_start(__ptype, __name), \
|
||||
sysval_fau_end(__ptype, __name))
|
||||
|
||||
#define shader_uses_sysval(__shader, __ptype, __name) \
|
||||
BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \
|
||||
sysval_fau_start(__ptype, __name), \
|
||||
sysval_fau_end(__ptype, __name))
|
||||
|
||||
#define shader_uses_sysval_entry(__shader, __ptype, __name, __idx) \
|
||||
BITSET_TEST_RANGE((__shader)->fau.used_sysvals, \
|
||||
sysval_fau_entry_start(__ptype, __name, __idx), \
|
||||
sysval_fau_entry_end(__ptype, __name, __idx))
|
||||
|
||||
#define shader_use_sysval_range(__shader, __base, __range) \
|
||||
BITSET_SET_RANGE((__shader)->fau.used_sysvals, (__base) / FAU_WORD_SIZE, \
|
||||
((__base) + (__range) - 1) / FAU_WORD_SIZE)
|
||||
|
||||
#define shader_use_push_const_range(__shader, __base, __range) \
|
||||
BITSET_SET_RANGE((__shader)->fau.used_push_consts, \
|
||||
(__base) / FAU_WORD_SIZE, \
|
||||
((__base) + (__range) - 1) / FAU_WORD_SIZE)
|
||||
|
||||
#define load_sysval(__b, __ptype, __bitsz, __name) \
|
||||
nir_load_push_constant( \
|
||||
__b, sysval_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \
|
||||
nir_imm_int(__b, sysval_offset(__ptype, __name)), \
|
||||
.base = SYSVALS_PUSH_CONST_BASE)
|
||||
|
||||
#define load_sysval_entry(__b, __ptype, __bitsz, __name, __dyn_idx) \
|
||||
nir_load_push_constant( \
|
||||
__b, sysval_entry_size(__ptype, __name) / ((__bitsz) / 8), __bitsz, \
|
||||
nir_imul_imm(__b, __dyn_idx, sysval_entry_size(__ptype, __name)), \
|
||||
.base = SYSVALS_PUSH_CONST_BASE + sysval_offset(__ptype, __name), \
|
||||
.range = sysval_size(__ptype, __name))
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
enum panvk_bifrost_desc_table_type {
|
||||
PANVK_BIFROST_DESC_TABLE_INVALID = -1,
|
||||
|
||||
/* UBO is encoded on 8 bytes */
|
||||
PANVK_BIFROST_DESC_TABLE_UBO = 0,
|
||||
|
||||
/* Images are using a <3DAttributeBuffer,Attribute> pair, each
|
||||
* of them being stored in a separate table. */
|
||||
PANVK_BIFROST_DESC_TABLE_IMG,
|
||||
|
||||
/* Texture and sampler are encoded on 32 bytes */
|
||||
PANVK_BIFROST_DESC_TABLE_TEXTURE,
|
||||
PANVK_BIFROST_DESC_TABLE_SAMPLER,
|
||||
|
||||
PANVK_BIFROST_DESC_TABLE_COUNT,
|
||||
};
|
||||
#endif
|
||||
|
||||
#define COPY_DESC_HANDLE(table, idx) ((table << 28) | (idx))
|
||||
#define COPY_DESC_HANDLE_EXTRACT_INDEX(handle) ((handle) & BITFIELD_MASK(28))
|
||||
#define COPY_DESC_HANDLE_EXTRACT_TABLE(handle) ((handle) >> 28)
|
||||
|
||||
#define MAX_COMPUTE_SYSVAL_FAUS \
|
||||
(sizeof(struct panvk_compute_sysvals) / FAU_WORD_SIZE)
|
||||
#define MAX_GFX_SYSVAL_FAUS \
|
||||
(sizeof(struct panvk_graphics_sysvals) / FAU_WORD_SIZE)
|
||||
#define MAX_SYSVAL_FAUS MAX2(MAX_COMPUTE_SYSVAL_FAUS, MAX_GFX_SYSVAL_FAUS)
|
||||
#define MAX_PUSH_CONST_FAUS (MAX_PUSH_CONSTANTS_SIZE / FAU_WORD_SIZE)
|
||||
|
||||
struct panvk_shader_fau_info {
|
||||
BITSET_DECLARE(used_sysvals, MAX_SYSVAL_FAUS);
|
||||
BITSET_DECLARE(used_push_consts, MAX_PUSH_CONST_FAUS);
|
||||
uint32_t sysval_count;
|
||||
uint32_t total_count;
|
||||
};
|
||||
|
||||
struct panvk_shader_desc_info {
|
||||
uint32_t used_set_mask;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct {
|
||||
uint32_t map[MAX_DYNAMIC_UNIFORM_BUFFERS];
|
||||
uint32_t count;
|
||||
} dyn_ubos;
|
||||
struct {
|
||||
uint32_t map[MAX_DYNAMIC_STORAGE_BUFFERS];
|
||||
uint32_t count;
|
||||
} dyn_ssbos;
|
||||
struct {
|
||||
struct panvk_priv_mem map;
|
||||
uint32_t count[PANVK_BIFROST_DESC_TABLE_COUNT];
|
||||
} others;
|
||||
#else
|
||||
struct {
|
||||
uint32_t map[MAX_DYNAMIC_BUFFERS];
|
||||
uint32_t count;
|
||||
} dyn_bufs;
|
||||
uint32_t fs_varying_attr_desc_count;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct panvk_shader_variant {
|
||||
struct pan_shader_info info;
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct pan_compute_dim local_size;
|
||||
} cs;
|
||||
|
||||
struct {
|
||||
struct pan_earlyzs_lut earlyzs_lut;
|
||||
uint32_t input_attachment_read;
|
||||
} fs;
|
||||
};
|
||||
|
||||
struct panvk_shader_desc_info desc_info;
|
||||
|
||||
struct panvk_shader_fau_info fau;
|
||||
|
||||
const void *bin_ptr;
|
||||
uint32_t bin_size;
|
||||
bool own_bin;
|
||||
|
||||
struct panvk_priv_mem code_mem;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_priv_mem rsd;
|
||||
#else
|
||||
union {
|
||||
struct panvk_priv_mem spd;
|
||||
struct {
|
||||
#if PAN_ARCH < 12
|
||||
struct panvk_priv_mem pos_points;
|
||||
struct panvk_priv_mem pos_triangles;
|
||||
struct panvk_priv_mem var;
|
||||
#else
|
||||
struct panvk_priv_mem all_points;
|
||||
struct panvk_priv_mem all_triangles;
|
||||
#endif
|
||||
} spds;
|
||||
};
|
||||
#endif
|
||||
|
||||
const char *nir_str;
|
||||
const char *asm_str;
|
||||
};
|
||||
|
||||
enum panvk_vs_variant {
|
||||
/* Hardware vertex shader, when next stage is fragment */
|
||||
PANVK_VS_VARIANT_HW,
|
||||
|
||||
PANVK_VS_VARIANTS,
|
||||
};
|
||||
|
||||
struct panvk_shader {
|
||||
struct vk_shader vk;
|
||||
|
||||
struct panvk_shader_variant variants[];
|
||||
};
|
||||
|
||||
static inline unsigned
|
||||
panvk_shader_num_variants(mesa_shader_stage stage)
|
||||
{
|
||||
if (stage == MESA_SHADER_VERTEX)
|
||||
return PANVK_VS_VARIANTS;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static const char *panvk_vs_shader_variant_name[] = {
|
||||
[PANVK_VS_VARIANT_HW] = NULL,
|
||||
};
|
||||
|
||||
static const char *
|
||||
panvk_shader_variant_name(const struct panvk_shader *shader,
|
||||
struct panvk_shader_variant *variant)
|
||||
{
|
||||
unsigned i = variant - shader->variants;
|
||||
assert(i < panvk_shader_num_variants(shader->vk.stage));
|
||||
|
||||
if (shader->vk.stage == MESA_SHADER_VERTEX) {
|
||||
assert(i < ARRAY_SIZE(panvk_vs_shader_variant_name));
|
||||
return panvk_vs_shader_variant_name[i];
|
||||
}
|
||||
|
||||
assert(panvk_shader_num_variants(shader->vk.stage) == 1);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const struct panvk_shader_variant *
|
||||
panvk_shader_only_variant(const struct panvk_shader *shader)
|
||||
{
|
||||
if (!shader)
|
||||
return NULL;
|
||||
|
||||
assert(panvk_shader_num_variants(shader->vk.stage) == 1);
|
||||
return &shader->variants[0];
|
||||
}
|
||||
|
||||
static const struct panvk_shader_variant *
|
||||
panvk_shader_hw_variant(const struct panvk_shader *shader)
|
||||
{
|
||||
if (!shader)
|
||||
return NULL;
|
||||
|
||||
return &shader->variants[0];
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
panvk_shader_variant_get_dev_addr(const struct panvk_shader_variant *shader)
|
||||
{
|
||||
return shader != NULL ? panvk_priv_mem_dev_addr(shader->code_mem) : 0;
|
||||
}
|
||||
|
||||
#define panvk_shader_foreach_variant(__shader, __var) \
|
||||
for (struct panvk_shader_variant *__var = (__shader)->variants; \
|
||||
__var < (__shader)->variants + \
|
||||
panvk_shader_num_variants((__shader)->vk.stage); \
|
||||
++__var)
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_shader_link {
|
||||
struct {
|
||||
struct panvk_priv_mem attribs;
|
||||
} vs, fs;
|
||||
unsigned buf_strides[PANVK_VARY_BUF_MAX];
|
||||
};
|
||||
|
||||
VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool,
|
||||
const struct panvk_shader_variant *vs,
|
||||
const struct panvk_shader_variant *fs,
|
||||
struct panvk_shader_link *link);
|
||||
|
||||
static inline void
|
||||
panvk_shader_link_cleanup(struct panvk_shader_link *link)
|
||||
{
|
||||
panvk_pool_free_mem(&link->vs.attribs);
|
||||
panvk_pool_free_mem(&link->fs.attribs);
|
||||
}
|
||||
#endif
|
||||
|
||||
bool panvk_per_arch(nir_lower_input_attachment_loads)(
|
||||
nir_shader *nir,
|
||||
const struct vk_graphics_pipeline_state *state,
|
||||
uint32_t *input_attachment_read_out);
|
||||
|
||||
void panvk_per_arch(nir_lower_descriptors)(
|
||||
nir_shader *nir, struct panvk_device *dev,
|
||||
const struct vk_pipeline_robustness_state *rs, uint32_t set_layout_count,
|
||||
struct vk_descriptor_set_layout *const *set_layouts,
|
||||
const struct vk_graphics_pipeline_state *state,
|
||||
struct panvk_shader_desc_info *desc_info);
|
||||
|
||||
/* This a stripped-down version of panvk_shader for internal shaders that
|
||||
* are managed by vk_meta (blend and preload shaders). Those don't need the
|
||||
* complexity inherent to user provided shaders as they're not exposed. */
|
||||
struct panvk_internal_shader {
|
||||
struct vk_shader vk;
|
||||
struct pan_shader_info info;
|
||||
struct panvk_priv_mem code_mem;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_priv_mem rsd;
|
||||
#else
|
||||
struct panvk_priv_mem spd;
|
||||
#endif
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_internal_shader, vk.base, VkShaderEXT,
|
||||
VK_OBJECT_TYPE_SHADER_EXT)
|
||||
|
||||
void panvk_per_arch(compiler_lock)(void);
|
||||
void panvk_per_arch(compiler_unlock)(void);
|
||||
|
||||
VkResult panvk_per_arch(create_internal_shader)(
|
||||
struct panvk_device *dev, nir_shader *nir,
|
||||
struct pan_compile_inputs *compiler_inputs,
|
||||
struct panvk_internal_shader **shader_out);
|
||||
|
||||
VkResult panvk_per_arch(create_shader_from_binary)(
|
||||
struct panvk_device *dev, const struct pan_shader_info *info,
|
||||
struct pan_compute_dim local_size, const void *bin_ptr, size_t bin_size,
|
||||
struct panvk_shader **shader_out);
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,956 @@
|
||||
/*
|
||||
* Copyright © 2024 Collabora Ltd.
|
||||
* Copyright © 2024 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "panvk_buffer.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_device_memory.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
#include "pan_compiler.h" /* PAN_SHADER_OOB_ADDRESS */
|
||||
#include "pan_util.h"
|
||||
|
||||
static void
|
||||
att_set_clear_preload(const VkRenderingAttachmentInfo *att, bool *clear, bool *preload)
|
||||
{
|
||||
switch (att->loadOp) {
|
||||
case VK_ATTACHMENT_LOAD_OP_CLEAR:
|
||||
*clear = true;
|
||||
break;
|
||||
case VK_ATTACHMENT_LOAD_OP_LOAD:
|
||||
*preload = true;
|
||||
break;
|
||||
case VK_ATTACHMENT_LOAD_OP_NONE:
|
||||
case VK_ATTACHMENT_LOAD_OP_DONT_CARE:
|
||||
/* This is a very frustrating corner case. From the spec:
|
||||
*
|
||||
* VK_ATTACHMENT_STORE_OP_NONE specifies the contents within the
|
||||
* render area are not accessed by the store operation as long as
|
||||
* no values are written to the attachment during the render pass.
|
||||
*
|
||||
* With VK_ATTACHMENT_LOAD_OP_DONT_CARE + VK_ATTACHMENT_STORE_OP_NONE,
|
||||
* we need to preserve the contents throughout partial renders. The
|
||||
* easiest way to do that is forcing a preload, so that partial stores
|
||||
* for unused attachments will be no-op'd by writing existing contents.
|
||||
*
|
||||
* TODO: disable preload when we have clean_pixel_write_enable = false
|
||||
* as an optimization
|
||||
*/
|
||||
*preload |= att->storeOp == VK_ATTACHMENT_STORE_OP_NONE;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Unsupported loadOp");
|
||||
}
|
||||
}
|
||||
|
||||
static struct panvk_image_view *
|
||||
get_ms2ss_image_view(struct panvk_image_view *iview, uint32_t nr_samples)
|
||||
{
|
||||
assert(nr_samples >= 2 && nr_samples <= 16);
|
||||
assert(iview->pview.nr_samples == 1);
|
||||
assert(iview->vk.image->create_flags &
|
||||
VK_IMAGE_CREATE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_BIT_EXT);
|
||||
|
||||
/* sample count 2 is at index 0, 4 at 1, .. */
|
||||
uint32_t vidx = 0;
|
||||
switch (nr_samples) {
|
||||
case VK_SAMPLE_COUNT_2_BIT:
|
||||
vidx = 0;
|
||||
break;
|
||||
case VK_SAMPLE_COUNT_4_BIT:
|
||||
vidx = 1;
|
||||
break;
|
||||
case VK_SAMPLE_COUNT_8_BIT:
|
||||
vidx = 2;
|
||||
break;
|
||||
case VK_SAMPLE_COUNT_16_BIT:
|
||||
vidx = 3;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("unhandled sample count");
|
||||
}
|
||||
assert(iview->ms_views[vidx] != VK_NULL_HANDLE);
|
||||
|
||||
struct panvk_image_view *res =
|
||||
panvk_image_view_from_handle(iview->ms_views[vidx]);
|
||||
|
||||
assert(res->pview.nr_samples == nr_samples);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
render_state_set_color_attachment(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingAttachmentInfo *att,
|
||||
uint32_t index)
|
||||
{
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
struct panvk_image_view *iview_ss = NULL;
|
||||
const bool ms2ss = cmdbuf->state.gfx.render.fb.nr_samples > 1 &&
|
||||
iview->pview.nr_samples == 1;
|
||||
|
||||
if (ms2ss) {
|
||||
iview_ss = iview;
|
||||
iview =
|
||||
get_ms2ss_image_view(iview, cmdbuf->state.gfx.render.fb.nr_samples);
|
||||
}
|
||||
|
||||
struct panvk_image *img =
|
||||
container_of(iview->vk.image, struct panvk_image, vk);
|
||||
|
||||
state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(index);
|
||||
state->render.color_attachments.iviews[index] = iview;
|
||||
state->render.color_attachments.preload_iviews[index] =
|
||||
ms2ss ? iview_ss : NULL;
|
||||
state->render.color_attachments.fmts[index] = iview->vk.format;
|
||||
state->render.color_attachments.samples[index] = img->vk.samples;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
for (uint8_t p = 0; p < ARRAY_SIZE(iview->pview.planes); p++) {
|
||||
struct pan_image_plane_ref pref =
|
||||
pan_image_view_get_plane(&iview->pview, p);
|
||||
|
||||
if (!pref.image)
|
||||
continue;
|
||||
|
||||
assert(pref.plane_idx < ARRAY_SIZE(img->planes));
|
||||
assert(img->planes[pref.plane_idx].mem->bo != NULL);
|
||||
state->render.fb.bos[state->render.fb.bo_count++] =
|
||||
img->planes[pref.plane_idx].mem->bo;
|
||||
}
|
||||
#endif
|
||||
|
||||
fbinfo->rts[index].view = &iview->pview;
|
||||
fbinfo->rts[index].crc_valid = &state->render.fb.crc_valid[index];
|
||||
state->render.fb.nr_samples =
|
||||
MAX2(state->render.fb.nr_samples,
|
||||
pan_image_view_get_nr_samples(&iview->pview));
|
||||
|
||||
if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
|
||||
enum pipe_format fmt = vk_format_to_pipe_format(iview->vk.format);
|
||||
union pipe_color_union *col =
|
||||
(union pipe_color_union *)&att->clearValue.color;
|
||||
pan_pack_color(phys_dev->formats.blendable,
|
||||
fbinfo->rts[index].clear_value, col, fmt, false);
|
||||
}
|
||||
|
||||
att_set_clear_preload(att, &fbinfo->rts[index].clear,
|
||||
&fbinfo->rts[index].preload);
|
||||
|
||||
if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
|
||||
struct panvk_resolve_attachment *resolve_info =
|
||||
&state->render.color_attachments.resolve[index];
|
||||
VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
|
||||
|
||||
/* VUID-VkRenderingAttachmentInfo-imageView-06862 and
|
||||
* VUID-VkRenderingAttachmentInfo-imageView-06863:
|
||||
* If resolveMode != NONE, then
|
||||
* resolveView == NULL iff. multisampledRenderToSingleSampledEnable */
|
||||
assert(ms2ss == (resolve_iview == NULL));
|
||||
|
||||
resolve_info->mode = att->resolveMode;
|
||||
if (!ms2ss) {
|
||||
resolve_info->dst_iview = resolve_iview;
|
||||
} else {
|
||||
assert(iview_ss);
|
||||
resolve_info->dst_iview = iview_ss;
|
||||
assert(resolve_info->dst_iview->pview.nr_samples == 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_state_set_z_attachment(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingAttachmentInfo *att)
|
||||
{
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
struct panvk_image_view *iview_ss = NULL;
|
||||
const bool ms2ss = cmdbuf->state.gfx.render.fb.nr_samples > 1 &&
|
||||
iview->pview.nr_samples == 1;
|
||||
|
||||
if (ms2ss) {
|
||||
iview_ss = iview;
|
||||
iview =
|
||||
get_ms2ss_image_view(iview, cmdbuf->state.gfx.render.fb.nr_samples);
|
||||
}
|
||||
|
||||
struct panvk_image *img =
|
||||
container_of(iview->vk.image, struct panvk_image, vk);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* Depth plane always comes first. */
|
||||
state->render.fb.bos[state->render.fb.bo_count++] = img->planes[0].mem->bo;
|
||||
#endif
|
||||
|
||||
state->render.z_attachment.fmt = iview->vk.format;
|
||||
state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
|
||||
|
||||
state->render.zs_pview = iview->pview;
|
||||
fbinfo->zs.view.zs = &state->render.zs_pview;
|
||||
|
||||
/* Fixup view format when the image is multiplanar. */
|
||||
if (panvk_image_is_planar_depth_stencil(img))
|
||||
state->render.zs_pview.format = panvk_image_depth_only_pfmt(img);
|
||||
|
||||
state->render.zs_pview.planes[0] = (struct pan_image_plane_ref){
|
||||
.image = &img->planes[0].image,
|
||||
.plane_idx = 0,
|
||||
};
|
||||
state->render.zs_pview.planes[1] = (struct pan_image_plane_ref){0};
|
||||
state->render.fb.nr_samples =
|
||||
MAX2(state->render.fb.nr_samples,
|
||||
pan_image_view_get_nr_samples(&iview->pview));
|
||||
state->render.z_attachment.iview = iview;
|
||||
state->render.z_attachment.preload_iview = ms2ss ? iview_ss : NULL;
|
||||
|
||||
/* D24S8 is a single plane format where the depth/stencil are interleaved.
|
||||
* If we touch the depth component, we need to make sure the stencil
|
||||
* component is preserved, hence the preload, and the view format adjusment.
|
||||
*/
|
||||
if (panvk_image_is_interleaved_depth_stencil(img)) {
|
||||
fbinfo->zs.preload.s = true;
|
||||
cmdbuf->state.gfx.render.zs_pview.format =
|
||||
img->planes[0].image.props.format;
|
||||
} else {
|
||||
state->render.zs_pview.format = panvk_image_depth_only_pfmt(img);
|
||||
}
|
||||
|
||||
if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
fbinfo->zs.clear_value.depth = att->clearValue.depthStencil.depth;
|
||||
|
||||
att_set_clear_preload(att, &fbinfo->zs.clear.z, &fbinfo->zs.preload.z);
|
||||
|
||||
if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
|
||||
struct panvk_resolve_attachment *resolve_info =
|
||||
&state->render.z_attachment.resolve;
|
||||
VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
|
||||
|
||||
resolve_info->mode = att->resolveMode;
|
||||
if (!ms2ss) {
|
||||
resolve_info->dst_iview = resolve_iview;
|
||||
} else {
|
||||
assert(iview_ss);
|
||||
resolve_info->dst_iview = iview_ss;
|
||||
assert(resolve_info->dst_iview->pview.nr_samples == 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_state_set_s_attachment(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingAttachmentInfo *att)
|
||||
{
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
struct panvk_image_view *iview_ss = NULL;
|
||||
const bool ms2ss = cmdbuf->state.gfx.render.fb.nr_samples > 1 &&
|
||||
iview->pview.nr_samples == 1;
|
||||
|
||||
if (ms2ss) {
|
||||
iview_ss = iview;
|
||||
iview =
|
||||
get_ms2ss_image_view(iview, cmdbuf->state.gfx.render.fb.nr_samples);
|
||||
}
|
||||
|
||||
struct panvk_image *img =
|
||||
container_of(iview->vk.image, struct panvk_image, vk);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* The stencil plane is always last. */
|
||||
state->render.fb.bos[state->render.fb.bo_count++] =
|
||||
img->planes[img->plane_count - 1].mem->bo;
|
||||
#endif
|
||||
|
||||
state->render.s_attachment.fmt = iview->vk.format;
|
||||
state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
|
||||
|
||||
state->render.s_pview = iview->pview;
|
||||
fbinfo->zs.view.s = &state->render.s_pview;
|
||||
|
||||
if (panvk_image_is_planar_depth_stencil(img)) {
|
||||
state->render.s_pview.format = panvk_image_stencil_only_pfmt(img);
|
||||
state->render.s_pview.planes[0] = (struct pan_image_plane_ref){0};
|
||||
state->render.s_pview.planes[1] = (struct pan_image_plane_ref){
|
||||
.image = &img->planes[1].image,
|
||||
.plane_idx = 0,
|
||||
};
|
||||
} else {
|
||||
state->render.s_pview.format = panvk_image_stencil_only_pfmt(img);
|
||||
state->render.s_pview.planes[0] = (struct pan_image_plane_ref){
|
||||
.image = &img->planes[0].image,
|
||||
.plane_idx = 0,
|
||||
};
|
||||
state->render.s_pview.planes[1] = (struct pan_image_plane_ref){0};
|
||||
}
|
||||
|
||||
state->render.fb.nr_samples =
|
||||
MAX2(state->render.fb.nr_samples,
|
||||
pan_image_view_get_nr_samples(&iview->pview));
|
||||
state->render.s_attachment.iview = iview;
|
||||
state->render.s_attachment.preload_iview = ms2ss ? iview_ss : NULL;
|
||||
|
||||
/* If the depth and stencil attachments point to the same image,
|
||||
* and the format is D24S8, we can combine them in a single view
|
||||
* addressing both components.
|
||||
*/
|
||||
if (state->render.s_pview.format == PIPE_FORMAT_X24S8_UINT &&
|
||||
state->render.z_attachment.iview &&
|
||||
state->render.z_attachment.iview->vk.image == iview->vk.image) {
|
||||
state->render.zs_pview.format = PIPE_FORMAT_Z24_UNORM_S8_UINT;
|
||||
fbinfo->zs.preload.s = false;
|
||||
fbinfo->zs.view.s = NULL;
|
||||
|
||||
/* If there was no depth attachment, and the image format is D24S8,
|
||||
* we use the depth+stencil slot, so we can benefit from AFBC, which
|
||||
* is not supported on the stencil-only slot on Bifrost.
|
||||
*/
|
||||
} else if (img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
|
||||
state->render.s_pview.format == PIPE_FORMAT_X24S8_UINT &&
|
||||
fbinfo->zs.view.zs == NULL) {
|
||||
fbinfo->zs.view.zs = &state->render.s_pview;
|
||||
state->render.s_pview.format = PIPE_FORMAT_Z24_UNORM_S8_UINT;
|
||||
fbinfo->zs.preload.z = true;
|
||||
fbinfo->zs.view.s = NULL;
|
||||
}
|
||||
|
||||
if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
fbinfo->zs.clear_value.stencil = att->clearValue.depthStencil.stencil;
|
||||
|
||||
att_set_clear_preload(att, &fbinfo->zs.clear.s, &fbinfo->zs.preload.s);
|
||||
|
||||
if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
|
||||
struct panvk_resolve_attachment *resolve_info =
|
||||
&state->render.s_attachment.resolve;
|
||||
VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
|
||||
|
||||
resolve_info->mode = att->resolveMode;
|
||||
if (!ms2ss) {
|
||||
resolve_info->dst_iview = resolve_iview;
|
||||
} else {
|
||||
assert(iview_ss);
|
||||
resolve_info->dst_iview = iview_ss;
|
||||
assert(resolve_info->dst_iview->pview.nr_samples == 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *pRenderingInfo)
|
||||
{
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
uint32_t att_width = UINT32_MAX, att_height = UINT32_MAX;
|
||||
|
||||
state->render.flags = pRenderingInfo->flags;
|
||||
|
||||
BITSET_SET(state->dirty, PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
state->render.fb.bo_count = 0;
|
||||
memset(state->render.fb.bos, 0, sizeof(state->render.fb.bos));
|
||||
#endif
|
||||
|
||||
state->render.first_provoking_vertex = U_TRISTATE_UNSET;
|
||||
#if PAN_ARCH >= 10
|
||||
state->render.maybe_set_tds_provoking_vertex = NULL;
|
||||
state->render.maybe_set_fbds_provoking_vertex = NULL;
|
||||
#endif
|
||||
memset(state->render.fb.crc_valid, 0, sizeof(state->render.fb.crc_valid));
|
||||
memset(&state->render.color_attachments, 0,
|
||||
sizeof(state->render.color_attachments));
|
||||
memset(&state->render.z_attachment, 0, sizeof(state->render.z_attachment));
|
||||
memset(&state->render.s_attachment, 0, sizeof(state->render.s_attachment));
|
||||
state->render.bound_attachments = 0;
|
||||
|
||||
const VkMultisampledRenderToSingleSampledInfoEXT *ms2ss_info =
|
||||
vk_find_struct_const(pRenderingInfo,
|
||||
MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT);
|
||||
const bool ms2ss = ms2ss_info
|
||||
? ms2ss_info->multisampledRenderToSingleSampledEnable
|
||||
: VK_FALSE;
|
||||
|
||||
cmdbuf->state.gfx.render.layer_count = pRenderingInfo->viewMask ?
|
||||
util_last_bit(pRenderingInfo->viewMask) :
|
||||
pRenderingInfo->layerCount;
|
||||
cmdbuf->state.gfx.render.view_mask = pRenderingInfo->viewMask;
|
||||
*fbinfo = (struct pan_fb_info){
|
||||
.tile_buf_budget = pan_query_optimal_tib_size(PAN_ARCH, phys_dev->model),
|
||||
.z_tile_buf_budget = pan_query_optimal_z_tib_size(PAN_ARCH, phys_dev->model),
|
||||
.nr_samples = 0,
|
||||
.rt_count = pRenderingInfo->colorAttachmentCount,
|
||||
};
|
||||
/* In case ms2ss is enabled, use the provided sample count.
|
||||
* All attachments need to have sample count == 1 or the provided value.
|
||||
* But, if all attachments have 1, we would end up choosing the wrong value
|
||||
* if we don't set it here already. */
|
||||
cmdbuf->state.gfx.render.fb.nr_samples =
|
||||
ms2ss ? ms2ss_info->rasterizationSamples : 1;
|
||||
|
||||
assert(pRenderingInfo->colorAttachmentCount <= ARRAY_SIZE(fbinfo->rts));
|
||||
|
||||
for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
|
||||
const VkRenderingAttachmentInfo *att =
|
||||
&pRenderingInfo->pColorAttachments[i];
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
if (!iview)
|
||||
continue;
|
||||
|
||||
render_state_set_color_attachment(cmdbuf, att, i);
|
||||
att_width = MIN2(iview->vk.extent.width, att_width);
|
||||
att_height = MIN2(iview->vk.extent.height, att_height);
|
||||
}
|
||||
|
||||
if (pRenderingInfo->pDepthAttachment &&
|
||||
pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE) {
|
||||
const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
if (iview) {
|
||||
assert(iview->vk.image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
|
||||
render_state_set_z_attachment(cmdbuf, att);
|
||||
att_width = MIN2(iview->vk.extent.width, att_width);
|
||||
att_height = MIN2(iview->vk.extent.height, att_height);
|
||||
}
|
||||
}
|
||||
|
||||
if (pRenderingInfo->pStencilAttachment &&
|
||||
pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE) {
|
||||
const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
|
||||
VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
|
||||
|
||||
if (iview) {
|
||||
assert(iview->vk.image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
render_state_set_s_attachment(cmdbuf, att);
|
||||
att_width = MIN2(iview->vk.extent.width, att_width);
|
||||
att_height = MIN2(iview->vk.extent.height, att_height);
|
||||
}
|
||||
}
|
||||
|
||||
fbinfo->draw_extent.minx = pRenderingInfo->renderArea.offset.x;
|
||||
fbinfo->draw_extent.maxx = pRenderingInfo->renderArea.offset.x +
|
||||
pRenderingInfo->renderArea.extent.width - 1;
|
||||
fbinfo->draw_extent.miny = pRenderingInfo->renderArea.offset.y;
|
||||
fbinfo->draw_extent.maxy = pRenderingInfo->renderArea.offset.y +
|
||||
pRenderingInfo->renderArea.extent.height - 1;
|
||||
|
||||
fbinfo->frame_bounding_box = fbinfo->draw_extent;
|
||||
|
||||
if (state->render.bound_attachments) {
|
||||
fbinfo->width = att_width;
|
||||
fbinfo->height = att_height;
|
||||
} else {
|
||||
fbinfo->width = fbinfo->draw_extent.maxx + 1;
|
||||
fbinfo->height = fbinfo->draw_extent.maxy + 1;
|
||||
}
|
||||
|
||||
assert(fbinfo->width && fbinfo->height);
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_select_tile_size)(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
|
||||
/* In case we never emitted tiler/framebuffer descriptors, we emit the
|
||||
* current sample count and compute tile size */
|
||||
if (fbinfo->nr_samples == 0) {
|
||||
fbinfo->nr_samples = cmdbuf->state.gfx.render.fb.nr_samples;
|
||||
GENX(pan_select_tile_size)(fbinfo);
|
||||
|
||||
#if PAN_ARCH != 6
|
||||
if (fbinfo->cbuf_allocation > fbinfo->tile_buf_budget) {
|
||||
vk_perf(VK_LOG_OBJS(&cmdbuf->vk.base),
|
||||
"Using too much tile-memory, disabling pipelining");
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
/* In case we already emitted tiler/framebuffer descriptors, we ensure
|
||||
* that the sample count didn't change (this should never happen) */
|
||||
assert(fbinfo->nr_samples == cmdbuf->state.gfx.render.fb.nr_samples);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const VkRenderingInfo *render_info)
|
||||
{
|
||||
/* We force preloading for all active attachments when the render area is
|
||||
* unaligned or when a barrier flushes prior draw calls in the middle of a
|
||||
* render pass. The two cases can be distinguished by whether a
|
||||
* render_info is provided.
|
||||
*
|
||||
* When the render area is unaligned, we force preloading to preserve
|
||||
* contents falling outside of the render area. We also make sure the
|
||||
* initial attachment clears are performed.
|
||||
*/
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
VkClearAttachment clear_atts[MAX_RTS + 2];
|
||||
uint32_t clear_att_count = 0;
|
||||
|
||||
if (!state->render.bound_attachments)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < fbinfo->rt_count; i++) {
|
||||
if (!fbinfo->rts[i].view)
|
||||
continue;
|
||||
|
||||
fbinfo->rts[i].preload = true;
|
||||
|
||||
if (fbinfo->rts[i].clear) {
|
||||
if (render_info) {
|
||||
const VkRenderingAttachmentInfo *att =
|
||||
&render_info->pColorAttachments[i];
|
||||
|
||||
clear_atts[clear_att_count++] = (VkClearAttachment){
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.colorAttachment = i,
|
||||
.clearValue = att->clearValue,
|
||||
};
|
||||
}
|
||||
fbinfo->rts[i].clear = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (fbinfo->zs.view.zs) {
|
||||
fbinfo->zs.preload.z = true;
|
||||
|
||||
if (fbinfo->zs.clear.z) {
|
||||
if (render_info) {
|
||||
const VkRenderingAttachmentInfo *att =
|
||||
render_info->pDepthAttachment;
|
||||
|
||||
clear_atts[clear_att_count++] = (VkClearAttachment){
|
||||
.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
|
||||
.clearValue = att->clearValue,
|
||||
};
|
||||
}
|
||||
fbinfo->zs.clear.z = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (fbinfo->zs.view.s ||
|
||||
(fbinfo->zs.view.zs &&
|
||||
util_format_is_depth_and_stencil(fbinfo->zs.view.zs->format))) {
|
||||
fbinfo->zs.preload.s = true;
|
||||
|
||||
if (fbinfo->zs.clear.s) {
|
||||
if (render_info) {
|
||||
const VkRenderingAttachmentInfo *att =
|
||||
render_info->pStencilAttachment;
|
||||
|
||||
clear_atts[clear_att_count++] = (VkClearAttachment){
|
||||
.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
|
||||
.clearValue = att->clearValue,
|
||||
};
|
||||
}
|
||||
|
||||
fbinfo->zs.clear.s = false;
|
||||
}
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
/* insert a barrier for preload */
|
||||
const VkMemoryBarrier2 mem_barrier = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
|
||||
.srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
.srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
|
||||
.dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
|
||||
.dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT,
|
||||
};
|
||||
const VkDependencyInfo dep_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.memoryBarrierCount = 1,
|
||||
.pMemoryBarriers = &mem_barrier,
|
||||
};
|
||||
panvk_per_arch(CmdPipelineBarrier2)(panvk_cmd_buffer_to_handle(cmdbuf),
|
||||
&dep_info);
|
||||
#endif
|
||||
|
||||
if (clear_att_count && render_info) {
|
||||
VkClearRect clear_rect = {
|
||||
.rect = render_info->renderArea,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = render_info->viewMask ? 1 : render_info->layerCount,
|
||||
};
|
||||
|
||||
panvk_per_arch(CmdClearAttachments)(panvk_cmd_buffer_to_handle(cmdbuf),
|
||||
clear_att_count, clear_atts, 1,
|
||||
&clear_rect);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_preload_render_area_border)(
|
||||
struct panvk_cmd_buffer *cmdbuf, const VkRenderingInfo *render_info)
|
||||
{
|
||||
const unsigned meta_tile_size = pan_meta_tile_size(PAN_ARCH);
|
||||
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
||||
struct pan_fb_info *fbinfo = &state->render.fb.info;
|
||||
|
||||
bool render_area_is_aligned =
|
||||
((fbinfo->draw_extent.minx | fbinfo->draw_extent.miny) %
|
||||
meta_tile_size) == 0 &&
|
||||
(fbinfo->draw_extent.maxx + 1 == fbinfo->width ||
|
||||
(fbinfo->draw_extent.maxx % meta_tile_size) == (meta_tile_size - 1)) &&
|
||||
(fbinfo->draw_extent.maxy + 1 == fbinfo->height ||
|
||||
(fbinfo->draw_extent.maxy % meta_tile_size) == (meta_tile_size - 1));
|
||||
|
||||
/* If the render area is aligned on the meta tile size, we're good. */
|
||||
if (!render_area_is_aligned)
|
||||
panvk_per_arch(cmd_force_fb_preload)(cmdbuf, render_info);
|
||||
}
|
||||
|
||||
static void
|
||||
prepare_iam_sysvals(struct panvk_cmd_buffer *cmdbuf, BITSET_WORD *dirty_sysvals)
|
||||
{
|
||||
const struct vk_input_attachment_location_state *ial =
|
||||
&cmdbuf->vk.dynamic_graphics_state.ial;
|
||||
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
|
||||
uint32_t catt_count =
|
||||
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
|
||||
? MAX_RTS
|
||||
: ial->color_attachment_count;
|
||||
|
||||
memset(iam, ~0, sizeof(iam));
|
||||
|
||||
assert(catt_count <= MAX_RTS);
|
||||
|
||||
for (uint32_t i = 0; i < catt_count; i++) {
|
||||
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED ||
|
||||
!(cmdbuf->state.gfx.render.bound_attachments &
|
||||
MESA_VK_RP_ATTACHMENT_COLOR_BIT(i)))
|
||||
continue;
|
||||
|
||||
VkFormat fmt = cmdbuf->state.gfx.render.color_attachments.fmts[i];
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
||||
struct mali_internal_conversion_packed conv;
|
||||
uint32_t ia_idx = ial->color_map[i] + 1;
|
||||
assert(ia_idx < ARRAY_SIZE(iam));
|
||||
|
||||
iam[ia_idx].target = PANVK_COLOR_ATTACHMENT(i);
|
||||
|
||||
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
|
||||
cfg.memory_format =
|
||||
GENX(pan_dithered_format_from_pipe_format)(pfmt, false);
|
||||
#if PAN_ARCH < 9
|
||||
cfg.register_format =
|
||||
vk_format_is_uint(fmt) ? MALI_REGISTER_FILE_FORMAT_U32
|
||||
: vk_format_is_sint(fmt) ? MALI_REGISTER_FILE_FORMAT_I32
|
||||
: MALI_REGISTER_FILE_FORMAT_F32;
|
||||
#endif
|
||||
}
|
||||
|
||||
iam[ia_idx].conversion = conv.opaque[0];
|
||||
}
|
||||
|
||||
if (ial->depth_att != MESA_VK_ATTACHMENT_UNUSED) {
|
||||
uint32_t ia_idx =
|
||||
ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->depth_att + 1;
|
||||
|
||||
assert(ia_idx < ARRAY_SIZE(iam));
|
||||
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
/* On v7, we need to pass the depth format around. If we use a conversion
|
||||
* of zero, like we do on v9+, the GPU reports an INVALID_INSTR_ENC. */
|
||||
VkFormat fmt = cmdbuf->state.gfx.render.z_attachment.fmt;
|
||||
enum pipe_format pfmt = vk_format_to_pipe_format(fmt);
|
||||
struct mali_internal_conversion_packed conv;
|
||||
|
||||
pan_pack(&conv, INTERNAL_CONVERSION, cfg) {
|
||||
cfg.register_format = MALI_REGISTER_FILE_FORMAT_F32;
|
||||
cfg.memory_format =
|
||||
GENX(pan_dithered_format_from_pipe_format)(pfmt, false);
|
||||
}
|
||||
iam[ia_idx].conversion = conv.opaque[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
if (ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED) {
|
||||
uint32_t ia_idx =
|
||||
ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ? 0 : ial->stencil_att + 1;
|
||||
|
||||
assert(ia_idx < ARRAY_SIZE(iam));
|
||||
iam[ia_idx].target = PANVK_ZS_ATTACHMENT;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(iam); i++)
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, iam[i], iam[i]);
|
||||
}
|
||||
|
||||
/* This value has been selected to get
|
||||
* dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing.
|
||||
*/
|
||||
#define MIN_DEPTH_CLIP_RANGE 37.7E-06f
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
|
||||
const struct panvk_draw_info *info)
|
||||
{
|
||||
struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
|
||||
const struct panvk_shader_variant *fs =
|
||||
panvk_shader_only_variant(get_fs(cmdbuf));
|
||||
uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0;
|
||||
BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};
|
||||
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.noperspective_varyings,
|
||||
noperspective_varyings);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.first_vertex, info->vertex.base);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.base_instance, info->instance.base);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
|
||||
info->vertex.raw_offset);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
|
||||
|
||||
/* iter13: VK_EXT_transform_feedback sysvals — always set (per draw),
|
||||
* reflect bound XFB state. set_gfx_sysval is a no-op if value unchanged. */
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.num_vertices, info->vertex.count);
|
||||
{
|
||||
const struct panvk_cmd_graphics_state *_gfx = &cmdbuf->state.gfx;
|
||||
/* iter13: default each XFB buffer address to PAN_SHADER_OOB_ADDRESS
|
||||
* (= 1<<63). This is the Panfrost-Gallium memory-sink idiom — the
|
||||
* Bifrost MMU silently discards stores to this address, so a pipeline
|
||||
* with XFB outputs used in a non-XFB draw (or in an XFB draw with
|
||||
* fewer bound buffers than the shader declares) is safe instead of
|
||||
* faulting. See gallium/drivers/panfrost/pan_cmdstream.c PAN_SYSVAL_XFB. */
|
||||
uint64_t _xa0 = PAN_SHADER_OOB_ADDRESS, _xa1 = PAN_SHADER_OOB_ADDRESS,
|
||||
_xa2 = PAN_SHADER_OOB_ADDRESS, _xa3 = PAN_SHADER_OOB_ADDRESS;
|
||||
if (_gfx->xfb.active) {
|
||||
if (_gfx->xfb.buffer_count > 0 && _gfx->xfb.buffers[0].addr)
|
||||
_xa0 = _gfx->xfb.buffers[0].addr + _gfx->xfb.buffers[0].offset;
|
||||
if (_gfx->xfb.buffer_count > 1 && _gfx->xfb.buffers[1].addr)
|
||||
_xa1 = _gfx->xfb.buffers[1].addr + _gfx->xfb.buffers[1].offset;
|
||||
if (_gfx->xfb.buffer_count > 2 && _gfx->xfb.buffers[2].addr)
|
||||
_xa2 = _gfx->xfb.buffers[2].addr + _gfx->xfb.buffers[2].offset;
|
||||
if (_gfx->xfb.buffer_count > 3 && _gfx->xfb.buffers[3].addr)
|
||||
_xa3 = _gfx->xfb.buffers[3].addr + _gfx->xfb.buffers[3].offset;
|
||||
}
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[0], _xa0);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[1], _xa1);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[2], _xa2);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.xfb_address[3], _xa3);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, blend.constants[i],
|
||||
cb->blend_constants[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < MAX_RTS; i++) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, fs.blend_descs[i],
|
||||
cmdbuf->state.gfx.fs.blend_descs[i]);
|
||||
}
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
|
||||
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) {
|
||||
const struct vk_rasterization_state *rs =
|
||||
&cmdbuf->vk.dynamic_graphics_state.rs;
|
||||
const struct vk_viewport_state *vp =
|
||||
&cmdbuf->vk.dynamic_graphics_state.vp;
|
||||
const VkViewport *viewport = &vp->viewports[0];
|
||||
|
||||
/* Doing the viewport transform in the vertex shader and then depth
|
||||
* clipping with the viewport depth range gets a similar result to
|
||||
* clipping in clip-space, but loses precision when the viewport depth
|
||||
* range is very small. When minDepth == maxDepth, this completely
|
||||
* flattens the clip-space depth and results in never clipping.
|
||||
*
|
||||
* To work around this, set a lower limit on depth range when clipping is
|
||||
* enabled. This results in slightly incorrect fragment depth values, and
|
||||
* doesn't help with the precision loss, but at least clipping isn't
|
||||
* completely broken.
|
||||
*/
|
||||
float z_min = viewport->minDepth;
|
||||
float z_max = viewport->maxDepth;
|
||||
if (vk_rasterization_state_depth_clip_enable(rs) &&
|
||||
fabsf(z_max - z_min) < MIN_DEPTH_CLIP_RANGE) {
|
||||
float z_sign = z_min <= z_max ? 1.0f : -1.0f;
|
||||
|
||||
float z_center = 0.5f * (z_max + z_min);
|
||||
/* Bump offset off-center if necessary, to not go out of range */
|
||||
z_center = CLAMP(z_center, 0.5f * MIN_DEPTH_CLIP_RANGE,
|
||||
1.0f - 0.5f * MIN_DEPTH_CLIP_RANGE);
|
||||
|
||||
z_min = z_center - 0.5f * z_sign * MIN_DEPTH_CLIP_RANGE;
|
||||
z_max = z_center + 0.5f * z_sign * MIN_DEPTH_CLIP_RANGE;
|
||||
}
|
||||
|
||||
/* Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of
|
||||
* section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
|
||||
* end of the section, the spec defines:
|
||||
*
|
||||
* px = width
|
||||
* py = height
|
||||
* pz = maxDepth - minDepth if negativeOneToOne is false
|
||||
* pz = (maxDepth - minDepth) / 2 if negativeOneToOne is true
|
||||
*/
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.x,
|
||||
0.5f * viewport->width);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.y,
|
||||
0.5f * viewport->height);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.z,
|
||||
vp->depth_clip_negative_one_to_one ?
|
||||
0.5f * (z_max - z_min) : z_max - z_min);
|
||||
|
||||
/* Upload the viewport offset. Defined as (ox, oy, oz) at the start of
|
||||
* section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
|
||||
* end of the section, the spec defines:
|
||||
*
|
||||
* ox = x + width/2
|
||||
* oy = y + height/2
|
||||
* oz = minDepth if negativeOneToOne is false
|
||||
* oz = (maxDepth + minDepth) / 2 if negativeOneToOne is true
|
||||
*/
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.x,
|
||||
(0.5f * viewport->width) + viewport->x);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.y,
|
||||
(0.5f * viewport->height) + viewport->y);
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.z,
|
||||
vp->depth_clip_negative_one_to_one ?
|
||||
0.5f * (z_min + z_max) : z_min);
|
||||
|
||||
}
|
||||
|
||||
if (dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP))
|
||||
prepare_iam_sysvals(cmdbuf, dirty_sysvals);
|
||||
|
||||
const struct panvk_shader_variant *vs =
|
||||
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
|
||||
struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
|
||||
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
|
||||
|
||||
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals,
|
||||
desc.sets[PANVK_DESC_TABLE_VS_DYN_SSBOS],
|
||||
vs_desc_state->dyn_ssbos);
|
||||
}
|
||||
|
||||
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals,
|
||||
desc.sets[PANVK_DESC_TABLE_FS_DYN_SSBOS],
|
||||
fs_desc_state->dyn_ssbos);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < MAX_SETS; i++) {
|
||||
uint32_t used_set_mask =
|
||||
vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
|
||||
|
||||
if (used_set_mask & BITFIELD_BIT(i)) {
|
||||
set_gfx_sysval(cmdbuf, dirty_sysvals, desc.sets[i],
|
||||
desc_state->sets[i]->descs.dev);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* We mask the dirty sysvals by the shader usage, and only flag
|
||||
* the push uniforms dirty if those intersect. */
|
||||
BITSET_DECLARE(dirty_shader_sysvals, MAX_SYSVAL_FAUS);
|
||||
BITSET_AND(dirty_shader_sysvals, dirty_sysvals, vs->fau.used_sysvals);
|
||||
if (!BITSET_IS_EMPTY(dirty_shader_sysvals))
|
||||
gfx_state_set_dirty(cmdbuf, VS_PUSH_UNIFORMS);
|
||||
|
||||
if (fs) {
|
||||
BITSET_AND(dirty_shader_sysvals, dirty_sysvals, fs->fau.used_sysvals);
|
||||
|
||||
/* If blend constants are not read by the blend shader, we can consider
|
||||
* they are not read at all, so clear the dirty bits to avoid re-emitting
|
||||
* FAUs when we can. */
|
||||
if (!cmdbuf->state.gfx.cb.info.shader_loads_blend_const)
|
||||
BITSET_CLEAR_COUNT(dirty_shader_sysvals, 0, 4);
|
||||
|
||||
if (!BITSET_IS_EMPTY(dirty_shader_sysvals))
|
||||
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBindVertexBuffers2)(VkCommandBuffer commandBuffer,
|
||||
uint32_t firstBinding,
|
||||
uint32_t bindingCount,
|
||||
const VkBuffer *pBuffers,
|
||||
const VkDeviceSize *pOffsets,
|
||||
const VkDeviceSize *pSizes,
|
||||
const VkDeviceSize *pStrides)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
assert(firstBinding + bindingCount <= MAX_VBS);
|
||||
|
||||
if (pStrides) {
|
||||
vk_cmd_set_vertex_binding_strides(&cmdbuf->vk, firstBinding,
|
||||
bindingCount, pStrides);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < bindingCount; i++) {
|
||||
VK_FROM_HANDLE(panvk_buffer, buffer, pBuffers[i]);
|
||||
|
||||
if (buffer) {
|
||||
cmdbuf->state.gfx.vb.bufs[firstBinding + i].address =
|
||||
panvk_buffer_gpu_ptr(buffer, pOffsets[i]);
|
||||
cmdbuf->state.gfx.vb.bufs[firstBinding + i].size = panvk_buffer_range(
|
||||
buffer, pOffsets[i], pSizes ? pSizes[i] : VK_WHOLE_SIZE);
|
||||
} else {
|
||||
cmdbuf->state.gfx.vb.bufs[firstBinding + i].address = 0;
|
||||
cmdbuf->state.gfx.vb.bufs[firstBinding + i].size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
cmdbuf->state.gfx.vb.count =
|
||||
MAX2(cmdbuf->state.gfx.vb.count, firstBinding + bindingCount);
|
||||
gfx_state_set_dirty(cmdbuf, VB);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdBindIndexBuffer2)(VkCommandBuffer commandBuffer,
|
||||
VkBuffer buffer, VkDeviceSize offset,
|
||||
VkDeviceSize size, VkIndexType indexType)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
VK_FROM_HANDLE(panvk_buffer, buf, buffer);
|
||||
|
||||
if (buf) {
|
||||
cmdbuf->state.gfx.ib.size = panvk_buffer_range(buf, offset, size);
|
||||
assert(cmdbuf->state.gfx.ib.size <= UINT32_MAX);
|
||||
cmdbuf->state.gfx.ib.dev_addr = panvk_buffer_gpu_ptr(buf, offset);
|
||||
} else {
|
||||
cmdbuf->state.gfx.ib.size = 0;
|
||||
/* In case of NullDescriptors, we need to set a non-NULL address and rely
|
||||
* on out-of-bounds behavior against the zero size of the buffer. Note
|
||||
* that this only works for v10+, as v9 does not have a way to specify the
|
||||
* index buffer size. */
|
||||
cmdbuf->state.gfx.ib.dev_addr = PAN_ARCH >= 10 ? 0x1000 : 0;
|
||||
}
|
||||
cmdbuf->state.gfx.ib.index_size = vk_index_type_to_bytes(indexType);
|
||||
|
||||
gfx_state_set_dirty(cmdbuf, IB);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user