Files
panvk-bifrost/mesa-panvk-bifrost/iter13/applied_state/panvk_cmd_draw.h
T
marfrit a4e7d8ab90 initial seed: retrofit campaign lineage from local working trees
panvk-bifrost campaigns (r1..r4 Vulkan compositor + r5.video1 Vulkan
video decode) shipped before this repo existed; the deliverable
patches live in marfrit-packages, but the reasoning chain, phase docs,
and source-state evidence lived only in local working trees on the
development host.

This retrofit imports:
- mesa-panvk-bifrost/   — r1..r4 era phase docs (iter1..iter18)
                          (libmali stub blobs at iter18/blob/ excluded
                          — 109MB of RE artifacts replaced with a README
                          pointer)
- mesa-panvk-bifrost-video/ — sibling campaign phase docs + probe
- evidence/             — frozen .tgz source snapshots at each milestone
                          (basis for the 0005 patch diff generation)

Future iterations should branch off here from day one, so each iter is
a commit rather than a snapshot. See [[feedback-session-local-process-pins]]
for the process drift this retrofit closes.

Total: 1.9 MB across 124 files.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-23 05:25:37 +02:00

502 lines
16 KiB
C

/*
* Copyright © 2024 Collabora Ltd.
* SPDX-License-Identifier: MIT
*/
#ifndef PANVK_CMD_DRAW_H
#define PANVK_CMD_DRAW_H
#ifndef PAN_ARCH
#error "PAN_ARCH must be defined"
#endif
#include "panvk_blend.h"
#include "panvk_cmd_desc_state.h"
#include "panvk_cmd_query.h"
#include "panvk_entrypoints.h"
#include "panvk_image.h"
#include "panvk_image_view.h"
#include "panvk_physical_device.h"
#include "panvk_shader.h"
#include "vk_command_buffer.h"
#include "vk_format.h"
#include "util/u_tristate.h"
#include "pan_props.h"
#define MAX_VBS 16
struct panvk_cmd_buffer;
struct panvk_attrib_buf {
uint64_t address;
unsigned size;
};
struct panvk_resolve_attachment {
VkResolveModeFlagBits mode;
struct panvk_image_view *dst_iview;
};
struct panvk_rendering_state {
VkRenderingFlags flags;
uint32_t layer_count;
uint32_t view_mask;
enum u_tristate first_provoking_vertex;
enum vk_rp_attachment_flags bound_attachments;
struct {
struct panvk_image_view *iviews[MAX_RTS];
/* If non-null, preload_iviews[i] overrides iviews[i] for preloads. */
struct panvk_image_view *preload_iviews[MAX_RTS];
VkFormat fmts[MAX_RTS];
uint8_t samples[MAX_RTS];
struct panvk_resolve_attachment resolve[MAX_RTS];
} color_attachments;
struct pan_image_view zs_pview;
struct pan_image_view s_pview;
struct {
struct panvk_image_view *iview;
/* If non-null, preload_iview overrides iview for preloads. */
struct panvk_image_view *preload_iview;
VkFormat fmt;
struct panvk_resolve_attachment resolve;
} z_attachment, s_attachment;
struct {
struct pan_fb_info info;
bool crc_valid[MAX_RTS];
/* nr_samples to be used before framebuffer / tiler descriptor are emitted */
uint32_t nr_samples;
#if PAN_ARCH < 9
uint32_t bo_count;
struct pan_kmod_bo *bos[(MAX_RTS * PANVK_MAX_PLANES) + 2];
#endif
} fb;
#if PAN_ARCH >= 10
struct pan_ptr fbds;
uint64_t tiler;
/* When a secondary command buffer has to flush draws, it disturbs the
* inherited context, and the primary command buffer needs to know. */
bool invalidate_inherited_ctx;
/* True if the last render pass was suspended. */
bool suspended;
/* Blocks that can patch to flip the provoking vertex mode if we need to
* emit FBDs/TDs before we know which mode the application is using */
struct cs_maybe *maybe_set_tds_provoking_vertex;
struct cs_maybe *maybe_set_fbds_provoking_vertex;
struct {
/* != 0 if the render pass contains one or more occlusion queries to
* signal. */
uint64_t chain;
/* Point to the syncobj of the last occlusion query that was passed
* to a draw. */
uint64_t last;
} oq;
#endif
};
enum panvk_cmd_graphics_dirty_state {
PANVK_CMD_GRAPHICS_DIRTY_VS,
PANVK_CMD_GRAPHICS_DIRTY_FS,
PANVK_CMD_GRAPHICS_DIRTY_VB,
PANVK_CMD_GRAPHICS_DIRTY_IB,
PANVK_CMD_GRAPHICS_DIRTY_OQ,
PANVK_CMD_GRAPHICS_DIRTY_DESC_STATE,
PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE,
PANVK_CMD_GRAPHICS_DIRTY_VS_PUSH_UNIFORMS,
PANVK_CMD_GRAPHICS_DIRTY_FS_PUSH_UNIFORMS,
PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT,
};
struct panvk_cmd_graphics_state {
struct panvk_descriptor_state desc_state;
struct {
struct vk_vertex_input_state vi;
struct vk_sample_locations_state sl;
} dynamic;
struct panvk_occlusion_query_state occlusion_query;
#if PAN_ARCH >= 10
struct panvk_prims_generated_query_state prims_generated_query;
#endif
struct panvk_graphics_sysvals sysvals;
#if PAN_ARCH < 9
/* iter13: VK_EXT_transform_feedback state (JM-class only for now). */
struct {
bool active;
uint32_t buffer_count;
struct {
uint64_t addr;
uint64_t offset;
uint64_t size;
} buffers[4];
} xfb;
#endif
#if PAN_ARCH < 9
struct panvk_shader_link link;
#endif
struct {
const struct panvk_shader *shader;
struct panvk_shader_desc_state desc;
uint64_t blend_descs[MAX_RTS];
uint64_t push_uniforms;
bool required;
#if PAN_ARCH < 9
uint64_t rsd;
#endif
} fs;
struct {
const struct panvk_shader *shader;
struct panvk_shader_desc_state desc;
uint64_t push_uniforms;
#if PAN_ARCH < 9
uint64_t attribs;
uint64_t attrib_bufs;
uint64_t indirect_attribs_infos;
uint64_t indirect_attrib_bufs_infos;
uint64_t indirect_varying_bufs_infos;
bool previous_draw_was_indirect;
#endif
} vs;
struct {
struct panvk_attrib_buf bufs[MAX_VBS];
unsigned count;
} vb;
#if PAN_ARCH >= 10
struct {
uint32_t attribs_changing_on_base_instance;
} vi;
#endif
/* Index buffer */
struct {
uint64_t dev_addr;
uint64_t size;
uint8_t index_size;
} ib;
struct {
struct panvk_blend_info info;
} cb;
struct panvk_rendering_state render;
bool vk_meta;
#if PAN_ARCH < 9
uint64_t vpd;
#endif
#if PAN_ARCH >= 10
uint64_t tsd;
#endif
BITSET_DECLARE(dirty, PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT);
};
#define dyn_gfx_state_dirty(__cmdbuf, __name) \
BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty, \
MESA_VK_DYNAMIC_##__name)
#define gfx_state_dirty(__cmdbuf, __name) \
BITSET_TEST((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
#define gfx_state_set_dirty(__cmdbuf, __name) \
BITSET_SET((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
#define gfx_state_clear_all_dirty(__cmdbuf) \
BITSET_ZERO((__cmdbuf)->state.gfx.dirty)
#define gfx_state_set_all_dirty(__cmdbuf) \
BITSET_ONES((__cmdbuf)->state.gfx.dirty)
#define set_gfx_sysval(__cmdbuf, __dirty, __name, __val) \
do { \
struct panvk_graphics_sysvals __new_sysval; \
__new_sysval.__name = __val; \
if (memcmp(&(__cmdbuf)->state.gfx.sysvals.__name, &__new_sysval.__name, \
sizeof(__new_sysval.__name))) { \
(__cmdbuf)->state.gfx.sysvals.__name = __new_sysval.__name; \
BITSET_SET_RANGE(__dirty, sysval_fau_start(graphics, __name), \
sysval_fau_end(graphics, __name)); \
} \
} while (0)
#if PAN_ARCH >= 10
struct panvk_device_draw_context {
struct panvk_priv_bo *fns_bo;
uint64_t fn_set_fbds_provoking_vertex_stride;
};
#endif
static inline void
panvk_depth_range(const struct panvk_cmd_graphics_state *state,
const struct vk_viewport_state *vp,
float *z_min, float *z_max)
{
float a = vp->depth_clip_negative_one_to_one ?
state->sysvals.viewport.offset.z - state->sysvals.viewport.scale.z :
state->sysvals.viewport.offset.z;
float b = state->sysvals.viewport.offset.z + state->sysvals.viewport.scale.z;
*z_min = MIN2(a, b);
*z_max = MAX2(a, b);
}
static inline uint32_t
panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev,
const struct panvk_cmd_graphics_state *state,
unsigned bin_ptr_mem_budget)
{
struct pan_tiler_features tiler_features =
pan_query_tiler_features(&phys_dev->kmod.dev->props);
uint32_t hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)(
state->render.fb.info.width, state->render.fb.info.height,
tiler_features.max_levels, state->render.fb.info.tile_size,
bin_ptr_mem_budget);
return hierarchy_mask;
}
static inline bool
fs_required(const struct panvk_cmd_graphics_state *state,
const struct vk_dynamic_graphics_state *dyn_state)
{
const struct panvk_shader_variant *fs =
panvk_shader_only_variant(state->fs.shader);
const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
const struct vk_color_blend_state *cb = &dyn_state->cb;
const struct vk_rasterization_state *rs = &dyn_state->rs;
if (rs->rasterizer_discard_enable || !fs_info)
return false;
/* If we generally have side effects */
if (fs_info->fs.sidefx)
return true;
/* If colour is written we need to execute */
for (unsigned i = 0; i < cb->attachment_count; ++i) {
if ((cb->color_write_enables & BITFIELD_BIT(i)) &&
cb->attachments[i].write_mask)
return true;
}
/* If alpha-to-coverage is enabled, we need to run the fragment shader even
* if we don't have a color attachment, so depth/stencil updates can be
* discarded if alpha, and thus coverage, is 0. */
if (dyn_state->ms.alpha_to_coverage_enable)
return true;
/* If the sample mask is updated, we need to run the fragment shader,
* otherwise the fixed-function depth/stencil results will apply to all
* samples. */
if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
return true;
/* If depth is written and not implied we need to execute.
* TODO: Predicate on Z/S writes being enabled */
return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil);
}
static inline bool
cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
ASSERTED const struct vk_dynamic_graphics_state *dyn_state,
bool cached_value)
{
/* Make sure the cached value was properly initialized. */
assert(fs_required(state, dyn_state) == cached_value);
return cached_value;
}
#define get_fs(__cmdbuf) \
(cached_fs_required(&(__cmdbuf)->state.gfx, \
&(__cmdbuf)->vk.dynamic_graphics_state, \
(__cmdbuf)->state.gfx.fs.required) \
? (__cmdbuf)->state.gfx.fs.shader \
: NULL)
/* Anything that might change the value returned by get_fs() makes users of the
* fragment shader dirty, because not using the fragment shader (when
* fs_required() returns false) impacts various other things, like VS -> FS
* linking in the JM backend, or the update of the fragment shader pointer in
* the CSF backend. Call gfx_state_dirty(cmdbuf, FS) if you only care about
* fragment shader updates. */
#define fs_user_dirty(__cmdbuf) \
(gfx_state_dirty(cmdbuf, FS) || \
dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) || \
dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || \
dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || \
dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || \
dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE))
/* After a draw, all dirty flags are cleared except the FS dirty flag which
* needs to be set again if the draw didn't use the fragment shader. */
#define clear_dirty_after_draw(__cmdbuf) \
do { \
bool __set_fs_dirty = \
(__cmdbuf)->state.gfx.fs.shader != get_fs(__cmdbuf); \
bool __set_fs_push_dirty = \
__set_fs_dirty && gfx_state_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
vk_dynamic_graphics_state_clear_dirty( \
&(__cmdbuf)->vk.dynamic_graphics_state); \
gfx_state_clear_all_dirty(__cmdbuf); \
if (__set_fs_dirty) \
gfx_state_set_dirty(__cmdbuf, FS); \
if (__set_fs_push_dirty) \
gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
} while (0)
#if PAN_ARCH >= 10
VkResult
panvk_per_arch(device_draw_context_init)(struct panvk_device *dev);
void
panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev);
#endif
void
panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingInfo *pRenderingInfo);
void
panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingInfo *render_info);
void
panvk_per_arch(cmd_preload_render_area_border)(struct panvk_cmd_buffer *cmdbuf,
const VkRenderingInfo *render_info);
void panvk_per_arch(cmd_select_tile_size)(struct panvk_cmd_buffer *cmdbuf);
struct panvk_draw_info {
struct {
uint32_t size;
uint32_t offset;
} index;
struct {
#if PAN_ARCH < 9
int32_t raw_offset;
#endif
int32_t base;
uint32_t count;
} vertex;
struct {
int32_t base;
uint32_t count;
} instance;
struct {
uint64_t buffer_dev_addr;
uint64_t count_buffer_dev_addr;
uint32_t draw_count;
uint32_t stride;
} indirect;
#if PAN_ARCH < 9
uint32_t layer_id;
#endif
};
void
panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_draw_info *info);
static inline uint32_t
color_attachment_written_mask(
const struct panvk_shader_variant *fs,
const struct vk_color_attachment_location_state *cal)
{
uint32_t written_by_shader =
(fs->info.outputs_written >> FRAG_RESULT_DATA0) & BITFIELD_MASK(8);
uint32_t catt_written_mask = 0;
for (uint32_t i = 0; i < MAX_RTS; i++) {
if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
continue;
uint32_t shader_rt = cal->color_map[i];
if (written_by_shader & BITFIELD_BIT(shader_rt))
catt_written_mask |= BITFIELD_BIT(i);
}
return catt_written_mask;
}
static inline uint32_t
color_attachment_read_mask(const struct panvk_shader_variant *fs,
const struct vk_input_attachment_location_state *ial,
uint8_t color_attachment_mask)
{
uint32_t color_attachment_count =
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
? util_last_bit(color_attachment_mask)
: ial->color_attachment_count;
uint32_t catt_read_mask = 0;
for (uint32_t i = 0; i < color_attachment_count; i++) {
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
continue;
uint32_t catt_idx = ial->color_map[i] + 1;
if (fs->fs.input_attachment_read & BITFIELD_BIT(catt_idx)) {
assert(color_attachment_mask & BITFIELD_BIT(i));
catt_read_mask |= BITFIELD_BIT(i);
}
}
return catt_read_mask;
}
static inline bool
z_attachment_read(const struct panvk_shader_variant *fs,
const struct vk_input_attachment_location_state *ial)
{
uint32_t depth_mask = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX
? BITFIELD_BIT(0)
: ial->depth_att != MESA_VK_ATTACHMENT_UNUSED
? BITFIELD_BIT(ial->depth_att + 1)
: 0;
return depth_mask & fs->fs.input_attachment_read;
}
static inline bool
s_attachment_read(const struct panvk_shader_variant *fs,
const struct vk_input_attachment_location_state *ial)
{
uint32_t stencil_mask = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX
? BITFIELD_BIT(0)
: ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED
? BITFIELD_BIT(ial->stencil_att + 1)
: 0;
return stencil_mask & fs->fs.input_attachment_read;
}
#endif