a4e7d8ab90
panvk-bifrost campaigns (r1..r4 Vulkan compositor + r5.video1 Vulkan
video decode) shipped before this repo existed; the deliverable
patches live in marfrit-packages, but the reasoning chain, phase docs,
and source-state evidence lived only in local working trees on the
development host.
This retrofit imports:
- mesa-panvk-bifrost/ — r1..r4 era phase docs (iter1..iter18)
(libmali stub blobs at iter18/blob/ excluded
— 109MB of RE artifacts replaced with a README
pointer)
- mesa-panvk-bifrost-video/ — sibling campaign phase docs + probe
- evidence/ — frozen .tgz source snapshots at each milestone
(basis for the 0005 patch diff generation)
Future iterations should branch off here from day one, so each iter is
a commit rather than a snapshot. See [[feedback-session-local-process-pins]]
for the process drift this retrofit closes.
Total: 1.9 MB across 124 files.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
502 lines
16 KiB
C
502 lines
16 KiB
C
/*
|
|
* Copyright © 2024 Collabora Ltd.
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#ifndef PANVK_CMD_DRAW_H
|
|
#define PANVK_CMD_DRAW_H
|
|
|
|
#ifndef PAN_ARCH
|
|
#error "PAN_ARCH must be defined"
|
|
#endif
|
|
|
|
#include "panvk_blend.h"
|
|
#include "panvk_cmd_desc_state.h"
|
|
#include "panvk_cmd_query.h"
|
|
#include "panvk_entrypoints.h"
|
|
#include "panvk_image.h"
|
|
#include "panvk_image_view.h"
|
|
#include "panvk_physical_device.h"
|
|
#include "panvk_shader.h"
|
|
|
|
#include "vk_command_buffer.h"
|
|
#include "vk_format.h"
|
|
#include "util/u_tristate.h"
|
|
|
|
#include "pan_props.h"
|
|
|
|
#define MAX_VBS 16
|
|
|
|
struct panvk_cmd_buffer;
|
|
|
|
struct panvk_attrib_buf {
|
|
uint64_t address;
|
|
unsigned size;
|
|
};
|
|
|
|
struct panvk_resolve_attachment {
|
|
VkResolveModeFlagBits mode;
|
|
struct panvk_image_view *dst_iview;
|
|
};
|
|
|
|
struct panvk_rendering_state {
|
|
VkRenderingFlags flags;
|
|
uint32_t layer_count;
|
|
uint32_t view_mask;
|
|
enum u_tristate first_provoking_vertex;
|
|
|
|
enum vk_rp_attachment_flags bound_attachments;
|
|
struct {
|
|
struct panvk_image_view *iviews[MAX_RTS];
|
|
/* If non-null, preload_iviews[i] overrides iviews[i] for preloads. */
|
|
struct panvk_image_view *preload_iviews[MAX_RTS];
|
|
VkFormat fmts[MAX_RTS];
|
|
uint8_t samples[MAX_RTS];
|
|
struct panvk_resolve_attachment resolve[MAX_RTS];
|
|
} color_attachments;
|
|
|
|
struct pan_image_view zs_pview;
|
|
struct pan_image_view s_pview;
|
|
|
|
struct {
|
|
struct panvk_image_view *iview;
|
|
/* If non-null, preload_iview overrides iview for preloads. */
|
|
struct panvk_image_view *preload_iview;
|
|
VkFormat fmt;
|
|
struct panvk_resolve_attachment resolve;
|
|
} z_attachment, s_attachment;
|
|
|
|
struct {
|
|
struct pan_fb_info info;
|
|
bool crc_valid[MAX_RTS];
|
|
|
|
/* nr_samples to be used before framebuffer / tiler descriptor are emitted */
|
|
uint32_t nr_samples;
|
|
|
|
#if PAN_ARCH < 9
|
|
uint32_t bo_count;
|
|
struct pan_kmod_bo *bos[(MAX_RTS * PANVK_MAX_PLANES) + 2];
|
|
#endif
|
|
} fb;
|
|
|
|
#if PAN_ARCH >= 10
|
|
struct pan_ptr fbds;
|
|
uint64_t tiler;
|
|
|
|
/* When a secondary command buffer has to flush draws, it disturbs the
|
|
* inherited context, and the primary command buffer needs to know. */
|
|
bool invalidate_inherited_ctx;
|
|
|
|
/* True if the last render pass was suspended. */
|
|
bool suspended;
|
|
|
|
/* Blocks that can patch to flip the provoking vertex mode if we need to
|
|
* emit FBDs/TDs before we know which mode the application is using */
|
|
struct cs_maybe *maybe_set_tds_provoking_vertex;
|
|
struct cs_maybe *maybe_set_fbds_provoking_vertex;
|
|
|
|
struct {
|
|
/* != 0 if the render pass contains one or more occlusion queries to
|
|
* signal. */
|
|
uint64_t chain;
|
|
|
|
/* Point to the syncobj of the last occlusion query that was passed
|
|
* to a draw. */
|
|
uint64_t last;
|
|
} oq;
|
|
#endif
|
|
};
|
|
|
|
enum panvk_cmd_graphics_dirty_state {
|
|
PANVK_CMD_GRAPHICS_DIRTY_VS,
|
|
PANVK_CMD_GRAPHICS_DIRTY_FS,
|
|
PANVK_CMD_GRAPHICS_DIRTY_VB,
|
|
PANVK_CMD_GRAPHICS_DIRTY_IB,
|
|
PANVK_CMD_GRAPHICS_DIRTY_OQ,
|
|
PANVK_CMD_GRAPHICS_DIRTY_DESC_STATE,
|
|
PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE,
|
|
PANVK_CMD_GRAPHICS_DIRTY_VS_PUSH_UNIFORMS,
|
|
PANVK_CMD_GRAPHICS_DIRTY_FS_PUSH_UNIFORMS,
|
|
PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT,
|
|
};
|
|
|
|
struct panvk_cmd_graphics_state {
|
|
struct panvk_descriptor_state desc_state;
|
|
|
|
struct {
|
|
struct vk_vertex_input_state vi;
|
|
struct vk_sample_locations_state sl;
|
|
} dynamic;
|
|
|
|
struct panvk_occlusion_query_state occlusion_query;
|
|
#if PAN_ARCH >= 10
|
|
struct panvk_prims_generated_query_state prims_generated_query;
|
|
#endif
|
|
struct panvk_graphics_sysvals sysvals;
|
|
|
|
#if PAN_ARCH < 9
|
|
/* iter13: VK_EXT_transform_feedback state (JM-class only for now). */
|
|
struct {
|
|
bool active;
|
|
uint32_t buffer_count;
|
|
struct {
|
|
uint64_t addr;
|
|
uint64_t offset;
|
|
uint64_t size;
|
|
} buffers[4];
|
|
} xfb;
|
|
#endif
|
|
|
|
#if PAN_ARCH < 9
|
|
struct panvk_shader_link link;
|
|
#endif
|
|
|
|
struct {
|
|
const struct panvk_shader *shader;
|
|
struct panvk_shader_desc_state desc;
|
|
uint64_t blend_descs[MAX_RTS];
|
|
uint64_t push_uniforms;
|
|
bool required;
|
|
#if PAN_ARCH < 9
|
|
uint64_t rsd;
|
|
#endif
|
|
} fs;
|
|
|
|
struct {
|
|
const struct panvk_shader *shader;
|
|
struct panvk_shader_desc_state desc;
|
|
uint64_t push_uniforms;
|
|
#if PAN_ARCH < 9
|
|
uint64_t attribs;
|
|
uint64_t attrib_bufs;
|
|
uint64_t indirect_attribs_infos;
|
|
uint64_t indirect_attrib_bufs_infos;
|
|
uint64_t indirect_varying_bufs_infos;
|
|
bool previous_draw_was_indirect;
|
|
#endif
|
|
} vs;
|
|
|
|
struct {
|
|
struct panvk_attrib_buf bufs[MAX_VBS];
|
|
unsigned count;
|
|
} vb;
|
|
|
|
#if PAN_ARCH >= 10
|
|
struct {
|
|
uint32_t attribs_changing_on_base_instance;
|
|
} vi;
|
|
#endif
|
|
|
|
/* Index buffer */
|
|
struct {
|
|
uint64_t dev_addr;
|
|
uint64_t size;
|
|
uint8_t index_size;
|
|
} ib;
|
|
|
|
struct {
|
|
struct panvk_blend_info info;
|
|
} cb;
|
|
|
|
struct panvk_rendering_state render;
|
|
|
|
bool vk_meta;
|
|
|
|
#if PAN_ARCH < 9
|
|
uint64_t vpd;
|
|
#endif
|
|
|
|
#if PAN_ARCH >= 10
|
|
uint64_t tsd;
|
|
#endif
|
|
|
|
BITSET_DECLARE(dirty, PANVK_CMD_GRAPHICS_DIRTY_STATE_COUNT);
|
|
};
|
|
|
|
#define dyn_gfx_state_dirty(__cmdbuf, __name) \
|
|
BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty, \
|
|
MESA_VK_DYNAMIC_##__name)
|
|
|
|
#define gfx_state_dirty(__cmdbuf, __name) \
|
|
BITSET_TEST((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
|
|
|
|
#define gfx_state_set_dirty(__cmdbuf, __name) \
|
|
BITSET_SET((__cmdbuf)->state.gfx.dirty, PANVK_CMD_GRAPHICS_DIRTY_##__name)
|
|
|
|
#define gfx_state_clear_all_dirty(__cmdbuf) \
|
|
BITSET_ZERO((__cmdbuf)->state.gfx.dirty)
|
|
|
|
#define gfx_state_set_all_dirty(__cmdbuf) \
|
|
BITSET_ONES((__cmdbuf)->state.gfx.dirty)
|
|
|
|
#define set_gfx_sysval(__cmdbuf, __dirty, __name, __val) \
|
|
do { \
|
|
struct panvk_graphics_sysvals __new_sysval; \
|
|
__new_sysval.__name = __val; \
|
|
if (memcmp(&(__cmdbuf)->state.gfx.sysvals.__name, &__new_sysval.__name, \
|
|
sizeof(__new_sysval.__name))) { \
|
|
(__cmdbuf)->state.gfx.sysvals.__name = __new_sysval.__name; \
|
|
BITSET_SET_RANGE(__dirty, sysval_fau_start(graphics, __name), \
|
|
sysval_fau_end(graphics, __name)); \
|
|
} \
|
|
} while (0)
|
|
|
|
#if PAN_ARCH >= 10
|
|
struct panvk_device_draw_context {
|
|
struct panvk_priv_bo *fns_bo;
|
|
uint64_t fn_set_fbds_provoking_vertex_stride;
|
|
};
|
|
#endif
|
|
|
|
static inline void
|
|
panvk_depth_range(const struct panvk_cmd_graphics_state *state,
|
|
const struct vk_viewport_state *vp,
|
|
float *z_min, float *z_max)
|
|
{
|
|
float a = vp->depth_clip_negative_one_to_one ?
|
|
state->sysvals.viewport.offset.z - state->sysvals.viewport.scale.z :
|
|
state->sysvals.viewport.offset.z;
|
|
float b = state->sysvals.viewport.offset.z + state->sysvals.viewport.scale.z;
|
|
*z_min = MIN2(a, b);
|
|
*z_max = MAX2(a, b);
|
|
}
|
|
|
|
static inline uint32_t
|
|
panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev,
|
|
const struct panvk_cmd_graphics_state *state,
|
|
unsigned bin_ptr_mem_budget)
|
|
{
|
|
struct pan_tiler_features tiler_features =
|
|
pan_query_tiler_features(&phys_dev->kmod.dev->props);
|
|
|
|
uint32_t hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)(
|
|
state->render.fb.info.width, state->render.fb.info.height,
|
|
tiler_features.max_levels, state->render.fb.info.tile_size,
|
|
bin_ptr_mem_budget);
|
|
|
|
return hierarchy_mask;
|
|
}
|
|
|
|
static inline bool
|
|
fs_required(const struct panvk_cmd_graphics_state *state,
|
|
const struct vk_dynamic_graphics_state *dyn_state)
|
|
{
|
|
const struct panvk_shader_variant *fs =
|
|
panvk_shader_only_variant(state->fs.shader);
|
|
const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
|
|
const struct vk_color_blend_state *cb = &dyn_state->cb;
|
|
const struct vk_rasterization_state *rs = &dyn_state->rs;
|
|
|
|
if (rs->rasterizer_discard_enable || !fs_info)
|
|
return false;
|
|
|
|
/* If we generally have side effects */
|
|
if (fs_info->fs.sidefx)
|
|
return true;
|
|
|
|
/* If colour is written we need to execute */
|
|
for (unsigned i = 0; i < cb->attachment_count; ++i) {
|
|
if ((cb->color_write_enables & BITFIELD_BIT(i)) &&
|
|
cb->attachments[i].write_mask)
|
|
return true;
|
|
}
|
|
|
|
/* If alpha-to-coverage is enabled, we need to run the fragment shader even
|
|
* if we don't have a color attachment, so depth/stencil updates can be
|
|
* discarded if alpha, and thus coverage, is 0. */
|
|
if (dyn_state->ms.alpha_to_coverage_enable)
|
|
return true;
|
|
|
|
/* If the sample mask is updated, we need to run the fragment shader,
|
|
* otherwise the fixed-function depth/stencil results will apply to all
|
|
* samples. */
|
|
if (fs_info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))
|
|
return true;
|
|
|
|
/* If depth is written and not implied we need to execute.
|
|
* TODO: Predicate on Z/S writes being enabled */
|
|
return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil);
|
|
}
|
|
|
|
static inline bool
|
|
cached_fs_required(ASSERTED const struct panvk_cmd_graphics_state *state,
|
|
ASSERTED const struct vk_dynamic_graphics_state *dyn_state,
|
|
bool cached_value)
|
|
{
|
|
/* Make sure the cached value was properly initialized. */
|
|
assert(fs_required(state, dyn_state) == cached_value);
|
|
return cached_value;
|
|
}
|
|
|
|
#define get_fs(__cmdbuf) \
|
|
(cached_fs_required(&(__cmdbuf)->state.gfx, \
|
|
&(__cmdbuf)->vk.dynamic_graphics_state, \
|
|
(__cmdbuf)->state.gfx.fs.required) \
|
|
? (__cmdbuf)->state.gfx.fs.shader \
|
|
: NULL)
|
|
|
|
/* Anything that might change the value returned by get_fs() makes users of the
|
|
* fragment shader dirty, because not using the fragment shader (when
|
|
* fs_required() returns false) impacts various other things, like VS -> FS
|
|
* linking in the JM backend, or the update of the fragment shader pointer in
|
|
* the CSF backend. Call gfx_state_dirty(cmdbuf, FS) if you only care about
|
|
* fragment shader updates. */
|
|
|
|
#define fs_user_dirty(__cmdbuf) \
|
|
(gfx_state_dirty(cmdbuf, FS) || \
|
|
dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) || \
|
|
dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) || \
|
|
dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) || \
|
|
dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || \
|
|
dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE))
|
|
|
|
/* After a draw, all dirty flags are cleared except the FS dirty flag which
|
|
* needs to be set again if the draw didn't use the fragment shader. */
|
|
|
|
#define clear_dirty_after_draw(__cmdbuf) \
|
|
do { \
|
|
bool __set_fs_dirty = \
|
|
(__cmdbuf)->state.gfx.fs.shader != get_fs(__cmdbuf); \
|
|
bool __set_fs_push_dirty = \
|
|
__set_fs_dirty && gfx_state_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
|
|
vk_dynamic_graphics_state_clear_dirty( \
|
|
&(__cmdbuf)->vk.dynamic_graphics_state); \
|
|
gfx_state_clear_all_dirty(__cmdbuf); \
|
|
if (__set_fs_dirty) \
|
|
gfx_state_set_dirty(__cmdbuf, FS); \
|
|
if (__set_fs_push_dirty) \
|
|
gfx_state_set_dirty(__cmdbuf, FS_PUSH_UNIFORMS); \
|
|
} while (0)
|
|
|
|
|
|
#if PAN_ARCH >= 10
|
|
VkResult
|
|
panvk_per_arch(device_draw_context_init)(struct panvk_device *dev);
|
|
|
|
void
|
|
panvk_per_arch(device_draw_context_cleanup)(struct panvk_device *dev);
|
|
#endif
|
|
|
|
void
|
|
panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
|
|
const VkRenderingInfo *pRenderingInfo);
|
|
|
|
void
|
|
panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
|
|
const VkRenderingInfo *render_info);
|
|
|
|
void
|
|
panvk_per_arch(cmd_preload_render_area_border)(struct panvk_cmd_buffer *cmdbuf,
|
|
const VkRenderingInfo *render_info);
|
|
|
|
void panvk_per_arch(cmd_select_tile_size)(struct panvk_cmd_buffer *cmdbuf);
|
|
|
|
struct panvk_draw_info {
|
|
struct {
|
|
uint32_t size;
|
|
uint32_t offset;
|
|
} index;
|
|
|
|
struct {
|
|
#if PAN_ARCH < 9
|
|
int32_t raw_offset;
|
|
#endif
|
|
int32_t base;
|
|
uint32_t count;
|
|
} vertex;
|
|
|
|
struct {
|
|
int32_t base;
|
|
uint32_t count;
|
|
} instance;
|
|
|
|
struct {
|
|
uint64_t buffer_dev_addr;
|
|
uint64_t count_buffer_dev_addr;
|
|
uint32_t draw_count;
|
|
uint32_t stride;
|
|
} indirect;
|
|
|
|
#if PAN_ARCH < 9
|
|
uint32_t layer_id;
|
|
#endif
|
|
};
|
|
|
|
void
|
|
panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
|
|
const struct panvk_draw_info *info);
|
|
|
|
static inline uint32_t
|
|
color_attachment_written_mask(
|
|
const struct panvk_shader_variant *fs,
|
|
const struct vk_color_attachment_location_state *cal)
|
|
{
|
|
uint32_t written_by_shader =
|
|
(fs->info.outputs_written >> FRAG_RESULT_DATA0) & BITFIELD_MASK(8);
|
|
uint32_t catt_written_mask = 0;
|
|
|
|
for (uint32_t i = 0; i < MAX_RTS; i++) {
|
|
if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
|
continue;
|
|
|
|
uint32_t shader_rt = cal->color_map[i];
|
|
|
|
if (written_by_shader & BITFIELD_BIT(shader_rt))
|
|
catt_written_mask |= BITFIELD_BIT(i);
|
|
}
|
|
|
|
return catt_written_mask;
|
|
}
|
|
|
|
static inline uint32_t
|
|
color_attachment_read_mask(const struct panvk_shader_variant *fs,
|
|
const struct vk_input_attachment_location_state *ial,
|
|
uint8_t color_attachment_mask)
|
|
{
|
|
uint32_t color_attachment_count =
|
|
ial->color_attachment_count == MESA_VK_COLOR_ATTACHMENT_COUNT_UNKNOWN
|
|
? util_last_bit(color_attachment_mask)
|
|
: ial->color_attachment_count;
|
|
uint32_t catt_read_mask = 0;
|
|
|
|
for (uint32_t i = 0; i < color_attachment_count; i++) {
|
|
if (ial->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
|
continue;
|
|
|
|
uint32_t catt_idx = ial->color_map[i] + 1;
|
|
if (fs->fs.input_attachment_read & BITFIELD_BIT(catt_idx)) {
|
|
assert(color_attachment_mask & BITFIELD_BIT(i));
|
|
catt_read_mask |= BITFIELD_BIT(i);
|
|
}
|
|
}
|
|
|
|
return catt_read_mask;
|
|
}
|
|
|
|
static inline bool
|
|
z_attachment_read(const struct panvk_shader_variant *fs,
|
|
const struct vk_input_attachment_location_state *ial)
|
|
{
|
|
uint32_t depth_mask = ial->depth_att == MESA_VK_ATTACHMENT_NO_INDEX
|
|
? BITFIELD_BIT(0)
|
|
: ial->depth_att != MESA_VK_ATTACHMENT_UNUSED
|
|
? BITFIELD_BIT(ial->depth_att + 1)
|
|
: 0;
|
|
return depth_mask & fs->fs.input_attachment_read;
|
|
}
|
|
|
|
static inline bool
|
|
s_attachment_read(const struct panvk_shader_variant *fs,
|
|
const struct vk_input_attachment_location_state *ial)
|
|
{
|
|
uint32_t stencil_mask = ial->stencil_att == MESA_VK_ATTACHMENT_NO_INDEX
|
|
? BITFIELD_BIT(0)
|
|
: ial->stencil_att != MESA_VK_ATTACHMENT_UNUSED
|
|
? BITFIELD_BIT(ial->stencil_att + 1)
|
|
: 0;
|
|
|
|
return stencil_mask & fs->fs.input_attachment_read;
|
|
}
|
|
|
|
#endif
|