a4e7d8ab90
panvk-bifrost campaigns (r1..r4 Vulkan compositor + r5.video1 Vulkan
video decode) shipped before this repo existed; the deliverable
patches live in marfrit-packages, but the reasoning chain, phase docs,
and source-state evidence lived only in local working trees on the
development host.
This retrofit imports:
- mesa-panvk-bifrost/ — r1..r4 era phase docs (iter1..iter18)
(libmali stub blobs at iter18/blob/ excluded
— 109MB of RE artifacts replaced with a README
pointer)
- mesa-panvk-bifrost-video/ — sibling campaign phase docs + probe
- evidence/ — frozen .tgz source snapshots at each milestone
(basis for the 0005 patch diff generation)
Future iterations should branch off here from day one, so each iter is
a commit rather than a snapshot. See [[feedback-session-local-process-pins]]
for the process drift this retrofit closes.
Total: 1.9 MB across 124 files.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1995 lines
72 KiB
C
1995 lines
72 KiB
C
/*
|
|
* Copyright © 2024 Collabora Ltd.
|
|
*
|
|
* Derived from tu_cmd_buffer.c which is:
|
|
* Copyright © 2016 Red Hat.
|
|
* Copyright © 2016 Bas Nieuwenhuizen
|
|
* Copyright © 2015 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "genxml/gen_macros.h"
|
|
|
|
#include "panvk_buffer.h"
|
|
#include "panvk_cmd_alloc.h"
|
|
#include "panvk_cmd_buffer.h"
|
|
#include "panvk_cmd_desc_state.h"
|
|
#include "panvk_cmd_draw.h"
|
|
#include "panvk_cmd_meta.h"
|
|
#include "panvk_cmd_precomp.h"
|
|
#include "panvk_device.h"
|
|
#include "panvk_entrypoints.h"
|
|
#include "panvk_image.h"
|
|
#include "panvk_image_view.h"
|
|
#include "panvk_instance.h"
|
|
#include "panvk_meta.h"
|
|
#include "panvk_priv_bo.h"
|
|
#include "panvk_shader.h"
|
|
|
|
#include "draw_helper.h"
|
|
#include "pan_desc.h"
|
|
#include "pan_earlyzs.h"
|
|
#include "pan_encoder.h"
|
|
#include "pan_format.h"
|
|
#include "pan_jc.h"
|
|
#include "pan_props.h"
|
|
#include "pan_shader.h"
|
|
|
|
#include "vk_format.h"
|
|
#include "vk_meta.h"
|
|
#include "vk_pipeline_layout.h"
|
|
|
|
struct panvk_draw_data {
|
|
struct panvk_draw_info info;
|
|
unsigned vertex_range;
|
|
unsigned padded_vertex_count;
|
|
struct mali_invocation_packed invocation;
|
|
struct {
|
|
uint64_t varyings;
|
|
uint64_t attributes;
|
|
uint64_t attribute_bufs;
|
|
} vs;
|
|
struct {
|
|
uint64_t rsd;
|
|
uint64_t varyings;
|
|
} fs;
|
|
uint64_t varying_bufs;
|
|
uint64_t position;
|
|
union {
|
|
uint64_t psiz;
|
|
float line_width;
|
|
};
|
|
uint64_t tls;
|
|
uint64_t fb;
|
|
const struct pan_tiler_context *tiler_ctx;
|
|
uint64_t viewport;
|
|
struct {
|
|
struct pan_ptr vertex_copy_desc;
|
|
struct pan_ptr frag_copy_desc;
|
|
union {
|
|
struct {
|
|
struct pan_ptr vertex;
|
|
struct pan_ptr tiler;
|
|
};
|
|
struct pan_ptr idvs;
|
|
};
|
|
} jobs;
|
|
struct {
|
|
uint64_t attribs;
|
|
uint64_t attrib_bufs;
|
|
uint64_t varying_bufs;
|
|
} indirect_info;
|
|
};
|
|
|
|
static bool
|
|
is_indirect_draw(const struct panvk_draw_data *draw)
|
|
{
|
|
return draw->info.indirect.buffer_dev_addr != 0 ||
|
|
draw->info.index.size != 0;
|
|
}
|
|
|
|
static bool
|
|
has_depth_att(struct panvk_cmd_buffer *cmdbuf)
|
|
{
|
|
return (cmdbuf->state.gfx.render.bound_attachments &
|
|
MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0;
|
|
}
|
|
|
|
static bool
|
|
has_stencil_att(struct panvk_cmd_buffer *cmdbuf)
|
|
{
|
|
return (cmdbuf->state.gfx.render.bound_attachments &
|
|
MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0;
|
|
}
|
|
|
|
static bool
|
|
writes_depth(struct panvk_cmd_buffer *cmdbuf)
|
|
{
|
|
const struct vk_depth_stencil_state *ds =
|
|
&cmdbuf->vk.dynamic_graphics_state.ds;
|
|
|
|
return has_depth_att(cmdbuf) && ds->depth.test_enable &&
|
|
ds->depth.write_enable && ds->depth.compare_op != VK_COMPARE_OP_NEVER;
|
|
}
|
|
|
|
static bool
|
|
writes_stencil(struct panvk_cmd_buffer *cmdbuf)
|
|
{
|
|
const struct vk_depth_stencil_state *ds =
|
|
&cmdbuf->vk.dynamic_graphics_state.ds;
|
|
|
|
return has_stencil_att(cmdbuf) && ds->stencil.test_enable &&
|
|
((ds->stencil.front.write_mask &&
|
|
(ds->stencil.front.op.fail != VK_STENCIL_OP_KEEP ||
|
|
ds->stencil.front.op.pass != VK_STENCIL_OP_KEEP ||
|
|
ds->stencil.front.op.depth_fail != VK_STENCIL_OP_KEEP)) ||
|
|
(ds->stencil.back.write_mask &&
|
|
(ds->stencil.back.op.fail != VK_STENCIL_OP_KEEP ||
|
|
ds->stencil.back.op.pass != VK_STENCIL_OP_KEEP ||
|
|
ds->stencil.back.op.depth_fail != VK_STENCIL_OP_KEEP)));
|
|
}
|
|
|
|
static bool
|
|
ds_test_always_passes(struct panvk_cmd_buffer *cmdbuf)
|
|
{
|
|
const struct vk_depth_stencil_state *ds =
|
|
&cmdbuf->vk.dynamic_graphics_state.ds;
|
|
|
|
if (!has_depth_att(cmdbuf))
|
|
return true;
|
|
|
|
if (ds->depth.test_enable && ds->depth.compare_op != VK_COMPARE_OP_ALWAYS)
|
|
return false;
|
|
|
|
if (ds->stencil.test_enable &&
|
|
(ds->stencil.front.op.compare != VK_COMPARE_OP_ALWAYS ||
|
|
ds->stencil.back.op.compare != VK_COMPARE_OP_ALWAYS))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline enum mali_func
|
|
translate_compare_func(VkCompareOp comp)
|
|
{
|
|
STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER);
|
|
STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS);
|
|
STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL);
|
|
STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL);
|
|
STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER);
|
|
STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL);
|
|
STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL ==
|
|
(VkCompareOp)MALI_FUNC_GEQUAL);
|
|
STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS);
|
|
|
|
return (enum mali_func)comp;
|
|
}
|
|
|
|
static enum mali_stencil_op
|
|
translate_stencil_op(VkStencilOp in)
|
|
{
|
|
switch (in) {
|
|
case VK_STENCIL_OP_KEEP:
|
|
return MALI_STENCIL_OP_KEEP;
|
|
case VK_STENCIL_OP_ZERO:
|
|
return MALI_STENCIL_OP_ZERO;
|
|
case VK_STENCIL_OP_REPLACE:
|
|
return MALI_STENCIL_OP_REPLACE;
|
|
case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
|
|
return MALI_STENCIL_OP_INCR_SAT;
|
|
case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
|
|
return MALI_STENCIL_OP_DECR_SAT;
|
|
case VK_STENCIL_OP_INCREMENT_AND_WRAP:
|
|
return MALI_STENCIL_OP_INCR_WRAP;
|
|
case VK_STENCIL_OP_DECREMENT_AND_WRAP:
|
|
return MALI_STENCIL_OP_DECR_WRAP;
|
|
case VK_STENCIL_OP_INVERT:
|
|
return MALI_STENCIL_OP_INVERT;
|
|
default:
|
|
UNREACHABLE("Invalid stencil op");
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
bool dirty = dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) ||
|
|
dyn_gfx_state_dirty(cmdbuf, RS_LINE_MODE) ||
|
|
/* line mode needs primitive topology */
|
|
dyn_gfx_state_dirty(cmdbuf, IA_PRIMITIVE_TOPOLOGY) ||
|
|
dyn_gfx_state_dirty(cmdbuf, CB_LOGIC_OP_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, CB_LOGIC_OP) ||
|
|
dyn_gfx_state_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||
|
|
dyn_gfx_state_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||
|
|
dyn_gfx_state_dirty(cmdbuf, CB_BLEND_ENABLES) ||
|
|
dyn_gfx_state_dirty(cmdbuf, CB_BLEND_EQUATIONS) ||
|
|
dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) ||
|
|
dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS) ||
|
|
dyn_gfx_state_dirty(cmdbuf, COLOR_ATTACHMENT_MAP) ||
|
|
dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
|
|
dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
|
|
dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_TEST_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_OP) ||
|
|
dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_COMPARE_MASK) ||
|
|
dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
|
|
dyn_gfx_state_dirty(cmdbuf, DS_STENCIL_REFERENCE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
|
|
dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) ||
|
|
dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) ||
|
|
gfx_state_dirty(cmdbuf, FS) || gfx_state_dirty(cmdbuf, OQ) ||
|
|
gfx_state_dirty(cmdbuf, RENDER_STATE);
|
|
|
|
if (!dirty) {
|
|
draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
const struct vk_dynamic_graphics_state *dyns =
|
|
&cmdbuf->vk.dynamic_graphics_state;
|
|
const struct vk_rasterization_state *rs = &dyns->rs;
|
|
const struct vk_depth_stencil_state *ds = &dyns->ds;
|
|
const struct vk_input_assembly_state *ia = &dyns->ia;
|
|
const struct panvk_shader_variant *fs =
|
|
panvk_shader_only_variant(get_fs(cmdbuf));
|
|
const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
|
|
uint32_t bd_count = MAX2(cmdbuf->state.gfx.render.fb.info.rt_count, 1);
|
|
bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable;
|
|
bool test_z = has_depth_att(cmdbuf) && ds->depth.test_enable;
|
|
bool writes_z = writes_depth(cmdbuf);
|
|
bool writes_s = writes_stencil(cmdbuf);
|
|
|
|
bool msaa = dyns->ms.rasterization_samples > 1;
|
|
if ((ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
|
|
ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP) &&
|
|
rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM) {
|
|
/* we need to disable MSAA when rendering bresenham lines.
|
|
*
|
|
* From the Vulkan spec:
|
|
* "When Bresenham lines are being rasterized, sample locations may
|
|
* all be treated as being at the pixel center (this may affect
|
|
* attribute and depth interpolation).""
|
|
*/
|
|
msaa = false;
|
|
}
|
|
|
|
struct pan_ptr ptr = panvk_cmd_alloc_desc_aggregate(
|
|
cmdbuf, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(bd_count, BLEND));
|
|
if (!ptr.gpu)
|
|
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
|
|
|
struct mali_renderer_state_packed *rsd = ptr.cpu;
|
|
struct mali_blend_packed *bds = ptr.cpu + pan_size(RENDERER_STATE);
|
|
struct panvk_blend_info *binfo = &cmdbuf->state.gfx.cb.info;
|
|
|
|
uint64_t fs_code = panvk_shader_variant_get_dev_addr(fs);
|
|
|
|
if (fs_info != NULL) {
|
|
panvk_per_arch(blend_emit_descs)(cmdbuf, bds);
|
|
} else {
|
|
for (unsigned i = 0; i < bd_count; i++) {
|
|
pan_pack(&bds[i], BLEND, cfg) {
|
|
cfg.enable = false;
|
|
cfg.internal.mode = MALI_BLEND_MODE_OFF;
|
|
}
|
|
}
|
|
}
|
|
|
|
pan_pack(rsd, RENDERER_STATE, cfg) {
|
|
bool alpha_to_coverage = dyns->ms.alpha_to_coverage_enable;
|
|
|
|
if (fs) {
|
|
pan_shader_prepare_rsd(fs_info, fs_code, &cfg);
|
|
|
|
uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
|
|
MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
|
|
uint8_t rt_written = color_attachment_written_mask(
|
|
fs, &cmdbuf->vk.dynamic_graphics_state.cal);
|
|
uint8_t rt_read = color_attachment_read_mask(fs, &dyns->ial, rt_mask);
|
|
enum pan_earlyzs_zs_tilebuf_read zs_read =
|
|
(z_attachment_read(fs, &dyns->ial) ||
|
|
s_attachment_read(fs, &dyns->ial))
|
|
? PAN_EARLYZS_ZS_TILEBUF_READ_NO_OPT
|
|
: PAN_EARLYZS_ZS_TILEBUF_NOT_READ;
|
|
|
|
cfg.properties.allow_forward_pixel_to_kill =
|
|
fs_info->fs.can_fpk && !(rt_mask & ~rt_written) &&
|
|
!(rt_read & rt_written) && !alpha_to_coverage &&
|
|
!binfo->any_dest_read;
|
|
|
|
bool writes_zs = writes_z || writes_s;
|
|
bool zs_always_passes = ds_test_always_passes(cmdbuf);
|
|
bool oq = cmdbuf->state.gfx.occlusion_query.mode !=
|
|
MALI_OCCLUSION_MODE_DISABLED;
|
|
|
|
struct pan_earlyzs_state earlyzs =
|
|
pan_earlyzs_get(fs->fs.earlyzs_lut, writes_zs || oq,
|
|
alpha_to_coverage, zs_always_passes, zs_read);
|
|
|
|
/* early ZS check for FPK is performed by HW on v7+ */
|
|
cfg.properties.allow_forward_pixel_to_be_killed =
|
|
!fs->info.writes_global &&
|
|
((PAN_ARCH > 6) || earlyzs.kill != MALI_PIXEL_KILL_FORCE_LATE);
|
|
|
|
cfg.properties.pixel_kill_operation = earlyzs.kill;
|
|
cfg.properties.zs_update_operation = earlyzs.update;
|
|
cfg.multisample_misc.evaluate_per_sample =
|
|
(fs->info.fs.sample_shading && dyns->ms.rasterization_samples > 1);
|
|
} else {
|
|
cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
|
|
cfg.properties.allow_forward_pixel_to_kill = true;
|
|
cfg.properties.allow_forward_pixel_to_be_killed = true;
|
|
cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY;
|
|
}
|
|
|
|
cfg.multisample_misc.multisample_enable = msaa;
|
|
cfg.multisample_misc.sample_mask =
|
|
msaa ? dyns->ms.sample_mask : UINT16_MAX;
|
|
|
|
cfg.multisample_misc.depth_function =
|
|
test_z ? translate_compare_func(ds->depth.compare_op)
|
|
: MALI_FUNC_ALWAYS;
|
|
|
|
cfg.multisample_misc.depth_write_mask = writes_z;
|
|
cfg.multisample_misc.fixed_function_near_discard =
|
|
cfg.multisample_misc.fixed_function_far_discard =
|
|
vk_rasterization_state_depth_clip_enable(rs);
|
|
cfg.multisample_misc.fixed_function_depth_range_fixed =
|
|
!rs->depth_clamp_enable;
|
|
cfg.multisample_misc.shader_depth_range_fixed = true;
|
|
|
|
cfg.stencil_mask_misc.stencil_enable = test_s;
|
|
cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
|
|
cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
|
|
cfg.stencil_mask_misc.front_facing_depth_bias = rs->depth_bias.enable;
|
|
cfg.stencil_mask_misc.back_facing_depth_bias = rs->depth_bias.enable;
|
|
|
|
if (rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM)
|
|
cfg.stencil_mask_misc.aligned_line_ends = true;
|
|
|
|
cfg.depth_units = rs->depth_bias.constant_factor;
|
|
cfg.depth_factor = rs->depth_bias.slope_factor;
|
|
cfg.depth_bias_clamp = rs->depth_bias.clamp;
|
|
|
|
cfg.stencil_front.mask = ds->stencil.front.compare_mask;
|
|
cfg.stencil_back.mask = ds->stencil.back.compare_mask;
|
|
|
|
cfg.stencil_mask_misc.stencil_mask_front = ds->stencil.front.write_mask;
|
|
cfg.stencil_mask_misc.stencil_mask_back = ds->stencil.back.write_mask;
|
|
|
|
cfg.stencil_front.reference_value = ds->stencil.front.reference;
|
|
cfg.stencil_back.reference_value = ds->stencil.back.reference;
|
|
|
|
if (test_s) {
|
|
cfg.stencil_front.compare_function =
|
|
translate_compare_func(ds->stencil.front.op.compare);
|
|
cfg.stencil_front.stencil_fail =
|
|
translate_stencil_op(ds->stencil.front.op.fail);
|
|
cfg.stencil_front.depth_fail =
|
|
translate_stencil_op(ds->stencil.front.op.depth_fail);
|
|
cfg.stencil_front.depth_pass =
|
|
translate_stencil_op(ds->stencil.front.op.pass);
|
|
cfg.stencil_back.compare_function =
|
|
translate_compare_func(ds->stencil.back.op.compare);
|
|
cfg.stencil_back.stencil_fail =
|
|
translate_stencil_op(ds->stencil.back.op.fail);
|
|
cfg.stencil_back.depth_fail =
|
|
translate_stencil_op(ds->stencil.back.op.depth_fail);
|
|
cfg.stencil_back.depth_pass =
|
|
translate_stencil_op(ds->stencil.back.op.pass);
|
|
}
|
|
}
|
|
|
|
cmdbuf->state.gfx.fs.rsd = ptr.gpu;
|
|
draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
struct panvk_batch *batch = cmdbuf->cur_batch;
|
|
VkResult result =
|
|
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, draw->info.layer_id);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
draw->tiler_ctx = &batch->tiler.ctx;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
|
const struct panvk_shader_variant *vs =
|
|
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
const struct panvk_shader_link *link = &cmdbuf->state.gfx.link;
|
|
struct pan_ptr bufs = panvk_cmd_alloc_desc_array(
|
|
cmdbuf, PANVK_VARY_BUF_MAX + 1, ATTRIBUTE_BUFFER);
|
|
if (!bufs.gpu)
|
|
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
|
|
|
struct mali_attribute_buffer_packed *buf_descs = bufs.cpu;
|
|
const struct vk_input_assembly_state *ia =
|
|
&cmdbuf->vk.dynamic_graphics_state.ia;
|
|
bool writes_point_size =
|
|
vs->info.vs.writes_point_size &&
|
|
ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
|
uint64_t psiz_buf = 0;
|
|
|
|
if (is_indirect_draw(draw) &&
|
|
!cmdbuf->state.gfx.vs.indirect_varying_bufs_infos) {
|
|
struct pan_ptr bufs_info_storage = panvk_cmd_alloc_dev_mem(
|
|
cmdbuf, desc, sizeof(struct libpan_draw_helper_varying_buf_info), 8);
|
|
|
|
if (!bufs_info_storage.gpu)
|
|
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
|
|
|
cmdbuf->state.gfx.vs.indirect_varying_bufs_infos = bufs_info_storage.gpu;
|
|
|
|
struct libpan_draw_helper_varying_buf_info *vary_bufs_info =
|
|
bufs_info_storage.cpu;
|
|
vary_bufs_info->address = dev->indirect_varying_buffer->addr.dev;
|
|
vary_bufs_info->size = PANVK_JM_MAX_PER_VTX_ATTRIBUTES_INDIRECT_SIZE *
|
|
PANVK_JM_MAX_VERTICES_INDIRECT;
|
|
vary_bufs_info->offset = 0;
|
|
}
|
|
|
|
for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
|
|
uint32_t buf_size;
|
|
uint64_t buf_addr;
|
|
if (is_indirect_draw(draw)) {
|
|
buf_addr = dev->indirect_varying_buffer->addr.dev;
|
|
buf_size = 0;
|
|
} else {
|
|
buf_size = draw->padded_vertex_count * draw->info.instance.count *
|
|
link->buf_strides[i];
|
|
buf_addr =
|
|
buf_size
|
|
? panvk_cmd_alloc_dev_mem(cmdbuf, varying, buf_size, 64).gpu
|
|
: 0;
|
|
|
|
if (buf_size && !buf_addr)
|
|
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
|
}
|
|
|
|
pan_pack(&buf_descs[i], ATTRIBUTE_BUFFER, cfg) {
|
|
cfg.stride = link->buf_strides[i];
|
|
cfg.size = buf_size;
|
|
cfg.pointer = buf_addr;
|
|
}
|
|
|
|
if (i == PANVK_VARY_BUF_POSITION)
|
|
draw->position = buf_addr;
|
|
|
|
if (i == PANVK_VARY_BUF_PSIZ)
|
|
psiz_buf = buf_addr;
|
|
}
|
|
|
|
/* We need an empty entry to stop prefetching on Bifrost */
|
|
memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * PANVK_VARY_BUF_MAX), 0,
|
|
pan_size(ATTRIBUTE_BUFFER));
|
|
|
|
if (writes_point_size)
|
|
draw->psiz = psiz_buf;
|
|
else if (ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
|
|
ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
|
|
draw->line_width = cmdbuf->vk.dynamic_graphics_state.rs.line.width;
|
|
else
|
|
draw->line_width = 1.0f;
|
|
|
|
draw->varying_bufs = bufs.gpu;
|
|
draw->indirect_info.varying_bufs =
|
|
cmdbuf->state.gfx.vs.indirect_varying_bufs_infos;
|
|
draw->vs.varyings = panvk_priv_mem_dev_addr(link->vs.attribs);
|
|
draw->fs.varyings = panvk_priv_mem_dev_addr(link->fs.attribs);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
panvk_draw_emit_attrib_buf(
|
|
const struct panvk_draw_data *draw,
|
|
const struct vk_vertex_binding_state *buf_info, uint32_t stride,
|
|
const struct panvk_attrib_buf *buf,
|
|
struct mali_attribute_buffer_packed *desc,
|
|
struct libpan_draw_helper_attrib_buf_info *helper_buf_info)
|
|
{
|
|
uint64_t addr = buf->address & ~63ULL;
|
|
unsigned size = buf->size + (buf->address & 63);
|
|
unsigned divisor = draw->padded_vertex_count * buf_info->divisor;
|
|
bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
|
|
struct mali_attribute_buffer_packed *buf_ext = &desc[1];
|
|
|
|
/* In case of indirect draw, the descriptor will be patched at runtime */
|
|
if (helper_buf_info != NULL) {
|
|
pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
|
|
cfg.type = MALI_ATTRIBUTE_TYPE_1D;
|
|
cfg.pointer = addr;
|
|
cfg.size = size;
|
|
}
|
|
|
|
helper_buf_info->divisor = buf_info->divisor;
|
|
helper_buf_info->stride = stride;
|
|
helper_buf_info->per_instance = per_instance;
|
|
} else if (draw->info.instance.count <= 1) {
|
|
pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
|
|
cfg.type = MALI_ATTRIBUTE_TYPE_1D;
|
|
cfg.stride = per_instance ? 0 : stride;
|
|
cfg.pointer = addr;
|
|
cfg.size = size;
|
|
}
|
|
} else if (!per_instance) {
|
|
pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
|
|
cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
|
|
cfg.divisor = draw->padded_vertex_count;
|
|
cfg.stride = stride;
|
|
cfg.pointer = addr;
|
|
cfg.size = size;
|
|
}
|
|
} else if (!divisor) {
|
|
/* instance_divisor == 0 means all instances share the same value.
|
|
* Make it a 1D array with a zero stride.
|
|
*/
|
|
pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
|
|
cfg.type = MALI_ATTRIBUTE_TYPE_1D;
|
|
cfg.stride = 0;
|
|
cfg.pointer = addr;
|
|
cfg.size = size;
|
|
}
|
|
} else if (util_is_power_of_two_or_zero(divisor)) {
|
|
pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
|
|
cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
|
|
cfg.stride = stride;
|
|
cfg.pointer = addr;
|
|
cfg.size = size;
|
|
cfg.divisor_r = __builtin_ctz(divisor);
|
|
}
|
|
} else {
|
|
unsigned divisor_r = 0, divisor_e = 0;
|
|
unsigned divisor_d =
|
|
pan_compute_npot_divisor(divisor, &divisor_r, &divisor_e);
|
|
pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
|
|
cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
|
|
cfg.stride = stride;
|
|
cfg.pointer = addr;
|
|
cfg.size = size;
|
|
cfg.divisor_r = divisor_r;
|
|
cfg.divisor_e = divisor_e;
|
|
}
|
|
|
|
pan_cast_and_pack(buf_ext, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
|
|
cfg.divisor_numerator = divisor_d;
|
|
cfg.divisor = buf_info->divisor;
|
|
}
|
|
|
|
buf_ext = NULL;
|
|
}
|
|
|
|
/* If the buffer extension wasn't used, memset(0) */
|
|
if (buf_ext)
|
|
memset(buf_ext, 0, pan_size(ATTRIBUTE_BUFFER));
|
|
}
|
|
|
|
static void
|
|
panvk_draw_emit_attrib(const struct panvk_draw_data *draw,
|
|
const struct vk_vertex_attribute_state *attrib_info,
|
|
const struct vk_vertex_binding_state *buf_info,
|
|
const struct panvk_attrib_buf *buf,
|
|
struct mali_attribute_packed *desc,
|
|
struct libpan_draw_helper_attrib_info *helper_attrib_info)
|
|
{
|
|
bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
|
|
enum pipe_format f = vk_format_to_pipe_format(attrib_info->format);
|
|
unsigned buf_idx = attrib_info->binding;
|
|
|
|
pan_pack(desc, ATTRIBUTE, cfg) {
|
|
cfg.buffer_index = buf_idx * 2;
|
|
cfg.offset_enable = true;
|
|
cfg.format = GENX(pan_format_from_pipe_format)(f)->hw;
|
|
|
|
uint32_t offset = attrib_info->offset + (buf->address & 63);
|
|
|
|
/* In case of indirect draw, the descriptor will be patched at runtime */
|
|
if (helper_attrib_info != NULL) {
|
|
helper_attrib_info->base_offset = offset;
|
|
helper_attrib_info->stride = per_instance ? buf_info->stride : 0;
|
|
} else {
|
|
cfg.offset = offset;
|
|
if (per_instance)
|
|
cfg.offset += draw->info.instance.base * buf_info->stride;
|
|
}
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
const struct panvk_shader_variant *vs =
|
|
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
const struct vk_dynamic_graphics_state *dyns =
|
|
&cmdbuf->vk.dynamic_graphics_state;
|
|
const struct vk_vertex_input_state *vi = dyns->vi;
|
|
unsigned num_imgs = vs->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_IMG];
|
|
unsigned num_vs_attribs = util_last_bit(vi->attributes_valid);
|
|
unsigned num_vbs = util_last_bit(vi->bindings_valid);
|
|
unsigned attrib_count =
|
|
num_imgs ? MAX_VS_ATTRIBS + num_imgs : num_vs_attribs;
|
|
bool dirty =
|
|
dyn_gfx_state_dirty(cmdbuf, VI) ||
|
|
dyn_gfx_state_dirty(cmdbuf, VI_BINDINGS_VALID) ||
|
|
dyn_gfx_state_dirty(cmdbuf, VI_BINDING_STRIDES) ||
|
|
gfx_state_dirty(cmdbuf, VB) || gfx_state_dirty(cmdbuf, DESC_STATE) ||
|
|
is_indirect_draw(draw) != cmdbuf->state.gfx.vs.previous_draw_was_indirect;
|
|
|
|
if (!dirty)
|
|
return VK_SUCCESS;
|
|
|
|
unsigned attrib_buf_count = (num_vbs + num_imgs) * 2;
|
|
struct pan_ptr bufs = panvk_cmd_alloc_desc_array(
|
|
cmdbuf, attrib_buf_count + 1, ATTRIBUTE_BUFFER);
|
|
struct mali_attribute_buffer_packed *attrib_buf_descs = bufs.cpu;
|
|
struct pan_ptr attribs =
|
|
panvk_cmd_alloc_desc_array(cmdbuf, attrib_count, ATTRIBUTE);
|
|
struct mali_attribute_packed *attrib_descs = attribs.cpu;
|
|
|
|
if (!bufs.gpu || (attrib_count && !attribs.gpu))
|
|
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
|
|
|
struct libpan_draw_helper_attrib_buf_info *bufs_infos = NULL;
|
|
struct libpan_draw_helper_attrib_info *attribs_infos = NULL;
|
|
|
|
if (is_indirect_draw(draw)) {
|
|
struct pan_ptr bufs_infos_storage = panvk_cmd_alloc_dev_mem(
|
|
cmdbuf, desc,
|
|
num_vbs * sizeof(struct libpan_draw_helper_attrib_buf_info), 8);
|
|
struct pan_ptr attribs_infos_storage = panvk_cmd_alloc_dev_mem(
|
|
cmdbuf, desc,
|
|
num_vs_attribs * sizeof(struct libpan_draw_helper_attrib_info), 8);
|
|
|
|
if (!bufs_infos_storage.gpu ||
|
|
(num_vs_attribs && !attribs_infos_storage.gpu))
|
|
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
|
|
|
cmdbuf->state.gfx.vs.indirect_attrib_bufs_infos = bufs_infos_storage.gpu;
|
|
cmdbuf->state.gfx.vs.indirect_attribs_infos = attribs_infos_storage.gpu;
|
|
bufs_infos = bufs_infos_storage.cpu;
|
|
attribs_infos = attribs_infos_storage.cpu;
|
|
}
|
|
|
|
for (unsigned i = 0; i < num_vbs; i++) {
|
|
if (vi->bindings_valid & BITFIELD_BIT(i)) {
|
|
struct libpan_draw_helper_attrib_buf_info *helper_buf_info =
|
|
bufs_infos ? &bufs_infos[i] : NULL;
|
|
panvk_draw_emit_attrib_buf(draw, &vi->bindings[i],
|
|
dyns->vi_binding_strides[i],
|
|
&cmdbuf->state.gfx.vb.bufs[i],
|
|
&attrib_buf_descs[i * 2], helper_buf_info);
|
|
} else {
|
|
memset(&attrib_buf_descs[i * 2], 0, sizeof(*attrib_buf_descs) * 2);
|
|
}
|
|
}
|
|
|
|
for (unsigned i = 0; i < num_vs_attribs; i++) {
|
|
if (vi->attributes_valid & BITFIELD_BIT(i)) {
|
|
unsigned buf_idx = vi->attributes[i].binding;
|
|
struct libpan_draw_helper_attrib_info *helper_attrib_info =
|
|
attribs_infos ? &attribs_infos[i] : NULL;
|
|
panvk_draw_emit_attrib(draw, &vi->attributes[i],
|
|
&vi->bindings[buf_idx],
|
|
&cmdbuf->state.gfx.vb.bufs[buf_idx],
|
|
&attrib_descs[i], helper_attrib_info);
|
|
} else {
|
|
memset(&attrib_descs[i], 0, sizeof(attrib_descs[0]));
|
|
}
|
|
}
|
|
|
|
/* A NULL entry is needed to stop prefecting on Bifrost */
|
|
memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
|
|
pan_size(ATTRIBUTE_BUFFER));
|
|
|
|
cmdbuf->state.gfx.vs.attrib_bufs = bufs.gpu;
|
|
cmdbuf->state.gfx.vs.attribs = attribs.gpu;
|
|
|
|
if (num_imgs) {
|
|
cmdbuf->state.gfx.vs.desc.img_attrib_table =
|
|
attribs.gpu + (MAX_VS_ATTRIBS * pan_size(ATTRIBUTE));
|
|
cmdbuf->state.gfx.vs.desc.tables[PANVK_BIFROST_DESC_TABLE_IMG] =
|
|
bufs.gpu + (num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2);
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
panvk_draw_prepare_vs_attribs(cmdbuf, draw);
|
|
draw->vs.attributes = cmdbuf->state.gfx.vs.attribs;
|
|
draw->vs.attribute_bufs = cmdbuf->state.gfx.vs.attrib_bufs;
|
|
draw->indirect_info.attribs = cmdbuf->state.gfx.vs.indirect_attribs_infos;
|
|
draw->indirect_info.attrib_bufs =
|
|
cmdbuf->state.gfx.vs.indirect_attrib_bufs_infos;
|
|
}
|
|
|
|
static void
|
|
panvk_emit_viewport(struct panvk_cmd_buffer *cmdbuf,
|
|
struct mali_viewport_packed *vpd)
|
|
{
|
|
const struct vk_viewport_state *vp = &cmdbuf->vk.dynamic_graphics_state.vp;
|
|
|
|
if (vp->viewport_count < 1)
|
|
return;
|
|
|
|
const VkViewport *viewport = &vp->viewports[0];
|
|
const VkRect2D *scissor = &vp->scissors[0];
|
|
float minz, maxz;
|
|
panvk_depth_range(&cmdbuf->state.gfx, &cmdbuf->vk.dynamic_graphics_state.vp,
|
|
&minz, &maxz);
|
|
|
|
/* The spec says "width must be greater than 0.0" */
|
|
assert(viewport->width >= 0);
|
|
int minx = (int)viewport->x;
|
|
int maxx = (int)(viewport->x + viewport->width);
|
|
|
|
/* Viewport height can be negative */
|
|
int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
|
|
int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
|
|
|
|
assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
|
|
minx = MAX2(scissor->offset.x, minx);
|
|
miny = MAX2(scissor->offset.y, miny);
|
|
maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
|
|
maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
|
|
|
|
/* Make sure we don't end up with a max < min when width/height is 0 */
|
|
maxx = maxx > minx ? maxx - 1 : maxx;
|
|
maxy = maxy > miny ? maxy - 1 : maxy;
|
|
|
|
/* Clamp viewport scissor to valid range */
|
|
minx = CLAMP(minx, 0, UINT16_MAX);
|
|
maxx = CLAMP(maxx, 0, UINT16_MAX);
|
|
miny = CLAMP(miny, 0, UINT16_MAX);
|
|
maxy = CLAMP(maxy, 0, UINT16_MAX);
|
|
|
|
pan_pack(vpd, VIEWPORT, cfg) {
|
|
cfg.scissor_minimum_x = minx;
|
|
cfg.scissor_minimum_y = miny;
|
|
cfg.scissor_maximum_x = maxx;
|
|
cfg.scissor_maximum_y = maxy;
|
|
cfg.minimum_z = minz;
|
|
cfg.maximum_z = maxz;
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
/* When rasterizerDiscardEnable is active, it is allowed to have viewport and
|
|
* scissor disabled.
|
|
* As a result, we define an empty one.
|
|
*/
|
|
if (!cmdbuf->state.gfx.vpd || dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
|
|
dyn_gfx_state_dirty(cmdbuf, VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, VP_SCISSORS) ||
|
|
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
|
|
dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) {
|
|
struct pan_ptr vp = panvk_cmd_alloc_desc(cmdbuf, VIEWPORT);
|
|
if (!vp.gpu)
|
|
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
|
|
|
panvk_emit_viewport(cmdbuf, vp.cpu);
|
|
cmdbuf->state.gfx.vpd = vp.gpu;
|
|
}
|
|
|
|
draw->viewport = cmdbuf->state.gfx.vpd;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf,
|
|
const struct panvk_draw_data *draw,
|
|
struct mali_draw_packed *dcd)
|
|
{
|
|
const struct panvk_shader_variant *vs =
|
|
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
const struct panvk_shader_desc_state *vs_desc_state =
|
|
&cmdbuf->state.gfx.vs.desc;
|
|
|
|
pan_pack(dcd, DRAW, cfg) {
|
|
cfg.state = panvk_priv_mem_dev_addr(vs->rsd);
|
|
cfg.attributes = draw->vs.attributes;
|
|
cfg.attribute_buffers = draw->vs.attribute_bufs;
|
|
cfg.varyings = draw->vs.varyings;
|
|
cfg.varying_buffers = draw->varying_bufs;
|
|
cfg.thread_storage = draw->tls;
|
|
|
|
/* In case of indirect draw, the descriptor will be patched at runtime */
|
|
if (!is_indirect_draw(draw)) {
|
|
cfg.offset_start = draw->info.vertex.raw_offset;
|
|
cfg.instance_size =
|
|
draw->info.instance.count > 1 ? draw->padded_vertex_count : 1;
|
|
}
|
|
|
|
cfg.uniform_buffers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
|
|
cfg.push_uniforms = cmdbuf->state.gfx.vs.push_uniforms;
|
|
cfg.textures = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
|
|
cfg.samplers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
struct panvk_batch *batch = cmdbuf->cur_batch;
|
|
struct pan_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB);
|
|
if (!ptr.gpu)
|
|
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
|
|
|
util_dynarray_append(&batch->jobs, ptr.cpu);
|
|
draw->jobs.vertex = ptr;
|
|
|
|
memcpy(pan_section_ptr(ptr.cpu, COMPUTE_JOB, INVOCATION), &draw->invocation,
|
|
pan_size(INVOCATION));
|
|
|
|
pan_section_pack(ptr.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
|
|
cfg.job_task_split = 5;
|
|
}
|
|
|
|
panvk_emit_vertex_dcd(cmdbuf, draw,
|
|
pan_section_ptr(ptr.cpu, COMPUTE_JOB, DRAW));
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static enum mali_draw_mode
|
|
translate_prim_topology(VkPrimitiveTopology in)
|
|
{
|
|
/* Test VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA separately, as it's not
|
|
* part of the VkPrimitiveTopology enum.
|
|
*/
|
|
if (in == VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA)
|
|
return MALI_DRAW_MODE_TRIANGLES;
|
|
|
|
switch (in) {
|
|
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
|
|
return MALI_DRAW_MODE_POINTS;
|
|
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
|
|
return MALI_DRAW_MODE_LINES;
|
|
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
|
|
return MALI_DRAW_MODE_LINE_STRIP;
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
|
|
return MALI_DRAW_MODE_TRIANGLES;
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
|
|
return MALI_DRAW_MODE_TRIANGLE_STRIP;
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
|
|
return MALI_DRAW_MODE_TRIANGLE_FAN;
|
|
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
|
|
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
|
|
case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
|
|
default:
|
|
UNREACHABLE("Invalid primitive type");
|
|
}
|
|
}
|
|
|
|
static void
|
|
panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf,
|
|
const struct panvk_draw_data *draw,
|
|
struct mali_primitive_packed *prim)
|
|
{
|
|
const struct panvk_shader_variant *vs =
|
|
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
const struct panvk_shader_variant *fs =
|
|
panvk_shader_only_variant(get_fs(cmdbuf));
|
|
const struct vk_dynamic_graphics_state *dyns =
|
|
&cmdbuf->vk.dynamic_graphics_state;
|
|
const struct vk_input_assembly_state *ia = &dyns->ia;
|
|
const struct vk_rasterization_state *rs = &dyns->rs;
|
|
bool writes_point_size =
|
|
vs->info.vs.writes_point_size &&
|
|
ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
|
bool secondary_shader = vs->info.vs.secondary_enable && fs != NULL;
|
|
assert(!(vs->info.outputs_written & VARYING_BIT_PRIMITIVE_ID));
|
|
bool fs_reads_primitive_id = fs ? fs->info.fs.reads_primitive_id : false;
|
|
|
|
pan_pack(prim, PRIMITIVE, cfg) {
|
|
cfg.draw_mode = translate_prim_topology(ia->primitive_topology);
|
|
if (writes_point_size)
|
|
cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
|
|
cfg.primitive_index_enable = fs_reads_primitive_id;
|
|
cfg.primitive_index_writeback = fs_reads_primitive_id;
|
|
|
|
cfg.first_provoking_vertex =
|
|
cmdbuf->state.gfx.render.first_provoking_vertex != U_TRISTATE_NO;
|
|
|
|
if (ia->primitive_restart_enable)
|
|
cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
|
|
cfg.job_task_split = 6;
|
|
|
|
if (draw->info.index.size) {
|
|
switch (draw->info.index.size) {
|
|
case 4:
|
|
cfg.index_type = MALI_INDEX_TYPE_UINT32;
|
|
break;
|
|
case 2:
|
|
cfg.index_type = MALI_INDEX_TYPE_UINT16;
|
|
break;
|
|
case 1:
|
|
cfg.index_type = MALI_INDEX_TYPE_UINT8;
|
|
break;
|
|
default:
|
|
UNREACHABLE("Invalid index size");
|
|
}
|
|
}
|
|
|
|
/* In case of indirect draw, the descriptor will be patched at runtime */
|
|
cfg.index_count = is_indirect_draw(draw) ? 1 : draw->info.vertex.count;
|
|
|
|
cfg.low_depth_cull = cfg.high_depth_cull =
|
|
vk_rasterization_state_depth_clip_enable(rs);
|
|
|
|
cfg.secondary_shader = secondary_shader;
|
|
}
|
|
}
|
|
|
|
static void
|
|
panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf,
|
|
const struct panvk_draw_data *draw,
|
|
struct mali_primitive_size_packed *primsz)
|
|
{
|
|
const struct panvk_shader_variant *vs =
|
|
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
const struct vk_input_assembly_state *ia =
|
|
&cmdbuf->vk.dynamic_graphics_state.ia;
|
|
bool writes_point_size =
|
|
vs->info.vs.writes_point_size &&
|
|
ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
|
|
|
pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
|
|
if (writes_point_size) {
|
|
cfg.size_array = draw->psiz;
|
|
} else {
|
|
cfg.fixed_sized = draw->line_width;
|
|
}
|
|
}
|
|
}
|
|
|
|
static uint32_t
|
|
primitive_vertex_count(enum mali_draw_mode in)
|
|
{
|
|
switch (in) {
|
|
case MALI_DRAW_MODE_POINTS:
|
|
return 1;
|
|
case MALI_DRAW_MODE_LINES:
|
|
case MALI_DRAW_MODE_LINE_STRIP:
|
|
return 2;
|
|
case MALI_DRAW_MODE_TRIANGLES:
|
|
case MALI_DRAW_MODE_TRIANGLE_STRIP:
|
|
case MALI_DRAW_MODE_TRIANGLE_FAN:
|
|
return 3;
|
|
default:
|
|
UNREACHABLE("Invalid draw mode");
|
|
}
|
|
}
|
|
|
|
static void
|
|
panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf,
|
|
const struct panvk_draw_data *draw,
|
|
struct mali_draw_packed *dcd)
|
|
{
|
|
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
|
|
const struct vk_rasterization_state *rs =
|
|
&cmdbuf->vk.dynamic_graphics_state.rs;
|
|
const struct vk_input_assembly_state *ia =
|
|
&cmdbuf->vk.dynamic_graphics_state.ia;
|
|
|
|
pan_pack(dcd, DRAW, cfg) {
|
|
cfg.front_face_ccw = rs->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE;
|
|
cfg.cull_front_face = (rs->cull_mode & VK_CULL_MODE_FRONT_BIT) != 0;
|
|
cfg.cull_back_face = (rs->cull_mode & VK_CULL_MODE_BACK_BIT) != 0;
|
|
cfg.position = draw->position;
|
|
cfg.state = draw->fs.rsd;
|
|
cfg.attributes = fs_desc_state->img_attrib_table;
|
|
cfg.attribute_buffers =
|
|
fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
|
|
cfg.viewport = draw->viewport;
|
|
cfg.varyings = draw->fs.varyings;
|
|
cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
|
|
cfg.thread_storage = draw->tls;
|
|
|
|
/* For all primitives but lines DRAW.flat_shading_vertex must
|
|
* be set to 0 and the provoking vertex is selected with the
|
|
* PRIMITIVE.first_provoking_vertex field.
|
|
*/
|
|
if (ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
|
|
ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
|
|
cfg.flat_shading_vertex = true;
|
|
|
|
/* In case of indirect draw, the descriptor will be patched at runtime */
|
|
if (!is_indirect_draw(draw)) {
|
|
cfg.offset_start = draw->info.vertex.raw_offset;
|
|
cfg.instance_size =
|
|
draw->info.instance.count > 1 ? draw->padded_vertex_count : 1;
|
|
uint32_t primitives_per_instance =
|
|
DIV_ROUND_UP(draw->padded_vertex_count,
|
|
primitive_vertex_count(
|
|
translate_prim_topology(ia->primitive_topology)));
|
|
/* instance_primitive_size has the same restrictions as
|
|
* padded_vertex_count, so we can use pan_padded_vertex_count here. */
|
|
cfg.instance_primitive_size =
|
|
pan_padded_vertex_count(primitives_per_instance);
|
|
}
|
|
|
|
cfg.uniform_buffers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
|
|
cfg.push_uniforms = cmdbuf->state.gfx.fs.push_uniforms;
|
|
cfg.textures = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
|
|
cfg.samplers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
|
|
|
|
cfg.occlusion_query = cmdbuf->state.gfx.occlusion_query.mode;
|
|
cfg.occlusion = cmdbuf->state.gfx.occlusion_query.ptr;
|
|
}
|
|
}
|
|
|
|
static void
|
|
set_provoking_vertex_mode(struct panvk_cmd_buffer *cmdbuf,
|
|
enum u_tristate first_provoking_vertex)
|
|
{
|
|
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
|
|
|
if (first_provoking_vertex != U_TRISTATE_UNSET) {
|
|
/* If this is not the first draw, first_provoking_vertex should match
|
|
* the one from the previous draws. Unfortunately, we can't check it
|
|
* when the render pass is inherited. */
|
|
assert(state->render.first_provoking_vertex == U_TRISTATE_UNSET ||
|
|
state->render.first_provoking_vertex == first_provoking_vertex);
|
|
state->render.first_provoking_vertex = first_provoking_vertex;
|
|
}
|
|
|
|
/* Once we emit the first FBDs/TDs, we need to commit to a state. If we
|
|
* choose the wrong one, we will fail the assert when the next application
|
|
* draw happens (with a different state). Use PROVOKING_VERTEX_MODE_FIRST
|
|
* because it's the vulkan default, and so likely to be right more often.
|
|
*
|
|
* TODO: handle this case better */
|
|
if (state->render.first_provoking_vertex == U_TRISTATE_UNSET)
|
|
state->render.first_provoking_vertex = U_TRISTATE_YES;
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
struct panvk_batch *batch = cmdbuf->cur_batch;
|
|
const struct panvk_shader_variant *fs =
|
|
panvk_shader_only_variant(cmdbuf->state.gfx.fs.shader);
|
|
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
|
|
struct pan_ptr ptr;
|
|
VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
|
|
cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
if (ptr.cpu)
|
|
util_dynarray_append(&batch->jobs, ptr.cpu);
|
|
|
|
draw->jobs.frag_copy_desc = ptr;
|
|
|
|
ptr = panvk_cmd_alloc_desc(cmdbuf, TILER_JOB);
|
|
util_dynarray_append(&batch->jobs, ptr.cpu);
|
|
draw->jobs.tiler = ptr;
|
|
|
|
memcpy(pan_section_ptr(ptr.cpu, TILER_JOB, INVOCATION), &draw->invocation,
|
|
pan_size(INVOCATION));
|
|
|
|
panvk_emit_tiler_primitive(cmdbuf, draw,
|
|
pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE));
|
|
|
|
panvk_emit_tiler_primitive_size(
|
|
cmdbuf, draw, pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE_SIZE));
|
|
|
|
panvk_emit_tiler_dcd(cmdbuf, draw,
|
|
pan_section_ptr(ptr.cpu, TILER_JOB, DRAW));
|
|
|
|
pan_section_pack(ptr.cpu, TILER_JOB, TILER, cfg) {
|
|
cfg.address = PAN_ARCH >= 9 ? draw->tiler_ctx->valhall.desc
|
|
: draw->tiler_ctx->bifrost.desc;
|
|
}
|
|
|
|
pan_section_pack(ptr.cpu, TILER_JOB, PADDING, padding)
|
|
;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
struct panvk_batch *batch = cmdbuf->cur_batch;
|
|
struct pan_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, INDEXED_VERTEX_JOB);
|
|
if (!ptr.gpu)
|
|
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
|
|
|
util_dynarray_append(&batch->jobs, ptr.cpu);
|
|
draw->jobs.idvs = ptr;
|
|
|
|
memcpy(pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, INVOCATION),
|
|
&draw->invocation, pan_size(INVOCATION));
|
|
|
|
panvk_emit_tiler_primitive(
|
|
cmdbuf, draw, pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, PRIMITIVE));
|
|
|
|
panvk_emit_tiler_primitive_size(
|
|
cmdbuf, draw,
|
|
pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE));
|
|
|
|
pan_section_pack(ptr.cpu, INDEXED_VERTEX_JOB, TILER, cfg) {
|
|
cfg.address = PAN_ARCH >= 9 ? draw->tiler_ctx->valhall.desc
|
|
: draw->tiler_ctx->bifrost.desc;
|
|
}
|
|
|
|
pan_section_pack(ptr.cpu, INDEXED_VERTEX_JOB, PADDING, _) {
|
|
}
|
|
|
|
panvk_emit_tiler_dcd(
|
|
cmdbuf, draw,
|
|
pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, FRAGMENT_DRAW));
|
|
|
|
panvk_emit_vertex_dcd(
|
|
cmdbuf, draw, pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
struct panvk_batch *batch = cmdbuf->cur_batch;
|
|
const struct panvk_shader_variant *vs =
|
|
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
const struct panvk_shader_desc_state *vs_desc_state =
|
|
&cmdbuf->state.gfx.vs.desc;
|
|
const struct vk_vertex_input_state *vi =
|
|
cmdbuf->vk.dynamic_graphics_state.vi;
|
|
unsigned num_vbs = util_last_bit(vi->bindings_valid);
|
|
struct pan_ptr ptr;
|
|
VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
|
|
cmdbuf, vs, &cmdbuf->state.gfx.desc_state, vs_desc_state,
|
|
num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2, &ptr);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
if (ptr.cpu) {
|
|
util_dynarray_append(&batch->jobs, ptr.cpu);
|
|
}
|
|
|
|
draw->jobs.vertex_copy_desc = ptr;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
const struct panvk_shader_variant *fs =
|
|
panvk_shader_only_variant(cmdbuf->state.gfx.fs.shader);
|
|
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
|
|
struct panvk_batch *batch = cmdbuf->cur_batch;
|
|
struct pan_ptr ptr;
|
|
VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
|
|
cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
if (ptr.cpu) {
|
|
util_dynarray_append(&batch->jobs, ptr.cpu);
|
|
}
|
|
|
|
draw->jobs.frag_copy_desc = ptr;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void
|
|
panvk_per_arch(cmd_preload_fb_after_batch_split)(struct panvk_cmd_buffer *cmdbuf)
|
|
{
|
|
for (unsigned i = 0; i < cmdbuf->state.gfx.render.fb.info.rt_count; i++) {
|
|
if (cmdbuf->state.gfx.render.fb.info.rts[i].view) {
|
|
cmdbuf->state.gfx.render.fb.info.rts[i].clear = false;
|
|
cmdbuf->state.gfx.render.fb.info.rts[i].preload = true;
|
|
}
|
|
}
|
|
|
|
if (cmdbuf->state.gfx.render.fb.info.zs.view.zs) {
|
|
cmdbuf->state.gfx.render.fb.info.zs.clear.z = false;
|
|
cmdbuf->state.gfx.render.fb.info.zs.preload.z = true;
|
|
}
|
|
|
|
if (cmdbuf->state.gfx.render.fb.info.zs.view.s ||
|
|
(cmdbuf->state.gfx.render.fb.info.zs.view.zs &&
|
|
util_format_is_depth_and_stencil(
|
|
cmdbuf->state.gfx.render.fb.info.zs.view.zs->format))) {
|
|
cmdbuf->state.gfx.render.fb.info.zs.clear.s = false;
|
|
cmdbuf->state.gfx.render.fb.info.zs.preload.s = true;
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer *cmd)
|
|
{
|
|
struct panvk_cmd_graphics_state *gfx = &cmd->state.gfx;
|
|
|
|
if (!gfx_state_dirty(cmd, VS) && !gfx_state_dirty(cmd, FS))
|
|
return VK_SUCCESS;
|
|
|
|
const struct panvk_shader_variant *vs =
|
|
panvk_shader_hw_variant(cmd->state.gfx.vs.shader);
|
|
const struct panvk_shader_variant *fs =
|
|
panvk_shader_only_variant(get_fs(cmd));
|
|
|
|
VkResult result =
|
|
panvk_per_arch(link_shaders)(&cmd->desc_pool, vs, fs, &gfx->link);
|
|
if (result != VK_SUCCESS) {
|
|
vk_command_buffer_set_error(&cmd->vk, result);
|
|
return result;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw)
|
|
{
|
|
struct panvk_batch *batch = cmdbuf->cur_batch;
|
|
const struct panvk_shader_variant *vs =
|
|
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
|
|
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
|
|
struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
|
|
const struct vk_rasterization_state *rs =
|
|
&cmdbuf->vk.dynamic_graphics_state.rs;
|
|
VkResult result;
|
|
const struct panvk_shader_variant *fs =
|
|
panvk_shader_only_variant(get_fs(cmdbuf));
|
|
|
|
/* There are only 16 bits in the descriptor for the job ID. Each job has a
|
|
* pilot shader dealing with descriptor copies, and we need one
|
|
* <vertex,tiler> pair per draw.
|
|
*/
|
|
if (batch->vtc_jc.job_index + (4 * cmdbuf->state.gfx.render.layer_count) >=
|
|
UINT16_MAX) {
|
|
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
|
panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
|
|
batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
|
|
}
|
|
|
|
if (fs_user_dirty(cmdbuf)) {
|
|
result = panvk_cmd_prepare_draw_link_shaders(cmdbuf);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
if (cmdbuf->state.gfx.vk_meta) {
|
|
/* vk_meta doesn't care about the provoking vertex mode, we should use
|
|
* the same mode that the application uses. */
|
|
set_provoking_vertex_mode(cmdbuf, U_TRISTATE_UNSET);
|
|
} else {
|
|
enum u_tristate first_provoking_vertex = u_tristate_make(
|
|
cmdbuf->vk.dynamic_graphics_state.rs.provoking_vertex ==
|
|
VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
|
|
set_provoking_vertex_mode(cmdbuf, first_provoking_vertex);
|
|
}
|
|
|
|
if (!rs->rasterizer_discard_enable) {
|
|
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
|
uint32_t *nr_samples = &cmdbuf->state.gfx.render.fb.nr_samples;
|
|
uint32_t rasterization_samples =
|
|
cmdbuf->vk.dynamic_graphics_state.ms.rasterization_samples;
|
|
|
|
/* If there's no attachment, and the FB descriptor hasn't been allocated
|
|
* yet, we patch nr_samples to match rasterization_samples, otherwise, we
|
|
* make sure those two numbers match. */
|
|
if (!batch->fb.desc.gpu && !cmdbuf->state.gfx.render.bound_attachments) {
|
|
assert(rasterization_samples > 0);
|
|
*nr_samples = rasterization_samples;
|
|
} else {
|
|
assert(rasterization_samples == *nr_samples);
|
|
}
|
|
|
|
/* In case we already emitted tiler/framebuffer descriptors, we ensure
|
|
* that the sample count didn't change
|
|
* XXX: This currently can happen in case we resume a render pass with no
|
|
* attachements and without any draw as the FBD is emitted when suspending.
|
|
*/
|
|
assert(fbinfo->nr_samples == 0 ||
|
|
fbinfo->nr_samples == cmdbuf->state.gfx.render.fb.nr_samples);
|
|
|
|
result = panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
panvk_per_arch(cmd_select_tile_size)(cmdbuf);
|
|
|
|
result = panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
uint32_t used_set_mask =
|
|
vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
|
|
|
|
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS) ||
|
|
gfx_state_dirty(cmdbuf, FS)) {
|
|
result = panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state,
|
|
used_set_mask);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
|
|
result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
|
|
cmdbuf, desc_state, vs, vs_desc_state);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
/* No need to setup the FS desc tables if the FS is not executed. */
|
|
if (fs &&
|
|
(gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS))) {
|
|
result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
|
|
cmdbuf, desc_state, fs, fs_desc_state);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = panvk_draw_prepare_fs_copy_desc_job(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
panvk_draw_prepare_attributes(cmdbuf, draw);
|
|
|
|
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS))
|
|
panvk_draw_prepare_vs_copy_desc_job(cmdbuf, draw);
|
|
|
|
draw->tls = batch->tls.gpu;
|
|
draw->fb = batch->fb.desc.gpu;
|
|
|
|
result = panvk_draw_prepare_fs_rsd(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
batch->tlsinfo.tls.size = MAX3(vs->info.tls_size, fs ? fs->info.tls_size : 0,
|
|
batch->tlsinfo.tls.size);
|
|
|
|
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
|
|
VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)(
|
|
cmdbuf, desc_state, vs, vs_desc_state);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) {
|
|
VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)(
|
|
cmdbuf, desc_state, fs, fs_desc_state);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
panvk_per_arch(cmd_prepare_draw_sysvals)(cmdbuf, &draw->info);
|
|
|
|
/* Viewport emission requires up-to-date {scale,offset}.z for min/max Z,
|
|
* so we need to call it after calling cmd_prepare_draw_sysvals(), but
|
|
* viewports are the same for all layers, so we only emit when layer_id=0.
|
|
*/
|
|
result = panvk_draw_prepare_viewport(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw)
|
|
{
|
|
const struct panvk_shader_variant *vs = panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
VkResult result;
|
|
|
|
/* If there's no vertex shader, we can skip the draw. */
|
|
if (!panvk_priv_mem_check_alloc(vs->rsd))
|
|
return;
|
|
|
|
/* Needs to be done before get_fs() is called because it depends on
|
|
* fs.required being initialized. */
|
|
cmdbuf->state.gfx.fs.required =
|
|
fs_required(&cmdbuf->state.gfx, &cmdbuf->vk.dynamic_graphics_state);
|
|
|
|
result = prepare_draw(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
pan_pack_work_groups_compute(&draw->invocation, 1, draw->vertex_range,
|
|
draw->info.instance.count, 1, 1, 1, true,
|
|
false);
|
|
|
|
struct panvk_batch *batch = cmdbuf->cur_batch;
|
|
|
|
unsigned copy_desc_job_id =
|
|
draw->jobs.vertex_copy_desc.gpu
|
|
? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
|
|
0, 0, &draw->jobs.vertex_copy_desc, false)
|
|
: 0;
|
|
|
|
if (draw->jobs.frag_copy_desc.gpu) {
|
|
/* We don't need to add frag_copy_desc as a dependency because the
|
|
* tiler job doesn't execute the fragment shader, the fragment job
|
|
* will, and the tiler/fragment synchronization happens at the batch
|
|
* level. */
|
|
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
|
|
&draw->jobs.frag_copy_desc, false);
|
|
}
|
|
|
|
uint32_t view_mask = cmdbuf->state.gfx.render.view_mask;
|
|
assert(view_mask == 0 || util_bitcount(view_mask) <= batch->fb.layer_count);
|
|
uint32_t enabled_layer_count = view_mask
|
|
? util_bitcount(view_mask)
|
|
: cmdbuf->state.gfx.render.layer_count;
|
|
const struct panvk_shader_variant *fs = panvk_shader_only_variant(get_fs(cmdbuf));
|
|
|
|
for (uint32_t i = 0; i < enabled_layer_count; i++) {
|
|
result = panvk_draw_prepare_varyings(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
draw->info.layer_id = (view_mask != 0) ? u_bit_scan(&view_mask) : i;
|
|
if (draw->info.layer_id > 0) {
|
|
cmdbuf->state.gfx.sysvals.layer_id = draw->info.layer_id;
|
|
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
|
}
|
|
|
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
|
cmdbuf, vs, 1);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
if (fs) {
|
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
|
cmdbuf, fs, 1);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
|
|
result = panvk_draw_prepare_tiler_context(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
if (vs->info.vs.idvs) {
|
|
result = panvk_draw_prepare_idvs_job(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_INDEXED_VERTEX, false,
|
|
false, 0, copy_desc_job_id, &draw->jobs.idvs, false);
|
|
} else {
|
|
result = panvk_draw_prepare_vertex_job(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
unsigned vjob_id =
|
|
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_VERTEX, false, false,
|
|
0, copy_desc_job_id, &draw->jobs.vertex, false);
|
|
|
|
bool needs_tiling =
|
|
!cmdbuf->vk.dynamic_graphics_state.rs.rasterizer_discard_enable ||
|
|
cmdbuf->state.gfx.occlusion_query.mode !=
|
|
MALI_OCCLUSION_MODE_DISABLED;
|
|
|
|
if (needs_tiling) {
|
|
panvk_draw_prepare_tiler_job(cmdbuf, draw);
|
|
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_TILER, false, false,
|
|
vjob_id, 0, &draw->jobs.tiler, false);
|
|
}
|
|
}
|
|
}
|
|
|
|
clear_dirty_after_draw(cmdbuf);
|
|
cmdbuf->state.gfx.vs.previous_draw_was_indirect = false;
|
|
}
|
|
|
|
static void
|
|
panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|
struct panvk_draw_data *draw)
|
|
{
|
|
const struct panvk_shader_variant *vs = panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
VkResult result;
|
|
|
|
/* If there's no vertex shader, we can skip the draw. */
|
|
if (!panvk_priv_mem_check_alloc(vs->rsd))
|
|
return;
|
|
|
|
/* Needs to be done before get_fs() is called because it depends on
|
|
* fs.required being initialized. */
|
|
cmdbuf->state.gfx.fs.required =
|
|
fs_required(&cmdbuf->state.gfx, &cmdbuf->vk.dynamic_graphics_state);
|
|
|
|
result = prepare_draw(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
struct panvk_batch *batch = cmdbuf->cur_batch;
|
|
const struct vk_input_assembly_state *ia =
|
|
&cmdbuf->vk.dynamic_graphics_state.ia;
|
|
const struct vk_vertex_input_state *vi =
|
|
cmdbuf->vk.dynamic_graphics_state.vi;
|
|
|
|
unsigned copy_desc_job_id =
|
|
draw->jobs.vertex_copy_desc.gpu
|
|
? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
|
|
0, 0, &draw->jobs.vertex_copy_desc, false)
|
|
: 0;
|
|
|
|
if (draw->jobs.frag_copy_desc.gpu) {
|
|
/* We don't need to add frag_copy_desc as a dependency because the
|
|
* tiler job doesn't execute the fragment shader, the fragment job
|
|
* will, and the tiler/fragment synchronization happens at the batch
|
|
* level. */
|
|
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
|
|
&draw->jobs.frag_copy_desc, false);
|
|
}
|
|
|
|
uint32_t view_mask = cmdbuf->state.gfx.render.view_mask;
|
|
assert(view_mask == 0 || util_bitcount(view_mask) <= batch->fb.layer_count);
|
|
uint32_t enabled_layer_count = view_mask
|
|
? util_bitcount(view_mask)
|
|
: cmdbuf->state.gfx.render.layer_count;
|
|
const struct panvk_shader_variant *fs = panvk_shader_only_variant(get_fs(cmdbuf));
|
|
|
|
struct panvk_precomp_ctx precomp_ctx = panvk_per_arch(precomp_cs)(cmdbuf);
|
|
uint64_t index_min_max_res_ptr = 0;
|
|
uint32_t job_before_indirect_helper = copy_desc_job_id;
|
|
if (draw->info.index.size) {
|
|
index_min_max_res_ptr =
|
|
panvk_cmd_alloc_dev_mem(
|
|
cmdbuf, desc,
|
|
sizeof(struct libpan_draw_helper_index_min_max_result), 8)
|
|
.gpu;
|
|
const struct panlib_draw_index_minmax_search_helper_args args = {
|
|
.index_buffer_ptr = cmdbuf->state.gfx.ib.dev_addr,
|
|
.cmd = draw->info.indirect.buffer_dev_addr,
|
|
.min_ptr =
|
|
index_min_max_res_ptr +
|
|
offsetof(struct libpan_draw_helper_index_min_max_result, min),
|
|
.max_ptr =
|
|
index_min_max_res_ptr +
|
|
offsetof(struct libpan_draw_helper_index_min_max_result, max),
|
|
};
|
|
|
|
struct libpan_draw_helper_index_min_max_result val = {
|
|
.min = ((uint64_t)1 << (draw->info.index.size * 8)) - 1,
|
|
.max = 0,
|
|
};
|
|
uint64_t *raw_val = (uint64_t *)&val;
|
|
|
|
struct pan_ptr write_job =
|
|
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, WRITE_VALUE_JOB);
|
|
|
|
pan_section_pack(write_job.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) {
|
|
payload.type = MALI_WRITE_VALUE_TYPE_IMMEDIATE_64;
|
|
payload.address = index_min_max_res_ptr;
|
|
payload.immediate_value = *raw_val;
|
|
};
|
|
|
|
unsigned write_job_id =
|
|
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_WRITE_VALUE, false, false,
|
|
0, copy_desc_job_id, &write_job, false);
|
|
util_dynarray_append(&batch->jobs, write_job.cpu);
|
|
|
|
uint32_t index_count = cmdbuf->state.gfx.ib.size / draw->info.index.size;
|
|
uint32_t wg_count = DIV_ROUND_UP(index_count, 65536);
|
|
assert(wg_count <= 65536);
|
|
|
|
panlib_draw_index_minmax_search_helper_struct(
|
|
&precomp_ctx, panlib_1d_with_jm_deps(wg_count, 0, write_job_id),
|
|
PANLIB_BARRIER_NONE, args, util_logbase2(draw->info.index.size),
|
|
ia->primitive_restart_enable);
|
|
job_before_indirect_helper = batch->vtc_jc.job_index;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < enabled_layer_count; i++) {
|
|
/* Force a new push uniform block to be allocated */
|
|
gfx_state_set_dirty(cmdbuf, VS_PUSH_UNIFORMS);
|
|
|
|
result = panvk_draw_prepare_varyings(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
draw->info.layer_id = (view_mask != 0) ? u_bit_scan(&view_mask) : i;
|
|
if (draw->info.layer_id > 0) {
|
|
cmdbuf->state.gfx.sysvals.layer_id = draw->info.layer_id;
|
|
gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
|
|
}
|
|
|
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
|
cmdbuf, vs, 1);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
if (fs) {
|
|
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
|
cmdbuf, fs, 1);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
|
|
result = panvk_draw_prepare_tiler_context(cmdbuf, draw);
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
if (vs->info.vs.idvs) {
|
|
result = panvk_draw_prepare_idvs_job(cmdbuf, draw);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
} else {
|
|
result = panvk_draw_prepare_vertex_job(cmdbuf, draw);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
|
|
bool needs_tiling =
|
|
!cmdbuf->vk.dynamic_graphics_state.rs.rasterizer_discard_enable ||
|
|
cmdbuf->state.gfx.occlusion_query.mode !=
|
|
MALI_OCCLUSION_MODE_DISABLED;
|
|
|
|
if (needs_tiling) {
|
|
result = panvk_draw_prepare_tiler_job(cmdbuf, draw);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return;
|
|
}
|
|
}
|
|
|
|
assert(draw->info.indirect.buffer_dev_addr != 0 || draw->info.index.size);
|
|
|
|
uint32_t attrib_bufs_valid = vi->bindings_valid;
|
|
uint32_t attribs_valid = vi->attributes_valid;
|
|
uint64_t first_vertex_sysval = 0x8ull << 60;
|
|
uint64_t first_instance_sysval = 0x8ull << 60;
|
|
uint64_t raw_vertex_offset_sysval = 0x8ull << 60;
|
|
if (shader_uses_sysval(vs, graphics, vs.first_vertex)) {
|
|
first_vertex_sysval = cmdbuf->state.gfx.vs.push_uniforms +
|
|
shader_remapped_sysval_offset(
|
|
vs, sysval_offset(graphics, vs.first_vertex));
|
|
}
|
|
|
|
if (shader_uses_sysval(vs, graphics, vs.base_instance)) {
|
|
first_instance_sysval =
|
|
cmdbuf->state.gfx.vs.push_uniforms +
|
|
shader_remapped_sysval_offset(
|
|
vs, sysval_offset(graphics, vs.base_instance));
|
|
}
|
|
|
|
if (shader_uses_sysval(vs, graphics, vs.raw_vertex_offset)) {
|
|
raw_vertex_offset_sysval =
|
|
cmdbuf->state.gfx.vs.push_uniforms +
|
|
shader_remapped_sysval_offset(
|
|
vs, sysval_offset(graphics, vs.raw_vertex_offset));
|
|
}
|
|
|
|
enum panlib_barrier indirect_barrier =
|
|
PANLIB_BARRIER_JM_SUPPRESS_PREFETCH;
|
|
struct panlib_precomp_grid indirect_grid =
|
|
panlib_1d_with_jm_deps(1, 0, job_before_indirect_helper);
|
|
|
|
if (draw->info.indirect.buffer_dev_addr != 0 && draw->info.index.size) {
|
|
const struct panlib_draw_indexed_indirect_helper_args args = {
|
|
.cmd = draw->info.indirect.buffer_dev_addr,
|
|
.index_buffer_ptr = cmdbuf->state.gfx.ib.dev_addr,
|
|
.index_min_max_res = index_min_max_res_ptr,
|
|
.index_size = draw->info.index.size,
|
|
.primitive_vertex_count = primitive_vertex_count(
|
|
translate_prim_topology(ia->primitive_topology)),
|
|
.varying_bufs_descs = draw->varying_bufs,
|
|
.varying_bufs_info = draw->indirect_info.varying_bufs,
|
|
.attrib_bufs_descs = draw->vs.attribute_bufs,
|
|
.attrib_bufs_infos = draw->indirect_info.attrib_bufs,
|
|
.attrib_bufs_valid = attrib_bufs_valid,
|
|
.attribs_valid = attribs_valid,
|
|
.attribs_descs = draw->vs.attributes,
|
|
.attribs_infos = draw->indirect_info.attribs,
|
|
.first_vertex_sysval = first_vertex_sysval,
|
|
.first_instance_sysval = first_instance_sysval,
|
|
.raw_vertex_offset_sysval = raw_vertex_offset_sysval,
|
|
.idvs_job = vs->info.vs.idvs ? draw->jobs.idvs.gpu : 0,
|
|
.vertex_job = draw->jobs.vertex.gpu,
|
|
.tiler_job = draw->jobs.tiler.gpu,
|
|
};
|
|
panlib_draw_indexed_indirect_helper_struct(&precomp_ctx, indirect_grid,
|
|
indirect_barrier, args);
|
|
} else if (draw->info.indirect.buffer_dev_addr != 0) {
|
|
const struct panlib_draw_indirect_helper_args args = {
|
|
.cmd = draw->info.indirect.buffer_dev_addr,
|
|
.primitive_vertex_count = primitive_vertex_count(
|
|
translate_prim_topology(ia->primitive_topology)),
|
|
.varying_bufs_descs = draw->varying_bufs,
|
|
.varying_bufs_info = draw->indirect_info.varying_bufs,
|
|
.attrib_bufs_descs = draw->vs.attribute_bufs,
|
|
.attrib_bufs_infos = draw->indirect_info.attrib_bufs,
|
|
.attrib_bufs_valid = attrib_bufs_valid,
|
|
.attribs_valid = attribs_valid,
|
|
.attribs_descs = draw->vs.attributes,
|
|
.attribs_infos = draw->indirect_info.attribs,
|
|
.first_vertex_sysval = first_vertex_sysval,
|
|
.first_instance_sysval = first_instance_sysval,
|
|
.raw_vertex_offset_sysval = raw_vertex_offset_sysval,
|
|
.idvs_job = vs->info.vs.idvs ? draw->jobs.idvs.gpu : 0,
|
|
.vertex_job = draw->jobs.vertex.gpu,
|
|
.tiler_job = draw->jobs.tiler.gpu,
|
|
};
|
|
panlib_draw_indirect_helper_struct(&precomp_ctx, indirect_grid,
|
|
indirect_barrier, args);
|
|
} else {
|
|
assert(false && "Invalid indirect draw");
|
|
}
|
|
|
|
/* Grab the index of the indirect helper job */
|
|
uint32_t prev_job = batch->vtc_jc.job_index;
|
|
|
|
if (vs->info.vs.idvs) {
|
|
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_INDEXED_VERTEX, false,
|
|
false, 0, prev_job, &draw->jobs.idvs, false);
|
|
} else {
|
|
unsigned vjob_id =
|
|
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_VERTEX, false, true, 0,
|
|
prev_job, &draw->jobs.vertex, false);
|
|
|
|
if (draw->jobs.tiler.gpu != 0) {
|
|
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_TILER, false, false,
|
|
vjob_id, 0, &draw->jobs.tiler, false);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We split every ~1024 indirect draw.
|
|
* This is here for multiple reasons:
|
|
* - The indirect varying buffer offset need to be reset at some point to
|
|
* avoid going outside of bounds.
|
|
* - It is possible to always end up with timeouts for batches with 4k draws
|
|
* (see "dEQP-VK.api.command_buffers.many_indirect_draws_on_secondary") At
|
|
* the same time, because of how TLS works on Mali, we should not split too
|
|
* much as this will cause the TLS budget to go crazy.
|
|
*/
|
|
if (batch->vtc_jc.job_index > (5 * 1024)) {
|
|
bool preload_fb =
|
|
cmdbuf->cur_batch && cmdbuf->cur_batch->vtc_jc.first_tiler;
|
|
|
|
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
|
|
|
if (preload_fb)
|
|
panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
|
|
|
|
batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
|
|
cmdbuf->state.gfx.vs.indirect_varying_bufs_infos = 0;
|
|
}
|
|
|
|
clear_dirty_after_draw(cmdbuf);
|
|
cmdbuf->state.gfx.vs.previous_draw_was_indirect = true;
|
|
}
|
|
|
|
static unsigned
|
|
padded_vertex_count(struct panvk_cmd_buffer *cmdbuf, uint32_t vertex_count,
|
|
uint32_t instance_count)
|
|
{
|
|
if (instance_count == 1)
|
|
return vertex_count;
|
|
|
|
const struct panvk_shader_variant *vs =
|
|
panvk_shader_hw_variant(cmdbuf->state.gfx.vs.shader);
|
|
bool idvs = vs->info.vs.idvs;
|
|
|
|
/* Index-Driven Vertex Shading requires different instances to
|
|
* have different cache lines for position results. Each vertex
|
|
* position is 16 bytes and the Mali cache line is 64 bytes, so
|
|
* the instance count must be aligned to 4 vertices.
|
|
*/
|
|
if (idvs)
|
|
vertex_count = ALIGN_POT(vertex_count, 4);
|
|
|
|
return pan_padded_vertex_count(vertex_count);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount,
|
|
uint32_t instanceCount, uint32_t firstVertex,
|
|
uint32_t firstInstance)
|
|
{
|
|
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
|
|
|
if (instanceCount == 0 || vertexCount == 0)
|
|
return;
|
|
|
|
/* gl_BaseVertexARB is a signed integer, and it should expose the value of
|
|
* firstVertex in a non-indexed draw. */
|
|
assert(firstVertex < INT32_MAX);
|
|
|
|
/* gl_BaseInstance is a signed integer, and it should expose the value of
|
|
* firstInstnace. */
|
|
assert(firstInstance < INT32_MAX);
|
|
|
|
struct panvk_draw_data draw = {
|
|
.info = {
|
|
.vertex.base = firstVertex,
|
|
.vertex.raw_offset = firstVertex,
|
|
.vertex.count = vertexCount,
|
|
.instance.base = firstInstance,
|
|
.instance.count = instanceCount,
|
|
},
|
|
.vertex_range = vertexCount,
|
|
.padded_vertex_count =
|
|
padded_vertex_count(cmdbuf, vertexCount, instanceCount),
|
|
};
|
|
|
|
panvk_cmd_draw(cmdbuf, &draw);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
|
|
uint32_t indexCount, uint32_t instanceCount,
|
|
uint32_t firstIndex, int32_t vertexOffset,
|
|
uint32_t firstInstance)
|
|
{
|
|
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
|
|
|
if (instanceCount == 0 || indexCount == 0)
|
|
return;
|
|
|
|
/* gl_BaseInstance is a signed integer, and it should expose the value of
|
|
* firstInstnace. */
|
|
assert(firstInstance < INT32_MAX);
|
|
|
|
struct pan_ptr indirect_index_alloc = panvk_cmd_alloc_dev_mem(
|
|
cmdbuf, desc, sizeof(struct VkDrawIndexedIndirectCommand), 8);
|
|
|
|
struct VkDrawIndexedIndirectCommand *indirect_index_alloc_ptr =
|
|
indirect_index_alloc.cpu;
|
|
|
|
*indirect_index_alloc_ptr = (struct VkDrawIndexedIndirectCommand){
|
|
.indexCount = indexCount,
|
|
.instanceCount = instanceCount,
|
|
.firstIndex = firstIndex,
|
|
.vertexOffset = vertexOffset,
|
|
.firstInstance = firstInstance,
|
|
};
|
|
|
|
struct panvk_draw_data draw = {
|
|
.info = {
|
|
.index.size = cmdbuf->state.gfx.ib.index_size,
|
|
.indirect.buffer_dev_addr = indirect_index_alloc.gpu,
|
|
.indirect.draw_count = 1,
|
|
.indirect.stride = 0,
|
|
},
|
|
};
|
|
|
|
panvk_cmd_draw_indirect(cmdbuf, &draw);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
panvk_per_arch(CmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer _buffer,
|
|
VkDeviceSize offset, uint32_t drawCount,
|
|
uint32_t stride)
|
|
{
|
|
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
|
VK_FROM_HANDLE(panvk_buffer, buffer, _buffer);
|
|
|
|
if (drawCount == 0)
|
|
return;
|
|
|
|
/* We cannot support arbitrary draw count on JM */
|
|
assert(drawCount == 1);
|
|
|
|
struct panvk_draw_data draw = {
|
|
.info = {
|
|
.indirect.buffer_dev_addr = panvk_buffer_gpu_ptr(buffer, offset),
|
|
.indirect.draw_count = drawCount,
|
|
.indirect.stride = stride,
|
|
},
|
|
};
|
|
|
|
panvk_cmd_draw_indirect(cmdbuf, &draw);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
panvk_per_arch(CmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer,
|
|
VkBuffer _buffer, VkDeviceSize offset,
|
|
uint32_t drawCount, uint32_t stride)
|
|
{
|
|
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
|
VK_FROM_HANDLE(panvk_buffer, buffer, _buffer);
|
|
|
|
if (drawCount == 0)
|
|
return;
|
|
|
|
/* We cannot support arbitrary draw count on JM */
|
|
assert(drawCount == 1);
|
|
|
|
struct panvk_draw_data draw = {
|
|
.info = {
|
|
.index.size = cmdbuf->state.gfx.ib.index_size,
|
|
.indirect.buffer_dev_addr = panvk_buffer_gpu_ptr(buffer, offset),
|
|
.indirect.draw_count = drawCount,
|
|
.indirect.stride = stride,
|
|
},
|
|
};
|
|
|
|
panvk_cmd_draw_indirect(cmdbuf, &draw);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer,
|
|
const VkRenderingInfo *pRenderingInfo)
|
|
{
|
|
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
|
struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
|
|
bool resuming = pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT;
|
|
|
|
/* When resuming from a suspended pass, the state should be unchanged. */
|
|
if (resuming && cmdbuf->cur_batch) {
|
|
state->render.flags = pRenderingInfo->flags;
|
|
} else {
|
|
/* If we're not resuming, cur_batch should be NULL. However, this
|
|
* currently isn't true because of how events are implemented.
|
|
*
|
|
* XXX: Rewrite events to not close and open batch and add an assert here.
|
|
*/
|
|
if (cmdbuf->cur_batch)
|
|
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
|
|
|
panvk_per_arch(cmd_init_render_state)(cmdbuf, pRenderingInfo);
|
|
|
|
if (resuming)
|
|
panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
|
|
}
|
|
|
|
if (!cmdbuf->cur_batch)
|
|
panvk_per_arch(cmd_open_batch)(cmdbuf);
|
|
|
|
if (!resuming)
|
|
panvk_per_arch(cmd_preload_render_area_border)(cmdbuf, pRenderingInfo);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer)
|
|
{
|
|
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
|
|
|
if (!(cmdbuf->state.gfx.render.flags & VK_RENDERING_SUSPENDING_BIT)) {
|
|
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
|
bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
|
|
for (unsigned i = 0; i < fbinfo->rt_count; i++)
|
|
clear |= fbinfo->rts[i].clear;
|
|
|
|
if (clear)
|
|
panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
|
|
|
|
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
|
cmdbuf->cur_batch = NULL;
|
|
panvk_per_arch(cmd_meta_resolve_attachments)(cmdbuf);
|
|
}
|
|
}
|