a4e7d8ab90
panvk-bifrost campaigns (r1..r4 Vulkan compositor + r5.video1 Vulkan
video decode) shipped before this repo existed; the deliverable
patches live in marfrit-packages, but the reasoning chain, phase docs,
and source-state evidence lived only in local working trees on the
development host.
This retrofit imports:
- mesa-panvk-bifrost/ — r1..r4 era phase docs (iter1..iter18)
(libmali stub blobs at iter18/blob/ excluded
— 109MB of RE artifacts replaced with a README
pointer)
- mesa-panvk-bifrost-video/ — sibling campaign phase docs + probe
- evidence/ — frozen .tgz source snapshots at each milestone
(basis for the 0005 patch diff generation)
Future iterations should branch off here from day one, so each iter is
a commit rather than a snapshot. See [[feedback-session-local-process-pins]]
for the process drift this retrofit closes.
Total: 1.9 MB across 124 files.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
487 lines
18 KiB
C
487 lines
18 KiB
C
/*
|
|
* Copyright © 2026 mfritsche / claude-noether
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
* iter17: panvk-specific replacement for pan_nir_lower_xfb that handles
|
|
* primitive decomposition for transform_feedback on non-LIST topologies
|
|
* (TRIANGLE_STRIP/FAN, LINE_STRIP, *_WITH_ADJACENCY).
|
|
*
|
|
* Approach: emit a topology dispatch at the start of each store_output
|
|
* lowering. The shader reads vs.xfb_topology sysval at runtime and branches
|
|
* into per-topology emission logic. For each affected topology, the lowered
|
|
* code emits guarded conditional stores — one per primitive this vertex
|
|
* contributes to, computing the output buffer position via primitive index
|
|
* and slot within the decomposed primitive.
|
|
*
|
|
* For LIST topologies (POINT/LINE/TRIANGLE LIST), takes a fast path that
|
|
* matches iter13's single-store behavior.
|
|
*
|
|
* For TRIANGLE_FAN, the central vertex (v=0) contributes to ALL primitives
|
|
* as slot 2 — handled via a NIR loop bounded by num_vertices.
|
|
*
|
|
* See ~/src/panvk-bifrost/iter17/phase{0,1,2}_*.md for full design context.
|
|
*/
|
|
|
|
#include "panvk_macros.h"
|
|
|
|
#if PAN_ARCH < 9
|
|
|
|
#include "panvk_shader.h"
|
|
|
|
#include "compiler/nir/nir_builder.h"
|
|
#include "pan_nir.h"
|
|
|
|
#include <vulkan/vulkan_core.h>
|
|
|
|
/* ----- Address arithmetic ----- */
|
|
|
|
static nir_def *
|
|
xfb_store_addr(nir_builder *b, nir_def *buf, nir_def *out_idx,
|
|
uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
nir_def *byte_off = nir_iadd_imm(b,
|
|
nir_imul_imm(b, out_idx, stride), offset_bytes);
|
|
return nir_iadd(b, buf, nir_u2u64(b, byte_off));
|
|
}
|
|
|
|
static void
|
|
emit_list_store(nir_builder *b, nir_def *buf, nir_def *output_count,
|
|
nir_def *instance_id, nir_def *raw_vid, nir_def *value,
|
|
uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
nir_def *out_idx = nir_iadd(b,
|
|
nir_imul(b, instance_id, output_count), raw_vid);
|
|
nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
|
|
nir_store_global(b, value, addr);
|
|
}
|
|
|
|
static void
|
|
emit_prim_store(nir_builder *b, nir_def *buf, nir_def *output_count,
|
|
nir_def *instance_id, nir_def *eligible,
|
|
nir_def *prim_idx, nir_def *slot,
|
|
uint32_t verts_per_prim,
|
|
nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
nir_push_if(b, eligible);
|
|
{
|
|
nir_def *out_idx = nir_iadd(b,
|
|
nir_imul(b, instance_id, output_count),
|
|
nir_iadd(b, nir_imul_imm(b, prim_idx, verts_per_prim), slot));
|
|
nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
|
|
nir_store_global(b, value, addr);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
|
|
/* ----- Per-topology emission ----- */
|
|
|
|
/* TRIANGLE_STRIP: vertex v contributes to prims v, v-1, v-2 (per eligibility). */
|
|
static void
|
|
emit_tri_strip(nir_builder *b, nir_def *v, nir_def *N,
|
|
nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
|
nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
nir_def *Nm2 = nir_iadd_imm(b, N, -2);
|
|
nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
|
|
|
/* Prim v, slot 0: v < N-2 */
|
|
emit_prim_store(b, buf, output_count, instance_id,
|
|
nir_ult(b, v, Nm2),
|
|
v, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
|
|
|
|
/* Prim v-1, slot = 1 if prim even else 2: 1 <= v < N-1 */
|
|
{
|
|
nir_def *prim = nir_iadd_imm(b, v, -1);
|
|
nir_def *parity = nir_iand_imm(b, prim, 1u);
|
|
nir_def *slot = nir_iadd_imm(b, parity, 1);
|
|
nir_def *eligible = nir_iand(b,
|
|
nir_uge(b, v, nir_imm_int(b, 1)),
|
|
nir_ult(b, v, Nm1));
|
|
emit_prim_store(b, buf, output_count, instance_id, eligible,
|
|
prim, slot, 3, value, stride, offset_bytes);
|
|
}
|
|
|
|
/* Prim v-2, slot = 2 if prim even else 1: 2 <= v < N */
|
|
{
|
|
nir_def *prim = nir_iadd_imm(b, v, -2);
|
|
nir_def *parity = nir_iand_imm(b, prim, 1u);
|
|
nir_def *slot = nir_isub(b, nir_imm_int(b, 2), parity);
|
|
nir_def *eligible = nir_iand(b,
|
|
nir_uge(b, v, nir_imm_int(b, 2)),
|
|
nir_ult(b, v, N));
|
|
emit_prim_store(b, buf, output_count, instance_id, eligible,
|
|
prim, slot, 3, value, stride, offset_bytes);
|
|
}
|
|
}
|
|
|
|
/* LINE_STRIP: vertex v contributes to prim v slot 0 + prim v-1 slot 1. */
|
|
static void
|
|
emit_line_strip(nir_builder *b, nir_def *v, nir_def *N,
|
|
nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
|
nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
|
|
|
/* Prim v, slot 0: v < N-1 */
|
|
emit_prim_store(b, buf, output_count, instance_id,
|
|
nir_ult(b, v, Nm1),
|
|
v, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
|
|
|
|
/* Prim v-1, slot 1: 1 <= v < N */
|
|
{
|
|
nir_def *prim = nir_iadd_imm(b, v, -1);
|
|
nir_def *eligible = nir_iand(b,
|
|
nir_uge(b, v, nir_imm_int(b, 1)),
|
|
nir_ult(b, v, N));
|
|
emit_prim_store(b, buf, output_count, instance_id, eligible,
|
|
prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
|
|
}
|
|
}
|
|
|
|
/* TRIANGLE_FAN: prim p emits {p+1, p+2, 0}.
|
|
* vertex v=0: contributes to ALL prims as slot 2 (loop required)
|
|
* vertex v>=1: contributes to prim v-1 as slot 0 (if 1 <= v <= N-2)
|
|
* vertex v>=2: contributes to prim v-2 as slot 1 (if 2 <= v <= N-1)
|
|
*/
|
|
static void
|
|
emit_tri_fan(nir_builder *b, nir_def *v, nir_def *N,
|
|
nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
|
nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
|
nir_def *Nm2 = nir_iadd_imm(b, N, -2);
|
|
|
|
/* Prim v-1, slot 0: 1 <= v < N-1 */
|
|
{
|
|
nir_def *prim = nir_iadd_imm(b, v, -1);
|
|
nir_def *eligible = nir_iand(b,
|
|
nir_uge(b, v, nir_imm_int(b, 1)),
|
|
nir_ult(b, v, Nm1));
|
|
emit_prim_store(b, buf, output_count, instance_id, eligible,
|
|
prim, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
|
|
}
|
|
|
|
/* Prim v-2, slot 1: 2 <= v < N */
|
|
{
|
|
nir_def *prim = nir_iadd_imm(b, v, -2);
|
|
nir_def *eligible = nir_iand(b,
|
|
nir_uge(b, v, nir_imm_int(b, 2)),
|
|
nir_ult(b, v, N));
|
|
emit_prim_store(b, buf, output_count, instance_id, eligible,
|
|
prim, nir_imm_int(b, 1), 3, value, stride, offset_bytes);
|
|
}
|
|
|
|
/* Central vertex (v == 0): loop over all prims, write to slot 2. */
|
|
nir_push_if(b, nir_ieq_imm(b, v, 0));
|
|
{
|
|
nir_variable *p_var = nir_local_variable_create(b->impl,
|
|
glsl_uint_type(), "fan_p");
|
|
nir_store_var(b, p_var, nir_imm_int(b, 0), 0x1);
|
|
nir_push_loop(b);
|
|
{
|
|
nir_def *p = nir_load_var(b, p_var);
|
|
nir_push_if(b, nir_uge(b, p, Nm2));
|
|
{
|
|
nir_jump(b, nir_jump_break);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
|
|
nir_def *out_idx = nir_iadd(b,
|
|
nir_imul(b, instance_id, output_count),
|
|
nir_iadd_imm(b, nir_imul_imm(b, p, 3), 2));
|
|
nir_def *addr = xfb_store_addr(b, buf, out_idx, stride, offset_bytes);
|
|
nir_store_global(b, value, addr);
|
|
|
|
nir_store_var(b, p_var, nir_iadd_imm(b, p, 1), 0x1);
|
|
}
|
|
nir_pop_loop(b, NULL);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
|
|
/* LINE_LIST_WITH_ADJACENCY: 4-vertex groups [4i..4i+3]; output {4i+1, 4i+2}.
|
|
* v contributes if v%4 == 1: prim v/4 slot 0
|
|
* v contributes if v%4 == 2: prim v/4 slot 1
|
|
*/
|
|
static void
|
|
emit_line_list_adj(nir_builder *b, nir_def *v, nir_def *N,
|
|
nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
|
nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
(void)N; /* eligibility is mod-based, not range-based */
|
|
nir_def *vmod4 = nir_iand_imm(b, v, 3u);
|
|
nir_def *prim = nir_ushr_imm(b, v, 2); /* v / 4 */
|
|
|
|
emit_prim_store(b, buf, output_count, instance_id,
|
|
nir_ieq_imm(b, vmod4, 1),
|
|
prim, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
|
|
|
|
emit_prim_store(b, buf, output_count, instance_id,
|
|
nir_ieq_imm(b, vmod4, 2),
|
|
prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
|
|
}
|
|
|
|
/* LINE_STRIP_WITH_ADJACENCY: prim p emits {p+1, p+2}.
|
|
* v contributes to prim v-1 slot 0 (1 <= v <= N-2)
|
|
* v contributes to prim v-2 slot 1 (2 <= v <= N-1)
|
|
*/
|
|
static void
|
|
emit_line_strip_adj(nir_builder *b, nir_def *v, nir_def *N,
|
|
nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
|
nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
nir_def *Nm1 = nir_iadd_imm(b, N, -1);
|
|
nir_def *Nm2 = nir_iadd_imm(b, N, -2);
|
|
|
|
/* Prim v-1, slot 0: 1 <= v <= N-2 ⇔ v >= 1 AND v <= N-2 ⇔ v >= 1 AND v < N-1 */
|
|
{
|
|
nir_def *prim = nir_iadd_imm(b, v, -1);
|
|
nir_def *eligible = nir_iand(b,
|
|
nir_uge(b, v, nir_imm_int(b, 1)),
|
|
nir_ult(b, v, Nm1));
|
|
(void)Nm2;
|
|
emit_prim_store(b, buf, output_count, instance_id, eligible,
|
|
prim, nir_imm_int(b, 0), 2, value, stride, offset_bytes);
|
|
}
|
|
|
|
/* Prim v-2, slot 1: 2 <= v <= N-1 ⇔ v >= 2 AND v < N */
|
|
{
|
|
nir_def *prim = nir_iadd_imm(b, v, -2);
|
|
nir_def *eligible = nir_iand(b,
|
|
nir_uge(b, v, nir_imm_int(b, 2)),
|
|
nir_ult(b, v, N));
|
|
emit_prim_store(b, buf, output_count, instance_id, eligible,
|
|
prim, nir_imm_int(b, 1), 2, value, stride, offset_bytes);
|
|
}
|
|
}
|
|
|
|
/* TRIANGLE_LIST_WITH_ADJACENCY: 6-vertex groups; output {6i, 6i+2, 6i+4}.
|
|
* v contributes if v%6 == 0: prim v/6 slot 0
|
|
* v contributes if v%6 == 2: prim v/6 slot 1
|
|
* v contributes if v%6 == 4: prim v/6 slot 2
|
|
*/
|
|
static void
|
|
emit_tri_list_adj(nir_builder *b, nir_def *v, nir_def *N,
|
|
nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
|
nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
(void)N;
|
|
nir_def *vmod6 = nir_umod_imm(b, v, 6);
|
|
nir_def *prim = nir_udiv_imm(b, v, 6);
|
|
|
|
for (uint32_t slot = 0; slot < 3; slot++) {
|
|
emit_prim_store(b, buf, output_count, instance_id,
|
|
nir_ieq_imm(b, vmod6, slot * 2),
|
|
prim, nir_imm_int(b, slot), 3, value, stride, offset_bytes);
|
|
}
|
|
}
|
|
|
|
/* TRIANGLE_STRIP_WITH_ADJACENCY: prim i emits:
|
|
* even i: {2i, 2i+2, 2i+4} (slots 0, 1, 2 ← input indices 2i, 2i+2, 2i+4)
|
|
* odd i: {2i, 2i+4, 2i+2} (slots 0, 1, 2 ← input indices 2i, 2i+4, 2i+2)
|
|
*
|
|
* Only EVEN input vertices contribute (since all output indices are 2*something).
|
|
* For even input v:
|
|
* prim v/2 slot 0 (always, if v/2 < N/2-2)
|
|
* prim (v-2)/2 slot 1 if (v-2)/2 even, slot 2 if odd (when v >= 2)
|
|
* prim (v-4)/2 slot 2 if (v-4)/2 even, slot 1 if odd (when v >= 4)
|
|
*/
|
|
static void
|
|
emit_tri_strip_adj(nir_builder *b, nir_def *v, nir_def *N,
|
|
nir_def *buf, nir_def *output_count, nir_def *instance_id,
|
|
nir_def *value, uint16_t stride, uint16_t offset_bytes)
|
|
{
|
|
/* Bail for odd input vertices — they never contribute. */
|
|
nir_def *v_is_even = nir_ieq_imm(b, nir_iand_imm(b, v, 1u), 0);
|
|
nir_push_if(b, v_is_even);
|
|
{
|
|
nir_def *N_half = nir_ushr_imm(b, N, 1);
|
|
nir_def *max_prim = nir_iadd_imm(b, N_half, -2); /* N/2 - 2 */
|
|
nir_def *v_half = nir_ushr_imm(b, v, 1);
|
|
|
|
/* Prim v/2 slot 0: v/2 < N/2 - 2 */
|
|
emit_prim_store(b, buf, output_count, instance_id,
|
|
nir_ult(b, v_half, max_prim),
|
|
v_half, nir_imm_int(b, 0), 3, value, stride, offset_bytes);
|
|
|
|
/* Prim (v-2)/2 = v/2 - 1: v >= 2 AND prim < N/2-2 */
|
|
{
|
|
nir_def *prim = nir_iadd_imm(b, v_half, -1);
|
|
nir_def *parity = nir_iand_imm(b, prim, 1u);
|
|
nir_def *slot = nir_iadd_imm(b, parity, 1); /* even→1, odd→2 */
|
|
nir_def *eligible = nir_iand(b,
|
|
nir_uge(b, v, nir_imm_int(b, 2)),
|
|
nir_ult(b, prim, max_prim));
|
|
emit_prim_store(b, buf, output_count, instance_id, eligible,
|
|
prim, slot, 3, value, stride, offset_bytes);
|
|
}
|
|
|
|
/* Prim (v-4)/2 = v/2 - 2: v >= 4 AND prim < N/2-2 */
|
|
{
|
|
nir_def *prim = nir_iadd_imm(b, v_half, -2);
|
|
nir_def *parity = nir_iand_imm(b, prim, 1u);
|
|
nir_def *slot = nir_isub(b, nir_imm_int(b, 2), parity); /* even→2, odd→1 */
|
|
nir_def *eligible = nir_iand(b,
|
|
nir_uge(b, v, nir_imm_int(b, 4)),
|
|
nir_ult(b, prim, max_prim));
|
|
emit_prim_store(b, buf, output_count, instance_id, eligible,
|
|
prim, slot, 3, value, stride, offset_bytes);
|
|
}
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
|
|
/* ----- Main lowering: per store_output XFB channel ----- */
|
|
|
|
static void
|
|
lower_xfb_output_iter17(nir_builder *b, nir_intrinsic_instr *intr,
|
|
unsigned channel_idx, unsigned num_components,
|
|
unsigned buffer, unsigned offset_words)
|
|
{
|
|
assert(buffer < MAX_XFB_BUFFERS);
|
|
assert(nir_intrinsic_component(intr) == 0);
|
|
|
|
uint16_t stride = b->shader->info.xfb_stride[buffer] * 4;
|
|
assert(stride != 0);
|
|
uint16_t offset_bytes = offset_words * 4;
|
|
|
|
BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
|
|
BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
|
|
|
|
nir_def *topology = load_sysval(b, graphics, 32, vs.xfb_topology);
|
|
nir_def *out_count = load_sysval(b, graphics, 32, vs.xfb_output_count);
|
|
nir_def *N = nir_load_num_vertices(b);
|
|
nir_def *v = nir_load_raw_vertex_id_pan(b);
|
|
nir_def *instance = nir_load_instance_id(b);
|
|
nir_def *buf = nir_load_xfb_address(b, 64, .base = buffer);
|
|
|
|
nir_def *src = intr->src[0].ssa;
|
|
nir_component_mask_t mask = nir_component_mask(num_components);
|
|
nir_def *value = nir_channels(b, src, mask << channel_idx);
|
|
|
|
/* Topology dispatch ladder. LIST first (fast path). */
|
|
nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LIST));
|
|
{
|
|
emit_list_store(b, buf, out_count, instance, v, value,
|
|
stride, offset_bytes);
|
|
}
|
|
nir_push_else(b, NULL);
|
|
{
|
|
/* iter17 Janet Finding 3: gate all non-LIST emission on
|
|
* output_count > 0. For degenerate input counts (N < min required
|
|
* for the topology), output_count is 0 and we must emit NO stores
|
|
* — otherwise N-2 / N-3 / etc. arithmetic underflows in the
|
|
* eligibility predicates and we falsely fire stores. */
|
|
nir_push_if(b, nir_ult(b, nir_imm_int(b, 0), out_count));
|
|
{
|
|
nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_STRIP));
|
|
{
|
|
emit_tri_strip(b, v, N, buf, out_count, instance, value,
|
|
stride, offset_bytes);
|
|
}
|
|
nir_push_else(b, NULL);
|
|
{
|
|
nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_STRIP));
|
|
{
|
|
emit_line_strip(b, v, N, buf, out_count, instance, value,
|
|
stride, offset_bytes);
|
|
}
|
|
nir_push_else(b, NULL);
|
|
{
|
|
nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_FAN));
|
|
{
|
|
emit_tri_fan(b, v, N, buf, out_count, instance, value,
|
|
stride, offset_bytes);
|
|
}
|
|
nir_push_else(b, NULL);
|
|
{
|
|
nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_LIST_ADJ));
|
|
{
|
|
emit_line_list_adj(b, v, N, buf, out_count, instance, value,
|
|
stride, offset_bytes);
|
|
}
|
|
nir_push_else(b, NULL);
|
|
{
|
|
nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_LINE_STRIP_ADJ));
|
|
{
|
|
emit_line_strip_adj(b, v, N, buf, out_count, instance, value,
|
|
stride, offset_bytes);
|
|
}
|
|
nir_push_else(b, NULL);
|
|
{
|
|
nir_push_if(b, nir_ieq_imm(b, topology, PANVK_XFB_TOPO_TRI_LIST_ADJ));
|
|
{
|
|
emit_tri_list_adj(b, v, N, buf, out_count, instance, value,
|
|
stride, offset_bytes);
|
|
}
|
|
nir_push_else(b, NULL);
|
|
{
|
|
/* TRI_STRIP_ADJ — last case */
|
|
emit_tri_strip_adj(b, v, N, buf, out_count, instance, value,
|
|
stride, offset_bytes);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
nir_pop_if(b, NULL); /* Janet Finding 3: close output_count > 0 guard */
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
}
|
|
|
|
/* Mirror of pan_nir_lower_xfb's lower_xfb: load_vertex_id rewrite +
|
|
* dispatch store_output through our topology-aware emission. */
|
|
static bool
|
|
lower_xfb_iter17(nir_builder *b, nir_intrinsic_instr *intr,
|
|
UNUSED void *data)
|
|
{
|
|
if (intr->intrinsic == nir_intrinsic_load_vertex_id) {
|
|
b->cursor = nir_instr_remove(&intr->instr);
|
|
nir_def *repl = nir_iadd(b, nir_load_raw_vertex_id_pan(b),
|
|
nir_load_raw_vertex_offset_pan(b));
|
|
nir_def_rewrite_uses(&intr->def, repl);
|
|
return true;
|
|
}
|
|
|
|
if (intr->intrinsic != nir_intrinsic_store_output)
|
|
return false;
|
|
|
|
bool progress = false;
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
|
|
/* io_xfb has only out[0,1]; the other 2 channels are in io_xfb2.
|
|
* Outer loop selects which annotation; inner picks which channel. */
|
|
for (unsigned i = 0; i < 2; ++i) {
|
|
nir_io_xfb xfb = i ? nir_intrinsic_io_xfb2(intr)
|
|
: nir_intrinsic_io_xfb(intr);
|
|
for (unsigned j = 0; j < 2; ++j) {
|
|
if (!xfb.out[j].num_components)
|
|
continue;
|
|
lower_xfb_output_iter17(b, intr, i * 2 + j, xfb.out[j].num_components,
|
|
xfb.out[j].buffer, xfb.out[j].offset);
|
|
progress = true;
|
|
}
|
|
}
|
|
|
|
if (progress)
|
|
nir_instr_remove(&intr->instr);
|
|
return progress;
|
|
}
|
|
|
|
bool
|
|
panvk_per_arch(nir_lower_xfb)(nir_shader *nir)
|
|
{
|
|
return nir_shader_intrinsics_pass(
|
|
nir, lower_xfb_iter17, nir_metadata_control_flow, NULL);
|
|
}
|
|
|
|
#endif /* PAN_ARCH < 9 */
|