initial seed: retrofit campaign lineage from local working trees
panvk-bifrost campaigns (r1..r4 Vulkan compositor + r5.video1 Vulkan
video decode) shipped before this repo existed; the deliverable
patches live in marfrit-packages, but the reasoning chain, phase docs,
and source-state evidence lived only in local working trees on the
development host.
This retrofit imports:
- mesa-panvk-bifrost/ — r1..r4 era phase docs (iter1..iter18)
(libmali stub blobs at iter18/blob/ excluded
— 109MB of RE artifacts replaced with a README
pointer)
- mesa-panvk-bifrost-video/ — sibling campaign phase docs + probe
- evidence/ — frozen .tgz source snapshots at each milestone
(basis for the 0005 patch diff generation)
Future iterations should branch off here from day one, so each iter is
a commit rather than a snapshot. See [[feedback-session-local-process-pins]]
for the process drift this retrofit closes.
Total: 1.9 MB across 124 files.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,504 @@
|
||||
/*
|
||||
* iter16 winding-order regression probe for PanVk-Bifrost.
|
||||
*
|
||||
* Phase 3 of iter16. The 162 CTS dEQP-VK.transform_feedback.simple.winding_*
|
||||
* failures (catalogued in iter15) all share the same root cause: iter13's
|
||||
* pan_nir_lower_xfb captures one entry per VS invocation, which for non-LIST
|
||||
* topologies gives ONE OUTPUT PER INPUT VERTEX. The Vulkan spec requires
|
||||
* primitive-decomposed capture: an N-vertex triangle strip must produce
|
||||
* 3*(N-2) captured entries with the right per-primitive winding order.
|
||||
*
|
||||
* This probe exercises the canonical case: triangle strip with 8 input
|
||||
* vertices, expecting 18 captured entries arranged as 6 triangles. The
|
||||
* verifier accepts any rotation within each primitive (per CTS's rule)
|
||||
* but enforces the winding direction.
|
||||
*
|
||||
* Pre-iter16 behavior (current iter13/r3 driver): captured count = 8
|
||||
* → PROBE FAILS (under-capture).
|
||||
* Post-iter16 behavior: captured count = 18 in decomposed order
|
||||
* → PROBE PASSES.
|
||||
*
|
||||
* Parameterized so we can add LINE_STRIP, TRIANGLE_FAN, *_ADJACENCY tests
|
||||
* as the fix expands in Phase 4. For now, only TRIANGLE_STRIP is wired up.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#define VSPV_PATH "probe_winding.vert.spv"
|
||||
|
||||
#define STEP(name) do { fprintf(stderr, "[step] " name "\n"); fflush(stderr); } while (0)
|
||||
|
||||
#define VK_CHECK(call) do { \
|
||||
VkResult _r = (call); \
|
||||
if (_r != VK_SUCCESS) { \
|
||||
fprintf(stderr, "[fail] " #call " => %d at %s:%d\n", \
|
||||
(int)_r, __FILE__, __LINE__); \
|
||||
exit(2); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* ---- Per-topology expected-output helper (mirrors CTS) ---- */
|
||||
|
||||
/*
|
||||
* For input vertex count N and topology T, returns the decomposed primitive
|
||||
* count and per-primitive vertex layout. CTS test logic uses identical lambdas
|
||||
* in vktTransformFeedbackSimpleTests.cpp around line 1241.
|
||||
*/
|
||||
struct topo_decomp {
|
||||
VkPrimitiveTopology topology;
|
||||
const char *name;
|
||||
uint32_t verts_per_prim;
|
||||
uint32_t (*prim_count)(uint32_t input_count);
|
||||
/* Fills out[verts_per_prim] with the input-vertex-IDs that should appear
|
||||
* in primitive prim_idx (in CTS winding order; rotations are accepted at
|
||||
* verify time). */
|
||||
void (*expected)(uint32_t prim_idx, uint32_t *out);
|
||||
};
|
||||
|
||||
/* TRIANGLE_STRIP: 3*(N-2) outputs.
|
||||
* Even prim i: {i, i+1, i+2}
|
||||
* Odd prim i: {i, i+2, i+1}
|
||||
*/
|
||||
static uint32_t prim_count_tri_strip(uint32_t n) {
|
||||
return (n >= 2) ? (n - 2) : 0;
|
||||
}
|
||||
static void expected_tri_strip(uint32_t i, uint32_t *out) {
|
||||
uint32_t iMod2 = i & 1u;
|
||||
out[0] = i;
|
||||
out[1] = i + 1 + iMod2;
|
||||
out[2] = i + 2 - iMod2;
|
||||
}
|
||||
|
||||
/* LINE_STRIP: 2*(N-1) outputs. Each prim i: {i, i+1} */
|
||||
static uint32_t prim_count_line_strip(uint32_t n) {
|
||||
return (n >= 1) ? (n - 1) : 0;
|
||||
}
|
||||
static void expected_line_strip(uint32_t i, uint32_t *out) {
|
||||
out[0] = i;
|
||||
out[1] = i + 1u;
|
||||
}
|
||||
|
||||
/* TRIANGLE_FAN: 3*(N-2) outputs. Each prim i: {i+1, i+2, 0} */
|
||||
static uint32_t prim_count_tri_fan(uint32_t n) {
|
||||
return (n >= 2) ? (n - 2) : 0;
|
||||
}
|
||||
static void expected_tri_fan(uint32_t i, uint32_t *out) {
|
||||
out[0] = i + 1u;
|
||||
out[1] = i + 2u;
|
||||
out[2] = 0u;
|
||||
}
|
||||
|
||||
static const struct topo_decomp TOPO_TESTS[] = {
|
||||
{ VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, "TRIANGLE_STRIP", 3,
|
||||
prim_count_tri_strip, expected_tri_strip },
|
||||
{ VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, "LINE_STRIP", 2,
|
||||
prim_count_line_strip, expected_line_strip },
|
||||
{ VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN, "TRIANGLE_FAN", 3,
|
||||
prim_count_tri_fan, expected_tri_fan },
|
||||
};
|
||||
#define NUM_TOPO_TESTS (sizeof(TOPO_TESTS) / sizeof(TOPO_TESTS[0]))
|
||||
|
||||
/* ---- Vulkan plumbing ---- */
|
||||
|
||||
static uint32_t *read_spv(const char *path, size_t *out_bytes) {
|
||||
FILE *f = fopen(path, "rb");
|
||||
if (!f) { fprintf(stderr, "[fail] open %s: %s\n", path, strerror(errno)); exit(3); }
|
||||
fseek(f, 0, SEEK_END);
|
||||
long n = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
uint32_t *buf = malloc((size_t)n);
|
||||
fread(buf, 1, (size_t)n, f);
|
||||
fclose(f);
|
||||
*out_bytes = (size_t)n;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static uint32_t pick_host_visible(const VkPhysicalDeviceMemoryProperties *mp,
|
||||
uint32_t type_bits) {
|
||||
VkMemoryPropertyFlags want =
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
for (uint32_t i = 0; i < mp->memoryTypeCount; i++) {
|
||||
if ((type_bits & (1u << i)) &&
|
||||
(mp->memoryTypes[i].propertyFlags & want) == want) return i;
|
||||
}
|
||||
fprintf(stderr, "[fail] no HOST_VISIBLE+COHERENT memtype\n"); exit(4);
|
||||
}
|
||||
|
||||
/* ---- Verifier (rotation-aware, mirrors CTS verifyVertexDataWithWinding) ---- */
|
||||
|
||||
/* Returns 1 if got[verts_per_prim] is a rotation of ref[verts_per_prim], 0 else. */
|
||||
static int rotations_match(const uint32_t *ref, const uint32_t *got, uint32_t vpp) {
|
||||
for (uint32_t start = 0; start < vpp; start++) {
|
||||
int ok = 1;
|
||||
for (uint32_t v = 0; v < vpp; v++) {
|
||||
uint32_t r = ref[(start + v) % vpp];
|
||||
if (r != got[v]) { ok = 0; break; }
|
||||
}
|
||||
if (ok) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns number of mismatched primitives. Prints details for each mismatch. */
|
||||
static int verify_winding(const struct topo_decomp *t, uint32_t input_count,
|
||||
const uint32_t *got, uint32_t got_count) {
|
||||
uint32_t expected_prims = t->prim_count(input_count);
|
||||
uint32_t expected_count = expected_prims * t->verts_per_prim;
|
||||
if (got_count != expected_count) {
|
||||
fprintf(stderr, "[diff] %s: captured count %u, expected %u "
|
||||
"(%u prims × %u verts)\n",
|
||||
t->name, got_count, expected_count,
|
||||
expected_prims, t->verts_per_prim);
|
||||
return -1;
|
||||
}
|
||||
int mismatches = 0;
|
||||
for (uint32_t p = 0; p < expected_prims; p++) {
|
||||
uint32_t ref[8] = {0};
|
||||
t->expected(p, ref);
|
||||
const uint32_t *prim_got = got + p * t->verts_per_prim;
|
||||
if (!rotations_match(ref, prim_got, t->verts_per_prim)) {
|
||||
fprintf(stderr, "[diff] %s prim %u: expected rotation of {",
|
||||
t->name, p);
|
||||
for (uint32_t v = 0; v < t->verts_per_prim; v++)
|
||||
fprintf(stderr, "%s%u", v ? "," : "", ref[v]);
|
||||
fprintf(stderr, "} got {");
|
||||
for (uint32_t v = 0; v < t->verts_per_prim; v++)
|
||||
fprintf(stderr, "%s%u", v ? "," : "", prim_got[v]);
|
||||
fprintf(stderr, "}\n");
|
||||
mismatches++;
|
||||
}
|
||||
}
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
/* ---- Per-topology test ---- */
|
||||
|
||||
static int run_one_topology(VkDevice dev, VkQueue queue, uint32_t qfam,
|
||||
VkRenderPass dummy_rp,
|
||||
PFN_vkCmdBindTransformFeedbackBuffersEXT pBindXfb,
|
||||
PFN_vkCmdBeginTransformFeedbackEXT pBeginXfb,
|
||||
PFN_vkCmdEndTransformFeedbackEXT pEndXfb,
|
||||
PFN_vkCmdBeginRenderingKHR pBeginRendering,
|
||||
PFN_vkCmdEndRenderingKHR pEndRendering,
|
||||
VkPhysicalDeviceMemoryProperties *mp,
|
||||
VkShaderModule vsm,
|
||||
const struct topo_decomp *t,
|
||||
uint32_t input_count) {
|
||||
/* Capacity: expected_prims × verts_per_prim × 4. Pad to 64 entries
|
||||
* (256 bytes) so iter13's under-capture is visible (sentinel-filled tail). */
|
||||
const uint32_t buf_words = 64;
|
||||
const VkDeviceSize buf_bytes = buf_words * sizeof(uint32_t);
|
||||
|
||||
fprintf(stderr, "\n=== %s with %u input verts ===\n", t->name, input_count);
|
||||
|
||||
/* XFB capture buffer */
|
||||
VkBufferCreateInfo bci = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.size = buf_bytes,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
};
|
||||
VkBuffer xfb_buf;
|
||||
VK_CHECK(vkCreateBuffer(dev, &bci, NULL, &xfb_buf));
|
||||
|
||||
VkMemoryRequirements mr;
|
||||
vkGetBufferMemoryRequirements(dev, xfb_buf, &mr);
|
||||
VkMemoryAllocateInfo mai = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = mr.size,
|
||||
.memoryTypeIndex = pick_host_visible(mp, mr.memoryTypeBits),
|
||||
};
|
||||
VkDeviceMemory xfb_mem;
|
||||
VK_CHECK(vkAllocateMemory(dev, &mai, NULL, &xfb_mem));
|
||||
VK_CHECK(vkBindBufferMemory(dev, xfb_buf, xfb_mem, 0));
|
||||
void *mapped;
|
||||
VK_CHECK(vkMapMemory(dev, xfb_mem, 0, VK_WHOLE_SIZE, 0, &mapped));
|
||||
/* Sentinel-fill so we can distinguish "captured 0xDEADBEEF" from
|
||||
* "GPU didn't write here" — under-capture leaves the tail at sentinel. */
|
||||
uint32_t *u32 = (uint32_t *)mapped;
|
||||
for (uint32_t i = 0; i < buf_words; i++) u32[i] = 0xDEADBEEFu;
|
||||
|
||||
/* Pipeline */
|
||||
VkPipelineLayoutCreateInfo plci = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
};
|
||||
VkPipelineLayout pl;
|
||||
VK_CHECK(vkCreatePipelineLayout(dev, &plci, NULL, &pl));
|
||||
|
||||
VkPipelineShaderStageCreateInfo stages[1] = {
|
||||
{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_VERTEX_BIT, .module = vsm, .pName = "main" },
|
||||
};
|
||||
VkPipelineVertexInputStateCreateInfo vi = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
};
|
||||
VkPipelineInputAssemblyStateCreateInfo ia = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.topology = t->topology,
|
||||
};
|
||||
VkViewport vp_dummy = { 0, 0, 1, 1, 0.0f, 1.0f };
|
||||
VkRect2D sc_dummy = {{0,0}, {1,1}};
|
||||
VkPipelineViewportStateCreateInfo vp = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.viewportCount = 1, .pViewports = &vp_dummy,
|
||||
.scissorCount = 1, .pScissors = &sc_dummy,
|
||||
};
|
||||
VkPipelineRasterizationStateCreateInfo rs = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.rasterizerDiscardEnable = VK_TRUE,
|
||||
.polygonMode = VK_POLYGON_MODE_FILL,
|
||||
.cullMode = VK_CULL_MODE_NONE,
|
||||
.lineWidth = 1.0f,
|
||||
};
|
||||
VkPipelineMultisampleStateCreateInfo ms = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
||||
};
|
||||
VkPipelineRenderingCreateInfoKHR pri = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR,
|
||||
.colorAttachmentCount = 0,
|
||||
};
|
||||
VkGraphicsPipelineCreateInfo gpci = {
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = &pri,
|
||||
.stageCount = 1, .pStages = stages,
|
||||
.pVertexInputState = &vi,
|
||||
.pInputAssemblyState = &ia,
|
||||
.pViewportState = &vp,
|
||||
.pRasterizationState = &rs,
|
||||
.pMultisampleState = &ms,
|
||||
.layout = pl,
|
||||
};
|
||||
VkPipeline pipe;
|
||||
VK_CHECK(vkCreateGraphicsPipelines(dev, VK_NULL_HANDLE, 1, &gpci, NULL, &pipe));
|
||||
|
||||
/* Command buffer */
|
||||
VkCommandPoolCreateInfo cpoolci = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.queueFamilyIndex = qfam,
|
||||
};
|
||||
VkCommandPool cpool;
|
||||
VK_CHECK(vkCreateCommandPool(dev, &cpoolci, NULL, &cpool));
|
||||
VkCommandBufferAllocateInfo cbai = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.commandPool = cpool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
};
|
||||
VkCommandBuffer cb;
|
||||
VK_CHECK(vkAllocateCommandBuffers(dev, &cbai, &cb));
|
||||
|
||||
VkCommandBufferBeginInfo cbbi = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
};
|
||||
VK_CHECK(vkBeginCommandBuffer(cb, &cbbi));
|
||||
|
||||
VkDeviceSize xfb_off = 0, xfb_size = buf_bytes;
|
||||
pBindXfb(cb, 0, 1, &xfb_buf, &xfb_off, &xfb_size);
|
||||
|
||||
VkRenderingInfoKHR ri = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR,
|
||||
.renderArea = {{0,0}, {1,1}},
|
||||
.layerCount = 1,
|
||||
.colorAttachmentCount = 0,
|
||||
};
|
||||
pBeginRendering(cb, &ri);
|
||||
vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_GRAPHICS, pipe);
|
||||
pBeginXfb(cb, 0, 0, NULL, NULL);
|
||||
vkCmdDraw(cb, input_count, 1, 0, 0);
|
||||
pEndXfb(cb, 0, 0, NULL, NULL);
|
||||
pEndRendering(cb);
|
||||
|
||||
VkBufferMemoryBarrier bb = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT,
|
||||
.dstAccessMask = VK_ACCESS_HOST_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = xfb_buf, .offset = 0, .size = VK_WHOLE_SIZE,
|
||||
};
|
||||
vkCmdPipelineBarrier(cb,
|
||||
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
|
||||
VK_PIPELINE_STAGE_HOST_BIT,
|
||||
0, 0, NULL, 1, &bb, 0, NULL);
|
||||
VK_CHECK(vkEndCommandBuffer(cb));
|
||||
|
||||
/* Submit + wait */
|
||||
VkFenceCreateInfo fci = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
|
||||
VkFence fence;
|
||||
VK_CHECK(vkCreateFence(dev, &fci, NULL, &fence));
|
||||
VkSubmitInfo si = {
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.commandBufferCount = 1, .pCommandBuffers = &cb,
|
||||
};
|
||||
VK_CHECK(vkQueueSubmit(queue, 1, &si, fence));
|
||||
VkResult wr = vkWaitForFences(dev, 1, &fence, VK_TRUE, 10ULL * 1000 * 1000 * 1000);
|
||||
if (wr != VK_SUCCESS) {
|
||||
fprintf(stderr, "[fail] %s: vkWaitForFences => %d\n", t->name, wr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Read back: count contiguous non-sentinel words from offset 0. */
|
||||
uint32_t captured_count = 0;
|
||||
while (captured_count < buf_words && u32[captured_count] != 0xDEADBEEFu)
|
||||
captured_count++;
|
||||
|
||||
fprintf(stderr, "[info] %s: captured %u entries (sentinel-stopped)\n",
|
||||
t->name, captured_count);
|
||||
/* Print first few for debugging */
|
||||
if (captured_count > 0) {
|
||||
fprintf(stderr, "[info] first 8: ");
|
||||
for (uint32_t i = 0; i < captured_count && i < 8; i++)
|
||||
fprintf(stderr, "%u%s", u32[i], (i + 1 < 8 && i + 1 < captured_count) ? "," : "");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
int mismatches = verify_winding(t, input_count, u32, captured_count);
|
||||
|
||||
/* Teardown */
|
||||
vkUnmapMemory(dev, xfb_mem);
|
||||
vkDestroyFence(dev, fence, NULL);
|
||||
vkDestroyCommandPool(dev, cpool, NULL);
|
||||
vkDestroyPipeline(dev, pipe, NULL);
|
||||
vkDestroyPipelineLayout(dev, pl, NULL);
|
||||
vkDestroyBuffer(dev, xfb_buf, NULL);
|
||||
vkFreeMemory(dev, xfb_mem, NULL);
|
||||
(void)dummy_rp;
|
||||
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
/* ---- main: bring up Vulkan, run all topology tests ---- */
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
/* Optional CLI: limit to one topology by name */
|
||||
const char *only = NULL;
|
||||
if (argc > 1) only = argv[1];
|
||||
|
||||
STEP("vkCreateInstance");
|
||||
VkApplicationInfo app = {
|
||||
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
||||
.pApplicationName = "panvk-bifrost iter16 winding probe",
|
||||
.apiVersion = VK_API_VERSION_1_0,
|
||||
};
|
||||
const char *inst_exts[] = { "VK_KHR_get_physical_device_properties2" };
|
||||
VkInstanceCreateInfo ici = {
|
||||
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
||||
.pApplicationInfo = &app,
|
||||
.enabledExtensionCount = 1,
|
||||
.ppEnabledExtensionNames = inst_exts,
|
||||
};
|
||||
VkInstance inst;
|
||||
VK_CHECK(vkCreateInstance(&ici, NULL, &inst));
|
||||
|
||||
uint32_t n_phys = 0;
|
||||
VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, NULL));
|
||||
VkPhysicalDevice *phys = calloc(n_phys, sizeof(*phys));
|
||||
VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, phys));
|
||||
VkPhysicalDevice gpu = phys[0];
|
||||
VkPhysicalDeviceMemoryProperties mp;
|
||||
vkGetPhysicalDeviceMemoryProperties(gpu, &mp);
|
||||
|
||||
uint32_t n_qf = 0;
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, NULL);
|
||||
VkQueueFamilyProperties *qfp = calloc(n_qf, sizeof(*qfp));
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, qfp);
|
||||
uint32_t qfam = UINT32_MAX;
|
||||
for (uint32_t i = 0; i < n_qf; i++)
|
||||
if (qfp[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { qfam = i; break; }
|
||||
|
||||
STEP("vkCreateDevice");
|
||||
const char *dev_exts[] = {
|
||||
"VK_KHR_multiview", "VK_KHR_maintenance2",
|
||||
"VK_KHR_create_renderpass2", "VK_KHR_depth_stencil_resolve",
|
||||
"VK_KHR_dynamic_rendering",
|
||||
"VK_EXT_transform_feedback",
|
||||
};
|
||||
VkPhysicalDeviceTransformFeedbackFeaturesEXT enable_xfb = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT,
|
||||
.transformFeedback = VK_TRUE,
|
||||
.geometryStreams = VK_FALSE,
|
||||
};
|
||||
VkPhysicalDeviceDynamicRenderingFeaturesKHR dyn_feat = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR,
|
||||
.pNext = &enable_xfb,
|
||||
.dynamicRendering = VK_TRUE,
|
||||
};
|
||||
float qprio = 1.0f;
|
||||
VkDeviceQueueCreateInfo qci = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||
.queueFamilyIndex = qfam, .queueCount = 1, .pQueuePriorities = &qprio,
|
||||
};
|
||||
VkDeviceCreateInfo dci = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
||||
.pNext = &dyn_feat,
|
||||
.queueCreateInfoCount = 1, .pQueueCreateInfos = &qci,
|
||||
.enabledExtensionCount = sizeof(dev_exts)/sizeof(dev_exts[0]),
|
||||
.ppEnabledExtensionNames = dev_exts,
|
||||
};
|
||||
VkDevice dev;
|
||||
VK_CHECK(vkCreateDevice(gpu, &dci, NULL, &dev));
|
||||
VkQueue queue;
|
||||
vkGetDeviceQueue(dev, qfam, 0, &queue);
|
||||
|
||||
PFN_vkCmdBindTransformFeedbackBuffersEXT pBindXfb =
|
||||
(PFN_vkCmdBindTransformFeedbackBuffersEXT)vkGetDeviceProcAddr(
|
||||
dev, "vkCmdBindTransformFeedbackBuffersEXT");
|
||||
PFN_vkCmdBeginTransformFeedbackEXT pBeginXfb =
|
||||
(PFN_vkCmdBeginTransformFeedbackEXT)vkGetDeviceProcAddr(
|
||||
dev, "vkCmdBeginTransformFeedbackEXT");
|
||||
PFN_vkCmdEndTransformFeedbackEXT pEndXfb =
|
||||
(PFN_vkCmdEndTransformFeedbackEXT)vkGetDeviceProcAddr(
|
||||
dev, "vkCmdEndTransformFeedbackEXT");
|
||||
PFN_vkCmdBeginRenderingKHR pBeginRendering =
|
||||
(PFN_vkCmdBeginRenderingKHR)vkGetDeviceProcAddr(dev, "vkCmdBeginRenderingKHR");
|
||||
PFN_vkCmdEndRenderingKHR pEndRendering =
|
||||
(PFN_vkCmdEndRenderingKHR)vkGetDeviceProcAddr(dev, "vkCmdEndRenderingKHR");
|
||||
|
||||
/* Shader (shared across topology iterations) */
|
||||
size_t spv_bytes = 0;
|
||||
uint32_t *spv = read_spv(VSPV_PATH, &spv_bytes);
|
||||
VkShaderModuleCreateInfo smci = {
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.codeSize = spv_bytes, .pCode = spv,
|
||||
};
|
||||
VkShaderModule vsm;
|
||||
VK_CHECK(vkCreateShaderModule(dev, &smci, NULL, &vsm));
|
||||
free(spv);
|
||||
|
||||
/* Run each topology test */
|
||||
int total_fail = 0;
|
||||
int total_tested = 0;
|
||||
for (size_t i = 0; i < NUM_TOPO_TESTS; i++) {
|
||||
const struct topo_decomp *t = &TOPO_TESTS[i];
|
||||
if (only && strcmp(only, t->name) != 0) continue;
|
||||
total_tested++;
|
||||
int rc = run_one_topology(dev, queue, qfam, VK_NULL_HANDLE,
|
||||
pBindXfb, pBeginXfb, pEndXfb,
|
||||
pBeginRendering, pEndRendering,
|
||||
&mp, vsm, t, 8u);
|
||||
if (rc != 0) {
|
||||
total_fail++;
|
||||
fprintf(stderr, "[FAIL] %s: %d mismatch(es)\n", t->name, rc);
|
||||
} else {
|
||||
fprintf(stderr, "[PASS] %s\n", t->name);
|
||||
}
|
||||
}
|
||||
|
||||
vkDestroyShaderModule(dev, vsm, NULL);
|
||||
vkDestroyDevice(dev, NULL);
|
||||
vkDestroyInstance(inst, NULL);
|
||||
free(phys); free(qfp);
|
||||
|
||||
fprintf(stderr, "\n=== SUMMARY: %d/%d topology tests passed ===\n",
|
||||
total_tested - total_fail, total_tested);
|
||||
return total_fail == 0 ? 0 : 1;
|
||||
}
|
||||
Reference in New Issue
Block a user