/* * iter2 minimal Vulkan image-clear probe for panvk-bifrost campaign. * * Goal: exercise the image / layout-transition / transfer-op path on PanVk- * Bifrost (PineTab2 / Mali-G52 r1 MC1). Bridges from iter1 (compute) toward * iter3 (graphics) by adding only image-side machinery. * * Pipeline: * 1. Create 4x4 R8G8B8A8_UNORM image, optimal tiling, TRANSFER_DST|TRANSFER_SRC. * 2. Allocate device-local memory, bind. * 3. Create 64-byte staging buffer (TRANSFER_DST, host-visible), pre-fill 0xDEADBEEF. * 4. Record cmd buffer: * a. ImageBarrier UNDEFINED -> TRANSFER_DST_OPTIMAL * b. vkCmdClearColorImage -> color 0x11223344 (R=0x11 G=0x22 B=0x33 A=0x44) * c. ImageBarrier TRANSFER_DST_OPTIMAL -> TRANSFER_SRC_OPTIMAL * d. vkCmdCopyImageToBuffer 4x4 RGBA8 -> staging buffer * e. MemoryBarrier TRANSFER_WRITE -> HOST_READ * 5. Submit + fence-wait. * 6. Invalidate + readback: verify all 16 pixels = 0x44332211 (little-endian RGBA8). * * Pure Vulkan 1.0 core. No instance/device extensions requested. */ #include #include #include #include #include #include #define IMG_W 4 #define IMG_H 4 #define PIXELS (IMG_W * IMG_H) #define BYTES_PER_PIXEL 4 #define BUFFER_BYTES (PIXELS * BYTES_PER_PIXEL) /* 64 */ /* Clear color: R=0x11 G=0x22 B=0x33 A=0x44 → LE uint32 readback = 0x44332211. */ #define CLEAR_R 0x11u #define CLEAR_G 0x22u #define CLEAR_B 0x33u #define CLEAR_A 0x44u #define EXPECTED_PIXEL ((CLEAR_A << 24) | (CLEAR_B << 16) | (CLEAR_G << 8) | CLEAR_R) #define STEP(name) do { fprintf(stderr, "[step] " name "\n"); fflush(stderr); } while (0) #define VK_CHECK(call) do { \ VkResult _r = (call); \ if (_r != VK_SUCCESS) { \ fprintf(stderr, "[fail] " #call " => %d at %s:%d\n", \ (int)_r, __FILE__, __LINE__); \ exit(2); \ } \ } while (0) static uint32_t pick_memtype(const VkPhysicalDeviceMemoryProperties *mp, uint32_t type_bits, VkMemoryPropertyFlags want) { /* Exact match first. */ for (uint32_t i = 0; i < mp->memoryTypeCount; i++) { if ((type_bits & (1u << i)) && (mp->memoryTypes[i].propertyFlags & want) == want) return i; } fprintf(stderr, "[fail] no memory type matches type_bits=0x%x want=0x%x\n", type_bits, want); exit(4); } static uint32_t pick_host_visible(const VkPhysicalDeviceMemoryProperties *mp, uint32_t type_bits) { /* Prefer DEVICE_LOCAL|HOST_VISIBLE|HOST_COHERENT, else any HOST_VISIBLE. */ VkMemoryPropertyFlags pref = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; for (uint32_t i = 0; i < mp->memoryTypeCount; i++) { if ((type_bits & (1u << i)) && (mp->memoryTypes[i].propertyFlags & pref) == pref) return i; } for (uint32_t i = 0; i < mp->memoryTypeCount; i++) { if ((type_bits & (1u << i)) && (mp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) return i; } fprintf(stderr, "[fail] no HOST_VISIBLE memory type matches type_bits=0x%x\n", type_bits); exit(4); } static void image_barrier(VkCommandBuffer cb, VkImage img, VkImageLayout old_layout, VkImageLayout new_layout, VkAccessFlags src_access, VkAccessFlags dst_access, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage) { VkImageMemoryBarrier ib = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .srcAccessMask = src_access, .dstAccessMask = dst_access, .oldLayout = old_layout, .newLayout = new_layout, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = img, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, .layerCount = 1, }, }; vkCmdPipelineBarrier(cb, src_stage, dst_stage, 0, 0, NULL, 0, NULL, 1, &ib); } int main(void) { /* ---- instance ---------------------------------------------------------- */ STEP("vkCreateInstance"); VkApplicationInfo app = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pApplicationName = "panvk-bifrost iter2 image-clear probe", .apiVersion = VK_API_VERSION_1_0, }; VkInstanceCreateInfo ici = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &app, }; VkInstance inst; VK_CHECK(vkCreateInstance(&ici, NULL, &inst)); /* ---- physical device + properties ------------------------------------- */ STEP("vkEnumeratePhysicalDevices"); uint32_t n_phys = 0; VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, NULL)); if (n_phys == 0) { fprintf(stderr, "[fail] no physical devices\n"); return 5; } VkPhysicalDevice *phys = calloc(n_phys, sizeof(*phys)); VK_CHECK(vkEnumeratePhysicalDevices(inst, &n_phys, phys)); VkPhysicalDevice gpu = phys[0]; VkPhysicalDeviceProperties pp; vkGetPhysicalDeviceProperties(gpu, &pp); fprintf(stderr, "[info] gpu='%s' apiVersion=%u.%u.%u\n", pp.deviceName, VK_VERSION_MAJOR(pp.apiVersion), VK_VERSION_MINOR(pp.apiVersion), VK_VERSION_PATCH(pp.apiVersion)); /* Sanity-check that R8G8B8A8_UNORM supports the ops we need. */ VkFormatProperties fmt_props; vkGetPhysicalDeviceFormatProperties(gpu, VK_FORMAT_R8G8B8A8_UNORM, &fmt_props); fprintf(stderr, "[info] R8G8B8A8_UNORM optimalTilingFeatures=0x%x\n", fmt_props.optimalTilingFeatures); if (!(fmt_props.optimalTilingFeatures & VK_FORMAT_FEATURE_TRANSFER_DST_BIT) || !(fmt_props.optimalTilingFeatures & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) { fprintf(stderr, "[fail] R8G8B8A8_UNORM lacks TRANSFER_SRC|DST in optimal tiling\n"); return 9; } VkPhysicalDeviceMemoryProperties mp; vkGetPhysicalDeviceMemoryProperties(gpu, &mp); /* ---- queue family ----------------------------------------------------- */ uint32_t n_qf = 0; vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, NULL); VkQueueFamilyProperties *qfp = calloc(n_qf, sizeof(*qfp)); vkGetPhysicalDeviceQueueFamilyProperties(gpu, &n_qf, qfp); uint32_t qfam = UINT32_MAX; for (uint32_t i = 0; i < n_qf; i++) { if (qfp[i].queueFlags & VK_QUEUE_TRANSFER_BIT) { qfam = i; break; } } if (qfam == UINT32_MAX) { fprintf(stderr, "[fail] no transfer queue family\n"); return 6; } /* ---- device ----------------------------------------------------------- */ STEP("vkCreateDevice"); float qprio = 1.0f; VkDeviceQueueCreateInfo qci = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueFamilyIndex = qfam, .queueCount = 1, .pQueuePriorities = &qprio, }; VkDeviceCreateInfo dci = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .queueCreateInfoCount = 1, .pQueueCreateInfos = &qci, }; VkDevice dev; VK_CHECK(vkCreateDevice(gpu, &dci, NULL, &dev)); VkQueue queue; vkGetDeviceQueue(dev, qfam, 0, &queue); /* ---- image ----------------------------------------------------------- */ STEP("vkCreateImage (4x4 R8G8B8A8_UNORM optimal-tiled)"); VkImageCreateInfo iciImg = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, .format = VK_FORMAT_R8G8B8A8_UNORM, .extent = { IMG_W, IMG_H, 1 }, .mipLevels = 1, .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, }; VkImage img; VK_CHECK(vkCreateImage(dev, &iciImg, NULL, &img)); VkMemoryRequirements imr; vkGetImageMemoryRequirements(dev, img, &imr); fprintf(stderr, "[info] image memReq size=%llu alignment=%llu typeBits=0x%x\n", (unsigned long long)imr.size, (unsigned long long)imr.alignment, imr.memoryTypeBits); STEP("vkAllocateMemory + vkBindImageMemory (device-local)"); VkMemoryAllocateInfo imai = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = imr.size, .memoryTypeIndex = pick_memtype(&mp, imr.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), }; VkDeviceMemory img_mem; VK_CHECK(vkAllocateMemory(dev, &imai, NULL, &img_mem)); VK_CHECK(vkBindImageMemory(dev, img, img_mem, 0)); /* ---- staging buffer -------------------------------------------------- */ STEP("vkCreateBuffer (staging, host-visible)"); VkBufferCreateInfo bci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = BUFFER_BYTES, .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; VkBuffer buf; VK_CHECK(vkCreateBuffer(dev, &bci, NULL, &buf)); VkMemoryRequirements bmr; vkGetBufferMemoryRequirements(dev, buf, &bmr); VkMemoryAllocateInfo bmai = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = bmr.size, .memoryTypeIndex = pick_host_visible(&mp, bmr.memoryTypeBits), }; VkDeviceMemory buf_mem; VK_CHECK(vkAllocateMemory(dev, &bmai, NULL, &buf_mem)); VK_CHECK(vkBindBufferMemory(dev, buf, buf_mem, 0)); /* Pre-fill staging with 0xDEADBEEF sentinel. */ void *mapped = NULL; VK_CHECK(vkMapMemory(dev, buf_mem, 0, VK_WHOLE_SIZE, 0, &mapped)); uint32_t *u32 = (uint32_t *)mapped; for (uint32_t i = 0; i < PIXELS; i++) u32[i] = 0xDEADBEEFu; /* ---- command buffer --------------------------------------------------- */ VkCommandPoolCreateInfo cpoolci = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .queueFamilyIndex = qfam, }; VkCommandPool cpool; VK_CHECK(vkCreateCommandPool(dev, &cpoolci, NULL, &cpool)); VkCommandBufferAllocateInfo cbai = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = cpool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VkCommandBuffer cb; VK_CHECK(vkAllocateCommandBuffers(dev, &cbai, &cb)); STEP("vkBeginCommandBuffer + record image clear + copy"); VkCommandBufferBeginInfo cbbi = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; VK_CHECK(vkBeginCommandBuffer(cb, &cbbi)); /* UNDEFINED → TRANSFER_DST_OPTIMAL */ image_barrier(cb, img, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 0, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); /* Clear */ VkClearColorValue clear = {{ (float)CLEAR_R / 255.0f, (float)CLEAR_G / 255.0f, (float)CLEAR_B / 255.0f, (float)CLEAR_A / 255.0f, }}; VkImageSubresourceRange range = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, .layerCount = 1, }; vkCmdClearColorImage(cb, img, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range); /* TRANSFER_DST_OPTIMAL → TRANSFER_SRC_OPTIMAL */ image_barrier(cb, img, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); /* Copy image → buffer */ VkBufferImageCopy region = { .bufferOffset = 0, .bufferRowLength = 0, .bufferImageHeight = 0, .imageSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1, }, .imageOffset = { 0, 0, 0 }, .imageExtent = { IMG_W, IMG_H, 1 }, }; vkCmdCopyImageToBuffer(cb, img, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buf, 1, ®ion); /* Buffer transfer-write → host-read */ VkBufferMemoryBarrier bb = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_HOST_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = buf, .offset = 0, .size = VK_WHOLE_SIZE, }; vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1, &bb, 0, NULL); VK_CHECK(vkEndCommandBuffer(cb)); /* ---- submit + wait --------------------------------------------------- */ VkFenceCreateInfo fci = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; VkFence fence; VK_CHECK(vkCreateFence(dev, &fci, NULL, &fence)); STEP("vkQueueSubmit + vkWaitForFences (5s timeout)"); VkSubmitInfo si = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &cb, }; VK_CHECK(vkQueueSubmit(queue, 1, &si, fence)); VkResult wr = vkWaitForFences(dev, 1, &fence, VK_TRUE, 5ULL * 1000 * 1000 * 1000); if (wr == VK_TIMEOUT) { fprintf(stderr, "[fail] fence TIMEOUT (5s)\n"); return 7; } if (wr != VK_SUCCESS) { fprintf(stderr, "[fail] vkWaitForFences => %d\n", wr); return 8; } /* ---- readback + verify ----------------------------------------------- */ STEP("vkInvalidateMappedMemoryRanges + readback"); VkMappedMemoryRange mmr = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = buf_mem, .offset = 0, .size = VK_WHOLE_SIZE, }; vkInvalidateMappedMemoryRanges(dev, 1, &mmr); int mismatches = 0; for (uint32_t i = 0; i < PIXELS; i++) { if (u32[i] != EXPECTED_PIXEL) { if (mismatches < 8) { fprintf(stderr, "[diff] pixel[%u] = 0x%08x (expected 0x%08x)\n", i, u32[i], EXPECTED_PIXEL); } mismatches++; } } fprintf(stderr, "[info] expected pixel = 0x%08x (R=0x%02x G=0x%02x B=0x%02x A=0x%02x)\n", EXPECTED_PIXEL, CLEAR_R, CLEAR_G, CLEAR_B, CLEAR_A); fprintf(stderr, "[info] mismatches = %d / %d\n", mismatches, PIXELS); /* Dump full buffer in case of failure for debugging. */ if (mismatches) { fprintf(stderr, "[dump] buffer contents (uint32 LE):\n"); for (uint32_t row = 0; row < IMG_H; row++) { fprintf(stderr, "[dump] "); for (uint32_t col = 0; col < IMG_W; col++) { fprintf(stderr, "0x%08x ", u32[row * IMG_W + col]); } fprintf(stderr, "\n"); } } /* ---- teardown -------------------------------------------------------- */ vkUnmapMemory(dev, buf_mem); vkDestroyFence(dev, fence, NULL); vkDestroyCommandPool(dev, cpool, NULL); vkDestroyBuffer(dev, buf, NULL); vkFreeMemory(dev, buf_mem, NULL); vkDestroyImage(dev, img, NULL); vkFreeMemory(dev, img_mem, NULL); vkDestroyDevice(dev, NULL); vkDestroyInstance(inst, NULL); free(phys); free(qfp); if (mismatches == 0) { fprintf(stderr, "[PASS] PanVk-Bifrost image clear+copy: all 16 pixels match.\n"); return 0; } else { fprintf(stderr, "[FAIL] %d / %d pixels mismatched.\n", mismatches, PIXELS); return 1; } }