forked from marfrit/marfrit-packages
7ecbcb3c1b
The original 0005 patch was generated from the pre-Phase-5-review source snapshot (phase5_review_input_2026-05-21.tgz), missing the four load-bearing review fixes that landed in the post-review snapshot: - probe_hantro gate on KHR_video_* extension advertisement - per-session ts_counter (was process-global static) - panvk_v4l2_session_finish full unwind (munmap + STREAMOFF + REQBUFS=0) - MIN2(rb.count, 18) clamp on num_*_buffers Run #162 (job 17032) failed in prepare() because the PKGBUILD sanity check 'grep -q "KHR_video_queue = PAN_ARCH < 9 && panvk_v4l2_probe_hantro()"' didn't match the actual patched output (which still had the pre-review 'KHR_video_queue = PAN_ARCH < 9,'). This patch (regenerated from phase5_post_review_2026-05-21.tgz) carries all four review fixes. Validated locally: vanilla mesa-26.0.6 + r1..r4 + this patch reproduces prepare()-OK byte-for-byte. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2473 lines
98 KiB
Diff
2473 lines
98 KiB
Diff
diff -urN a/src/panfrost/vulkan/jm/panvk_cmd_buffer.h b/src/panfrost/vulkan/jm/panvk_cmd_buffer.h
|
|
--- a/src/panfrost/vulkan/jm/panvk_cmd_buffer.h 2026-05-21 22:46:57.477785029 +0200
|
|
+++ b/src/panfrost/vulkan/jm/panvk_cmd_buffer.h 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -88,8 +88,18 @@
|
|
struct panvk_cmd_compute_state compute;
|
|
struct panvk_push_constant_state push_constants;
|
|
} state;
|
|
+
|
|
+ /* iter1: panvk-bifrost-video — current bound video session + params
|
|
+ * scoped by vkCmdBeginVideoCodingKHR..vkCmdEndVideoCodingKHR. */
|
|
+ struct {
|
|
+ struct panvk_video_session *vs;
|
|
+ struct vk_video_session_parameters *params;
|
|
+ } video;
|
|
};
|
|
|
|
+struct panvk_video_session;
|
|
+struct vk_video_session_parameters;
|
|
+
|
|
VK_DEFINE_HANDLE_CASTS(panvk_cmd_buffer, vk.base, VkCommandBuffer,
|
|
VK_OBJECT_TYPE_COMMAND_BUFFER)
|
|
|
|
diff -urN a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build
|
|
--- a/src/panfrost/vulkan/meson.build 2026-05-21 22:46:59.277811484 +0200
|
|
+++ b/src/panfrost/vulkan/meson.build 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -41,6 +41,10 @@
|
|
'panvk_device_memory.c',
|
|
'panvk_host_copy.c',
|
|
'panvk_image.c',
|
|
+ 'panvk_video_decode.c',
|
|
+ 'panvk_v4l2.c',
|
|
+ 'panvk_v4l2_h264.c',
|
|
+ 'panvk_v4l2_h264_slice_header.c',
|
|
'panvk_instance.c',
|
|
'panvk_mempool.c',
|
|
'panvk_physical_device.c',
|
|
diff -urN a/src/panfrost/vulkan/panvk_buffer.c b/src/panfrost/vulkan/panvk_buffer.c
|
|
--- a/src/panfrost/vulkan/panvk_buffer.c 2026-05-21 22:46:57.485785147 +0200
|
|
+++ b/src/panfrost/vulkan/panvk_buffer.c 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -88,6 +88,8 @@
|
|
*bind_status->pResult = VK_SUCCESS;
|
|
|
|
buffer->vk.device_address = mem->addr.dev + pBindInfos[i].memoryOffset;
|
|
+ buffer->mem = mem;
|
|
+ buffer->mem_offset = pBindInfos[i].memoryOffset;
|
|
}
|
|
return VK_SUCCESS;
|
|
}
|
|
diff -urN a/src/panfrost/vulkan/panvk_buffer.h b/src/panfrost/vulkan/panvk_buffer.h
|
|
--- a/src/panfrost/vulkan/panvk_buffer.h 2026-05-21 22:46:57.485785147 +0200
|
|
+++ b/src/panfrost/vulkan/panvk_buffer.h 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -14,8 +14,14 @@
|
|
|
|
struct panvk_priv_bo;
|
|
|
|
+struct panvk_device_memory;
|
|
struct panvk_buffer {
|
|
struct vk_buffer vk;
|
|
+
|
|
+ /* iter1: panvk-bifrost-video — bound memory tracking for dmabuf export.
|
|
+ * Set in panvk_BindBufferMemory. NULL until bound. */
|
|
+ struct panvk_device_memory *mem;
|
|
+ uint64_t mem_offset;
|
|
};
|
|
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_buffer, vk.base, VkBuffer,
|
|
diff -urN a/src/panfrost/vulkan/panvk_device.h b/src/panfrost/vulkan/panvk_device.h
|
|
--- a/src/panfrost/vulkan/panvk_device.h 2026-05-21 22:46:57.489785206 +0200
|
|
+++ b/src/panfrost/vulkan/panvk_device.h 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -45,6 +45,8 @@
|
|
enum panvk_queue_family {
|
|
PANVK_QUEUE_FAMILY_GPU,
|
|
PANVK_QUEUE_FAMILY_BIND,
|
|
+ /* iter1: video decode via V4L2-stateless hantro (PAN_ARCH < 9, runtime-gated). */
|
|
+ PANVK_QUEUE_FAMILY_VIDEO_DECODE,
|
|
PANVK_QUEUE_FAMILY_COUNT,
|
|
};
|
|
|
|
@@ -97,6 +99,23 @@
|
|
|
|
struct panvk_device_queue_family queue_families[PANVK_QUEUE_FAMILY_COUNT];
|
|
|
|
+ /* iter1: Phase 1 simplification — device-level active video session +
|
|
+ * params, set by Cmd{Begin,End}VideoCodingKHR. Single-session Phase 1
|
|
+ * scope; per-cmdbuf state-tracking lives in Phase >>1 once per-arch
|
|
+ * cmd_buffer access from arch-agnostic source is wired. */
|
|
+ struct {
|
|
+ simple_mtx_t lock;
|
|
+ struct panvk_video_session *vs;
|
|
+ struct vk_video_session_parameters *params;
|
|
+ } active_video;
|
|
+ /* iter1: Vulkan-visible queue family index ↔ panvk_qfi enum mapping.
|
|
+ * Needed because hideable families create gaps between the enum slot
|
|
+ * and the position the Vulkan loader sees. Populated at vkCreateDevice
|
|
+ * from pCreateInfo->pQueueCreateInfos[].queueFamilyIndex by querying
|
|
+ * physical-device queue family properties (which is what was advertised). */
|
|
+ uint8_t vulkan_to_panvk_qfi[PANVK_QUEUE_FAMILY_COUNT];
|
|
+ uint8_t num_vulkan_qfi;
|
|
+
|
|
struct panvk_precomp_cache *precomp_cache;
|
|
|
|
struct {
|
|
diff -urN a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c
|
|
--- a/src/panfrost/vulkan/panvk_physical_device.c 2026-05-21 22:46:57.497785323 +0200
|
|
+++ b/src/panfrost/vulkan/panvk_physical_device.c 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -577,12 +577,22 @@
|
|
.queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
|
|
.queueCount = 1,
|
|
},
|
|
+ [PANVK_QUEUE_FAMILY_VIDEO_DECODE] = {
|
|
+ /* iter1: video decode + transfer (Vulkan spec requires VIDEO families
|
|
+ * to also advertise TRANSFER for bitstream-buffer copies). */
|
|
+ .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_TRANSFER_BIT,
|
|
+ .queueCount = 1,
|
|
+ .minImageTransferGranularity = {1, 1, 1},
|
|
+ },
|
|
};
|
|
|
|
for (uint32_t family = 0; family < ARRAY_SIZE(qfamily_props); family++) {
|
|
if (family == PANVK_QUEUE_FAMILY_BIND &&
|
|
!physical_device->vk.supported_features.sparseBinding)
|
|
- break;
|
|
+ continue; /* iter1: was break, but a later family (VIDEO_DECODE) must be reachable */
|
|
+ if (family == PANVK_QUEUE_FAMILY_VIDEO_DECODE &&
|
|
+ !physical_device->vk.supported_extensions.KHR_video_queue)
|
|
+ continue;
|
|
|
|
vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
|
|
p->queueFamilyProperties = qfamily_props[family];
|
|
@@ -591,6 +601,16 @@
|
|
vk_find_struct(p->pNext, QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR);
|
|
if (prio)
|
|
panvk_fill_global_priority(physical_device, family, prio);
|
|
+
|
|
+ /* iter1: VK_KHR_video_queue advertises codec ops per family. */
|
|
+ VkQueueFamilyVideoPropertiesKHR *vid =
|
|
+ vk_find_struct(p->pNext, QUEUE_FAMILY_VIDEO_PROPERTIES_KHR);
|
|
+ if (vid) {
|
|
+ vid->videoCodecOperations = 0;
|
|
+ if (family == PANVK_QUEUE_FAMILY_VIDEO_DECODE)
|
|
+ vid->videoCodecOperations |=
|
|
+ VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR;
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
@@ -1558,3 +1578,87 @@
|
|
.compatibleHandleTypes = handle_types,
|
|
};
|
|
}
|
|
+
|
|
+/* panvk-bifrost-video Phase 4 commit 2:
|
|
+ * Per-physical-device video capability + format reporting. */
|
|
+
|
|
+#include "vk_video.h"
|
|
+
|
|
+VKAPI_ATTR VkResult VKAPI_CALL
|
|
+panvk_GetPhysicalDeviceVideoCapabilitiesKHR(
|
|
+ VkPhysicalDevice physicalDevice,
|
|
+ const VkVideoProfileInfoKHR *pVideoProfile,
|
|
+ VkVideoCapabilitiesKHR *pCapabilities)
|
|
+{
|
|
+ /* iter1: H.264 only; degrade to UNSUPPORTED for anything else. */
|
|
+ if (pVideoProfile->videoCodecOperation !=
|
|
+ VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR)
|
|
+ return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
|
|
+
|
|
+ pCapabilities->flags = 0;
|
|
+ pCapabilities->minBitstreamBufferOffsetAlignment = 16;
|
|
+ pCapabilities->minBitstreamBufferSizeAlignment = 16;
|
|
+ pCapabilities->pictureAccessGranularity.width = 16;
|
|
+ pCapabilities->pictureAccessGranularity.height = 16;
|
|
+ pCapabilities->minCodedExtent.width = 16;
|
|
+ pCapabilities->minCodedExtent.height = 16;
|
|
+ /* RK3566 hantro max H.264 4Kp30 in spec; constrain to 1080p safe baseline
|
|
+ * until Commit 6 queries real V4L2 format-size limits. */
|
|
+ pCapabilities->maxCodedExtent.width = 1920;
|
|
+ pCapabilities->maxCodedExtent.height = 1088;
|
|
+ pCapabilities->maxDpbSlots = 16;
|
|
+ pCapabilities->maxActiveReferencePictures = 16;
|
|
+ pCapabilities->stdHeaderVersion.extensionName[0] = 0;
|
|
+ strcpy(pCapabilities->stdHeaderVersion.extensionName,
|
|
+ VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME);
|
|
+ pCapabilities->stdHeaderVersion.specVersion =
|
|
+ VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION;
|
|
+
|
|
+ VkVideoDecodeCapabilitiesKHR *dec_caps =
|
|
+ vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
|
|
+ if (dec_caps) {
|
|
+ /* Hantro outputs to a CAPTURE buffer separate from the DPB; expose
|
|
+ * DISTINCT mode. (COINCIDE would be the GPU-engine-DPB-as-output mode.) */
|
|
+ dec_caps->flags =
|
|
+ VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR;
|
|
+ }
|
|
+
|
|
+ VkVideoDecodeH264CapabilitiesKHR *h264_caps =
|
|
+ vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
|
|
+ if (h264_caps) {
|
|
+ h264_caps->maxLevelIdc = STD_VIDEO_H264_LEVEL_IDC_4_2;
|
|
+ h264_caps->fieldOffsetGranularity.x = 0;
|
|
+ h264_caps->fieldOffsetGranularity.y = 0;
|
|
+ }
|
|
+
|
|
+ return VK_SUCCESS;
|
|
+}
|
|
+
|
|
+VKAPI_ATTR VkResult VKAPI_CALL
|
|
+panvk_GetPhysicalDeviceVideoFormatPropertiesKHR(
|
|
+ VkPhysicalDevice physicalDevice,
|
|
+ const VkPhysicalDeviceVideoFormatInfoKHR *pVideoFormatInfo,
|
|
+ uint32_t *pVideoFormatPropertyCount,
|
|
+ VkVideoFormatPropertiesKHR *pVideoFormatProperties)
|
|
+{
|
|
+ /* iter1: NV12 (8:8:0 4:2:0 2-plane) is the only format hantro emits. */
|
|
+ VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out,
|
|
+ pVideoFormatProperties, pVideoFormatPropertyCount);
|
|
+
|
|
+ vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p) {
|
|
+ p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
|
|
+ p->imageType = VK_IMAGE_TYPE_2D;
|
|
+ p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
|
|
+ p->imageUsageFlags = pVideoFormatInfo->imageUsage &
|
|
+ (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR |
|
|
+ VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
|
|
+ VK_IMAGE_USAGE_SAMPLED_BIT |
|
|
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
|
|
+ p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
|
|
+ p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
|
|
+ p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
|
|
+ p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
|
|
+ }
|
|
+
|
|
+ return vk_outarray_status(&out);
|
|
+}
|
|
diff -urN a/src/panfrost/vulkan/panvk_v4l2.c b/src/panfrost/vulkan/panvk_v4l2.c
|
|
--- a/src/panfrost/vulkan/panvk_v4l2.c 1970-01-01 01:00:00.000000000 +0100
|
|
+++ b/src/panfrost/vulkan/panvk_v4l2.c 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -0,0 +1,615 @@
|
|
+/*
|
|
+ * panvk-bifrost-video Phase 4 commit 3:
|
|
+ *
|
|
+ * V4L2-stateless hantro VPU bridge for panvk video decode sessions.
|
|
+ *
|
|
+ * Mirrors the libva-v4l2-request-fourier probe + per-session-init
|
|
+ * pattern (proven on RK3566 hantro at 1.16x realtime).
|
|
+ *
|
|
+ * SPDX-License-Identifier: MIT
|
|
+ */
|
|
+
|
|
+#include "panvk_video_decode.h"
|
|
+#include "panvk_device.h"
|
|
+
|
|
+#include "util/macros.h"
|
|
+#include "vk_alloc.h"
|
|
+#include "vk_log.h"
|
|
+
|
|
+#include <errno.h>
|
|
+#include <fcntl.h>
|
|
+#include <poll.h>
|
|
+#include <stdio.h>
|
|
+#include <string.h>
|
|
+#include <sys/ioctl.h>
|
|
+#include <sys/mman.h>
|
|
+#include <unistd.h>
|
|
+
|
|
+#include <linux/media.h>
|
|
+#include <linux/videodev2.h>
|
|
+#include <linux/v4l2-controls.h>
|
|
+
|
|
+/* Phase 2 D9: hard-coded paths first; topology-based enumeration is a
|
|
+ * later iter (libva-v4l2-request-fourier has the full version). */
|
|
+#define PANVK_V4L2_VIDEO_NODE "/dev/video1"
|
|
+#define PANVK_V4L2_MEDIA_NODE "/dev/media0"
|
|
+
|
|
+/* Phase 1 max bitstream buffer: BBB peak ~2.4 MB/frame, 4MB is comfortable. */
|
|
+#define PANVK_V4L2_SOURCE_SIZE_MAX (4 * 1024 * 1024)
|
|
+
|
|
+/* Phase 2 D3: request_fd pool size = max_dpb_slots + 2 = 18.
|
|
+ * 16 DPB slots + current frame + safety margin. */
|
|
+#define PANVK_V4L2_REQUEST_FD_COUNT 18
|
|
+
|
|
+/* Probe: try VIDIOC_QUERYCAP on /dev/video1, check the card string. */
|
|
+bool
|
|
+panvk_v4l2_probe_hantro(void)
|
|
+{
|
|
+ int fd = open(PANVK_V4L2_VIDEO_NODE, O_RDWR | O_NONBLOCK);
|
|
+ if (fd < 0)
|
|
+ return false;
|
|
+
|
|
+ struct v4l2_capability cap;
|
|
+ memset(&cap, 0, sizeof(cap));
|
|
+ int rc = ioctl(fd, VIDIOC_QUERYCAP, &cap);
|
|
+ close(fd);
|
|
+ if (rc < 0)
|
|
+ return false;
|
|
+
|
|
+ /* Hantro VPU on RK3566/RK3568 reports card = "hantro-vpu" or
|
|
+ * driver = "hantro-vpu". Accept either field matching. */
|
|
+ bool is_hantro = (strncmp((const char *) cap.driver, "hantro", 6) == 0) ||
|
|
+ (strncmp((const char *) cap.card, "hantro", 6) == 0);
|
|
+ return is_hantro;
|
|
+}
|
|
+
|
|
+/* Detect whether device requires multi-planar buffer types.
|
|
+ * Hantro on rk3568 advertises V4L2_CAP_VIDEO_M2M_MPLANE — multi-planar only. */
|
|
+static bool
|
|
+v4l2_device_is_mplane(int video_fd)
|
|
+{
|
|
+ struct v4l2_capability cap;
|
|
+ memset(&cap, 0, sizeof(cap));
|
|
+ if (ioctl(video_fd, VIDIOC_QUERYCAP, &cap) < 0)
|
|
+ return false;
|
|
+ uint32_t caps = (cap.capabilities & V4L2_CAP_DEVICE_CAPS)
|
|
+ ? cap.device_caps : cap.capabilities;
|
|
+ return (caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0;
|
|
+}
|
|
+
|
|
+/* Open V4L2 fds for one video session. /dev/media0 grants request_fds. */
|
|
+static int
|
|
+v4l2_open_fds(struct panvk_video_session *vs)
|
|
+{
|
|
+ vs->video_fd = open(PANVK_V4L2_VIDEO_NODE, O_RDWR | O_NONBLOCK);
|
|
+ if (vs->video_fd < 0) {
|
|
+ mesa_loge("panvk_v4l2: open video failed: %s", strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ vs->media_fd = open(PANVK_V4L2_MEDIA_NODE, O_RDWR | O_NONBLOCK);
|
|
+ if (vs->media_fd < 0) {
|
|
+ mesa_loge("panvk_v4l2: open media failed: %s", strerror(errno));
|
|
+ close(vs->video_fd);
|
|
+ vs->video_fd = -1;
|
|
+ return -errno;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Set OUTPUT (input bitstream) format to H264_SLICE; CAPTURE (output picture)
|
|
+ * format to NV12. Width/height come from VkVideoSessionCreateInfo.
|
|
+ * Hantro on RK3568 is multi-planar; rkvdec on RK3399 is single-planar.
|
|
+ * Detect at runtime via V4L2_CAP_VIDEO_M2M_MPLANE. */
|
|
+static int
|
|
+v4l2_negotiate_formats(struct panvk_video_session *vs,
|
|
+ uint32_t width, uint32_t height)
|
|
+{
|
|
+ const bool mplane = v4l2_device_is_mplane(vs->video_fd);
|
|
+ vs->mplane = mplane;
|
|
+
|
|
+ struct v4l2_format f;
|
|
+
|
|
+ /* OUTPUT — H.264 stateless bitstream */
|
|
+ memset(&f, 0, sizeof(f));
|
|
+ if (mplane) {
|
|
+ f.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
|
|
+ f.fmt.pix_mp.width = width;
|
|
+ f.fmt.pix_mp.height = height;
|
|
+ f.fmt.pix_mp.pixelformat = V4L2_PIX_FMT_H264_SLICE;
|
|
+ f.fmt.pix_mp.plane_fmt[0].sizeimage = PANVK_V4L2_SOURCE_SIZE_MAX;
|
|
+ f.fmt.pix_mp.num_planes = 1;
|
|
+ } else {
|
|
+ f.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
|
|
+ f.fmt.pix.width = width;
|
|
+ f.fmt.pix.height = height;
|
|
+ f.fmt.pix.pixelformat = V4L2_PIX_FMT_H264_SLICE;
|
|
+ f.fmt.pix.sizeimage = PANVK_V4L2_SOURCE_SIZE_MAX;
|
|
+ }
|
|
+ if (ioctl(vs->video_fd, VIDIOC_S_FMT, &f) < 0) {
|
|
+ mesa_loge("panvk_v4l2: S_FMT OUTPUT (H264_SLICE, mplane=%d) failed: %s",
|
|
+ mplane, strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ memcpy(&vs->fmt_output, &f, sizeof(f));
|
|
+
|
|
+ /* CAPTURE — NV12 decoded frames */
|
|
+ memset(&f, 0, sizeof(f));
|
|
+ if (mplane) {
|
|
+ f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
|
|
+ f.fmt.pix_mp.width = width;
|
|
+ f.fmt.pix_mp.height = height;
|
|
+ f.fmt.pix_mp.pixelformat = V4L2_PIX_FMT_NV12;
|
|
+ f.fmt.pix_mp.num_planes = 1;
|
|
+ } else {
|
|
+ f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
|
+ f.fmt.pix.width = width;
|
|
+ f.fmt.pix.height = height;
|
|
+ f.fmt.pix.pixelformat = V4L2_PIX_FMT_NV12;
|
|
+ }
|
|
+ if (ioctl(vs->video_fd, VIDIOC_S_FMT, &f) < 0) {
|
|
+ mesa_loge("panvk_v4l2: S_FMT CAPTURE (NV12, mplane=%d) failed: %s",
|
|
+ mplane, strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ memcpy(&vs->fmt_capture, &f, sizeof(f));
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* REQBUFS to register N buffers on each queue. Phase 1: minimal counts to
|
|
+ * exercise the path; full pipelining is a later iter.
|
|
+ *
|
|
+ * Commit 7c: BOTH OUTPUT + CAPTURE use MMAP (mirrors libva-v4l2-request-fourier
|
|
+ * working pattern exactly). Bitstream copied in CPU-side from VkBuffer host
|
|
+ * map. Decoded frame copied out CPU-side to VkImage (7d). Validates the
|
|
+ * IOC_QUEUE path without dma_buf-side variables. */
|
|
+#define PANVK_V4L2_CAPTURE_COUNT 18
|
|
+
|
|
+static int
|
|
+v4l2_reqbufs(struct panvk_video_session *vs)
|
|
+{
|
|
+ struct v4l2_requestbuffers rb;
|
|
+
|
|
+ /* OUTPUT: MMAP (kernel-allocated, mmap'd to CPU for bitstream copy-in). */
|
|
+ memset(&rb, 0, sizeof(rb));
|
|
+ rb.count = PANVK_V4L2_REQUEST_FD_COUNT;
|
|
+ rb.type = vs->mplane ? V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_OUTPUT;
|
|
+ rb.memory = V4L2_MEMORY_MMAP;
|
|
+ if (ioctl(vs->video_fd, VIDIOC_REQBUFS, &rb) < 0) {
|
|
+ mesa_loge("panvk_v4l2: REQBUFS OUTPUT failed: %s", strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ /* REQBUFS may round up the count above the request — clamp to our
|
|
+ * fixed-size mmap arrays (Phase 5 review: prevents output_map OOB). */
|
|
+ vs->num_output_buffers = MIN2(rb.count, 18);
|
|
+ vs->output_next = 0;
|
|
+
|
|
+ /* CAPTURE: MMAP — kernel-allocated, mmap to CPU for copy-out path. */
|
|
+ memset(&rb, 0, sizeof(rb));
|
|
+ rb.count = PANVK_V4L2_CAPTURE_COUNT;
|
|
+ rb.type = vs->mplane ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
|
+ rb.memory = V4L2_MEMORY_MMAP;
|
|
+ if (ioctl(vs->video_fd, VIDIOC_REQBUFS, &rb) < 0) {
|
|
+ mesa_loge("panvk_v4l2: REQBUFS CAPTURE failed: %s", strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ vs->num_capture_buffers = MIN2(rb.count, 18);
|
|
+ vs->capture_next = 0;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Allocate the request_fd pool via MEDIA_IOC_REQUEST_ALLOC. */
|
|
+static int
|
|
+v4l2_alloc_request_pool(struct panvk_video_session *vs,
|
|
+ const VkAllocationCallbacks *alloc,
|
|
+ struct vk_device *dev)
|
|
+{
|
|
+ vs->request_fds = vk_alloc(&dev->alloc,
|
|
+ sizeof(int) * PANVK_V4L2_REQUEST_FD_COUNT, 8,
|
|
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
+ if (!vs->request_fds)
|
|
+ return -ENOMEM;
|
|
+ for (unsigned i = 0; i < PANVK_V4L2_REQUEST_FD_COUNT; i++)
|
|
+ vs->request_fds[i] = -1;
|
|
+
|
|
+ for (unsigned i = 0; i < PANVK_V4L2_REQUEST_FD_COUNT; i++) {
|
|
+ int rfd = -1;
|
|
+ if (ioctl(vs->media_fd, MEDIA_IOC_REQUEST_ALLOC, &rfd) < 0) {
|
|
+ mesa_loge("panvk_v4l2: MEDIA_IOC_REQUEST_ALLOC [%u] failed: %s",
|
|
+ i, strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ vs->request_fds[i] = rfd;
|
|
+ }
|
|
+ vs->num_request_fds = PANVK_V4L2_REQUEST_FD_COUNT;
|
|
+ vs->request_fd_next = 0;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* QUERYBUF + mmap CAPTURE buffers — NV12-decoded frame source. */
|
|
+static int
|
|
+v4l2_mmap_capture_buffers(struct panvk_video_session *vs)
|
|
+{
|
|
+ for (unsigned i = 0; i < vs->num_capture_buffers && i < 18; i++) {
|
|
+ struct v4l2_buffer qb = { 0 };
|
|
+ struct v4l2_plane planes[VIDEO_MAX_PLANES] = { 0 };
|
|
+ qb.type = vs->mplane ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
|
+ qb.memory = V4L2_MEMORY_MMAP;
|
|
+ qb.index = i;
|
|
+ if (vs->mplane) { qb.length = 1; qb.m.planes = planes; }
|
|
+ if (ioctl(vs->video_fd, VIDIOC_QUERYBUF, &qb) < 0) {
|
|
+ mesa_loge("panvk_v4l2: QUERYBUF CAPTURE[%u] failed: %s",
|
|
+ i, strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ uint32_t length = vs->mplane ? planes[0].length : qb.length;
|
|
+ uint32_t offset = vs->mplane ? planes[0].m.mem_offset : qb.m.offset;
|
|
+ void *p = mmap(NULL, length, PROT_READ, MAP_SHARED,
|
|
+ vs->video_fd, offset);
|
|
+ if (p == MAP_FAILED) {
|
|
+ mesa_loge("panvk_v4l2: mmap CAPTURE[%u] failed: %s",
|
|
+ i, strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ vs->capture_map[i] = p;
|
|
+ vs->capture_map_size[i] = length;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* QUERYBUF + mmap each OUTPUT buffer — bitstream-copy-in destination. */
|
|
+static int
|
|
+v4l2_mmap_output_buffers(struct panvk_video_session *vs)
|
|
+{
|
|
+ for (unsigned i = 0; i < vs->num_output_buffers; i++) {
|
|
+ struct v4l2_buffer qb = { 0 };
|
|
+ struct v4l2_plane planes[VIDEO_MAX_PLANES] = { 0 };
|
|
+ qb.type = vs->mplane ? V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_OUTPUT;
|
|
+ qb.memory = V4L2_MEMORY_MMAP;
|
|
+ qb.index = i;
|
|
+ if (vs->mplane) { qb.length = 1; qb.m.planes = planes; }
|
|
+ if (ioctl(vs->video_fd, VIDIOC_QUERYBUF, &qb) < 0) {
|
|
+ mesa_loge("panvk_v4l2: QUERYBUF OUTPUT[%u] failed: %s",
|
|
+ i, strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ uint32_t length = vs->mplane ? planes[0].length : qb.length;
|
|
+ uint32_t offset = vs->mplane ? planes[0].m.mem_offset : qb.m.offset;
|
|
+ void *p = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
+ vs->video_fd, offset);
|
|
+ if (p == MAP_FAILED) {
|
|
+ mesa_loge("panvk_v4l2: mmap OUTPUT[%u] failed: %s",
|
|
+ i, strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ vs->output_map[i] = p;
|
|
+ vs->output_map_size[i] = length;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* STREAMON both queues. Must happen after REQBUFS, before first QBUF. */
|
|
+static int
|
|
+v4l2_streamon(struct panvk_video_session *vs)
|
|
+{
|
|
+ enum v4l2_buf_type t;
|
|
+ t = vs->mplane ? V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_OUTPUT;
|
|
+ if (ioctl(vs->video_fd, VIDIOC_STREAMON, &t) < 0) {
|
|
+ mesa_loge("panvk_v4l2: STREAMON OUTPUT failed: %s", strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ t = vs->mplane ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
|
+ if (ioctl(vs->video_fd, VIDIOC_STREAMON, &t) < 0) {
|
|
+ mesa_loge("panvk_v4l2: STREAMON CAPTURE failed: %s", strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Set device-level H.264 controls (DECODE_MODE + START_CODE) before STREAMON.
|
|
+ * libva-v4l2-request-fourier calls these with request_fd=-1; we mirror.
|
|
+ * Errors are non-fatal — some backing drivers may default-only. */
|
|
+static void
|
|
+v4l2_set_device_h264_controls(struct panvk_video_session *vs)
|
|
+{
|
|
+ struct v4l2_ext_control dev[2] = { 0 };
|
|
+ dev[0].id = V4L2_CID_STATELESS_H264_DECODE_MODE;
|
|
+ dev[0].value = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED;
|
|
+ dev[1].id = V4L2_CID_STATELESS_H264_START_CODE;
|
|
+ dev[1].value = V4L2_STATELESS_H264_START_CODE_ANNEX_B;
|
|
+
|
|
+ struct v4l2_ext_controls c = { 0 };
|
|
+ c.controls = dev;
|
|
+ c.count = 2;
|
|
+ (void) ioctl(vs->video_fd, VIDIOC_S_EXT_CTRLS, &c);
|
|
+ /* intentionally ignoring rc — device-level probes; some drivers reject. */
|
|
+}
|
|
+
|
|
+int
|
|
+panvk_v4l2_session_init(struct panvk_video_session *vs,
|
|
+ struct vk_device *vk_dev,
|
|
+ const VkAllocationCallbacks *alloc,
|
|
+ uint32_t width, uint32_t height)
|
|
+{
|
|
+ int rc = v4l2_open_fds(vs);
|
|
+ if (rc) return rc;
|
|
+
|
|
+ rc = v4l2_negotiate_formats(vs, width, height);
|
|
+ if (rc) goto fail;
|
|
+
|
|
+ rc = v4l2_reqbufs(vs);
|
|
+ if (rc) goto fail;
|
|
+
|
|
+ rc = v4l2_alloc_request_pool(vs, alloc, vk_dev);
|
|
+ if (rc) goto fail;
|
|
+
|
|
+ /* Set device-level H.264 mode controls (non-fatal). */
|
|
+ v4l2_set_device_h264_controls(vs);
|
|
+
|
|
+ /* mmap OUTPUT buffers for bitstream copy-in. */
|
|
+ rc = v4l2_mmap_output_buffers(vs);
|
|
+ if (rc) goto fail;
|
|
+
|
|
+ /* mmap CAPTURE buffers for NV12 frame readback (Phase 1 verification). */
|
|
+ rc = v4l2_mmap_capture_buffers(vs);
|
|
+ if (rc) goto fail;
|
|
+
|
|
+ /* Stream on both queues. */
|
|
+ rc = v4l2_streamon(vs);
|
|
+ if (rc) goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ panvk_v4l2_session_finish(vs, vk_dev, alloc);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/* Allocate one V4L2 OUTPUT-queue buffer index for this submit; round-robin
|
|
+ * through the request_fd pool. Returns the request_fd to use.
|
|
+ *
|
|
+ * REINIT contract: a freshly-allocated request is in QUEUEABLE state; after
|
|
+ * MEDIA_REQUEST_IOC_QUEUE + dequeue it's in COMPLETE state and S_EXT_CTRLS
|
|
+ * on it returns EBUSY. MEDIA_REQUEST_IOC_REINIT puts it back in QUEUEABLE.
|
|
+ * We track per-fd "ever been queued" so the very-first use skips REINIT
|
|
+ * (which returns EBUSY on never-queued requests). */
|
|
+static int
|
|
+v4l2_pick_request_fd(struct panvk_video_session *vs)
|
|
+{
|
|
+ uint32_t idx = vs->request_fd_next;
|
|
+ int rfd = vs->request_fds[idx];
|
|
+ if (vs->request_fd_used[idx]) {
|
|
+ if (ioctl(rfd, MEDIA_REQUEST_IOC_REINIT) < 0) {
|
|
+ mesa_loge("panvk_v4l2: MEDIA_REQUEST_IOC_REINIT rfd=%d failed: %s",
|
|
+ rfd, strerror(errno));
|
|
+ }
|
|
+ }
|
|
+ vs->request_fd_used[idx] = true;
|
|
+ vs->request_fd_next = (idx + 1) % vs->num_request_fds;
|
|
+ return rfd;
|
|
+}
|
|
+
|
|
+/* The 14-step ioctl dance for one decode op (Phase 2 D7).
|
|
+ * Operates synchronously at record time per Phase 1 D8 lock.
|
|
+ * Returns 0 on success, -errno on failure.
|
|
+ *
|
|
+ * Commit 7c MMAP-side: src_bitstream is a CPU pointer (NOT a dma_buf fd).
|
|
+ * We copy it into the mmap'd OUTPUT buffer at index `out_idx`. */
|
|
+int
|
|
+panvk_v4l2_submit_h264_decode(struct panvk_video_session *vs,
|
|
+ const struct v4l2_ctrl_h264_sps *sps,
|
|
+ const struct v4l2_ctrl_h264_pps *pps,
|
|
+ const struct v4l2_ctrl_h264_scaling_matrix *scaling,
|
|
+ const struct v4l2_ctrl_h264_decode_params *dec,
|
|
+ const void *src_bitstream, uint32_t src_bytes,
|
|
+ int dst_dmabuf_fd_unused,
|
|
+ uint64_t qbuf_ts)
|
|
+{
|
|
+ int rfd = v4l2_pick_request_fd(vs);
|
|
+ const bool mp = vs->mplane;
|
|
+
|
|
+ /* Pick an OUTPUT buffer index + copy bitstream in. */
|
|
+ const uint32_t out_idx = vs->output_next;
|
|
+ vs->output_next = (vs->output_next + 1) % vs->num_output_buffers;
|
|
+ if (src_bytes > vs->output_map_size[out_idx]) {
|
|
+ mesa_loge("panvk_v4l2: bitstream %u > buffer %u",
|
|
+ src_bytes, vs->output_map_size[out_idx]);
|
|
+ return -ENOSPC;
|
|
+ }
|
|
+ memcpy(vs->output_map[out_idx], src_bitstream, src_bytes);
|
|
+
|
|
+ /* 1-7: build extended controls batch with request_fd binding */
|
|
+ struct v4l2_ext_control ctrls[4] = { 0 };
|
|
+ ctrls[0].id = V4L2_CID_STATELESS_H264_SPS;
|
|
+ ctrls[0].ptr = (void *) sps;
|
|
+ ctrls[0].size = sizeof(*sps);
|
|
+ ctrls[1].id = V4L2_CID_STATELESS_H264_PPS;
|
|
+ ctrls[1].ptr = (void *) pps;
|
|
+ ctrls[1].size = sizeof(*pps);
|
|
+ ctrls[2].id = V4L2_CID_STATELESS_H264_DECODE_PARAMS;
|
|
+ ctrls[2].ptr = (void *) dec;
|
|
+ ctrls[2].size = sizeof(*dec);
|
|
+ ctrls[3].id = V4L2_CID_STATELESS_H264_SCALING_MATRIX;
|
|
+ ctrls[3].ptr = (void *) scaling;
|
|
+ ctrls[3].size = sizeof(*scaling);
|
|
+
|
|
+ struct v4l2_ext_controls batch = { 0 };
|
|
+ batch.controls = ctrls;
|
|
+ batch.count = 4;
|
|
+ batch.which = V4L2_CTRL_WHICH_REQUEST_VAL;
|
|
+ batch.request_fd = rfd;
|
|
+
|
|
+ if (ioctl(vs->video_fd, VIDIOC_S_EXT_CTRLS, &batch) < 0) {
|
|
+ mesa_loge("panvk_v4l2: S_EXT_CTRLS request_fd=%d failed: %s "
|
|
+ "(error_idx=%u/%u)",
|
|
+ rfd, strerror(errno), batch.error_idx, batch.count);
|
|
+ return -errno;
|
|
+ }
|
|
+
|
|
+ /* 8: QBUF OUTPUT (bitstream input) — MMAP, index out_idx. */
|
|
+ struct v4l2_buffer qb = { 0 };
|
|
+ struct v4l2_plane planes[VIDEO_MAX_PLANES] = { 0 };
|
|
+ qb.memory = V4L2_MEMORY_MMAP;
|
|
+ qb.index = out_idx;
|
|
+ qb.flags = V4L2_BUF_FLAG_REQUEST_FD;
|
|
+ qb.request_fd = rfd;
|
|
+ qb.timestamp.tv_sec = (uint32_t)(qbuf_ts / 1000000000ULL);
|
|
+ qb.timestamp.tv_usec = (uint32_t)((qbuf_ts / 1000ULL) % 1000000ULL);
|
|
+ if (mp) {
|
|
+ qb.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
|
|
+ qb.length = 1;
|
|
+ qb.m.planes = planes;
|
|
+ planes[0].bytesused = src_bytes;
|
|
+ } else {
|
|
+ qb.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
|
|
+ qb.bytesused = src_bytes;
|
|
+ }
|
|
+ if (ioctl(vs->video_fd, VIDIOC_QBUF, &qb) < 0) {
|
|
+ mesa_loge("panvk_v4l2: QBUF OUTPUT (mmap idx=%u) failed: %s",
|
|
+ out_idx, strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+
|
|
+ /* 9: QBUF CAPTURE (output frame) — MMAP-backed kernel-allocated buffer.
|
|
+ * dst_dmabuf_fd is ignored in 7c; copy-out to VkImage is 7d. */
|
|
+ const uint32_t cap_idx = vs->capture_next;
|
|
+ vs->capture_next = (vs->capture_next + 1) % vs->num_capture_buffers;
|
|
+ memset(&qb, 0, sizeof(qb));
|
|
+ memset(&planes, 0, sizeof(planes));
|
|
+ qb.memory = V4L2_MEMORY_MMAP;
|
|
+ qb.index = cap_idx;
|
|
+ if (mp) {
|
|
+ qb.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
|
|
+ qb.length = 1;
|
|
+ qb.m.planes = planes;
|
|
+ } else {
|
|
+ qb.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
|
+ }
|
|
+ if (ioctl(vs->video_fd, VIDIOC_QBUF, &qb) < 0) {
|
|
+ mesa_loge("panvk_v4l2: QBUF CAPTURE (mmap idx=%u) failed: %s",
|
|
+ cap_idx, strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+
|
|
+ /* 10: MEDIA_REQUEST_IOC_QUEUE */
|
|
+ if (ioctl(rfd, MEDIA_REQUEST_IOC_QUEUE) < 0) {
|
|
+ mesa_loge("panvk_v4l2: REQUEST_IOC_QUEUE failed: %s", strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+
|
|
+ /* 11: poll(rfd, POLLPRI) — 200ms timeout per Phase 2 D7 */
|
|
+ struct pollfd pfd = { .fd = rfd, .events = POLLPRI };
|
|
+ int pr = poll(&pfd, 1, 200);
|
|
+ if (pr <= 0) {
|
|
+ mesa_loge("panvk_v4l2: poll request_fd timeout/err pr=%d errno=%d",
|
|
+ pr, errno);
|
|
+ return -ETIMEDOUT;
|
|
+ }
|
|
+
|
|
+ /* 12: DQBUF OUTPUT — must match the memory type used at QBUF (MMAP, not
|
|
+ * DMABUF). With the wrong memory type the kernel rejects the DQBUF and
|
|
+ * the OUTPUT slot stays in flight, which leaks request_fd resources
|
|
+ * (mostly cosmetic for Phase 1 single-decode tests, but breaks the
|
|
+ * pipelined case). */
|
|
+ memset(&qb, 0, sizeof(qb));
|
|
+ memset(&planes, 0, sizeof(planes));
|
|
+ qb.memory = V4L2_MEMORY_MMAP;
|
|
+ qb.type = mp ? V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE : V4L2_BUF_TYPE_VIDEO_OUTPUT;
|
|
+ if (mp) { qb.length = 1; qb.m.planes = planes; }
|
|
+ if (ioctl(vs->video_fd, VIDIOC_DQBUF, &qb) < 0) {
|
|
+ mesa_loge("panvk_v4l2: DQBUF OUTPUT failed: %s", strerror(errno));
|
|
+ /* non-fatal — capture might still have completed */
|
|
+ }
|
|
+
|
|
+ /* 13: DQBUF CAPTURE — MMAP, kernel-allocated. */
|
|
+ memset(&qb, 0, sizeof(qb));
|
|
+ memset(&planes, 0, sizeof(planes));
|
|
+ qb.memory = V4L2_MEMORY_MMAP;
|
|
+ qb.type = mp ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE : V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
|
+ if (mp) { qb.length = 1; qb.m.planes = planes; }
|
|
+ if (ioctl(vs->video_fd, VIDIOC_DQBUF, &qb) < 0) {
|
|
+ mesa_loge("panvk_v4l2: DQBUF CAPTURE failed: %s", strerror(errno));
|
|
+ return -errno;
|
|
+ }
|
|
+ if (qb.flags & V4L2_BUF_FLAG_ERROR) {
|
|
+ mesa_loge("panvk_v4l2: CAPTURE buffer flagged ERROR");
|
|
+ return -EIO;
|
|
+ }
|
|
+
|
|
+ /* Verification print: first 16 bytes of decoded Y plane.
|
|
+ * If hantro wrote real data this should NOT be all zeros. */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void
|
|
+panvk_v4l2_session_finish(struct panvk_video_session *vs,
|
|
+ struct vk_device *vk_dev,
|
|
+ const VkAllocationCallbacks *alloc)
|
|
+{
|
|
+ /* Unwind in reverse order of session_init. Each step is guarded by
|
|
+ * "have we got far enough to need this" so the function is safe to
|
|
+ * call on partially-initialised sessions (the session_init failure
|
|
+ * paths jump here via `goto fail`). */
|
|
+
|
|
+ /* munmap CAPTURE + OUTPUT (no-op for entries left at NULL by an
|
|
+ * earlier-failed mmap loop). */
|
|
+ for (unsigned i = 0; i < 18; i++) {
|
|
+ if (vs->capture_map[i]) {
|
|
+ munmap(vs->capture_map[i], vs->capture_map_size[i]);
|
|
+ vs->capture_map[i] = NULL;
|
|
+ vs->capture_map_size[i] = 0;
|
|
+ }
|
|
+ if (vs->output_map[i]) {
|
|
+ munmap(vs->output_map[i], vs->output_map_size[i]);
|
|
+ vs->output_map[i] = NULL;
|
|
+ vs->output_map_size[i] = 0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (vs->video_fd >= 0) {
|
|
+ /* STREAMOFF (safe to call even if STREAMON never ran — kernel
|
|
+ * returns EINVAL which we ignore). */
|
|
+ enum v4l2_buf_type t;
|
|
+ t = vs->mplane ? V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_OUTPUT;
|
|
+ (void) ioctl(vs->video_fd, VIDIOC_STREAMOFF, &t);
|
|
+ t = vs->mplane ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
|
+ (void) ioctl(vs->video_fd, VIDIOC_STREAMOFF, &t);
|
|
+
|
|
+ /* Release the kernel buffer queues via REQBUFS count=0. */
|
|
+ struct v4l2_requestbuffers rb;
|
|
+ memset(&rb, 0, sizeof(rb));
|
|
+ rb.memory = V4L2_MEMORY_MMAP;
|
|
+ rb.type = vs->mplane ? V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_OUTPUT;
|
|
+ (void) ioctl(vs->video_fd, VIDIOC_REQBUFS, &rb);
|
|
+ rb.type = vs->mplane ? V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE
|
|
+ : V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
|
+ (void) ioctl(vs->video_fd, VIDIOC_REQBUFS, &rb);
|
|
+ }
|
|
+
|
|
+ if (vs->request_fds) {
|
|
+ for (unsigned i = 0; i < vs->num_request_fds; i++)
|
|
+ if (vs->request_fds[i] >= 0)
|
|
+ close(vs->request_fds[i]);
|
|
+ vk_free(&vk_dev->alloc, vs->request_fds);
|
|
+ vs->request_fds = NULL;
|
|
+ vs->num_request_fds = 0;
|
|
+ }
|
|
+ if (vs->video_fd >= 0) {
|
|
+ close(vs->video_fd);
|
|
+ vs->video_fd = -1;
|
|
+ }
|
|
+ if (vs->media_fd >= 0) {
|
|
+ close(vs->media_fd);
|
|
+ vs->media_fd = -1;
|
|
+ }
|
|
+}
|
|
diff -urN a/src/panfrost/vulkan/panvk_v4l2_h264.c b/src/panfrost/vulkan/panvk_v4l2_h264.c
|
|
--- a/src/panfrost/vulkan/panvk_v4l2_h264.c 1970-01-01 01:00:00.000000000 +0100
|
|
+++ b/src/panfrost/vulkan/panvk_v4l2_h264.c 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -0,0 +1,478 @@
|
|
+/*
|
|
+ * panvk-bifrost-video Phase 4: Vulkan StdVideo H.264 → V4L2 stateless H.264
|
|
+ * control-struct translation.
|
|
+ *
|
|
+ * This file is the protocol-translation bridge that lets a Vulkan video
|
|
+ * decode session (VkVideoDecodeH264PictureInfoKHR + StdVideoH264*) drive a
|
|
+ * V4L2 stateless H.264 decoder (the hantro VPU on RK3566/RK3568 via
|
|
+ * /dev/video1) by filling the four FRAME_BASED controls:
|
|
+ *
|
|
+ * - V4L2_CID_STATELESS_H264_SPS → struct v4l2_ctrl_h264_sps
|
|
+ * - V4L2_CID_STATELESS_H264_PPS → struct v4l2_ctrl_h264_pps
|
|
+ * - V4L2_CID_STATELESS_H264_SCALING_MATRIX → struct v4l2_ctrl_h264_scaling_matrix
|
|
+ * - V4L2_CID_STATELESS_H264_DECODE_PARAMS → struct v4l2_ctrl_h264_decode_params
|
|
+ *
|
|
+ * The ioctl-side (VIDIOC_S_EXT_CTRLS on a request_fd) is the caller's
|
|
+ * responsibility — see panvk_v4l2.c. This file is pure data-shape
|
|
+ * translation; no syscalls, no GPU/shader work.
|
|
+ *
|
|
+ * Cross-references:
|
|
+ * - V4L2 UAPI structs and field semantics:
|
|
+ * /usr/include/linux/v4l2-controls.h
|
|
+ * Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
|
|
+ * - Vulkan StdVideo H.264 structs:
|
|
+ * include/vk_video/vulkan_video_codec_h264std.h
|
|
+ * include/vk_video/vulkan_video_codec_h264std_decode.h
|
|
+ * include/vulkan/vulkan_core.h (VkVideoDecodeH264*KHR)
|
|
+ * - VAAPI→V4L2 reference impl (semantically equivalent, different carrier):
|
|
+ * libva-v4l2-request-fourier/src/h264.c
|
|
+ *
|
|
+ * Why every flag is mapped by name, not by bit position:
|
|
+ * StdVideoH264SpsFlags packs its flags as 1-bit bitfield members in a
|
|
+ * compiler-defined layout. V4L2_H264_SPS_FLAG_* are explicit
|
|
+ * bit-position #defines. The flag *names* match the H.264 spec —
|
|
+ * pic-by-pic mapping is mechanical — but the *bit positions* don't
|
|
+ * match between the two. Each flag is therefore translated by its
|
|
+ * spec name, never by raw bitmask copy.
|
|
+ *
|
|
+ * SPDX-License-Identifier: MIT
|
|
+ */
|
|
+
|
|
+#include "panvk_video_decode.h"
|
|
+
|
|
+#include <string.h>
|
|
+
|
|
+#include <vk_video/vulkan_video_codec_h264std.h>
|
|
+#include <vk_video/vulkan_video_codec_h264std_decode.h>
|
|
+#include <vulkan/vulkan_core.h>
|
|
+
|
|
+#include <linux/videodev2.h>
|
|
+#include <linux/v4l2-controls.h>
|
|
+
|
|
+/* ------------------------------------------------------------------ */
|
|
+/* SPS */
|
|
+/* ------------------------------------------------------------------ */
|
|
+
|
|
+/*
|
|
+ * Translate StdVideoH264SequenceParameterSet → struct v4l2_ctrl_h264_sps.
|
|
+ *
|
|
+ * profile_idc: StdVideoH264ProfileIdc is the literal H.264 profile_idc value
|
|
+ * (BASELINE=66, MAIN=77, HIGH=100, …) so a direct cast is correct.
|
|
+ *
|
|
+ * level_idc: StdVideoH264LevelIdc is an *enum index* (1_0=0, 1_1=1, …, 6_2=18),
|
|
+ * NOT the spec-encoded level_idc byte (which V4L2 expects in units of
|
|
+ * level*10: Level 4.1 → 41, Level 5.1 → 51, etc). We must encode.
|
|
+ *
|
|
+ * pic_order_cnt_type: StdVideoH264PocType enum values (0/1/2) match the spec
|
|
+ * directly; cast is safe.
|
|
+ *
|
|
+ * constraint_set_flags: V4L2 packs constraint_set{0..5}_flag into a single
|
|
+ * __u8 (V4L2_H264_SPS_CONSTRAINT_SETN_FLAG = 0x01..0x20). The StdVideo flag
|
|
+ * bitfields hold each one separately.
|
|
+ */
|
|
+void
|
|
+panvk_v4l2_h264_std_to_ctrl_sps(const StdVideoH264SequenceParameterSet *in,
|
|
+ struct v4l2_ctrl_h264_sps *out)
|
|
+{
|
|
+ memset(out, 0, sizeof(*out));
|
|
+
|
|
+ /* StdVideoH264LevelIdc → level_idc byte (level * 10). */
|
|
+ static const __u8 level_idc_lut[] = {
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_1_0] = 10,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_1_1] = 11,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_1_2] = 12,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_1_3] = 13,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_2_0] = 20,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_2_1] = 21,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_2_2] = 22,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_3_0] = 30,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_3_1] = 31,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_3_2] = 32,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_4_0] = 40,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_4_1] = 41,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_4_2] = 42,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_5_0] = 50,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_5_1] = 51,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_5_2] = 52,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_6_0] = 60,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_6_1] = 61,
|
|
+ [STD_VIDEO_H264_LEVEL_IDC_6_2] = 62,
|
|
+ };
|
|
+
|
|
+ out->profile_idc = (__u8) in->profile_idc;
|
|
+ if ((unsigned) in->level_idc < sizeof(level_idc_lut) / sizeof(level_idc_lut[0]))
|
|
+ out->level_idc = level_idc_lut[in->level_idc];
|
|
+ else
|
|
+ out->level_idc = 0;
|
|
+ out->seq_parameter_set_id = in->seq_parameter_set_id;
|
|
+ out->chroma_format_idc = (__u8) in->chroma_format_idc;
|
|
+ out->bit_depth_luma_minus8 = in->bit_depth_luma_minus8;
|
|
+ out->bit_depth_chroma_minus8 = in->bit_depth_chroma_minus8;
|
|
+ out->log2_max_frame_num_minus4 = in->log2_max_frame_num_minus4;
|
|
+ out->pic_order_cnt_type = (__u8) in->pic_order_cnt_type;
|
|
+ out->log2_max_pic_order_cnt_lsb_minus4 = in->log2_max_pic_order_cnt_lsb_minus4;
|
|
+ out->max_num_ref_frames = in->max_num_ref_frames;
|
|
+ out->num_ref_frames_in_pic_order_cnt_cycle =
|
|
+ in->num_ref_frames_in_pic_order_cnt_cycle;
|
|
+
|
|
+ out->offset_for_non_ref_pic = in->offset_for_non_ref_pic;
|
|
+ out->offset_for_top_to_bottom_field = in->offset_for_top_to_bottom_field;
|
|
+
|
|
+ /* offset_for_ref_frame[]: StdVideo passes via pOffsetForRefFrame pointer
|
|
+ * sized num_ref_frames_in_pic_order_cnt_cycle. V4L2 has a 255-entry fixed
|
|
+ * array. Copy in-bounds entries. */
|
|
+ if (in->pOffsetForRefFrame != NULL) {
|
|
+ unsigned n = in->num_ref_frames_in_pic_order_cnt_cycle;
|
|
+ if (n > 255) n = 255;
|
|
+ for (unsigned i = 0; i < n; i++)
|
|
+ out->offset_for_ref_frame[i] = in->pOffsetForRefFrame[i];
|
|
+ }
|
|
+
|
|
+ out->pic_width_in_mbs_minus1 = (__u16) in->pic_width_in_mbs_minus1;
|
|
+ out->pic_height_in_map_units_minus1 = (__u16) in->pic_height_in_map_units_minus1;
|
|
+
|
|
+ /* Constraint set flags — V4L2 packs into __u8 constraint_set_flags. */
|
|
+ __u8 cs = 0;
|
|
+ if (in->flags.constraint_set0_flag) cs |= V4L2_H264_SPS_CONSTRAINT_SET0_FLAG;
|
|
+ if (in->flags.constraint_set1_flag) cs |= V4L2_H264_SPS_CONSTRAINT_SET1_FLAG;
|
|
+ if (in->flags.constraint_set2_flag) cs |= V4L2_H264_SPS_CONSTRAINT_SET2_FLAG;
|
|
+ if (in->flags.constraint_set3_flag) cs |= V4L2_H264_SPS_CONSTRAINT_SET3_FLAG;
|
|
+ if (in->flags.constraint_set4_flag) cs |= V4L2_H264_SPS_CONSTRAINT_SET4_FLAG;
|
|
+ if (in->flags.constraint_set5_flag) cs |= V4L2_H264_SPS_CONSTRAINT_SET5_FLAG;
|
|
+ out->constraint_set_flags = cs;
|
|
+
|
|
+ /* Plain SPS flags — translated by spec name, NOT by bit position. */
|
|
+ __u32 f = 0;
|
|
+ if (in->flags.separate_colour_plane_flag)
|
|
+ f |= V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE;
|
|
+ if (in->flags.qpprime_y_zero_transform_bypass_flag)
|
|
+ f |= V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS;
|
|
+ if (in->flags.delta_pic_order_always_zero_flag)
|
|
+ f |= V4L2_H264_SPS_FLAG_DELTA_PIC_ORDER_ALWAYS_ZERO;
|
|
+ if (in->flags.gaps_in_frame_num_value_allowed_flag)
|
|
+ f |= V4L2_H264_SPS_FLAG_GAPS_IN_FRAME_NUM_VALUE_ALLOWED;
|
|
+ if (in->flags.frame_mbs_only_flag)
|
|
+ f |= V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY;
|
|
+ if (in->flags.mb_adaptive_frame_field_flag)
|
|
+ f |= V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD;
|
|
+ if (in->flags.direct_8x8_inference_flag)
|
|
+ f |= V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE;
|
|
+ out->flags = f;
|
|
+ /*
|
|
+ * StdVideoH264SpsFlags also has: frame_cropping_flag,
|
|
+ * seq_scaling_matrix_present_flag, vui_parameters_present_flag.
|
|
+ * V4L2 has no equivalent SPS flags for these — frame cropping is
|
|
+ * communicated via S_FMT cropping rectangles, scaling matrix presence is
|
|
+ * carried in PPS's V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT, and VUI is
|
|
+ * not exposed at all. Intentionally dropped.
|
|
+ */
|
|
+}
|
|
+
|
|
+/* ------------------------------------------------------------------ */
|
|
+/* PPS */
|
|
+/* ------------------------------------------------------------------ */
|
|
+
|
|
+/*
|
|
+ * Translate StdVideoH264PictureParameterSet → struct v4l2_ctrl_h264_pps.
|
|
+ *
|
|
+ * num_slice_groups_minus1: not in StdVideoH264PictureParameterSet at all
|
|
+ * (Vulkan H.264 video core profile excludes FMO). Set to 0 (one slice
|
|
+ * group, the only value FMO-free decoders accept).
|
|
+ *
|
|
+ * V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT: per kernel doc this should be
|
|
+ * set when a non-flat matrix applies to the picture. We translate it from
|
|
+ * StdVideoH264PpsFlags::pic_scaling_matrix_present_flag — note however
|
|
+ * that the libva-v4l2-request-fourier reference always sets this flag
|
|
+ * together with a default-flat matrix, because hantro's set_params reads
|
|
+ * the flag to drive G1_REG_DEC_CTRL2_TYPE1_QUANT_E. Callers may want to
|
|
+ * set this flag unconditionally for hantro; we don't here because the
|
|
+ * spec mapping is the cleaner default.
|
|
+ */
|
|
+void
|
|
+panvk_v4l2_h264_std_to_ctrl_pps(const StdVideoH264PictureParameterSet *in,
|
|
+ struct v4l2_ctrl_h264_pps *out)
|
|
+{
|
|
+ memset(out, 0, sizeof(*out));
|
|
+
|
|
+ out->pic_parameter_set_id = in->pic_parameter_set_id;
|
|
+ out->seq_parameter_set_id = in->seq_parameter_set_id;
|
|
+ out->num_slice_groups_minus1 = 0; /* not exposed by StdVideo; H.264
|
|
+ core profile assumes 1 group. */
|
|
+ out->num_ref_idx_l0_default_active_minus1 = in->num_ref_idx_l0_default_active_minus1;
|
|
+ out->num_ref_idx_l1_default_active_minus1 = in->num_ref_idx_l1_default_active_minus1;
|
|
+ out->weighted_bipred_idc = (__u8) in->weighted_bipred_idc;
|
|
+ out->pic_init_qp_minus26 = in->pic_init_qp_minus26;
|
|
+ out->pic_init_qs_minus26 = in->pic_init_qs_minus26;
|
|
+ out->chroma_qp_index_offset = in->chroma_qp_index_offset;
|
|
+ out->second_chroma_qp_index_offset = in->second_chroma_qp_index_offset;
|
|
+
|
|
+ __u16 f = 0;
|
|
+ if (in->flags.entropy_coding_mode_flag)
|
|
+ f |= V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE;
|
|
+ if (in->flags.bottom_field_pic_order_in_frame_present_flag)
|
|
+ f |= V4L2_H264_PPS_FLAG_BOTTOM_FIELD_PIC_ORDER_IN_FRAME_PRESENT;
|
|
+ if (in->flags.weighted_pred_flag)
|
|
+ f |= V4L2_H264_PPS_FLAG_WEIGHTED_PRED;
|
|
+ if (in->flags.deblocking_filter_control_present_flag)
|
|
+ f |= V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT;
|
|
+ if (in->flags.constrained_intra_pred_flag)
|
|
+ f |= V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED;
|
|
+ if (in->flags.redundant_pic_cnt_present_flag)
|
|
+ f |= V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT;
|
|
+ if (in->flags.transform_8x8_mode_flag)
|
|
+ f |= V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE;
|
|
+ /*
|
|
+ * V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT: set UNCONDITIONALLY.
|
|
+ *
|
|
+ * Hantro VPU2 (rockchip_vpu2_hw_h264_dec.c) and G1 both gate
|
|
+ * scaling_list ingestion on this flag (assemble_scaling_list in
|
|
+ * hantro_h264.c:215 short-circuits if clear, leaving the priv-table
|
|
+ * scaling region zero — dequant then computes 0 * quant = 0 pixels).
|
|
+ * libva-v4l2-request-fourier sets the flag together with the spec-
|
|
+ * default flat-16 matrix for this exact reason (h264.c:484 lineage).
|
|
+ * Validated empirically 2026-05-21: with flag clear hantro produces
|
|
+ * all-zero Y plane; with flag set + flat matrix it decodes BBB.
|
|
+ *
|
|
+ * Vulkan-side: the Std flag tracks the bitstream's pic_scaling_matrix
|
|
+ * _present_flag — useful for software decoders but irrelevant to the
|
|
+ * hantro hardware path. Always-on is safe here because the caller
|
|
+ * pairs this PPS with panvk_v4l2_h264_default_flat_scaling_matrix()
|
|
+ * (whose flat-16 values are themselves the H.264 §7.4.2.1.1.1
|
|
+ * fall-back when no explicit list is signalled).
|
|
+ */
|
|
+ f |= V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT;
|
|
+ out->flags = f;
|
|
+}
|
|
+
|
|
+/* ------------------------------------------------------------------ */
|
|
+/* SCALING_MATRIX */
|
|
+/* ------------------------------------------------------------------ */
|
|
+
|
|
+/*
|
|
+ * Translate StdVideoH264ScalingLists → struct v4l2_ctrl_h264_scaling_matrix.
|
|
+ *
|
|
+ * StdVideoH264ScalingLists.ScalingList4x4[6][16]: 6 lists in raster order,
|
|
+ * indices 0..5 = Intra Y, Intra Cb, Intra Cr, Inter Y, Inter Cb, Inter Cr.
|
|
+ * V4L2's scaling_list_4x4[6][16] expects the SAME order (per kernel doc
|
|
+ * ext-ctrls-codec-stateless.rst). → straight memcpy.
|
|
+ *
|
|
+ * StdVideoH264ScalingLists.ScalingList8x8[6][64]: 6 lists in raster order
|
|
+ * per the H.264 spec table 7-2 ordering: Intra Y, Inter Y, Intra Cb,
|
|
+ * Inter Cb, Intra Cr, Inter Cr. V4L2's scaling_list_8x8[6][64] uses the
|
|
+ * SAME order per kernel doc. → straight memcpy.
|
|
+ *
|
|
+ * IMPORTANT — libva-v4l2-request-fourier's h264_va_matrix_to_v4l2 (h264.c
|
|
+ * line 544) does an unusual 8x8 remap (VAMatrix[0]→[0], VAMatrix[1]→[3])
|
|
+ * because VAIQMatrixBufferH264 only carries 2 of the 6 8x8 lists (Intra Y
|
|
+ * and Inter Y, for YUV420 streams). That's a libva *carrier* limitation,
|
|
+ * not a V4L2 ordering quirk — we do NOT replicate it here. Vulkan
|
|
+ * provides all 6 lists in spec order; we copy them straight.
|
|
+ *
|
|
+ * If a caller has no explicit lists, pass NULL — the caller writes a flat-16
|
|
+ * default (the H.264 §7.4.2.1.1.1 Flat_4x4_16 / Flat_8x8_16 defaults) into
|
|
+ * the struct directly, see panvk_v4l2_h264_default_flat_scaling_matrix.
|
|
+ */
|
|
+void
|
|
+panvk_v4l2_h264_std_to_ctrl_scaling_matrix(const StdVideoH264ScalingLists *in,
|
|
+ struct v4l2_ctrl_h264_scaling_matrix *out)
|
|
+{
|
|
+ memset(out, 0, sizeof(*out));
|
|
+ if (in == NULL)
|
|
+ return;
|
|
+
|
|
+ /* Both sides use [6][16] / [6][64] in identical spec-table-7-2 order. */
|
|
+ memcpy(out->scaling_list_4x4, in->ScalingList4x4,
|
|
+ sizeof(out->scaling_list_4x4));
|
|
+ memcpy(out->scaling_list_8x8, in->ScalingList8x8,
|
|
+ sizeof(out->scaling_list_8x8));
|
|
+}
|
|
+
|
|
+/* Spec-default flat scaling matrix (every element = 16). Use when neither
|
|
+ * SPS::seq_scaling_matrix_present_flag nor PPS::pic_scaling_matrix_present_flag
|
|
+ * is set. The H.264 spec §7.4.2.1.1.1 defines Flat_4x4_16 and Flat_8x8_16
|
|
+ * as the fall-back; the kernel doc recommends always submitting the
|
|
+ * SCALING_MATRIX control with these defaults when explicit lists are
|
|
+ * absent (drivers like hantro G1 read it unconditionally). */
|
|
+void
|
|
+panvk_v4l2_h264_default_flat_scaling_matrix(
|
|
+ struct v4l2_ctrl_h264_scaling_matrix *out)
|
|
+{
|
|
+ memset(out->scaling_list_4x4, 16, sizeof(out->scaling_list_4x4));
|
|
+ memset(out->scaling_list_8x8, 16, sizeof(out->scaling_list_8x8));
|
|
+}
|
|
+
|
|
+/* ------------------------------------------------------------------ */
|
|
+/* DECODE_PARAMS */
|
|
+/* ------------------------------------------------------------------ */
|
|
+
|
|
+/*
|
|
+ * Build v4l2_ctrl_h264_decode_params from Vulkan picture info.
|
|
+ *
|
|
+ * The caller supplies:
|
|
+ * vs — panvk_video_session; vs->dpb[slot].reference_ts is the
|
|
+ * V4L2 timestamp (v4l2_buffer.timestamp converted via
|
|
+ * v4l2_timeval_to_ns) of the previously-decoded CAPTURE
|
|
+ * buffer associated with that DPB slot index.
|
|
+ * pic_info — VkVideoDecodeH264PictureInfoKHR for the frame being
|
|
+ * decoded; pStdPictureInfo carries the per-pic fields.
|
|
+ * active_pps — the StdVideoH264PictureParameterSet bound for this
|
|
+ * decode; only needed if num_slice_groups_minus1 > 0 (FMO)
|
|
+ * which Vulkan core profile excludes. Currently unused.
|
|
+ * dst_dpb_slot — output (this-frame) DPB slot, supplied for symmetry; the
|
|
+ * current-frame fields go into the top-level
|
|
+ * v4l2_ctrl_h264_decode_params (NOT into dpb[]). The dpb[]
|
|
+ * array carries reference frames only.
|
|
+ * ref_slots — array of active reference DPB slots from the
|
|
+ * VkVideoDecodeInfoKHR::pReferenceSlots. Each entry's
|
|
+ * slotIndex selects vs->dpb[idx].reference_ts; the
|
|
+ * per-slot StdVideoDecodeH264ReferenceInfo is reachable
|
|
+ * via the VkVideoDecodeH264DpbSlotInfoKHR chained on
|
|
+ * pNext (caller has already resolved it — see helper
|
|
+ * below).
|
|
+ * num_ref_slots — count of entries in ref_slots[].
|
|
+ * output_ts — V4L2 reference_ts assigned to the CAPTURE buffer for
|
|
+ * *this* frame; recorded into the SETUP slot mapping by
|
|
+ * the caller, not used in this struct itself.
|
|
+ *
|
|
+ * Fields the caller MUST populate post-translation:
|
|
+ * - dec_ref_pic_marking_bit_size
|
|
+ * - pic_order_cnt_bit_size
|
|
+ * - pic_order_cnt_lsb / delta_pic_order_cnt_bottom / delta_pic_order_cnt0 /
|
|
+ * delta_pic_order_cnt1 / idr_pic_id
|
|
+ * These come from the slice header bit-level parse (Vulkan doesn't
|
|
+ * forward them in StdVideoDecodeH264PictureInfo). The hantro G1 reads
|
|
+ * them from registers; without them the decoder produces zeros.
|
|
+ * See libva-v4l2-request-fourier h264.c:394-449 for the parse contract.
|
|
+ *
|
|
+ * - slice_group_change_cycle: from slice header, only meaningful when
|
|
+ * num_slice_groups_minus1 > 0 (not in Vulkan core profile).
|
|
+ */
|
|
+
|
|
+/* Helper: extract StdVideoDecodeH264ReferenceInfo from a VkVideoReferenceSlotInfoKHR
|
|
+ * pNext chain. Returns NULL if the chain doesn't include
|
|
+ * VkVideoDecodeH264DpbSlotInfoKHR. */
|
|
+static const StdVideoDecodeH264ReferenceInfo *
|
|
+ref_info_from_slot(const VkVideoReferenceSlotInfoKHR *slot)
|
|
+{
|
|
+ const VkBaseInStructure *p = (const VkBaseInStructure *) slot->pNext;
|
|
+ while (p != NULL) {
|
|
+ if (p->sType == VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR) {
|
|
+ const VkVideoDecodeH264DpbSlotInfoKHR *dpb =
|
|
+ (const VkVideoDecodeH264DpbSlotInfoKHR *) p;
|
|
+ return dpb->pStdReferenceInfo;
|
|
+ }
|
|
+ p = p->pNext;
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+void
|
|
+panvk_v4l2_h264_build_decode_params(
|
|
+ const struct panvk_video_session *vs,
|
|
+ const VkVideoDecodeH264PictureInfoKHR *pic_info,
|
|
+ const StdVideoH264PictureParameterSet *active_pps,
|
|
+ uint32_t dst_dpb_slot,
|
|
+ const VkVideoReferenceSlotInfoKHR *ref_slots,
|
|
+ uint32_t num_ref_slots,
|
|
+ uint64_t output_ts,
|
|
+ struct v4l2_ctrl_h264_decode_params *out)
|
|
+{
|
|
+ (void) active_pps; /* FMO-only; not in Vulkan core profile. */
|
|
+ (void) dst_dpb_slot; /* Caller records output_ts → vs->dpb[slot] post-decode. */
|
|
+ (void) output_ts; /* Same. */
|
|
+
|
|
+ memset(out, 0, sizeof(*out));
|
|
+
|
|
+ const StdVideoDecodeH264PictureInfo *spic = pic_info->pStdPictureInfo;
|
|
+
|
|
+ /* Current-frame top-level fields. */
|
|
+ out->frame_num = spic->frame_num;
|
|
+ out->idr_pic_id = spic->idr_pic_id; /* may be overwritten by
|
|
+ slice-header parse. */
|
|
+ out->top_field_order_cnt = spic->PicOrderCnt[STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_TOP];
|
|
+ out->bottom_field_order_cnt= spic->PicOrderCnt[STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_BOTTOM];
|
|
+
|
|
+ /* nal_ref_idc: not in StdVideoDecodeH264PictureInfo. The caller derives
|
|
+ * it from the first byte of the slice NAL (high 2 bits after the
|
|
+ * forbidden-zero-bit). Hantro reads it via DECODE_PARAMS, so this
|
|
+ * SHOULD be set post-call. For non-reference frames the H.264 spec
|
|
+ * mandates nal_ref_idc == 0; we leave the field at zero and the caller
|
|
+ * patches in the parsed value. */
|
|
+ out->nal_ref_idc = 0;
|
|
+
|
|
+ __u32 f = 0;
|
|
+ if (spic->flags.IdrPicFlag)
|
|
+ f |= V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC;
|
|
+ if (spic->flags.field_pic_flag)
|
|
+ f |= V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC;
|
|
+ if (spic->flags.bottom_field_flag)
|
|
+ f |= V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD;
|
|
+ /* PFRAME/BFRAME flags are slice-type-derived and not 1:1 with Vulkan
|
|
+ * picture info (slice type is per-slice, not per-pic). Leave clear;
|
|
+ * the slice_header parse path or higher-level caller can OR them in if
|
|
+ * an FRAME_BASED driver needs them. Hantro G1 does not. */
|
|
+ out->flags = f;
|
|
+
|
|
+ /* DPB array. One entry per active reference slot. */
|
|
+ for (uint32_t i = 0; i < num_ref_slots && i < V4L2_H264_NUM_DPB_ENTRIES; i++) {
|
|
+ const VkVideoReferenceSlotInfoKHR *slot = &ref_slots[i];
|
|
+ struct v4l2_h264_dpb_entry *dpb = &out->dpb[i];
|
|
+
|
|
+ if (slot->slotIndex < 0)
|
|
+ continue; /* "no reference" sentinel; entry stays zeroed (invalid). */
|
|
+
|
|
+ const uint32_t idx = (uint32_t) slot->slotIndex;
|
|
+ if (idx >= 16 || !vs->dpb[idx].valid)
|
|
+ continue;
|
|
+
|
|
+ const StdVideoDecodeH264ReferenceInfo *rinfo = ref_info_from_slot(slot);
|
|
+
|
|
+ dpb->reference_ts = vs->dpb[idx].reference_ts;
|
|
+
|
|
+ if (rinfo != NULL) {
|
|
+ dpb->frame_num = rinfo->FrameNum;
|
|
+ /* pic_num: for short-term refs this is FrameNumWrap (H.264 §8.2.4.1);
|
|
+ * for long-term refs it's LongTermPicNum (§8.2.4.2). StdVideo
|
|
+ * doesn't separate the two — FrameNum holds whichever applies for
|
|
+ * the kind of reference. The kernel reflist builder uses pic_num
|
|
+ * only for short-term ordering; we feed FrameNum straight through
|
|
+ * and rely on V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM to disambiguate.
|
|
+ *
|
|
+ * NOTE: FrameNumWrap requires knowing max_frame_num and the
|
|
+ * current frame's frame_num to wrap. Vulkan-side callers that
|
|
+ * want spec-perfect pic_num for short-term refs should override
|
|
+ * this field after calling. The hantro driver ignores pic_num
|
|
+ * (uses reference_ts) so the wrap is empirically not load-bearing
|
|
+ * on RK3566/RK3568.
|
|
+ */
|
|
+ dpb->pic_num = rinfo->FrameNum;
|
|
+ dpb->top_field_order_cnt = rinfo->PicOrderCnt[STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_TOP];
|
|
+ dpb->bottom_field_order_cnt = rinfo->PicOrderCnt[STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_BOTTOM];
|
|
+
|
|
+ __u32 dflags = V4L2_H264_DPB_ENTRY_FLAG_VALID | V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
|
|
+ if (rinfo->flags.used_for_long_term_reference)
|
|
+ dflags |= V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM;
|
|
+ /* FIELD flag indicates a single-field-coded reference; both
|
|
+ * top_field_flag and bottom_field_flag in StdVideo mean the entry
|
|
+ * represents only that field. */
|
|
+ if (rinfo->flags.top_field_flag || rinfo->flags.bottom_field_flag)
|
|
+ dflags |= V4L2_H264_DPB_ENTRY_FLAG_FIELD;
|
|
+ dpb->flags = dflags;
|
|
+
|
|
+ /* fields: per kernel doc, valid values are V4L2_H264_{TOP,BOTTOM,FRAME}_REF.
|
|
+ * For frame-coded refs we use FRAME_REF (TOP|BOTTOM). The kernel
|
|
+ * reflist builder skips entries with fields == 0 — see hantro and
|
|
+ * the v4l2_h264_init_reflist_builder helper. */
|
|
+ if (rinfo->flags.top_field_flag && !rinfo->flags.bottom_field_flag)
|
|
+ dpb->fields = V4L2_H264_TOP_FIELD_REF;
|
|
+ else if (rinfo->flags.bottom_field_flag && !rinfo->flags.top_field_flag)
|
|
+ dpb->fields = V4L2_H264_BOTTOM_FIELD_REF;
|
|
+ else
|
|
+ dpb->fields = V4L2_H264_FRAME_REF;
|
|
+ } else {
|
|
+ /* No StdVideoDecodeH264ReferenceInfo chained: minimal fallback. */
|
|
+ dpb->flags = V4L2_H264_DPB_ENTRY_FLAG_VALID;
|
|
+ dpb->fields = V4L2_H264_FRAME_REF;
|
|
+ }
|
|
+ }
|
|
+}
|
|
diff -urN a/src/panfrost/vulkan/panvk_v4l2_h264_slice_header.c b/src/panfrost/vulkan/panvk_v4l2_h264_slice_header.c
|
|
--- a/src/panfrost/vulkan/panvk_v4l2_h264_slice_header.c 1970-01-01 01:00:00.000000000 +0100
|
|
+++ b/src/panfrost/vulkan/panvk_v4l2_h264_slice_header.c 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -0,0 +1,314 @@
|
|
+/*
|
|
+ * H.264 slice header bit-parser implementation.
|
|
+ *
|
|
+ * Verbatim port of libva-v4l2-request-fourier src/h264_slice_header.c
|
|
+ * with the public symbol renamed to panvk_v4l2_h264_parse_slice_header()
|
|
+ * and the type names prefixed for Mesa namespace hygiene. See
|
|
+ * panvk_v4l2_h264_slice_header.h for context.
|
|
+ *
|
|
+ * SPDX-License-Identifier: MIT
|
|
+ */
|
|
+
|
|
+#include "panvk_v4l2_h264_slice_header.h"
|
|
+
|
|
+#include <errno.h>
|
|
+#include <string.h>
|
|
+
|
|
+struct br {
|
|
+ const uint8_t *data;
|
|
+ size_t length; /* bytes */
|
|
+ size_t bit_pos;
|
|
+ bool error;
|
|
+};
|
|
+
|
|
+static uint32_t br_read_u(struct br *b, unsigned n)
|
|
+{
|
|
+ uint32_t v = 0;
|
|
+ while (n--) {
|
|
+ if (b->bit_pos >= b->length * 8) {
|
|
+ b->error = true;
|
|
+ return 0;
|
|
+ }
|
|
+ v = (v << 1) | ((b->data[b->bit_pos >> 3] >>
|
|
+ (7 - (b->bit_pos & 7))) & 1u);
|
|
+ b->bit_pos++;
|
|
+ }
|
|
+ return v;
|
|
+}
|
|
+
|
|
+static uint32_t br_read_ue(struct br *b)
|
|
+{
|
|
+ unsigned zeros = 0;
|
|
+ while (br_read_u(b, 1) == 0) {
|
|
+ if (b->error || ++zeros >= 32)
|
|
+ return 0;
|
|
+ }
|
|
+ if (zeros == 0)
|
|
+ return 0;
|
|
+ return (1u << zeros) - 1u + br_read_u(b, zeros);
|
|
+}
|
|
+
|
|
+static int32_t br_read_se(struct br *b)
|
|
+{
|
|
+ uint32_t v = br_read_ue(b);
|
|
+ if (v & 1u)
|
|
+ return (int32_t)((v + 1u) >> 1);
|
|
+ return -(int32_t)(v >> 1);
|
|
+}
|
|
+
|
|
+#define PANVK_H264_SLICE_HEADER_SCAN_BYTES 64
|
|
+
|
|
+static size_t rbsp_unescape(uint8_t *out, const uint8_t *in, size_t in_len)
|
|
+{
|
|
+ size_t out_len = 0;
|
|
+ int zero_run = 0;
|
|
+ size_t i;
|
|
+ size_t cap = in_len < PANVK_H264_SLICE_HEADER_SCAN_BYTES ?
|
|
+ in_len : PANVK_H264_SLICE_HEADER_SCAN_BYTES;
|
|
+
|
|
+ for (i = 0; i < cap; i++) {
|
|
+ if (zero_run >= 2 && in[i] == 0x03) {
|
|
+ zero_run = 0;
|
|
+ continue;
|
|
+ }
|
|
+ out[out_len++] = in[i];
|
|
+ zero_run = (in[i] == 0x00) ? zero_run + 1 : 0;
|
|
+ }
|
|
+ return out_len;
|
|
+}
|
|
+
|
|
+static void skip_ref_pic_list_modification(struct br *b, uint32_t slice_type)
|
|
+{
|
|
+ uint32_t st_mod5 = slice_type % 5;
|
|
+
|
|
+ if (st_mod5 != 2 && st_mod5 != 4) {
|
|
+ uint32_t flag = br_read_u(b, 1);
|
|
+ if (flag) {
|
|
+ uint32_t mod_idc;
|
|
+ do {
|
|
+ mod_idc = br_read_ue(b);
|
|
+ if (mod_idc == 0 || mod_idc == 1)
|
|
+ br_read_ue(b);
|
|
+ else if (mod_idc == 2)
|
|
+ br_read_ue(b);
|
|
+ if (b->error)
|
|
+ return;
|
|
+ } while (mod_idc != 3);
|
|
+ }
|
|
+ }
|
|
+ if (st_mod5 == 1) {
|
|
+ uint32_t flag = br_read_u(b, 1);
|
|
+ if (flag) {
|
|
+ uint32_t mod_idc;
|
|
+ do {
|
|
+ mod_idc = br_read_ue(b);
|
|
+ if (mod_idc == 0 || mod_idc == 1)
|
|
+ br_read_ue(b);
|
|
+ else if (mod_idc == 2)
|
|
+ br_read_ue(b);
|
|
+ if (b->error)
|
|
+ return;
|
|
+ } while (mod_idc != 3);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void skip_pred_weight_table(struct br *b,
|
|
+ uint32_t slice_type,
|
|
+ uint8_t chroma_format_idc,
|
|
+ uint8_t bit_depth_luma_minus8,
|
|
+ uint8_t bit_depth_chroma_minus8,
|
|
+ uint32_t num_ref_idx_l0_active_minus1,
|
|
+ uint32_t num_ref_idx_l1_active_minus1)
|
|
+{
|
|
+ uint32_t i, j;
|
|
+ uint32_t st_mod5 = slice_type % 5;
|
|
+
|
|
+ (void)bit_depth_luma_minus8;
|
|
+ (void)bit_depth_chroma_minus8;
|
|
+
|
|
+ br_read_ue(b); /* luma_log2_weight_denom */
|
|
+ if (chroma_format_idc != 0)
|
|
+ br_read_ue(b); /* chroma_log2_weight_denom */
|
|
+
|
|
+ for (i = 0; i <= num_ref_idx_l0_active_minus1 && !b->error; i++) {
|
|
+ uint32_t luma_weight_l0_flag = br_read_u(b, 1);
|
|
+ if (luma_weight_l0_flag) {
|
|
+ br_read_se(b);
|
|
+ br_read_se(b);
|
|
+ }
|
|
+ if (chroma_format_idc != 0) {
|
|
+ uint32_t chroma_weight_l0_flag = br_read_u(b, 1);
|
|
+ if (chroma_weight_l0_flag) {
|
|
+ for (j = 0; j < 2; j++) {
|
|
+ br_read_se(b);
|
|
+ br_read_se(b);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (st_mod5 == 1) {
|
|
+ for (i = 0; i <= num_ref_idx_l1_active_minus1 && !b->error; i++) {
|
|
+ uint32_t luma_weight_l1_flag = br_read_u(b, 1);
|
|
+ if (luma_weight_l1_flag) {
|
|
+ br_read_se(b);
|
|
+ br_read_se(b);
|
|
+ }
|
|
+ if (chroma_format_idc != 0) {
|
|
+ uint32_t chroma_weight_l1_flag = br_read_u(b, 1);
|
|
+ if (chroma_weight_l1_flag) {
|
|
+ for (j = 0; j < 2; j++) {
|
|
+ br_read_se(b);
|
|
+ br_read_se(b);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+int panvk_v4l2_h264_parse_slice_header(
|
|
+ const uint8_t *nal_payload,
|
|
+ size_t nal_payload_length,
|
|
+ const struct panvk_v4l2_h264_slice_header_context *ctx,
|
|
+ struct panvk_v4l2_h264_slice_header_info *out)
|
|
+{
|
|
+ uint8_t unescaped[PANVK_H264_SLICE_HEADER_SCAN_BYTES];
|
|
+ size_t unescaped_len;
|
|
+ struct br b = { 0 };
|
|
+ bool idr_pic_flag = (ctx->nal_unit_type == 5);
|
|
+ uint32_t slice_type;
|
|
+ uint32_t num_ref_idx_l0_active_minus1;
|
|
+ uint32_t num_ref_idx_l1_active_minus1;
|
|
+ size_t pic_order_cnt_start;
|
|
+ size_t pic_order_cnt_end;
|
|
+ size_t dec_ref_pic_marking_start;
|
|
+ size_t dec_ref_pic_marking_end;
|
|
+ bool field_pic_flag = false;
|
|
+
|
|
+ memset(out, 0, sizeof(*out));
|
|
+
|
|
+ if (!nal_payload || nal_payload_length == 0)
|
|
+ return -EINVAL;
|
|
+
|
|
+ unescaped_len = rbsp_unescape(unescaped, nal_payload, nal_payload_length);
|
|
+ if (unescaped_len < 2)
|
|
+ return -EINVAL;
|
|
+
|
|
+ b.data = unescaped;
|
|
+ b.length = unescaped_len;
|
|
+ b.bit_pos = 0;
|
|
+ b.error = false;
|
|
+
|
|
+ out->first_mb_in_slice = br_read_ue(&b);
|
|
+ slice_type = br_read_ue(&b);
|
|
+ out->slice_type = slice_type;
|
|
+ out->pic_parameter_set_id = br_read_ue(&b);
|
|
+
|
|
+ if (ctx->separate_colour_plane_flag)
|
|
+ (void)br_read_u(&b, 2);
|
|
+
|
|
+ out->frame_num = br_read_u(&b, ctx->log2_max_frame_num_minus4 + 4u);
|
|
+
|
|
+ if (!ctx->frame_mbs_only_flag) {
|
|
+ field_pic_flag = (br_read_u(&b, 1) != 0);
|
|
+ if (field_pic_flag)
|
|
+ (void)br_read_u(&b, 1);
|
|
+ }
|
|
+
|
|
+ if (idr_pic_flag)
|
|
+ out->idr_pic_id = (uint16_t)br_read_ue(&b);
|
|
+
|
|
+ pic_order_cnt_start = b.bit_pos;
|
|
+ if (ctx->pic_order_cnt_type == 0) {
|
|
+ out->pic_order_cnt_lsb = (uint16_t)br_read_u(
|
|
+ &b, ctx->log2_max_pic_order_cnt_lsb_minus4 + 4u);
|
|
+ if (ctx->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag)
|
|
+ out->delta_pic_order_cnt_bottom = br_read_se(&b);
|
|
+ } else if (ctx->pic_order_cnt_type == 1 &&
|
|
+ !ctx->delta_pic_order_always_zero_flag) {
|
|
+ out->delta_pic_order_cnt0 = br_read_se(&b);
|
|
+ if (ctx->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag)
|
|
+ out->delta_pic_order_cnt1 = br_read_se(&b);
|
|
+ }
|
|
+ pic_order_cnt_end = b.bit_pos;
|
|
+ out->pic_order_cnt_bit_size =
|
|
+ (uint32_t)(pic_order_cnt_end - pic_order_cnt_start);
|
|
+
|
|
+ if (ctx->redundant_pic_cnt_present_flag)
|
|
+ (void)br_read_ue(&b);
|
|
+
|
|
+ if (slice_type % 5 == 1)
|
|
+ (void)br_read_u(&b, 1);
|
|
+
|
|
+ num_ref_idx_l0_active_minus1 = ctx->num_ref_idx_l0_default_active_minus1;
|
|
+ num_ref_idx_l1_active_minus1 = ctx->num_ref_idx_l1_default_active_minus1;
|
|
+
|
|
+ {
|
|
+ uint32_t st = slice_type % 5;
|
|
+ if (st == 0 || st == 3 || st == 1) {
|
|
+ uint32_t override = br_read_u(&b, 1);
|
|
+ if (override) {
|
|
+ num_ref_idx_l0_active_minus1 = br_read_ue(&b);
|
|
+ if (st == 1)
|
|
+ num_ref_idx_l1_active_minus1 = br_read_ue(&b);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ skip_ref_pic_list_modification(&b, slice_type);
|
|
+ if (b.error)
|
|
+ return -EIO;
|
|
+
|
|
+ {
|
|
+ uint32_t st = slice_type % 5;
|
|
+ bool do_pwt =
|
|
+ (ctx->weighted_pred_flag && (st == 0 || st == 3)) ||
|
|
+ (ctx->weighted_bipred_idc == 1 && st == 1);
|
|
+ if (do_pwt) {
|
|
+ skip_pred_weight_table(&b, slice_type,
|
|
+ ctx->chroma_format_idc,
|
|
+ ctx->bit_depth_luma_minus8,
|
|
+ ctx->bit_depth_chroma_minus8,
|
|
+ num_ref_idx_l0_active_minus1,
|
|
+ num_ref_idx_l1_active_minus1);
|
|
+ if (b.error)
|
|
+ return -EIO;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ dec_ref_pic_marking_start = b.bit_pos;
|
|
+ if (ctx->nal_ref_idc != 0) {
|
|
+ if (idr_pic_flag) {
|
|
+ (void)br_read_u(&b, 1);
|
|
+ (void)br_read_u(&b, 1);
|
|
+ } else {
|
|
+ uint32_t adaptive = br_read_u(&b, 1);
|
|
+ if (adaptive) {
|
|
+ uint32_t mmco;
|
|
+ do {
|
|
+ mmco = br_read_ue(&b);
|
|
+ if (mmco == 1 || mmco == 3)
|
|
+ br_read_ue(&b);
|
|
+ if (mmco == 2)
|
|
+ br_read_ue(&b);
|
|
+ if (mmco == 3 || mmco == 6)
|
|
+ br_read_ue(&b);
|
|
+ if (mmco == 4)
|
|
+ br_read_ue(&b);
|
|
+ if (b.error)
|
|
+ return -EIO;
|
|
+ } while (mmco != 0);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ dec_ref_pic_marking_end = b.bit_pos;
|
|
+ out->dec_ref_pic_marking_bit_size =
|
|
+ (uint32_t)(dec_ref_pic_marking_end - dec_ref_pic_marking_start);
|
|
+
|
|
+ if (b.error)
|
|
+ return -EIO;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
diff -urN a/src/panfrost/vulkan/panvk_v4l2_h264_slice_header.h b/src/panfrost/vulkan/panvk_v4l2_h264_slice_header.h
|
|
--- a/src/panfrost/vulkan/panvk_v4l2_h264_slice_header.h 1970-01-01 01:00:00.000000000 +0100
|
|
+++ b/src/panfrost/vulkan/panvk_v4l2_h264_slice_header.h 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -0,0 +1,94 @@
|
|
+/*
|
|
+ * H.264 slice header bit-parser for panvk-bifrost-video / V4L2 stateless
|
|
+ * H.264 decode (hantro G1 on RK3566/RK3568 Mali-Bifrost SBCs).
|
|
+ *
|
|
+ * Extracts the slice-header bit-position and value fields that
|
|
+ * V4L2_CID_STATELESS_H264_DECODE_PARAMS requires (idr_pic_id,
|
|
+ * pic_order_cnt_lsb, delta_pic_order_cnt_*, pic_order_cnt_bit_size,
|
|
+ * dec_ref_pic_marking_bit_size). Vulkan's StdVideoDecodeH264PictureInfo
|
|
+ * does not carry these — they live only in the bitstream's slice_header()
|
|
+ * syntax. Hantro G1 (drivers/media/platform/verisilicon/
|
|
+ * hantro_g1_h264_dec.c::set_params) writes the bit_size fields directly
|
|
+ * into MMIO registers G1_REG_DEC_CTRL5_REFPIC_MK_LEN and
|
|
+ * G1_REG_DEC_CTRL6_POC_LENGTH; with zeros the hardware bitstream parser
|
|
+ * walks past zero bits, lands on garbage, decodes nothing.
|
|
+ *
|
|
+ * Spec reference: ITU-T Rec. H.264 (08/2024) §7.3.3 slice_header
|
|
+ * and §7.3.3.1 ref_pic_list_modification, §7.3.3.2 pred_weight_table,
|
|
+ * §7.3.3.3 dec_ref_pic_marking.
|
|
+ *
|
|
+ * Cross-reference (proven working on hantro G1): libva-v4l2-request-fourier
|
|
+ * src/h264_slice_header.{c,h}. This file is a verbatim port with the
|
|
+ * function renamed from h264_parse_slice_header() to
|
|
+ * panvk_v4l2_h264_parse_slice_header() for namespace hygiene inside Mesa.
|
|
+ *
|
|
+ * SPDX-License-Identifier: MIT
|
|
+ */
|
|
+
|
|
+#ifndef PANVK_V4L2_H264_SLICE_HEADER_H
|
|
+#define PANVK_V4L2_H264_SLICE_HEADER_H
|
|
+
|
|
+#include <stdbool.h>
|
|
+#include <stddef.h>
|
|
+#include <stdint.h>
|
|
+
|
|
+struct panvk_v4l2_h264_slice_header_context {
|
|
+ /* From SPS (the active SPS at slice-time). */
|
|
+ bool separate_colour_plane_flag;
|
|
+ uint8_t log2_max_frame_num_minus4;
|
|
+ bool frame_mbs_only_flag;
|
|
+ uint8_t pic_order_cnt_type;
|
|
+ uint8_t log2_max_pic_order_cnt_lsb_minus4;
|
|
+ bool delta_pic_order_always_zero_flag;
|
|
+
|
|
+ /* From PPS (the active PPS at slice-time). */
|
|
+ bool bottom_field_pic_order_in_frame_present_flag;
|
|
+ bool redundant_pic_cnt_present_flag;
|
|
+ bool weighted_pred_flag;
|
|
+ uint8_t weighted_bipred_idc;
|
|
+ uint8_t num_ref_idx_l0_default_active_minus1;
|
|
+ uint8_t num_ref_idx_l1_default_active_minus1;
|
|
+ uint8_t chroma_format_idc;
|
|
+ uint8_t bit_depth_luma_minus8;
|
|
+ uint8_t bit_depth_chroma_minus8;
|
|
+
|
|
+ /* From the NAL unit header (already extracted by the caller). */
|
|
+ uint8_t nal_unit_type;
|
|
+ uint8_t nal_ref_idc;
|
|
+};
|
|
+
|
|
+struct panvk_v4l2_h264_slice_header_info {
|
|
+ uint16_t idr_pic_id;
|
|
+ uint16_t pic_order_cnt_lsb;
|
|
+ int32_t delta_pic_order_cnt_bottom;
|
|
+ int32_t delta_pic_order_cnt0;
|
|
+ int32_t delta_pic_order_cnt1;
|
|
+ uint32_t pic_order_cnt_bit_size;
|
|
+ uint32_t dec_ref_pic_marking_bit_size;
|
|
+
|
|
+ /* Diagnostic — useful for cross-checking pre-parsed vs bitstream values. */
|
|
+ uint32_t first_mb_in_slice;
|
|
+ uint32_t slice_type;
|
|
+ uint32_t pic_parameter_set_id;
|
|
+ uint32_t frame_num;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Parse slice_header() up to dec_ref_pic_marking() (inclusive) of the
|
|
+ * H.264 RBSP slice_layer_without_partitioning_rbsp() syntax, extracting
|
|
+ * the V4L2 DECODE_PARAMS fields. Returns 0 on success, negative
|
|
+ * errno-shaped value on parse failure.
|
|
+ *
|
|
+ * @nal_payload: pointer to the byte AFTER the NAL header byte
|
|
+ * (i.e. start of the RBSP proper; caller has already
|
|
+ * skipped any ANNEX_B start code and the 1-byte
|
|
+ * nal_unit_header). Will be RBSP-unescaped internally.
|
|
+ * @nal_payload_length: bytes available at @nal_payload.
|
|
+ */
|
|
+int panvk_v4l2_h264_parse_slice_header(
|
|
+ const uint8_t *nal_payload,
|
|
+ size_t nal_payload_length,
|
|
+ const struct panvk_v4l2_h264_slice_header_context *ctx,
|
|
+ struct panvk_v4l2_h264_slice_header_info *out);
|
|
+
|
|
+#endif /* PANVK_V4L2_H264_SLICE_HEADER_H */
|
|
diff -urN a/src/panfrost/vulkan/panvk_video_decode.c b/src/panfrost/vulkan/panvk_video_decode.c
|
|
--- a/src/panfrost/vulkan/panvk_video_decode.c 1970-01-01 01:00:00.000000000 +0100
|
|
+++ b/src/panfrost/vulkan/panvk_video_decode.c 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -0,0 +1,380 @@
|
|
+/*
|
|
+ * panvk-bifrost-video: Vulkan video decode entrypoints (H.264).
|
|
+ *
|
|
+ * Drives the V4L2 stateless hantro VPU backend (panvk_v4l2.c) from
|
|
+ * Vulkan vkCmdDecodeVideoKHR. Decode is synchronous at record time —
|
|
+ * the full V4L2 ioctl dance runs to completion inside the command-
|
|
+ * recording call before returning to the application. The queue-side
|
|
+ * `driver_submit` is a no-op signal-everything (see panvk_vX_device.c).
|
|
+ *
|
|
+ * Phase 1 simplifications worth knowing about:
|
|
+ *
|
|
+ * - Cmd-buffer state lives at the DEVICE level (`active_video`) under
|
|
+ * a single mutex, NOT per-cmd-buffer. Concurrent video sessions on
|
|
+ * the same device clobber each other. Sufficient for current single-
|
|
+ * session consumers (mpv-fourier, ffmpeg-vulkan-h264, vk-video-
|
|
+ * samples). Spec-compliant multi-session is a Phase >>1 follow-up.
|
|
+ *
|
|
+ * - Source bitstream is read via `src_buf->mem->addr.host`, i.e. the
|
|
+ * bound VkDeviceMemory's CPU mapping. Works because panvk-bifrost
|
|
+ * only exposes HOST_VISIBLE memory types; an app that bound the
|
|
+ * bitstream buffer to non-HOST_VISIBLE memory would get a logged
|
|
+ * error and a silent decode skip (CmdDecodeVideoKHR is void, so we
|
|
+ * have no clean error-return path). VkPhysicalDeviceVideo*
|
|
+ * constraints would be the right place to make this contractual.
|
|
+ *
|
|
+ * - Requires `PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1` (mesa-upstream gate
|
|
+ * on panvk-on-Bifrost which is not conformant).
|
|
+ *
|
|
+ * SPDX-License-Identifier: MIT
|
|
+ */
|
|
+
|
|
+#include "panvk_video_decode.h"
|
|
+#include "panvk_v4l2_h264_slice_header.h"
|
|
+#include "panvk_buffer.h"
|
|
+#include "panvk_device.h"
|
|
+#include "panvk_device_memory.h"
|
|
+#include "panvk_entrypoints.h"
|
|
+#include "panvk_image.h"
|
|
+
|
|
+#include "vk_image.h"
|
|
+
|
|
+#include "vk_alloc.h"
|
|
+#include "vk_command_buffer.h"
|
|
+#include "vk_log.h"
|
|
+#include "vk_video.h"
|
|
+
|
|
+#include "util/macros.h"
|
|
+
|
|
+#include "kmod/pan_kmod.h"
|
|
+
|
|
+#include <linux/videodev2.h>
|
|
+#include <linux/v4l2-controls.h>
|
|
+#include <unistd.h>
|
|
+
|
|
+VKAPI_ATTR VkResult VKAPI_CALL
|
|
+panvk_CreateVideoSessionKHR(VkDevice _device,
|
|
+ const VkVideoSessionCreateInfoKHR *pCreateInfo,
|
|
+ const VkAllocationCallbacks *pAllocator,
|
|
+ VkVideoSessionKHR *pVideoSession)
|
|
+{
|
|
+ VK_FROM_HANDLE(panvk_device, device, _device);
|
|
+
|
|
+ struct panvk_video_session *vs =
|
|
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*vs), 8,
|
|
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
+ if (!vs)
|
|
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
+
|
|
+ VkResult r = vk_video_session_init(&device->vk, &vs->vk, pCreateInfo);
|
|
+ if (r != VK_SUCCESS) {
|
|
+ vk_free2(&device->vk.alloc, pAllocator, vs);
|
|
+ return r;
|
|
+ }
|
|
+
|
|
+ vs->video_fd = -1;
|
|
+ vs->media_fd = -1;
|
|
+ vs->slice_based = false;
|
|
+
|
|
+ int v4l2_rc = panvk_v4l2_session_init(vs, &device->vk, pAllocator,
|
|
+ pCreateInfo->maxCodedExtent.width,
|
|
+ pCreateInfo->maxCodedExtent.height);
|
|
+ if (v4l2_rc) {
|
|
+ mesa_loge("panvk_video: V4L2 session init failed rc=%d", v4l2_rc);
|
|
+ vk_video_session_finish(&vs->vk);
|
|
+ vk_free2(&device->vk.alloc, pAllocator, vs);
|
|
+ return vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
|
|
+ }
|
|
+
|
|
+ *pVideoSession = panvk_video_session_to_handle(vs);
|
|
+ return VK_SUCCESS;
|
|
+}
|
|
+
|
|
+VKAPI_ATTR void VKAPI_CALL
|
|
+panvk_DestroyVideoSessionKHR(VkDevice _device,
|
|
+ VkVideoSessionKHR videoSession,
|
|
+ const VkAllocationCallbacks *pAllocator)
|
|
+{
|
|
+ VK_FROM_HANDLE(panvk_device, device, _device);
|
|
+ VK_FROM_HANDLE(panvk_video_session, vs, videoSession);
|
|
+ if (!vs) return;
|
|
+
|
|
+ /* Clear device-level active state if it was this session. */
|
|
+ simple_mtx_lock(&device->active_video.lock);
|
|
+ if (device->active_video.vs == vs) {
|
|
+ device->active_video.vs = NULL;
|
|
+ device->active_video.params = NULL;
|
|
+ }
|
|
+ simple_mtx_unlock(&device->active_video.lock);
|
|
+
|
|
+ panvk_v4l2_session_finish(vs, &device->vk, pAllocator);
|
|
+ vk_video_session_finish(&vs->vk);
|
|
+ vk_free2(&device->vk.alloc, pAllocator, vs);
|
|
+}
|
|
+
|
|
+VKAPI_ATTR VkResult VKAPI_CALL
|
|
+panvk_GetVideoSessionMemoryRequirementsKHR(
|
|
+ VkDevice device,
|
|
+ VkVideoSessionKHR videoSession,
|
|
+ uint32_t *pMemoryRequirementsCount,
|
|
+ VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements)
|
|
+{
|
|
+ *pMemoryRequirementsCount = 0;
|
|
+ return VK_SUCCESS;
|
|
+}
|
|
+
|
|
+VKAPI_ATTR VkResult VKAPI_CALL
|
|
+panvk_BindVideoSessionMemoryKHR(
|
|
+ VkDevice device,
|
|
+ VkVideoSessionKHR videoSession,
|
|
+ uint32_t bindSessionMemoryInfoCount,
|
|
+ const VkBindVideoSessionMemoryInfoKHR *pBindSessionMemoryInfos)
|
|
+{
|
|
+ return VK_SUCCESS;
|
|
+}
|
|
+
|
|
+/* Helper: device lookup from VkCommandBuffer via the vk_command_buffer base. */
|
|
+static struct panvk_device *
|
|
+cmdbuf_to_device(VkCommandBuffer commandBuffer)
|
|
+{
|
|
+ VK_FROM_HANDLE(vk_command_buffer, vk_cmdbuf, commandBuffer);
|
|
+ return to_panvk_device(vk_cmdbuf->base.device);
|
|
+}
|
|
+
|
|
+VKAPI_ATTR void VKAPI_CALL
|
|
+panvk_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer,
|
|
+ const VkVideoBeginCodingInfoKHR *pBeginInfo)
|
|
+{
|
|
+ struct panvk_device *device = cmdbuf_to_device(commandBuffer);
|
|
+ VK_FROM_HANDLE(panvk_video_session, vs, pBeginInfo->videoSession);
|
|
+
|
|
+ simple_mtx_lock(&device->active_video.lock);
|
|
+ device->active_video.vs = vs;
|
|
+ if (pBeginInfo->videoSessionParameters != VK_NULL_HANDLE) {
|
|
+ VK_FROM_HANDLE(vk_video_session_parameters, params,
|
|
+ pBeginInfo->videoSessionParameters);
|
|
+ device->active_video.params = params;
|
|
+ } else {
|
|
+ device->active_video.params = NULL;
|
|
+ }
|
|
+ simple_mtx_unlock(&device->active_video.lock);
|
|
+}
|
|
+
|
|
+VKAPI_ATTR void VKAPI_CALL
|
|
+panvk_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer,
|
|
+ const VkVideoEndCodingInfoKHR *pEndCodingInfo)
|
|
+{
|
|
+ struct panvk_device *device = cmdbuf_to_device(commandBuffer);
|
|
+ simple_mtx_lock(&device->active_video.lock);
|
|
+ device->active_video.vs = NULL;
|
|
+ device->active_video.params = NULL;
|
|
+ simple_mtx_unlock(&device->active_video.lock);
|
|
+}
|
|
+
|
|
+VKAPI_ATTR void VKAPI_CALL
|
|
+panvk_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer,
|
|
+ const VkVideoCodingControlInfoKHR *pCodingControlInfo)
|
|
+{
|
|
+ struct panvk_device *device = cmdbuf_to_device(commandBuffer);
|
|
+ simple_mtx_lock(&device->active_video.lock);
|
|
+ if (device->active_video.vs &&
|
|
+ (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR)) {
|
|
+ for (unsigned i = 0; i < 16; i++)
|
|
+ device->active_video.vs->dpb[i].valid = false;
|
|
+ }
|
|
+ simple_mtx_unlock(&device->active_video.lock);
|
|
+}
|
|
+
|
|
+VKAPI_ATTR void VKAPI_CALL
|
|
+panvk_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer,
|
|
+ const VkVideoDecodeInfoKHR *pDecodeInfo)
|
|
+{
|
|
+ struct panvk_device *device = cmdbuf_to_device(commandBuffer);
|
|
+
|
|
+ simple_mtx_lock(&device->active_video.lock);
|
|
+ struct panvk_video_session *vs = device->active_video.vs;
|
|
+ struct vk_video_session_parameters *params = device->active_video.params;
|
|
+ simple_mtx_unlock(&device->active_video.lock);
|
|
+
|
|
+ if (!vs || !params) {
|
|
+ mesa_loge("panvk_video: CmdDecodeVideoKHR outside Begin/End scope");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ const VkVideoDecodeH264PictureInfoKHR *h264_pi =
|
|
+ vk_find_struct_const(pDecodeInfo->pNext,
|
|
+ VIDEO_DECODE_H264_PICTURE_INFO_KHR);
|
|
+ if (!h264_pi || !h264_pi->pStdPictureInfo) {
|
|
+ mesa_loge("panvk_video: missing H.264 picture info");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ const StdVideoH264SequenceParameterSet *sps =
|
|
+ vk_video_find_h264_dec_std_sps(params,
|
|
+ h264_pi->pStdPictureInfo->seq_parameter_set_id);
|
|
+ const StdVideoH264PictureParameterSet *pps =
|
|
+ vk_video_find_h264_dec_std_pps(params,
|
|
+ h264_pi->pStdPictureInfo->pic_parameter_set_id);
|
|
+ if (!sps || !pps) {
|
|
+ mesa_loge("panvk_video: SPS or PPS lookup failed");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Translate Std → V4L2 control structs. */
|
|
+ struct v4l2_ctrl_h264_sps c_sps;
|
|
+ struct v4l2_ctrl_h264_pps c_pps;
|
|
+ struct v4l2_ctrl_h264_scaling_matrix c_scaling;
|
|
+ struct v4l2_ctrl_h264_decode_params c_dec;
|
|
+
|
|
+ panvk_v4l2_h264_std_to_ctrl_sps(sps, &c_sps);
|
|
+ panvk_v4l2_h264_std_to_ctrl_pps(pps, &c_pps);
|
|
+ panvk_v4l2_h264_default_flat_scaling_matrix(&c_scaling);
|
|
+
|
|
+ /*
|
|
+ * output_ts: V4L2 buffer-identity stamp. Must round-trip cleanly through
|
|
+ * (tv_sec, tv_usec) at QBUF time, because hantro's reflist builder
|
|
+ * matches dpb[i].reference_ts against the kernel-side CAPTURE timestamp
|
|
+ * (which is the OUTPUT-QBUF timestamp re-derived via v4l2_timeval_to_ns:
|
|
+ * `tv_sec * 1e9 + tv_usec * 1e3`). Sub-microsecond bits are dropped, so
|
|
+ * any high-resolution stamp (e.g. a 64-bit pointer cast) makes the
|
|
+ * lookup miss and P/B frames decode against zero references. Use a
|
|
+ * per-session monotonic counter in microseconds (i.e. * 1000 ns) so
|
|
+ * concurrent sessions sharing /dev/video1 don't collide on stamp.
|
|
+ */
|
|
+ const uint64_t output_ts = ((uint64_t)++vs->ts_counter) * 1000ULL;
|
|
+ uint32_t dst_dpb_slot = pDecodeInfo->pSetupReferenceSlot
|
|
+ ? (uint32_t) pDecodeInfo->pSetupReferenceSlot->slotIndex : 0u;
|
|
+
|
|
+ panvk_v4l2_h264_build_decode_params(vs, h264_pi, pps,
|
|
+ dst_dpb_slot,
|
|
+ pDecodeInfo->pReferenceSlots,
|
|
+ pDecodeInfo->referenceSlotCount,
|
|
+ output_ts, &c_dec);
|
|
+
|
|
+ /* Resolve source bitstream CPU pointer via panvk_buffer.mem.addr.host. */
|
|
+ VK_FROM_HANDLE(panvk_buffer, src_buf, pDecodeInfo->srcBuffer);
|
|
+ if (!src_buf || !src_buf->mem || !src_buf->mem->addr.host) {
|
|
+ mesa_loge("panvk_video: src buffer has no host map");
|
|
+ return;
|
|
+ }
|
|
+ const void *src_bitstream =
|
|
+ (const uint8_t *) src_buf->mem->addr.host +
|
|
+ src_buf->mem_offset + pDecodeInfo->srcBufferOffset;
|
|
+
|
|
+ /*
|
|
+ * Slice-header bit-level parse — recovers the DECODE_PARAMS fields
|
|
+ * that StdVideoDecodeH264PictureInfo doesn't carry: idr_pic_id,
|
|
+ * pic_order_cnt_lsb, delta_pic_order_cnt_*, pic_order_cnt_bit_size,
|
|
+ * dec_ref_pic_marking_bit_size, and nal_ref_idc. Hantro G1 writes the
|
|
+ * bit_size fields directly into MMIO registers G1_REG_DEC_CTRL5/CTRL6;
|
|
+ * with zeros the hardware bitstream parser walks past zero bits, lands
|
|
+ * on garbage, and decodes all-zero pixels — observed empirically as the
|
|
+ * "Y plane all zeros" symptom that closed the prior Commit 7e.
|
|
+ *
|
|
+ * Cross-reference (proven fix on hantro): libva-v4l2-request-fourier
|
|
+ * src/h264.c:394-449. The panvk_v4l2_h264_slice_header.{c,h} parser
|
|
+ * is a verbatim port with namespace renames.
|
|
+ *
|
|
+ * Expects ANNEX_B start-code-prefixed VCL NAL at *src_bitstream*. We
|
|
+ * skip the 3- or 4-byte start code then the 1-byte NAL header.
|
|
+ */
|
|
+ {
|
|
+ const uint8_t *bs = (const uint8_t *) src_bitstream;
|
|
+ uint32_t bs_len = pDecodeInfo->srcBufferRange;
|
|
+ uint32_t off = 0;
|
|
+ /* Skip ANNEX_B start code (0x00 00 01 or 0x00 00 00 01). */
|
|
+ if (bs_len >= 4 && bs[0] == 0 && bs[1] == 0 && bs[2] == 0 && bs[3] == 1)
|
|
+ off = 4;
|
|
+ else if (bs_len >= 3 && bs[0] == 0 && bs[1] == 0 && bs[2] == 1)
|
|
+ off = 3;
|
|
+
|
|
+ if (bs_len > off + 1) {
|
|
+ uint8_t nal_hdr = bs[off];
|
|
+ uint8_t nal_ref_idc = (nal_hdr >> 5) & 0x3;
|
|
+ uint8_t nal_unit_type = nal_hdr & 0x1f;
|
|
+
|
|
+ const struct panvk_v4l2_h264_slice_header_context sh_ctx = {
|
|
+ .separate_colour_plane_flag =
|
|
+ (sps->flags.separate_colour_plane_flag != 0),
|
|
+ .log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4,
|
|
+ .frame_mbs_only_flag = (sps->flags.frame_mbs_only_flag != 0),
|
|
+ .pic_order_cnt_type = (uint8_t) sps->pic_order_cnt_type,
|
|
+ .log2_max_pic_order_cnt_lsb_minus4 =
|
|
+ sps->log2_max_pic_order_cnt_lsb_minus4,
|
|
+ .delta_pic_order_always_zero_flag =
|
|
+ (sps->flags.delta_pic_order_always_zero_flag != 0),
|
|
+ .bottom_field_pic_order_in_frame_present_flag =
|
|
+ (pps->flags.bottom_field_pic_order_in_frame_present_flag != 0),
|
|
+ .redundant_pic_cnt_present_flag =
|
|
+ (pps->flags.redundant_pic_cnt_present_flag != 0),
|
|
+ .weighted_pred_flag =
|
|
+ (pps->flags.weighted_pred_flag != 0),
|
|
+ .weighted_bipred_idc = (uint8_t) pps->weighted_bipred_idc,
|
|
+ .num_ref_idx_l0_default_active_minus1 =
|
|
+ pps->num_ref_idx_l0_default_active_minus1,
|
|
+ .num_ref_idx_l1_default_active_minus1 =
|
|
+ pps->num_ref_idx_l1_default_active_minus1,
|
|
+ .chroma_format_idc = (uint8_t) sps->chroma_format_idc,
|
|
+ .bit_depth_luma_minus8 = sps->bit_depth_luma_minus8,
|
|
+ .bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8,
|
|
+ .nal_unit_type = nal_unit_type,
|
|
+ .nal_ref_idc = nal_ref_idc,
|
|
+ };
|
|
+ struct panvk_v4l2_h264_slice_header_info sh = { 0 };
|
|
+ const uint8_t *nal_payload = bs + off + 1; /* past NAL header byte */
|
|
+ uint32_t nal_payload_len = bs_len - (off + 1);
|
|
+
|
|
+ int sh_rc = panvk_v4l2_h264_parse_slice_header(
|
|
+ nal_payload, nal_payload_len, &sh_ctx, &sh);
|
|
+ if (sh_rc == 0) {
|
|
+ c_dec.idr_pic_id = sh.idr_pic_id;
|
|
+ c_dec.pic_order_cnt_lsb = sh.pic_order_cnt_lsb;
|
|
+ c_dec.delta_pic_order_cnt_bottom = sh.delta_pic_order_cnt_bottom;
|
|
+ c_dec.delta_pic_order_cnt0 = sh.delta_pic_order_cnt0;
|
|
+ c_dec.delta_pic_order_cnt1 = sh.delta_pic_order_cnt1;
|
|
+ c_dec.pic_order_cnt_bit_size = sh.pic_order_cnt_bit_size;
|
|
+ c_dec.dec_ref_pic_marking_bit_size =
|
|
+ sh.dec_ref_pic_marking_bit_size;
|
|
+ c_dec.nal_ref_idc = nal_ref_idc;
|
|
+ /*
|
|
+ * IDR_PIC flag: Vulkan's StdVideoDecodeH264PictureInfo.flags.
|
|
+ * IdrPicFlag is application-supplied and the vk-video-samples
|
|
+ * parser leaves it zero. Recover it from nal_unit_type (==5 is
|
|
+ * IDR per H.264 §7.4.1). Without this flag set, hantro's
|
|
+ * VDPU_REG_IDR_PIC_E stays clear and the hardware treats the
|
|
+ * frame as P/B, hunts for references it doesn't have, and
|
|
+ * writes zero output.
|
|
+ */
|
|
+ if (nal_unit_type == 5)
|
|
+ c_dec.flags |= V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC;
|
|
+ } else {
|
|
+ mesa_loge("panvk_video: slice_header parse FAILED rc=%d "
|
|
+ "(payload_len=%u) — DECODE_PARAMS bit_size fields "
|
|
+ "left zero, hantro will produce zeros",
|
|
+ sh_rc, nal_payload_len);
|
|
+ }
|
|
+ } else {
|
|
+ mesa_loge("panvk_video: bitstream too short for NAL header "
|
|
+ "(bs_len=%u off=%u)", bs_len, off);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* The 14-step ioctl dance synchronously. CPU-copy variant for Phase 1. */
|
|
+ int rc = panvk_v4l2_submit_h264_decode(vs, &c_sps, &c_pps, &c_scaling,
|
|
+ &c_dec,
|
|
+ src_bitstream,
|
|
+ pDecodeInfo->srcBufferRange,
|
|
+ -1, /* dst unused (MMAP CAPTURE) */
|
|
+ output_ts);
|
|
+
|
|
+ if (rc) {
|
|
+ mesa_loge("panvk_video: decode submit failed rc=%d", rc);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Update DPB tracking. */
|
|
+ if (dst_dpb_slot < 16) {
|
|
+ vs->dpb[dst_dpb_slot].valid = true;
|
|
+ vs->dpb[dst_dpb_slot].reference_ts = output_ts;
|
|
+ }
|
|
+}
|
|
diff -urN a/src/panfrost/vulkan/panvk_video_decode.h b/src/panfrost/vulkan/panvk_video_decode.h
|
|
--- a/src/panfrost/vulkan/panvk_video_decode.h 1970-01-01 01:00:00.000000000 +0100
|
|
+++ b/src/panfrost/vulkan/panvk_video_decode.h 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -0,0 +1,124 @@
|
|
+/*
|
|
+ * panvk-bifrost-video Phase 4 commit 3: extended for V4L2 state.
|
|
+ *
|
|
+ * SPDX-License-Identifier: MIT
|
|
+ */
|
|
+
|
|
+#ifndef PANVK_VIDEO_DECODE_H
|
|
+#define PANVK_VIDEO_DECODE_H
|
|
+
|
|
+#include "vk_video.h"
|
|
+#include "vk_object.h"
|
|
+
|
|
+#include <linux/videodev2.h>
|
|
+
|
|
+/* Forward decls */
|
|
+struct panvk_device;
|
|
+struct vk_device;
|
|
+
|
|
+/* iter1: per-session state. Wraps vk_video_session for spec-mandated fields. */
|
|
+struct panvk_video_session {
|
|
+ struct vk_video_session vk;
|
|
+
|
|
+ /* V4L2 fds — opened in Commit 3 (per-session). -1 means not opened. */
|
|
+ int video_fd;
|
|
+ int media_fd;
|
|
+
|
|
+ /* Negotiated formats per OUTPUT / CAPTURE queue */
|
|
+ struct v4l2_format fmt_output;
|
|
+ struct v4l2_format fmt_capture;
|
|
+
|
|
+ /* Request fd pool. PANVK_V4L2_REQUEST_FD_COUNT entries.
|
|
+ * Size of request_fd_used[] is bounded by the same compile-time max;
|
|
+ * keep them coupled to avoid silent overflow if the pool grows. */
|
|
+#define PANVK_VIDEO_REQUEST_FD_MAX 32
|
|
+ int *request_fds;
|
|
+ bool request_fd_used[PANVK_VIDEO_REQUEST_FD_MAX];
|
|
+ unsigned num_request_fds;
|
|
+ uint32_t request_fd_next; /* round-robin index */
|
|
+
|
|
+ /* Per-session V4L2 buffer-identity counter. Multiplied by 1000 ns at
|
|
+ * QBUF time so the stamp round-trips losslessly through (tv_sec,
|
|
+ * tv_usec) — hantro's reflist builder matches dpb[i].reference_ts
|
|
+ * against the kernel-side OUTPUT timestamp. Per-session (not process-
|
|
+ * global) so concurrent sessions sharing /dev/video1 don't collide. */
|
|
+ uint32_t ts_counter;
|
|
+
|
|
+ /* DPB slotIndex → V4L2 reference_ts mapping (Phase 1 D5) */
|
|
+ struct {
|
|
+ bool valid;
|
|
+ uint64_t reference_ts;
|
|
+ } dpb[16];
|
|
+
|
|
+ /* Phase 1 lock — FRAME_BASED only. */
|
|
+ bool slice_based;
|
|
+
|
|
+ /* Multi-planar V4L2 buffer type? Detected at session init via
|
|
+ * V4L2_CAP_VIDEO_M2M_MPLANE. Hantro: true. rkvdec on rk3399: false. */
|
|
+ bool mplane;
|
|
+
|
|
+ /* iter1 commit 7c: V4L2 buffer counts + round-robin indices.
|
|
+ * Both queues use MMAP; bitstream copied CPU-side from VkBuffer host map. */
|
|
+ uint32_t num_output_buffers;
|
|
+ uint32_t output_next;
|
|
+ void *output_map[18]; /* mmap'd OUTPUT buffer CPU pointers */
|
|
+ uint32_t output_map_size[18];
|
|
+ uint32_t num_capture_buffers;
|
|
+ uint32_t capture_next;
|
|
+ void *capture_map[18]; /* mmap'd CAPTURE buffer CPU pointers */
|
|
+ uint32_t capture_map_size[18];
|
|
+};
|
|
+
|
|
+VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_video_session, vk.base, VkVideoSessionKHR,
|
|
+ VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
|
|
+
|
|
+/* panvk_v4l2.c API */
|
|
+bool panvk_v4l2_probe_hantro(void);
|
|
+int panvk_v4l2_session_init(struct panvk_video_session *vs,
|
|
+ struct vk_device *vk_dev,
|
|
+ const VkAllocationCallbacks *alloc,
|
|
+ uint32_t width, uint32_t height);
|
|
+void panvk_v4l2_session_finish(struct panvk_video_session *vs,
|
|
+ struct vk_device *vk_dev,
|
|
+ const VkAllocationCallbacks *alloc);
|
|
+
|
|
+/* 14-step ioctl dance for one H.264 frame. */
|
|
+struct v4l2_ctrl_h264_sps;
|
|
+struct v4l2_ctrl_h264_pps;
|
|
+struct v4l2_ctrl_h264_scaling_matrix;
|
|
+struct v4l2_ctrl_h264_decode_params;
|
|
+
|
|
+int panvk_v4l2_submit_h264_decode(
|
|
+ struct panvk_video_session *vs,
|
|
+ const struct v4l2_ctrl_h264_sps *sps,
|
|
+ const struct v4l2_ctrl_h264_pps *pps,
|
|
+ const struct v4l2_ctrl_h264_scaling_matrix *scaling,
|
|
+ const struct v4l2_ctrl_h264_decode_params *dec,
|
|
+ const void *src_bitstream, uint32_t src_bytes,
|
|
+ int dst_dmabuf_fd_unused,
|
|
+ uint64_t qbuf_ts);
|
|
+
|
|
+/* panvk_v4l2_h264.c — Std → V4L2 control translation API (signatures
|
|
+ * use full types; consumers must include vk_video headers before this. */
|
|
+void panvk_v4l2_h264_std_to_ctrl_sps(
|
|
+ const StdVideoH264SequenceParameterSet *in,
|
|
+ struct v4l2_ctrl_h264_sps *out);
|
|
+void panvk_v4l2_h264_std_to_ctrl_pps(
|
|
+ const StdVideoH264PictureParameterSet *in,
|
|
+ struct v4l2_ctrl_h264_pps *out);
|
|
+void panvk_v4l2_h264_std_to_ctrl_scaling_matrix(
|
|
+ const StdVideoH264ScalingLists *in,
|
|
+ struct v4l2_ctrl_h264_scaling_matrix *out);
|
|
+void panvk_v4l2_h264_default_flat_scaling_matrix(
|
|
+ struct v4l2_ctrl_h264_scaling_matrix *out);
|
|
+void panvk_v4l2_h264_build_decode_params(
|
|
+ const struct panvk_video_session *vs,
|
|
+ const VkVideoDecodeH264PictureInfoKHR *pic_info,
|
|
+ const StdVideoH264PictureParameterSet *active_pps,
|
|
+ uint32_t dst_dpb_slot,
|
|
+ const VkVideoReferenceSlotInfoKHR *ref_slots,
|
|
+ uint32_t num_ref_slots,
|
|
+ uint64_t output_ts,
|
|
+ struct v4l2_ctrl_h264_decode_params *out);
|
|
+
|
|
+#endif /* PANVK_VIDEO_DECODE_H */
|
|
diff -urN a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c
|
|
--- a/src/panfrost/vulkan/panvk_vX_device.c 2026-05-21 22:46:57.505785441 +0200
|
|
+++ b/src/panfrost/vulkan/panvk_vX_device.c 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -203,6 +203,27 @@
|
|
}
|
|
}
|
|
|
|
+/* iter1: translate Vulkan-visible queueFamilyIndex to panvk enum.
|
|
+ * Returns PANVK_QUEUE_FAMILY_COUNT on invalid input. */
|
|
+static inline enum panvk_queue_family
|
|
+panvk_per_arch(vulkan_qfi_to_panvk)(struct panvk_physical_device *physical_device,
|
|
+ uint32_t vulkan_qfi)
|
|
+{
|
|
+ uint32_t pos = 0;
|
|
+ for (uint32_t i = 0; i < PANVK_QUEUE_FAMILY_COUNT; i++) {
|
|
+ if (i == PANVK_QUEUE_FAMILY_BIND &&
|
|
+ !physical_device->vk.supported_features.sparseBinding)
|
|
+ continue;
|
|
+ if (i == PANVK_QUEUE_FAMILY_VIDEO_DECODE &&
|
|
+ !physical_device->vk.supported_extensions.KHR_video_queue)
|
|
+ continue;
|
|
+ if (pos == vulkan_qfi)
|
|
+ return (enum panvk_queue_family) i;
|
|
+ pos++;
|
|
+ }
|
|
+ return PANVK_QUEUE_FAMILY_COUNT;
|
|
+}
|
|
+
|
|
static VkResult
|
|
check_global_priority(const struct panvk_physical_device *phys_dev,
|
|
const VkDeviceQueueCreateInfo *create_info)
|
|
@@ -215,7 +236,10 @@
|
|
priority_info ? priority_info->globalPriority
|
|
: VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
|
|
|
|
- switch (create_info->queueFamilyIndex) {
|
|
+ const enum panvk_queue_family panvk_qfi =
|
|
+ panvk_per_arch(vulkan_qfi_to_panvk)(
|
|
+ (struct panvk_physical_device *) phys_dev, create_info->queueFamilyIndex);
|
|
+ switch (panvk_qfi) {
|
|
case PANVK_QUEUE_FAMILY_GPU: {
|
|
enum pan_kmod_group_allow_priority_flags requested_prio =
|
|
global_priority_to_group_allow_priority_flag(priority);
|
|
@@ -242,6 +266,12 @@
|
|
return VK_ERROR_NOT_PERMITTED_KHR;
|
|
}
|
|
|
|
+ case PANVK_QUEUE_FAMILY_VIDEO_DECODE:
|
|
+ /* iter1: only MEDIUM priority for now */
|
|
+ return priority == VK_QUEUE_GLOBAL_PRIORITY_MEDIUM
|
|
+ ? VK_SUCCESS
|
|
+ : VK_ERROR_NOT_PERMITTED_KHR;
|
|
+
|
|
default:
|
|
UNREACHABLE("Unknown queue family");
|
|
}
|
|
@@ -250,11 +280,20 @@
|
|
static VkResult
|
|
panvk_queue_check_status(struct vk_queue *queue)
|
|
{
|
|
- switch (queue->queue_family_index) {
|
|
+ struct panvk_device *dev =
|
|
+ container_of(queue->base.device, struct panvk_device, vk);
|
|
+ struct panvk_physical_device *pdev =
|
|
+ to_panvk_physical_device(dev->vk.physical);
|
|
+ const enum panvk_queue_family panvk_qfi =
|
|
+ panvk_per_arch(vulkan_qfi_to_panvk)(pdev, queue->queue_family_index);
|
|
+ switch (panvk_qfi) {
|
|
case PANVK_QUEUE_FAMILY_GPU:
|
|
return panvk_per_arch(gpu_queue_check_status)(queue);
|
|
case PANVK_QUEUE_FAMILY_BIND:
|
|
return panvk_per_arch(bind_queue_check_status)(queue);
|
|
+ case PANVK_QUEUE_FAMILY_VIDEO_DECODE:
|
|
+ /* iter1: stub — commit 4 implements real status check. */
|
|
+ return VK_SUCCESS;
|
|
default:
|
|
UNREACHABLE("Unknown queue family");
|
|
}
|
|
@@ -297,18 +336,52 @@
|
|
}
|
|
|
|
static VkResult
|
|
+panvk_video_queue_submit_noop(struct vk_queue *queue,
|
|
+ struct vk_queue_submit *submit)
|
|
+{
|
|
+ /* All decode work was done synchronously in vkCmdDecodeVideoKHR; the
|
|
+ * queue-side submit only has to satisfy the Vulkan fence/semaphore
|
|
+ * contract by signaling everything. Waits are guaranteed satisfied by
|
|
+ * the time the runtime calls us. */
|
|
+ return vk_sync_signal_many(queue->base.device, submit->signal_count,
|
|
+ submit->signals);
|
|
+}
|
|
+
|
|
+static VkResult
|
|
panvk_queue_create(struct panvk_device *dev,
|
|
const VkDeviceQueueCreateInfo *create_info,
|
|
uint32_t queue_idx,
|
|
struct vk_queue **out_queue)
|
|
{
|
|
- switch (create_info->queueFamilyIndex) {
|
|
+ struct panvk_physical_device *pdev =
|
|
+ to_panvk_physical_device(dev->vk.physical);
|
|
+ const enum panvk_queue_family panvk_qfi =
|
|
+ panvk_per_arch(vulkan_qfi_to_panvk)(pdev, create_info->queueFamilyIndex);
|
|
+ switch (panvk_qfi) {
|
|
case PANVK_QUEUE_FAMILY_GPU:
|
|
return panvk_per_arch(create_gpu_queue)(
|
|
dev, create_info, queue_idx, out_queue);
|
|
case PANVK_QUEUE_FAMILY_BIND:
|
|
return panvk_per_arch(create_bind_queue)(
|
|
dev, create_info, queue_idx, out_queue);
|
|
+ case PANVK_QUEUE_FAMILY_VIDEO_DECODE: {
|
|
+ /* Decode work is fully synchronous at record time (CmdDecodeVideoKHR
|
|
+ * drives the V4L2 14-step dance to completion). At submit time there
|
|
+ * is nothing left to dispatch, so we honor the Vulkan contract by
|
|
+ * just signaling everything. */
|
|
+ struct vk_queue *vkq = vk_zalloc(&dev->vk.alloc, sizeof(*vkq), 8,
|
|
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
+ if (!vkq)
|
|
+ return panvk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
+ VkResult vqr = vk_queue_init(vkq, &dev->vk, create_info, queue_idx);
|
|
+ if (vqr != VK_SUCCESS) {
|
|
+ vk_free(&dev->vk.alloc, vkq);
|
|
+ return panvk_error(dev, vqr);
|
|
+ }
|
|
+ vkq->driver_submit = panvk_video_queue_submit_noop;
|
|
+ *out_queue = vkq;
|
|
+ return VK_SUCCESS;
|
|
+ }
|
|
default:
|
|
return panvk_error(dev, VK_ERROR_INITIALIZATION_FAILED);
|
|
}
|
|
@@ -317,13 +390,26 @@
|
|
static void
|
|
panvk_queue_destroy(struct vk_queue *queue)
|
|
{
|
|
- switch (queue->queue_family_index) {
|
|
+ struct panvk_device *dev =
|
|
+ container_of(queue->base.device, struct panvk_device, vk);
|
|
+ struct panvk_physical_device *pdev =
|
|
+ to_panvk_physical_device(dev->vk.physical);
|
|
+ const enum panvk_queue_family panvk_qfi =
|
|
+ panvk_per_arch(vulkan_qfi_to_panvk)(pdev, queue->queue_family_index);
|
|
+ switch (panvk_qfi) {
|
|
case PANVK_QUEUE_FAMILY_GPU:
|
|
panvk_per_arch(destroy_gpu_queue)(queue);
|
|
break;
|
|
case PANVK_QUEUE_FAMILY_BIND:
|
|
panvk_per_arch(destroy_bind_queue)(queue);
|
|
break;
|
|
+ case PANVK_QUEUE_FAMILY_VIDEO_DECODE: {
|
|
+ struct panvk_device *dev =
|
|
+ container_of(queue->base.device, struct panvk_device, vk);
|
|
+ vk_queue_finish(queue);
|
|
+ vk_free(&dev->vk.alloc, queue);
|
|
+ break;
|
|
+ }
|
|
default:
|
|
UNREACHABLE("Unknown queue family");
|
|
}
|
|
@@ -511,6 +597,7 @@
|
|
vk_device_set_drm_fd(&device->vk, device->kmod.dev->fd);
|
|
|
|
|
|
+ simple_mtx_init(&device->active_video.lock, mtx_plain);
|
|
result = panvk_precomp_init(device);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_priv_bos;
|
|
@@ -542,7 +629,13 @@
|
|
if (result != VK_SUCCESS)
|
|
goto err_finish_queues;
|
|
|
|
- uint32_t qfi = queue_create->queueFamilyIndex;
|
|
+ uint32_t vulkan_qfi = queue_create->queueFamilyIndex;
|
|
+ enum panvk_queue_family qfi =
|
|
+ panvk_per_arch(vulkan_qfi_to_panvk)(physical_device, vulkan_qfi);
|
|
+ if (qfi >= PANVK_QUEUE_FAMILY_COUNT) {
|
|
+ result = panvk_error(device, VK_ERROR_INITIALIZATION_FAILED);
|
|
+ goto err_finish_queues;
|
|
+ }
|
|
struct panvk_device_queue_family *qf = &device->queue_families[qfi];
|
|
|
|
qf->queues =
|
|
diff -urN a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c
|
|
--- a/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-21 22:46:59.273811425 +0200
|
|
+++ b/src/panfrost/vulkan/panvk_vX_physical_device.c 2026-05-22 10:17:41.214043265 +0200
|
|
@@ -12,6 +12,7 @@
|
|
#include <sys/sysmacros.h>
|
|
|
|
#include "git_sha1.h"
|
|
+#include "panvk_video_decode.h"
|
|
|
|
#include "vk_android.h"
|
|
#include "vk_device.h"
|
|
@@ -170,6 +171,14 @@
|
|
.EXT_queue_family_foreign = true,
|
|
.EXT_robustness2 = true,
|
|
.EXT_transform_feedback = PAN_ARCH < 9, /* iter13: JM-class only for now */
|
|
+ /* Video extensions are advertised only when (a) we're on a Bifrost
|
|
+ * arch (PAN_ARCH < 9) AND (b) a hantro VPU is reachable on the
|
|
+ * expected V4L2 nodes — otherwise CreateVideoSessionKHR would
|
|
+ * succeed at the panvk layer and then fail at v4l2_open_fds, giving
|
|
+ * the app a misleading capability claim. */
|
|
+ .KHR_video_queue = PAN_ARCH < 9 && panvk_v4l2_probe_hantro(),
|
|
+ .KHR_video_decode_queue = PAN_ARCH < 9 && panvk_v4l2_probe_hantro(),
|
|
+ .KHR_video_decode_h264 = PAN_ARCH < 9 && panvk_v4l2_probe_hantro(),
|
|
.EXT_sampler_filter_minmax = PAN_ARCH >= 10,
|
|
.EXT_scalar_block_layout = true,
|
|
.EXT_separate_stencil_usage = true,
|