e64bb0852d
Iteration 2 Fix 2: branch on bytesperline alignment when setting the drm_format_modifier in RequestExportSurfaceHandle. For non-64-byte-aligned pitches (e.g. 864 for 864-wide videos), report DRM_FORMAT_MOD_INVALID instead of DRM_FORMAT_MOD_NONE (LINEAR explicit). Mesa's WSI rejects LINEAR buffers that aren't scanout-aligned with 'WSI pitch not properly aligned'; MOD_INVALID tells the importer to treat as texture-only, which is the correct behavior for buffers that don't meet scanout alignment requirements. Diagnosis from operator's mozilla.org session in iteration 1 close: 864-wide intro videos triggered the WSI alignment error and Firefox fell back to SW for those videos. Sonnet Phase 5 review endorsed the conditional approach over a universal MOD_INVALID change to preserve LINEAR semantics for already-aligned content (avoids unnecessary perf cost on the common 1920-wide case). Verification path (Phase 7 of iteration 2): Firefox loads mozilla.org main page; check no MESA WSI errors in stderr; operator confirms intro videos engage HW decode (or at least don't fall back). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
819 lines
28 KiB
C
819 lines
28 KiB
C
/*
|
|
* Copyright (C) 2007 Intel Corporation
|
|
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
|
|
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "request.h"
|
|
#include "surface.h"
|
|
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
#include <sys/mman.h>
|
|
|
|
#include <va/va_drmcommon.h>
|
|
#include <drm_fourcc.h>
|
|
#include <linux/videodev2.h>
|
|
|
|
#include "media.h"
|
|
#include "utils.h"
|
|
#include "v4l2.h"
|
|
#include "video.h"
|
|
|
|
/*
|
|
* Per-process cache of the OUTPUT format we've set. The previous
|
|
* SET_FORMAT_OF_OUTPUT_ONCE pattern was a latent bug (Sonnet Phase 5
|
|
* review finding 7.3): mpv probes with small surfaces (e.g. 128x128)
|
|
* before requesting the real resolution (e.g. 1920x1088). The
|
|
* once-only set kept the OUTPUT — and consequently the kernel-derived
|
|
* CAPTURE — format pinned to the probe size. Subsequent
|
|
* v4l2_get_format on CAPTURE then returned the small format, the
|
|
* VADRMPRIMESurfaceDescriptor was filled with width=1920 height=1088
|
|
* but pitch=128 offset=16384, and Mesa rejected the import with
|
|
* "WSI pitch too small." That manifested as the solid-blue render in
|
|
* mpv vaapi mode and the SW fallback in Firefox after frame 0.
|
|
*
|
|
* Fix: track (width, height) and re-set the OUTPUT format whenever
|
|
* the resolution changes. Re-setting requires REQBUFS(0) on both
|
|
* queues first because S_FMT after CREATE_BUFS is rejected by V4L2;
|
|
* we tear down and let the next allocation cycle recreate buffers
|
|
* at the new resolution.
|
|
*/
|
|
static unsigned int LAST_OUTPUT_WIDTH = 0;
|
|
static unsigned int LAST_OUTPUT_HEIGHT = 0;
|
|
|
|
void surface_reset_format_cache(void)
|
|
{
|
|
LAST_OUTPUT_WIDTH = 0;
|
|
LAST_OUTPUT_HEIGHT = 0;
|
|
}
|
|
|
|
VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format,
|
|
unsigned int width, unsigned int height,
|
|
VASurfaceID *surfaces_ids,
|
|
unsigned int surfaces_count,
|
|
VASurfaceAttrib *attributes,
|
|
unsigned int attributes_count)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_surface *surface_object;
|
|
struct video_format *video_format = NULL;
|
|
unsigned int destination_sizes[VIDEO_MAX_PLANES];
|
|
unsigned int destination_bytesperlines[VIDEO_MAX_PLANES];
|
|
unsigned int destination_planes_count;
|
|
unsigned int format_width, format_height;
|
|
unsigned int capture_type;
|
|
unsigned int index_base;
|
|
unsigned int index;
|
|
unsigned int i, j;
|
|
VASurfaceID id;
|
|
bool found;
|
|
int rc;
|
|
|
|
/*
|
|
* Set the OUTPUT format on (re)allocation when the resolution
|
|
* differs from the last set value. Without this, mpv's small
|
|
* probe surfaces (128x128) pin the CAPTURE format and the
|
|
* subsequent real-resolution surface ends up with wrong pitch
|
|
* in the export descriptor — causing Mesa to reject the
|
|
* DMA-BUF import. Detail in the LAST_OUTPUT_WIDTH/HEIGHT
|
|
* comment block at the top of this file.
|
|
*
|
|
* TODO: this is still not a clean architecture — v4l2_set_format
|
|
* after CREATE_BUFS requires REQBUFS(0) first (kernel returns
|
|
* EBUSY otherwise). For mpv's pattern (probe with small, then
|
|
* allocate big) the small probe surfaces have not been streamed
|
|
* yet, so REQBUFS(0) on them works. For consumers that legitimately
|
|
* stream multiple resolutions in sequence, we'd need to STREAMOFF
|
|
* + REQBUFS(0) + new S_FMT + new CREATE_BUFS — that's a context-
|
|
* level redesign for the next iteration.
|
|
*/
|
|
unsigned int pixelformat = V4L2_PIX_FMT_H264_SLICE;
|
|
unsigned int output_type = v4l2_type_video_output(true);
|
|
|
|
if (LAST_OUTPUT_WIDTH != width || LAST_OUTPUT_HEIGHT != height) {
|
|
/*
|
|
* If we've previously allocated buffers at a different
|
|
* resolution, tear them down on BOTH queues before re-setting
|
|
* the OUTPUT format. S_FMT is rejected by V4L2 while buffers
|
|
* exist; hantro derives CAPTURE format from OUTPUT format, so
|
|
* leftover CAPTURE buffers from the prior resolution would
|
|
* also block the implicit format change. Sonnet Phase 5
|
|
* review (iter2 9.1) flagged this as a missing REQBUFS(0)
|
|
* gap on the CAPTURE side of the resolution-change path.
|
|
*/
|
|
if (LAST_OUTPUT_WIDTH != 0) {
|
|
(void)v4l2_request_buffers(driver_data->video_fd,
|
|
output_type, 0);
|
|
(void)v4l2_request_buffers(driver_data->video_fd,
|
|
v4l2_type_video_capture(true), 0);
|
|
}
|
|
|
|
rc = v4l2_set_format(driver_data->video_fd, output_type, pixelformat,
|
|
width, height);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
LAST_OUTPUT_WIDTH = width;
|
|
LAST_OUTPUT_HEIGHT = height;
|
|
}
|
|
|
|
if (format != VA_RT_FORMAT_YUV420)
|
|
return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
|
|
|
|
if (!driver_data->video_format) {
|
|
found = v4l2_find_format(driver_data->video_fd,
|
|
V4L2_BUF_TYPE_VIDEO_CAPTURE,
|
|
V4L2_PIX_FMT_SUNXI_TILED_NV12);
|
|
if (found)
|
|
video_format = video_format_find(V4L2_PIX_FMT_SUNXI_TILED_NV12);
|
|
|
|
found = v4l2_find_format(driver_data->video_fd,
|
|
V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE,
|
|
V4L2_PIX_FMT_NV12);
|
|
if (found)
|
|
video_format = video_format_find(V4L2_PIX_FMT_NV12);
|
|
|
|
if (video_format == NULL)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
driver_data->video_format = video_format;
|
|
|
|
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
|
|
|
/*
|
|
* Do not VIDIOC_S_FMT on the CAPTURE queue. The hantro
|
|
* stateless decoder derives the CAPTURE format from the
|
|
* SPS attached to the OUTPUT request; explicitly setting
|
|
* it here can put the driver into an inconsistent state.
|
|
* GStreamer's v4l2slh264dec only G_FMTs CAPTURE (see
|
|
* gst-plugins-bad/sys/v4l2codecs/gstv4l2decoder.c::
|
|
* gst_v4l2_decoder_negotiate_src_format), and that
|
|
* variant produces correct decoded NV12 on the same
|
|
* hardware where this driver currently emits zeros.
|
|
*
|
|
* v4l2_get_format() below queries the driver's current
|
|
* state and gives us the bytesperline/sizes we need.
|
|
*/
|
|
} else {
|
|
video_format = driver_data->video_format;
|
|
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
|
}
|
|
|
|
rc = v4l2_get_format(driver_data->video_fd, capture_type, &format_width,
|
|
&format_height, destination_bytesperlines,
|
|
destination_sizes, NULL);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
destination_planes_count = video_format->planes_count;
|
|
|
|
/*
|
|
* DEBUG INSTRUMENTATION (surface-export diagnosis 2026-05-04):
|
|
* dump what v4l2_get_format returned. Sonnet's Phase 5 review
|
|
* hypothesis #4 was that format_height might be 1080 (stream-
|
|
* signaled) vs 1088 (MB-aligned), causing UV offset to land
|
|
* 15360 bytes early. Earlier ftrace shows hantro returns
|
|
* height=1088 — but verify in-driver to be sure.
|
|
*/
|
|
request_log("CreateSurfaces2: surf_width=%u surf_height=%u "
|
|
"fmt_width=%u fmt_height=%u bytesperline[0]=%u "
|
|
"sizes[0]=%u sizes[1]=%u planes_count=%u "
|
|
"v4l2_buffers_count=%u\n",
|
|
width, height, format_width, format_height,
|
|
destination_bytesperlines[0],
|
|
destination_sizes[0], destination_sizes[1],
|
|
destination_planes_count, video_format->v4l2_buffers_count);
|
|
|
|
rc = v4l2_create_buffers(driver_data->video_fd, capture_type,
|
|
surfaces_count, &index_base);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
|
|
for (i = 0; i < surfaces_count; i++) {
|
|
index = index_base + i;
|
|
|
|
id = object_heap_allocate(&driver_data->surface_heap);
|
|
surface_object = SURFACE(driver_data, id);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
|
|
rc = v4l2_query_buffer(driver_data->video_fd, capture_type,
|
|
index,
|
|
surface_object->destination_map_lengths,
|
|
surface_object->destination_map_offsets,
|
|
video_format->v4l2_buffers_count);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
|
|
for (j = 0; j < video_format->v4l2_buffers_count; j++) {
|
|
surface_object->destination_map[j] =
|
|
mmap(NULL,
|
|
surface_object->destination_map_lengths[j],
|
|
PROT_READ | PROT_WRITE, MAP_SHARED,
|
|
driver_data->video_fd,
|
|
surface_object->destination_map_offsets[j]);
|
|
|
|
if (surface_object->destination_map[j] == MAP_FAILED)
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
}
|
|
|
|
/*
|
|
* FIXME: Handle this per-pixelformat, trying to generalize it
|
|
* is not a reasonable approach. The final description should be
|
|
* in terms of (logical) planes.
|
|
*/
|
|
|
|
if (video_format->v4l2_buffers_count == 1) {
|
|
destination_sizes[0] = destination_bytesperlines[0] *
|
|
format_height;
|
|
|
|
for (j = 1; j < destination_planes_count; j++)
|
|
destination_sizes[j] = destination_sizes[0] / 2;
|
|
|
|
for (j = 0; j < destination_planes_count; j++) {
|
|
surface_object->destination_offsets[j] =
|
|
j > 0 ? destination_sizes[j - 1] : 0;
|
|
surface_object->destination_data[j] =
|
|
((unsigned char *)surface_object->destination_map[0] +
|
|
surface_object->destination_offsets[j]);
|
|
surface_object->destination_sizes[j] =
|
|
destination_sizes[j];
|
|
surface_object->destination_bytesperlines[j] =
|
|
destination_bytesperlines[0];
|
|
}
|
|
} else if (video_format->v4l2_buffers_count == destination_planes_count) {
|
|
for (j = 0; j < destination_planes_count; j++) {
|
|
surface_object->destination_offsets[j] = 0;
|
|
surface_object->destination_data[j] =
|
|
surface_object->destination_map[j];
|
|
surface_object->destination_sizes[j] =
|
|
destination_sizes[j];
|
|
surface_object->destination_bytesperlines[j] =
|
|
destination_bytesperlines[j];
|
|
}
|
|
} else {
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
}
|
|
|
|
surface_object->status = VASurfaceReady;
|
|
surface_object->width = width;
|
|
surface_object->height = height;
|
|
|
|
surface_object->source_index = 0;
|
|
surface_object->source_data = NULL;
|
|
surface_object->source_size = 0;
|
|
|
|
surface_object->destination_index = index;
|
|
|
|
surface_object->destination_planes_count =
|
|
destination_planes_count;
|
|
surface_object->destination_buffers_count =
|
|
video_format->v4l2_buffers_count;
|
|
|
|
memset(&surface_object->params, 0,
|
|
sizeof(surface_object->params));
|
|
surface_object->slices_count = 0;
|
|
surface_object->slices_size = 0;
|
|
|
|
surface_object->request_fd = -1;
|
|
|
|
surfaces_ids[i] = id;
|
|
}
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestCreateSurfaces(VADriverContextP context, int width, int height,
|
|
int format, int surfaces_count,
|
|
VASurfaceID *surfaces_ids)
|
|
{
|
|
return RequestCreateSurfaces2(context, format, width, height,
|
|
surfaces_ids, surfaces_count, NULL, 0);
|
|
}
|
|
|
|
VAStatus RequestDestroySurfaces(VADriverContextP context,
|
|
VASurfaceID *surfaces_ids, int surfaces_count)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_surface *surface_object;
|
|
unsigned int i, j;
|
|
|
|
for (i = 0; i < surfaces_count; i++) {
|
|
surface_object = SURFACE(driver_data, surfaces_ids[i]);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_SURFACE;
|
|
|
|
/*
|
|
* source_* are now transient borrows from request_pool, not
|
|
* surface-owned mappings; the pool owns the underlying mmap.
|
|
* Nothing to free here.
|
|
*/
|
|
|
|
for (j = 0; j < surface_object->destination_buffers_count; j++)
|
|
if (surface_object->destination_map[j] != NULL &&
|
|
surface_object->destination_map_lengths[j] > 0)
|
|
munmap(surface_object->destination_map[j],
|
|
surface_object->destination_map_lengths[j]);
|
|
|
|
if (surface_object->request_fd > 0)
|
|
close(surface_object->request_fd);
|
|
|
|
object_heap_free(&driver_data->surface_heap,
|
|
(struct object_base *)surface_object);
|
|
}
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id)
|
|
{
|
|
request_log("ENTER RequestSyncSurface(surf=%u)\n", surface_id);
|
|
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_surface *surface_object;
|
|
VAStatus status;
|
|
struct video_format *video_format;
|
|
unsigned int output_type, capture_type;
|
|
int request_fd = -1;
|
|
int rc;
|
|
|
|
video_format = driver_data->video_format;
|
|
if (video_format == NULL) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
|
|
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
|
|
|
surface_object = SURFACE(driver_data, surface_id);
|
|
if (surface_object == NULL) {
|
|
status = VA_STATUS_ERROR_INVALID_SURFACE;
|
|
goto error;
|
|
}
|
|
|
|
if (surface_object->status != VASurfaceRendering) {
|
|
status = VA_STATUS_SUCCESS;
|
|
request_log(" RequestSyncSurface(surf=%u) early-exit, status=%d\n",
|
|
surface_id, surface_object->status);
|
|
goto complete;
|
|
}
|
|
|
|
request_fd = surface_object->request_fd;
|
|
if (request_fd < 0) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
rc = media_request_queue(request_fd);
|
|
if (rc < 0) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
rc = media_request_wait_completion(request_fd);
|
|
if (rc < 0) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
rc = media_request_reinit(request_fd);
|
|
if (rc < 0) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
rc = v4l2_dequeue_buffer(driver_data->video_fd, -1, output_type,
|
|
surface_object->source_index, 1);
|
|
if (rc < 0) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
/*
|
|
* OUTPUT buffer is back from the kernel: return its pool slot
|
|
* for reuse and clear the surface's transient borrow handle.
|
|
*/
|
|
request_pool_release(&driver_data->output_pool,
|
|
surface_object->source_index);
|
|
surface_object->source_data = NULL;
|
|
surface_object->source_size = 0;
|
|
|
|
rc = v4l2_dequeue_buffer(driver_data->video_fd, -1, capture_type,
|
|
surface_object->destination_index,
|
|
surface_object->destination_buffers_count);
|
|
if (rc < 0) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
/*
|
|
* DEBUG INSTRUMENTATION (0010): hex-dump first 32 bytes of the
|
|
* decoded CAPTURE Y-plane after DQBUF, plus a 32-byte luma
|
|
* variance signal (max - min) so we can tell at a glance whether
|
|
* the bytes are real decoded pixels or a uniform fill.
|
|
*
|
|
* Cache hygiene fix (2026-05-04): without msync(MS_SYNC|
|
|
* MS_INVALIDATE) the previous version of this dump consistently
|
|
* showed the patch-0011 sentinel (0xab) even when the kernel had
|
|
* DMA-overwritten the buffer with real pixels — userspace's
|
|
* cached mmap line for the first 32 bytes contained the sentinel
|
|
* write and was never invalidated by DQBUF alone on this hantro
|
|
* CMA-backed config. Caused half a day of mistaken "kernel never
|
|
* wrote the buffer" diagnosis (see libva-multiplanar campaign
|
|
* phase0_evidence/2026-05-04-kernel-trace/findings.md). The
|
|
* msync() forces the kernel to invalidate userspace cache for
|
|
* this region so the read goes to physical memory.
|
|
*/
|
|
{
|
|
unsigned char *p =
|
|
(unsigned char *)surface_object->destination_map[0];
|
|
char hex[32 * 3 + 1] = { 0 };
|
|
unsigned int i;
|
|
if (p == NULL) {
|
|
request_log("CAPTURE[idx=%u, plane0]: (NULL)\n",
|
|
surface_object->destination_index);
|
|
} else {
|
|
unsigned char ymin = 0xff, ymax = 0;
|
|
(void)msync(p, 32, MS_SYNC | MS_INVALIDATE);
|
|
for (i = 0; i < 32; i++) {
|
|
unsigned char y = p[i];
|
|
snprintf(hex + i * 3, 4, " %02x", y);
|
|
if (y < ymin) ymin = y;
|
|
if (y > ymax) ymax = y;
|
|
}
|
|
request_log("CAPTURE[idx=%u, plane0]:%s "
|
|
"(ymin=0x%02x ymax=0x%02x var=%u)\n",
|
|
surface_object->destination_index,
|
|
hex, ymin, ymax,
|
|
(unsigned)(ymax - ymin));
|
|
}
|
|
}
|
|
|
|
surface_object->status = VASurfaceDisplaying;
|
|
|
|
status = VA_STATUS_SUCCESS;
|
|
goto complete;
|
|
|
|
error:
|
|
if (request_fd >= 0) {
|
|
close(request_fd);
|
|
surface_object->request_fd = -1;
|
|
}
|
|
|
|
complete:
|
|
request_log(" RequestSyncSurface(surf=%u) RETURN status=%d\n",
|
|
surface_id, status);
|
|
return status;
|
|
}
|
|
|
|
VAStatus RequestQuerySurfaceAttributes(VADriverContextP context,
|
|
VAConfigID config,
|
|
VASurfaceAttrib *attributes,
|
|
unsigned int *attributes_count)
|
|
{
|
|
request_log("ENTER RequestQuerySurfaceAttributes(config=%u, attrs_buf=%p)\n",
|
|
config, (void *)attributes);
|
|
|
|
struct request_data *driver_data = context->pDriverData;
|
|
VASurfaceAttrib *attributes_list;
|
|
unsigned int attributes_list_size = V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES *
|
|
sizeof(*attributes);
|
|
int memory_types;
|
|
unsigned int i = 0;
|
|
|
|
attributes_list = malloc(attributes_list_size);
|
|
memset(attributes_list, 0, attributes_list_size);
|
|
|
|
attributes_list[i].type = VASurfaceAttribPixelFormat;
|
|
attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
|
|
attributes_list[i].value.type = VAGenericValueTypeInteger;
|
|
attributes_list[i].value.value.i = VA_FOURCC_NV12;
|
|
i++;
|
|
|
|
attributes_list[i].type = VASurfaceAttribMinWidth;
|
|
attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
|
|
attributes_list[i].value.type = VAGenericValueTypeInteger;
|
|
attributes_list[i].value.value.i = 32;
|
|
i++;
|
|
|
|
attributes_list[i].type = VASurfaceAttribMaxWidth;
|
|
attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
|
|
attributes_list[i].value.type = VAGenericValueTypeInteger;
|
|
attributes_list[i].value.value.i = 2048;
|
|
i++;
|
|
|
|
attributes_list[i].type = VASurfaceAttribMinHeight;
|
|
attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
|
|
attributes_list[i].value.type = VAGenericValueTypeInteger;
|
|
attributes_list[i].value.value.i = 32;
|
|
i++;
|
|
|
|
attributes_list[i].type = VASurfaceAttribMaxHeight;
|
|
attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
|
|
attributes_list[i].value.type = VAGenericValueTypeInteger;
|
|
attributes_list[i].value.value.i = 2048;
|
|
i++;
|
|
|
|
attributes_list[i].type = VASurfaceAttribMemoryType;
|
|
attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE |
|
|
VA_SURFACE_ATTRIB_SETTABLE;
|
|
attributes_list[i].value.type = VAGenericValueTypeInteger;
|
|
|
|
memory_types = VA_SURFACE_ATTRIB_MEM_TYPE_VA |
|
|
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2;
|
|
|
|
/*
|
|
* First version of DRM prime export does not handle modifiers,
|
|
* that are required for supporting the tiled output format.
|
|
*/
|
|
|
|
if (video_format_is_linear(driver_data->video_format))
|
|
memory_types |= VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;
|
|
|
|
attributes_list[i].value.value.i = memory_types;
|
|
i++;
|
|
|
|
attributes_list_size = i * sizeof(*attributes);
|
|
|
|
if (attributes != NULL)
|
|
memcpy(attributes, attributes_list, attributes_list_size);
|
|
|
|
free(attributes_list);
|
|
|
|
*attributes_count = i;
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestQuerySurfaceStatus(VADriverContextP context,
|
|
VASurfaceID surface_id,
|
|
VASurfaceStatus *status)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_surface *surface_object;
|
|
|
|
request_log("ENTER RequestQuerySurfaceStatus(surf=%u)\n", surface_id);
|
|
|
|
surface_object = SURFACE(driver_data, surface_id);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_SURFACE;
|
|
|
|
*status = surface_object->status;
|
|
|
|
request_log(" -> status=%d (Ready=%d Rendering=%d Displaying=%d)\n",
|
|
*status, VASurfaceReady, VASurfaceRendering,
|
|
VASurfaceDisplaying);
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestPutSurface(VADriverContextP context, VASurfaceID surface_id,
|
|
void *draw, short src_x, short src_y,
|
|
unsigned short src_width, unsigned short src_height,
|
|
short dst_x, short dst_y, unsigned short dst_width,
|
|
unsigned short dst_height, VARectangle *cliprects,
|
|
unsigned int cliprects_count, unsigned int flags)
|
|
{
|
|
return VA_STATUS_ERROR_UNIMPLEMENTED;
|
|
}
|
|
|
|
VAStatus RequestLockSurface(VADriverContextP context, VASurfaceID surface_id,
|
|
unsigned int *fourcc, unsigned int *luma_stride,
|
|
unsigned int *chroma_u_stride,
|
|
unsigned int *chroma_v_stride,
|
|
unsigned int *luma_offset,
|
|
unsigned int *chroma_u_offset,
|
|
unsigned int *chroma_v_offset,
|
|
unsigned int *buffer_name, void **buffer)
|
|
{
|
|
return VA_STATUS_ERROR_UNIMPLEMENTED;
|
|
}
|
|
|
|
VAStatus RequestUnlockSurface(VADriverContextP context, VASurfaceID surface_id)
|
|
{
|
|
return VA_STATUS_ERROR_UNIMPLEMENTED;
|
|
}
|
|
|
|
VAStatus RequestExportSurfaceHandle(VADriverContextP context,
|
|
VASurfaceID surface_id, uint32_t mem_type,
|
|
uint32_t flags, void *descriptor)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
VADRMPRIMESurfaceDescriptor *surface_descriptor = descriptor;
|
|
struct object_surface *surface_object;
|
|
struct video_format *video_format;
|
|
int *export_fds = NULL;
|
|
unsigned int export_fds_count;
|
|
unsigned int planes_count;
|
|
unsigned int capture_type;
|
|
unsigned int size;
|
|
unsigned int i;
|
|
VAStatus status;
|
|
int rc;
|
|
|
|
video_format = driver_data->video_format;
|
|
if (video_format == NULL)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
if (mem_type != VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2)
|
|
return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE;
|
|
|
|
surface_object = SURFACE(driver_data, surface_id);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_SURFACE;
|
|
|
|
export_fds_count = surface_object->destination_buffers_count;
|
|
export_fds = malloc(export_fds_count * sizeof(*export_fds));
|
|
|
|
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
|
|
|
rc = v4l2_export_buffer(driver_data->video_fd, capture_type,
|
|
surface_object->destination_index, O_RDONLY,
|
|
export_fds, export_fds_count);
|
|
if (rc < 0) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
planes_count = surface_object->destination_planes_count;
|
|
|
|
surface_descriptor->fourcc = VA_FOURCC_NV12;
|
|
surface_descriptor->width = surface_object->width;
|
|
surface_descriptor->height = surface_object->height;
|
|
surface_descriptor->num_objects = export_fds_count;
|
|
|
|
size = 0;
|
|
|
|
if (export_fds_count == 1)
|
|
for (i = 0; i < planes_count; i++)
|
|
size += surface_object->destination_sizes[i];
|
|
|
|
/*
|
|
* Iteration 2 Fix 2: choose drm_format_modifier conditionally on
|
|
* pitch alignment. Mesa's WSI / Panfrost compositor path rejects
|
|
* DRM_FORMAT_MOD_NONE (= LINEAR explicit) buffers whose pitch isn't
|
|
* GPU-aligned (typically 64+ bytes for Mali). For 1920-wide content
|
|
* the pitch is 1920 (64-aligned, fine); for 864-wide content the
|
|
* pitch is 864 (only 16-aligned), Mesa rejects with "WSI pitch not
|
|
* properly aligned" and Firefox falls back to SW.
|
|
*
|
|
* Setting DRM_FORMAT_MOD_INVALID tells the importer "modifier
|
|
* unknown, treat as implicit / texture-only" — Firefox's
|
|
* DMABufSurface.cpp:1920 explicitly omits modifier attribs from
|
|
* eglCreateImage when the value is MOD_INVALID, bypassing Mesa's
|
|
* scanout-alignment check. The buffer is then texture-imported
|
|
* (small perf cost) instead of WSI scanout-imported, which is
|
|
* the correct behavior for a buffer that doesn't meet scanout
|
|
* alignment requirements.
|
|
*
|
|
* We branch on pitch alignment to preserve LINEAR semantics for
|
|
* already-aligned content (avoids unnecessary perf cost on the
|
|
* common 1920-wide case).
|
|
*
|
|
* Sonnet Phase 5 review (iter2 question 4) endorsed this
|
|
* conditional approach over a universal MOD_INVALID change.
|
|
*/
|
|
for (i = 0; i < export_fds_count; i++) {
|
|
uint64_t modifier = video_format->drm_modifier;
|
|
unsigned int bytesperline =
|
|
surface_object->destination_bytesperlines[0];
|
|
if (bytesperline & 63) /* not 64-byte aligned */
|
|
modifier = DRM_FORMAT_MOD_INVALID;
|
|
surface_descriptor->objects[i].drm_format_modifier = modifier;
|
|
surface_descriptor->objects[i].fd = export_fds[i];
|
|
surface_descriptor->objects[i].size = export_fds_count == 1 ?
|
|
size :
|
|
surface_object->destination_sizes[i];
|
|
}
|
|
|
|
/*
|
|
* Layer construction depends on the consumer's request flags
|
|
* (VA_EXPORT_SURFACE_*_LAYERS):
|
|
*
|
|
* COMPOSED_LAYERS (default, mpv): one layer carrying both
|
|
* Y and UV planes (drm_format=NV12, num_planes=2). Mesa
|
|
* imports as a single NV12 EGLImage.
|
|
*
|
|
* SEPARATE_LAYERS (Firefox 150 RDD): two layers, Y as a
|
|
* single-plane R8 layer, UV as a single-plane GR88 layer.
|
|
* Firefox's GetVAAPISurfaceDescriptor passes
|
|
* VA_EXPORT_SURFACE_SEPARATE_LAYERS so its DMABufSurfaceYUV
|
|
* import code can address Y and UV planes independently.
|
|
* Without this branch, Firefox parsed our COMPOSED layout
|
|
* as if it were SEPARATE, found bogus layer-1 data, and
|
|
* silently fell back to FFmpeg(FFVPX) software decode.
|
|
*
|
|
* The earlier path 0001 mplane port assumed a single COMPOSED
|
|
* shape — fine for mpv but breaks any consumer requesting
|
|
* SEPARATE. Honor the flag.
|
|
*/
|
|
if ((flags & VA_EXPORT_SURFACE_SEPARATE_LAYERS) && planes_count == 2) {
|
|
surface_descriptor->num_layers = 2;
|
|
|
|
/* Layer 0: Y plane as DRM_FORMAT_R8 (1 byte/pixel luma). */
|
|
surface_descriptor->layers[0].drm_format = DRM_FORMAT_R8;
|
|
surface_descriptor->layers[0].num_planes = 1;
|
|
surface_descriptor->layers[0].object_index[0] =
|
|
export_fds_count == 1 ? 0 : 0;
|
|
surface_descriptor->layers[0].offset[0] =
|
|
surface_object->destination_offsets[0];
|
|
surface_descriptor->layers[0].pitch[0] =
|
|
surface_object->destination_bytesperlines[0];
|
|
|
|
/* Layer 1: UV plane as DRM_FORMAT_GR88 (interleaved
|
|
* U+V, 2 bytes/pixel chroma at half resolution). */
|
|
surface_descriptor->layers[1].drm_format = DRM_FORMAT_GR88;
|
|
surface_descriptor->layers[1].num_planes = 1;
|
|
surface_descriptor->layers[1].object_index[0] =
|
|
export_fds_count == 1 ? 0 : 1;
|
|
surface_descriptor->layers[1].offset[0] =
|
|
surface_object->destination_offsets[1];
|
|
surface_descriptor->layers[1].pitch[0] =
|
|
surface_object->destination_bytesperlines[1];
|
|
} else {
|
|
/* COMPOSED_LAYERS / default: one layer with all planes. */
|
|
surface_descriptor->num_layers = 1;
|
|
surface_descriptor->layers[0].drm_format = video_format->drm_format;
|
|
surface_descriptor->layers[0].num_planes = planes_count;
|
|
|
|
for (i = 0; i < planes_count; i++) {
|
|
surface_descriptor->layers[0].object_index[i] =
|
|
export_fds_count == 1 ? 0 : i;
|
|
surface_descriptor->layers[0].offset[i] =
|
|
surface_object->destination_offsets[i];
|
|
surface_descriptor->layers[0].pitch[i] =
|
|
surface_object->destination_bytesperlines[i];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* DEBUG INSTRUMENTATION (surface-export diagnosis 2026-05-04):
|
|
* dump the full descriptor so we can compare against what mpv
|
|
* reports importing via --msg-level=vd=v --msg-level=vo=v.
|
|
* Phase 5 review identified DMA-BUF surface export as the
|
|
* likely root cause of the solid-blue render in mpv vaapi mode.
|
|
*/
|
|
request_log("ExportSurfaceHandle: surf=%u fd[0]=%d fourcc=0x%x "
|
|
"w=%u h=%u num_objects=%u num_layers=%u "
|
|
"obj[0].size=%u drm_fmt=0x%x drm_mod=0x%llx num_planes=%u "
|
|
"p[0].off=%u pitch=%u p[1].off=%u pitch=%u\n",
|
|
surface_id,
|
|
export_fds_count > 0 ? export_fds[0] : -1,
|
|
surface_descriptor->fourcc,
|
|
surface_descriptor->width,
|
|
surface_descriptor->height,
|
|
surface_descriptor->num_objects,
|
|
surface_descriptor->num_layers,
|
|
surface_descriptor->objects[0].size,
|
|
surface_descriptor->layers[0].drm_format,
|
|
(unsigned long long)surface_descriptor->objects[0].drm_format_modifier,
|
|
surface_descriptor->layers[0].num_planes,
|
|
surface_descriptor->layers[0].offset[0],
|
|
surface_descriptor->layers[0].pitch[0],
|
|
planes_count > 1 ? surface_descriptor->layers[0].offset[1] : 0,
|
|
planes_count > 1 ? surface_descriptor->layers[0].pitch[1] : 0);
|
|
|
|
status = VA_STATUS_SUCCESS;
|
|
goto complete;
|
|
|
|
error:
|
|
for (i = 0; i < export_fds_count; i++)
|
|
if (export_fds[i] >= 0)
|
|
close(export_fds[i]);
|
|
|
|
complete:
|
|
if (export_fds != NULL)
|
|
free(export_fds);
|
|
|
|
return status;
|
|
}
|