19acc76da4
Pre-iter2 each VA surface was permanently 1:1 bound to one V4L2 CAPTURE
buffer. mpv reusing a surface for a new decode while the compositor still
held an EXPBUF'd dma_buf fd to the prior frame caused the kernel to
write fresh decode output into the same physical memory the compositor
was reading -- visible as stutter / back-and-forth swap on
mpv --hwdec=vaapi --vo=gpu playback.
Architecture:
- New cap_pool abstraction (cap_pool.{h,c}) owns N CAPTURE buffers
(N = max(surfaces_count, MIN_CAP_POOL=24)) with per-slot state
{FREE, IN_DECODE, DECODED, EXPORTED} guarded by pthread_mutex_t.
- Surfaces no longer own buffers; each vaBeginPicture acquires the
oldest FREE slot (LRU), binds it for the decode cycle, and the slot
cycles IN_DECODE -> DECODED (post-DQBUF) -> EXPORTED (post-EXPBUF).
- Slot is released on next BeginPicture for the same surface or on
vaDestroySurfaces.
Limitations (Sonnet Phase 5 review iter2 9.x, deferred to iter3+):
- Option-A statistical mitigation; race window narrows to "pool
exhausted, force-recycle of oldest EXPORTED slot." For typical mpv
16-surface playback with MIN_CAP_POOL=24 the fallback never fires.
- Multi-context concurrent use not addressed (one V4L2 device, multiple
cap_pools -- iter3 scope).
Other call sites updated:
- picture.c::BeginPicture acquires + binds, releasing prior slot if any.
- surface.c::SyncSurface marks slot DECODED after DQBUF.
- surface.c::ExportSurfaceHandle marks slot EXPORTED, retaining OUR
EXPBUF fd for force-recycle close().
- surface.c::DestroySurfaces releases via surface_unbind_slot;
cap_pool owns the mmaps now.
- surface.c::CreateSurfaces2 destroys the pool in the resolution-change
path before REQBUFS(0) (else stale v4l2_index after Fix 1's REQBUFS).
- context.c::DestroyContext invokes cap_pool_destroy.
- image.c::DeriveImage skips copy_surface_to_image when current_slot is
NULL (ffmpeg av_hwframe_ctx_init probes derive on undecoded surfaces).
Verified: mpv vaapi-copy 200 frames bbb_1080p30, 0 drops, LRU visibly
recycling slot indices, real luma gradient. mpv vaapi --vo=gpu
operator-inspection follows.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
270 lines
8.7 KiB
C
270 lines
8.7 KiB
C
/*
|
|
* Copyright (C) 2007 Intel Corporation
|
|
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
|
|
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "context.h"
|
|
#include "config.h"
|
|
#include "request.h"
|
|
#include "surface.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include <assert.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
#include <sys/mman.h>
|
|
|
|
#include <linux/videodev2.h>
|
|
|
|
#include <mpeg2-ctrls.h>
|
|
#include <hevc-ctrls.h>
|
|
|
|
#include "utils.h"
|
|
#include "v4l2.h"
|
|
|
|
#include "autoconfig.h"
|
|
|
|
VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
|
int picture_width, int picture_height, int flags,
|
|
VASurfaceID *surfaces_ids, int surfaces_count,
|
|
VAContextID *context_id)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_config *config_object;
|
|
struct object_context *context_object = NULL;
|
|
struct video_format *video_format;
|
|
VASurfaceID *ids = NULL;
|
|
VAContextID id;
|
|
VAStatus status;
|
|
unsigned int output_type, capture_type;
|
|
int rc;
|
|
|
|
video_format = driver_data->video_format;
|
|
if (video_format == NULL)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
|
|
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
|
|
|
config_object = CONFIG(driver_data, config_id);
|
|
if (config_object == NULL) {
|
|
status = VA_STATUS_ERROR_INVALID_CONFIG;
|
|
goto error;
|
|
}
|
|
|
|
id = object_heap_allocate(&driver_data->context_heap);
|
|
context_object = CONTEXT(driver_data, id);
|
|
if (context_object == NULL) {
|
|
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
goto error;
|
|
}
|
|
memset(&context_object->dpb, 0, sizeof(context_object->dpb));
|
|
|
|
/*
|
|
* Initialize the OUTPUT (bitstream-input) buffer pool. Sized by
|
|
* codec pipeline depth (4 H.264 frames in flight is sufficient
|
|
* for current hantro/rkvdec scheduling); independent of caller-
|
|
* supplied surfaces_count. Pool is owned by driver_data so it
|
|
* outlives any single context destroy/recreate cycle.
|
|
*
|
|
* This replaces the prior per-surface OUTPUT loop, which (a)
|
|
* created an empty queue when surfaces_count==0 (ffmpeg vaapi-
|
|
* copy path) and (b) only populated surface->source_* for
|
|
* surfaces present at vaCreateContext time, NULL-derefing on
|
|
* surfaces created later.
|
|
*/
|
|
rc = request_pool_init(&driver_data->output_pool,
|
|
driver_data->video_fd, output_type, 4);
|
|
if (rc < 0) {
|
|
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
/*
|
|
* The surface_ids array has been allocated by the caller and
|
|
* we don't have any indication wrt its life time. Let's make sure
|
|
* its life span is under our control.
|
|
*/
|
|
if (surfaces_count > 0) {
|
|
ids = malloc(surfaces_count * sizeof(VASurfaceID));
|
|
if (ids == NULL) {
|
|
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
memcpy(ids, surfaces_ids,
|
|
surfaces_count * sizeof(VASurfaceID));
|
|
}
|
|
|
|
/*
|
|
* Stateless H.264 device-wide controls. The kernel V4L2 stateless
|
|
* framework requires DECODE_MODE and START_CODE be set on the
|
|
* device fd (request_fd=-1) before VIDIOC_STREAMON; per-request
|
|
* controls (SPS/PPS/etc.) attached to a request_fd come later.
|
|
*
|
|
* hantro-vpu (RK3568) accepts only DECODE_MODE_FRAME_BASED.
|
|
* START_CODE_ANNEX_B preserves leading 0x00000001 in the slice
|
|
* payload that h264.c assembles. Errors here are not fatal: not
|
|
* every backing driver supports both controls (e.g. cedrus may
|
|
* default to SLICE_BASED without exposing DECODE_MODE).
|
|
*/
|
|
{
|
|
struct v4l2_ext_control dev_ctrls[2] = {
|
|
{
|
|
.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
|
|
.value = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
|
|
},
|
|
{
|
|
.id = V4L2_CID_STATELESS_H264_START_CODE,
|
|
.value = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
|
|
},
|
|
};
|
|
(void)v4l2_set_controls(driver_data->video_fd, -1,
|
|
dev_ctrls, 2);
|
|
}
|
|
|
|
/*
|
|
* Mirror the ANNEX_B start-code mode set on the device above
|
|
* into context_object->h264_start_code so picture.c::
|
|
* codec_store_buffer prepends 0x00 0x00 0x01 to each slice
|
|
* payload it copies into the OUTPUT buffer. Without this, the
|
|
* kernel — which we just told to expect ANNEX_B — sees a raw
|
|
* NAL stream with no start codes, fails to find slice
|
|
* boundaries, and emits a zeroed CAPTURE buffer (visually a
|
|
* flat dark-green frame).
|
|
*
|
|
* h264_get_controls() exists for this purpose but is never
|
|
* called in the current code path; the planned probe-then-set
|
|
* commit will replace this hardcoded assignment with a runtime
|
|
* read of the kernel's accepted START_CODE value.
|
|
*/
|
|
context_object->h264_start_code = true;
|
|
|
|
rc = v4l2_set_stream(driver_data->video_fd, output_type, true);
|
|
if (rc < 0) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
rc = v4l2_set_stream(driver_data->video_fd, capture_type, true);
|
|
if (rc < 0) {
|
|
status = VA_STATUS_ERROR_OPERATION_FAILED;
|
|
goto error;
|
|
}
|
|
|
|
context_object->config_id = config_id;
|
|
context_object->render_surface_id = VA_INVALID_ID;
|
|
context_object->surfaces_ids = ids;
|
|
context_object->surfaces_count = surfaces_count;
|
|
context_object->picture_width = picture_width;
|
|
context_object->picture_height = picture_height;
|
|
context_object->flags = flags;
|
|
|
|
*context_id = id;
|
|
|
|
status = VA_STATUS_SUCCESS;
|
|
goto complete;
|
|
|
|
error:
|
|
if (ids != NULL)
|
|
free(ids);
|
|
|
|
if (context_object != NULL)
|
|
object_heap_free(&driver_data->context_heap,
|
|
(struct object_base *)context_object);
|
|
|
|
complete:
|
|
return status;
|
|
}
|
|
|
|
VAStatus RequestDestroyContext(VADriverContextP context, VAContextID context_id)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_context *context_object;
|
|
struct video_format *video_format;
|
|
unsigned int output_type, capture_type;
|
|
VAStatus status;
|
|
int rc;
|
|
|
|
video_format = driver_data->video_format;
|
|
if (video_format == NULL)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
|
|
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
|
|
|
context_object = CONTEXT(driver_data, context_id);
|
|
if (context_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONTEXT;
|
|
|
|
rc = v4l2_set_stream(driver_data->video_fd, output_type, false);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
rc = v4l2_set_stream(driver_data->video_fd, capture_type, false);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
/* Buffers liberation */
|
|
|
|
status = RequestDestroySurfaces(context, context_object->surfaces_ids,
|
|
context_object->surfaces_count);
|
|
if (status != VA_STATUS_SUCCESS)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
free(context_object->surfaces_ids);
|
|
|
|
object_heap_free(&driver_data->context_heap,
|
|
(struct object_base *)context_object);
|
|
|
|
rc = v4l2_request_buffers(driver_data->video_fd, output_type, 0);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
/*
|
|
* Iter2 Fix 3: cap_pool owns the CAPTURE buffers' mmaps + any
|
|
* outstanding our_export_fds. Tear it down (which also issues
|
|
* REQBUFS(0) on CAPTURE), so the next CreateSurfaces2 cycle sees
|
|
* a clean slate and rebuilds the pool at the new resolution.
|
|
*/
|
|
cap_pool_destroy(&driver_data->capture_pool, driver_data->video_fd,
|
|
capture_type);
|
|
|
|
/*
|
|
* Iteration 2 Fix 1: the kernel CAPTURE format state is no longer
|
|
* guaranteed after the dual REQBUFS(0). Invalidate the
|
|
* LAST_OUTPUT_WIDTH/HEIGHT cache so the next CreateSurfaces2 will
|
|
* unconditionally re-S_FMT on OUTPUT. Without this, multi-video
|
|
* Firefox sessions on mozilla.org corrupted the next session's
|
|
* CAPTURE format query (kernel returned 48x48 instead of the
|
|
* cached "already 1920x1088"); the exported descriptor encoded
|
|
* wrong pitch/offset.
|
|
*/
|
|
surface_reset_format_cache();
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|