Files
libva-v4l2-request-fourier/src/context.c
T
test0r 19acc76da4 iter2 Fix 3: decoupled CAPTURE buffer pool with LRU recycling
Pre-iter2 each VA surface was permanently 1:1 bound to one V4L2 CAPTURE
buffer. mpv reusing a surface for a new decode while the compositor still
held an EXPBUF'd dma_buf fd to the prior frame caused the kernel to
write fresh decode output into the same physical memory the compositor
was reading -- visible as stutter / back-and-forth swap on
mpv --hwdec=vaapi --vo=gpu playback.

Architecture:
- New cap_pool abstraction (cap_pool.{h,c}) owns N CAPTURE buffers
  (N = max(surfaces_count, MIN_CAP_POOL=24)) with per-slot state
  {FREE, IN_DECODE, DECODED, EXPORTED} guarded by pthread_mutex_t.
- Surfaces no longer own buffers; each vaBeginPicture acquires the
  oldest FREE slot (LRU), binds it for the decode cycle, and the slot
  cycles IN_DECODE -> DECODED (post-DQBUF) -> EXPORTED (post-EXPBUF).
- Slot is released on next BeginPicture for the same surface or on
  vaDestroySurfaces.

Limitations (Sonnet Phase 5 review iter2 9.x, deferred to iter3+):
- Option-A statistical mitigation; race window narrows to "pool
  exhausted, force-recycle of oldest EXPORTED slot." For typical mpv
  16-surface playback with MIN_CAP_POOL=24 the fallback never fires.
- Multi-context concurrent use not addressed (one V4L2 device, multiple
  cap_pools -- iter3 scope).

Other call sites updated:
- picture.c::BeginPicture acquires + binds, releasing prior slot if any.
- surface.c::SyncSurface marks slot DECODED after DQBUF.
- surface.c::ExportSurfaceHandle marks slot EXPORTED, retaining OUR
  EXPBUF fd for force-recycle close().
- surface.c::DestroySurfaces releases via surface_unbind_slot;
  cap_pool owns the mmaps now.
- surface.c::CreateSurfaces2 destroys the pool in the resolution-change
  path before REQBUFS(0) (else stale v4l2_index after Fix 1's REQBUFS).
- context.c::DestroyContext invokes cap_pool_destroy.
- image.c::DeriveImage skips copy_surface_to_image when current_slot is
  NULL (ffmpeg av_hwframe_ctx_init probes derive on undecoded surfaces).

Verified: mpv vaapi-copy 200 frames bbb_1080p30, 0 drops, LRU visibly
recycling slot indices, real luma gradient. mpv vaapi --vo=gpu
operator-inspection follows.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 22:03:31 +00:00

270 lines
8.7 KiB
C

/*
* Copyright (C) 2007 Intel Corporation
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "context.h"
#include "config.h"
#include "request.h"
#include "surface.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/videodev2.h>
#include <mpeg2-ctrls.h>
#include <hevc-ctrls.h>
#include "utils.h"
#include "v4l2.h"
#include "autoconfig.h"
VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
int picture_width, int picture_height, int flags,
VASurfaceID *surfaces_ids, int surfaces_count,
VAContextID *context_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_config *config_object;
struct object_context *context_object = NULL;
struct video_format *video_format;
VASurfaceID *ids = NULL;
VAContextID id;
VAStatus status;
unsigned int output_type, capture_type;
int rc;
video_format = driver_data->video_format;
if (video_format == NULL)
return VA_STATUS_ERROR_OPERATION_FAILED;
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
config_object = CONFIG(driver_data, config_id);
if (config_object == NULL) {
status = VA_STATUS_ERROR_INVALID_CONFIG;
goto error;
}
id = object_heap_allocate(&driver_data->context_heap);
context_object = CONTEXT(driver_data, id);
if (context_object == NULL) {
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
goto error;
}
memset(&context_object->dpb, 0, sizeof(context_object->dpb));
/*
* Initialize the OUTPUT (bitstream-input) buffer pool. Sized by
* codec pipeline depth (4 H.264 frames in flight is sufficient
* for current hantro/rkvdec scheduling); independent of caller-
* supplied surfaces_count. Pool is owned by driver_data so it
* outlives any single context destroy/recreate cycle.
*
* This replaces the prior per-surface OUTPUT loop, which (a)
* created an empty queue when surfaces_count==0 (ffmpeg vaapi-
* copy path) and (b) only populated surface->source_* for
* surfaces present at vaCreateContext time, NULL-derefing on
* surfaces created later.
*/
rc = request_pool_init(&driver_data->output_pool,
driver_data->video_fd, output_type, 4);
if (rc < 0) {
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
goto error;
}
/*
* The surface_ids array has been allocated by the caller and
* we don't have any indication wrt its life time. Let's make sure
* its life span is under our control.
*/
if (surfaces_count > 0) {
ids = malloc(surfaces_count * sizeof(VASurfaceID));
if (ids == NULL) {
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
goto error;
}
memcpy(ids, surfaces_ids,
surfaces_count * sizeof(VASurfaceID));
}
/*
* Stateless H.264 device-wide controls. The kernel V4L2 stateless
* framework requires DECODE_MODE and START_CODE be set on the
* device fd (request_fd=-1) before VIDIOC_STREAMON; per-request
* controls (SPS/PPS/etc.) attached to a request_fd come later.
*
* hantro-vpu (RK3568) accepts only DECODE_MODE_FRAME_BASED.
* START_CODE_ANNEX_B preserves leading 0x00000001 in the slice
* payload that h264.c assembles. Errors here are not fatal: not
* every backing driver supports both controls (e.g. cedrus may
* default to SLICE_BASED without exposing DECODE_MODE).
*/
{
struct v4l2_ext_control dev_ctrls[2] = {
{
.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
.value = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
},
{
.id = V4L2_CID_STATELESS_H264_START_CODE,
.value = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
},
};
(void)v4l2_set_controls(driver_data->video_fd, -1,
dev_ctrls, 2);
}
/*
* Mirror the ANNEX_B start-code mode set on the device above
* into context_object->h264_start_code so picture.c::
* codec_store_buffer prepends 0x00 0x00 0x01 to each slice
* payload it copies into the OUTPUT buffer. Without this, the
* kernel — which we just told to expect ANNEX_B — sees a raw
* NAL stream with no start codes, fails to find slice
* boundaries, and emits a zeroed CAPTURE buffer (visually a
* flat dark-green frame).
*
* h264_get_controls() exists for this purpose but is never
* called in the current code path; the planned probe-then-set
* commit will replace this hardcoded assignment with a runtime
* read of the kernel's accepted START_CODE value.
*/
context_object->h264_start_code = true;
rc = v4l2_set_stream(driver_data->video_fd, output_type, true);
if (rc < 0) {
status = VA_STATUS_ERROR_OPERATION_FAILED;
goto error;
}
rc = v4l2_set_stream(driver_data->video_fd, capture_type, true);
if (rc < 0) {
status = VA_STATUS_ERROR_OPERATION_FAILED;
goto error;
}
context_object->config_id = config_id;
context_object->render_surface_id = VA_INVALID_ID;
context_object->surfaces_ids = ids;
context_object->surfaces_count = surfaces_count;
context_object->picture_width = picture_width;
context_object->picture_height = picture_height;
context_object->flags = flags;
*context_id = id;
status = VA_STATUS_SUCCESS;
goto complete;
error:
if (ids != NULL)
free(ids);
if (context_object != NULL)
object_heap_free(&driver_data->context_heap,
(struct object_base *)context_object);
complete:
return status;
}
VAStatus RequestDestroyContext(VADriverContextP context, VAContextID context_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct video_format *video_format;
unsigned int output_type, capture_type;
VAStatus status;
int rc;
video_format = driver_data->video_format;
if (video_format == NULL)
return VA_STATUS_ERROR_OPERATION_FAILED;
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
rc = v4l2_set_stream(driver_data->video_fd, output_type, false);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
rc = v4l2_set_stream(driver_data->video_fd, capture_type, false);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
/* Buffers liberation */
status = RequestDestroySurfaces(context, context_object->surfaces_ids,
context_object->surfaces_count);
if (status != VA_STATUS_SUCCESS)
return VA_STATUS_ERROR_OPERATION_FAILED;
free(context_object->surfaces_ids);
object_heap_free(&driver_data->context_heap,
(struct object_base *)context_object);
rc = v4l2_request_buffers(driver_data->video_fd, output_type, 0);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
/*
* Iter2 Fix 3: cap_pool owns the CAPTURE buffers' mmaps + any
* outstanding our_export_fds. Tear it down (which also issues
* REQBUFS(0) on CAPTURE), so the next CreateSurfaces2 cycle sees
* a clean slate and rebuilds the pool at the new resolution.
*/
cap_pool_destroy(&driver_data->capture_pool, driver_data->video_fd,
capture_type);
/*
* Iteration 2 Fix 1: the kernel CAPTURE format state is no longer
* guaranteed after the dual REQBUFS(0). Invalidate the
* LAST_OUTPUT_WIDTH/HEIGHT cache so the next CreateSurfaces2 will
* unconditionally re-S_FMT on OUTPUT. Without this, multi-video
* Firefox sessions on mozilla.org corrupted the next session's
* CAPTURE format query (kernel returned 48x48 instead of the
* cached "already 1920x1088"); the exported descriptor encoded
* wrong pitch/offset.
*/
surface_reset_format_cache();
return VA_STATUS_SUCCESS;
}