forked from marfrit/libva-v4l2-request-fourier
a09c03c154
iter4 (385dee1) replaced the original media_request_reinit pattern with close+media_request_alloc per frame to escape an EINVAL on S_EXT_CTRLS that turned out to be a DPB-payload bug (74d8dd1, FFmpeg V4L2_H264_FRAME_REF semantics). The per-frame close+alloc model worked for mpv vaapi-copy (single-surface recycle) but raced under Firefox 150's MediaSource pipeline (multi-surface rotation): fd=30 got reused via lowest-free-fd allocation faster than the kernel- side per-buffer state-machine could tear down the prior request, producing intermittent VIDIOC_QBUF EINVAL on OUTPUT after 1..53 successful frames. Phase 2 telemetry confirmed: - DQBUF returned the index we passed (no FIFO mismatch) - SPS/PPS/DECODE_PARAMS/SCALING_MATRIX byte-identical between mpv and Firefox first 64 bytes - Pool size bump 4 -> 16 only delayed the failure (62 frames) - Different OUTPUT slot indices failed across runs (race signature) Fix: each OUTPUT pool slot owns a permanent request_fd allocated once at request_pool_init and REINIT'd between uses in RequestSyncSurface. 1:1 slot-to-fd binding eliminates cross-slot fd reuse entirely. Pool stays driver-wide (multi-context safe per iter5 Track E); slots cycle through 16 distinct fds in round-robin acquire. Files: - request_pool.h: add request_fd field to slot struct; init signature takes media_fd - request_pool.c: alloc per-slot fd at init, close at destroy - context.c: pass driver_data->media_fd; pool size 4 -> 16 - picture.c: BeginPicture binds slot->request_fd to surface; EndPicture's per-frame media_request_alloc removed - surface.c: RequestSyncSurface uses media_request_reinit instead of close+alloc; DestroySurfaces close removed (slot owns fd); error path close removed; surface_object NULL-init for the -Wmaybe-uninitialized warning fix Empirical verification (clean build sha ebe396d5..., no diagnostic instrumentation): - Firefox 150 + bbb_1080p30_h264.mp4 + LIBVA_DRIVER_NAME=v4l2_request + sandbox enabled: 35s+ playback, zero "Unable to queue buffer" / "Unable to set control(s)", lsof shows RDD process holds /dev/video1 + /dev/media0 throughout. Driver stderr: only the single cap_pool_init: 24 slots ready line. - mpv vaapi-copy 50 frames: zero errors, "Using hardware decoding (vaapi-copy)" - no regression vs iter5-end driver. Pool-size bump diagnostic (Phase 5 sonnet design review feedback): 4 -> 16 alone took 1->62 frames, far short of the 30s success criterion (~900 frames at 30fps). REINIT discipline is the actual fix; pool 16 is comfortable headroom over typical H.264 MaxDpbFrames. Phase 5 sonnet code review: APPROVE-WITH-CHANGES (one comment attribution corrected: cleanup runs at RequestTerminate, not RequestDestroyContext, since the pool is driver-wide). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
404 lines
12 KiB
C
404 lines
12 KiB
C
/*
|
|
* Copyright (C) 2007 Intel Corporation
|
|
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
|
|
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "picture.h"
|
|
#include "buffer.h"
|
|
#include "config.h"
|
|
#include "context.h"
|
|
#include "request.h"
|
|
#include "surface.h"
|
|
|
|
#include "h264.h"
|
|
#include "h265.h"
|
|
#include "mpeg2.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <linux/videodev2.h>
|
|
|
|
#include "media.h"
|
|
#include "utils.h"
|
|
#include "v4l2.h"
|
|
|
|
#include "autoconfig.h"
|
|
|
|
static VAStatus codec_store_buffer(struct request_data *driver_data,
|
|
struct object_context *context,
|
|
VAProfile profile,
|
|
struct object_surface *surface_object,
|
|
struct object_buffer *buffer_object)
|
|
{
|
|
switch (buffer_object->type) {
|
|
case VASliceDataBufferType:
|
|
/*
|
|
* Since there is no guarantee that the allocation
|
|
* order is the same as the submission order (via
|
|
* RenderPicture), we can't use a V4L2 buffer directly
|
|
* and have to copy from a regular buffer.
|
|
*/
|
|
if (context->h264_start_code) {
|
|
static const char start_code[3] = { 0x00, 0x00, 0x01 };
|
|
|
|
memcpy(surface_object->source_data +
|
|
surface_object->slices_size,
|
|
start_code, sizeof(start_code));
|
|
surface_object->slices_size += sizeof(start_code);
|
|
}
|
|
memcpy(surface_object->source_data +
|
|
surface_object->slices_size,
|
|
buffer_object->data,
|
|
buffer_object->size * buffer_object->count);
|
|
surface_object->slices_size +=
|
|
buffer_object->size * buffer_object->count;
|
|
surface_object->slices_count++;
|
|
break;
|
|
|
|
case VAPictureParameterBufferType:
|
|
switch (profile) {
|
|
case VAProfileMPEG2Simple:
|
|
case VAProfileMPEG2Main:
|
|
memcpy(&surface_object->params.mpeg2.picture,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.mpeg2.picture));
|
|
break;
|
|
|
|
case VAProfileH264Main:
|
|
case VAProfileH264High:
|
|
case VAProfileH264ConstrainedBaseline:
|
|
case VAProfileH264MultiviewHigh:
|
|
case VAProfileH264StereoHigh:
|
|
memcpy(&surface_object->params.h264.picture,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h264.picture));
|
|
break;
|
|
|
|
case VAProfileHEVCMain:
|
|
memcpy(&surface_object->params.h265.picture,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h265.picture));
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case VASliceParameterBufferType:
|
|
switch (profile) {
|
|
case VAProfileH264Main:
|
|
case VAProfileH264High:
|
|
case VAProfileH264ConstrainedBaseline:
|
|
case VAProfileH264MultiviewHigh:
|
|
case VAProfileH264StereoHigh:
|
|
memcpy(&surface_object->params.h264.slice,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h264.slice));
|
|
break;
|
|
|
|
case VAProfileHEVCMain:
|
|
memcpy(&surface_object->params.h265.slice,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h265.slice));
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case VAIQMatrixBufferType:
|
|
switch (profile) {
|
|
case VAProfileMPEG2Simple:
|
|
case VAProfileMPEG2Main:
|
|
memcpy(&surface_object->params.mpeg2.iqmatrix,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.mpeg2.iqmatrix));
|
|
surface_object->params.mpeg2.iqmatrix_set = true;
|
|
break;
|
|
|
|
case VAProfileH264Main:
|
|
case VAProfileH264High:
|
|
case VAProfileH264ConstrainedBaseline:
|
|
case VAProfileH264MultiviewHigh:
|
|
case VAProfileH264StereoHigh:
|
|
memcpy(&surface_object->params.h264.matrix,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h264.matrix));
|
|
surface_object->params.h264.matrix_set = true;
|
|
break;
|
|
|
|
case VAProfileHEVCMain:
|
|
memcpy(&surface_object->params.h265.iqmatrix,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h265.iqmatrix));
|
|
surface_object->params.h265.iqmatrix_set = true;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
static VAStatus codec_set_controls(struct request_data *driver_data,
|
|
struct object_context *context,
|
|
VAProfile profile,
|
|
struct object_surface *surface_object)
|
|
{
|
|
int rc;
|
|
|
|
switch (profile) {
|
|
case VAProfileMPEG2Simple:
|
|
case VAProfileMPEG2Main:
|
|
rc = mpeg2_set_controls(driver_data, context, surface_object);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
break;
|
|
|
|
case VAProfileH264Main:
|
|
case VAProfileH264High:
|
|
case VAProfileH264ConstrainedBaseline:
|
|
case VAProfileH264MultiviewHigh:
|
|
case VAProfileH264StereoHigh:
|
|
rc = h264_set_controls(driver_data, context, profile,
|
|
surface_object);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
break;
|
|
|
|
case VAProfileHEVCMain:
|
|
/* Fourier-local: HEVC stripped, no HW support on RK3566. */
|
|
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
|
|
|
default:
|
|
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
|
}
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id,
|
|
VASurfaceID surface_id)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_context *context_object;
|
|
struct object_surface *surface_object;
|
|
struct request_pool_slot *slot;
|
|
int slot_index;
|
|
|
|
|
|
context_object = CONTEXT(driver_data, context_id);
|
|
if (context_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONTEXT;
|
|
|
|
surface_object = SURFACE(driver_data, surface_id);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_SURFACE;
|
|
|
|
if (surface_object->status == VASurfaceRendering)
|
|
RequestSyncSurface(context, surface_id);
|
|
|
|
/*
|
|
* Iter2 Fix 3: acquire a CAPTURE-pool slot for this decode cycle.
|
|
* If the surface still holds a slot from a prior decode (DECODED
|
|
* or EXPORTED — the consumer is done with it by definition since
|
|
* we got back to BeginPicture for the same surface), release it
|
|
* first. The new slot is bound and its V4L2 index + mmap pointers
|
|
* are mirrored into surface_object->destination_* so the existing
|
|
* QBUF/DQBUF/EXPBUF code paths see no behavioral change.
|
|
*/
|
|
if (surface_object->current_slot != NULL)
|
|
surface_unbind_slot(driver_data, surface_object);
|
|
{
|
|
struct cap_pool_slot *cap_slot =
|
|
cap_pool_acquire(&driver_data->capture_pool, surface_id);
|
|
if (cap_slot == NULL)
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
surface_bind_slot(surface_object, cap_slot);
|
|
}
|
|
|
|
/*
|
|
* Borrow an OUTPUT (bitstream-input) slot from the driver-wide
|
|
* pool for the duration of this Begin/Render/End cycle. The
|
|
* surface's source_* fields hold the borrow's mmap pointer/size/
|
|
* V4L2 buffer index until RequestSyncSurface releases it after
|
|
* VIDIOC_DQBUF.
|
|
*/
|
|
slot_index = request_pool_acquire(&driver_data->output_pool);
|
|
if (slot_index < 0)
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
|
|
slot = request_pool_slot(&driver_data->output_pool,
|
|
(unsigned int)slot_index);
|
|
if (slot == NULL) {
|
|
request_pool_release(&driver_data->output_pool,
|
|
(unsigned int)slot_index);
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
}
|
|
|
|
surface_object->source_index = slot->index;
|
|
surface_object->source_data = slot->data;
|
|
surface_object->source_size = slot->size;
|
|
/*
|
|
* iter6: bind the slot's permanent request_fd to this surface for the
|
|
* duration of the decode cycle. Replaces the iter4 close+alloc-per-
|
|
* frame model. The fd is REINIT'd (not closed) at RequestSyncSurface,
|
|
* so the kernel-side request object is reset in place — no fd-reuse
|
|
* race with another slot's pending decode.
|
|
*/
|
|
surface_object->request_fd = slot->request_fd;
|
|
surface_object->slices_size = 0;
|
|
surface_object->slices_count = 0;
|
|
surface_object->params.h264.matrix_set = false;
|
|
|
|
surface_object->status = VASurfaceRendering;
|
|
context_object->render_surface_id = surface_id;
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestRenderPicture(VADriverContextP context, VAContextID context_id,
|
|
VABufferID *buffers_ids, int buffers_count)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_context *context_object;
|
|
struct object_config *config_object;
|
|
struct object_surface *surface_object;
|
|
struct object_buffer *buffer_object;
|
|
int rc;
|
|
int i;
|
|
|
|
context_object = CONTEXT(driver_data, context_id);
|
|
if (context_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONTEXT;
|
|
|
|
config_object = CONFIG(driver_data, context_object->config_id);
|
|
if (config_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONFIG;
|
|
|
|
surface_object =
|
|
SURFACE(driver_data, context_object->render_surface_id);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_SURFACE;
|
|
|
|
for (i = 0; i < buffers_count; i++) {
|
|
buffer_object = BUFFER(driver_data, buffers_ids[i]);
|
|
if (buffer_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_BUFFER;
|
|
|
|
rc = codec_store_buffer(driver_data, context_object,
|
|
config_object->profile,
|
|
surface_object, buffer_object);
|
|
if (rc != VA_STATUS_SUCCESS)
|
|
return rc;
|
|
}
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestEndPicture(VADriverContextP context, VAContextID context_id)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_context *context_object;
|
|
struct object_config *config_object;
|
|
struct object_surface *surface_object;
|
|
struct video_format *video_format;
|
|
unsigned int output_type, capture_type;
|
|
int request_fd;
|
|
VAStatus status;
|
|
int rc;
|
|
|
|
video_format = driver_data->video_format;
|
|
if (video_format == NULL)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
|
|
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
|
|
|
context_object = CONTEXT(driver_data, context_id);
|
|
if (context_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONTEXT;
|
|
|
|
config_object = CONFIG(driver_data, context_object->config_id);
|
|
if (config_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONFIG;
|
|
|
|
surface_object =
|
|
SURFACE(driver_data, context_object->render_surface_id);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_SURFACE;
|
|
|
|
gettimeofday(&surface_object->timestamp, NULL);
|
|
|
|
/*
|
|
* iter6: request_fd was bound to the surface in BeginPicture from
|
|
* the OUTPUT pool slot's permanent fd. Per-frame allocation is gone.
|
|
*/
|
|
request_fd = surface_object->request_fd;
|
|
if (request_fd < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
rc = codec_set_controls(driver_data, context_object,
|
|
config_object->profile, surface_object);
|
|
if (rc != VA_STATUS_SUCCESS)
|
|
return rc;
|
|
|
|
rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL,
|
|
surface_object->destination_index, 0,
|
|
surface_object->destination_buffers_count);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
rc = v4l2_queue_buffer(driver_data->video_fd, request_fd, output_type,
|
|
&surface_object->timestamp,
|
|
surface_object->source_index,
|
|
surface_object->slices_size, 1);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
surface_object->slices_size = 0;
|
|
|
|
status = RequestSyncSurface(context, context_object->render_surface_id);
|
|
if (status != VA_STATUS_SUCCESS)
|
|
return status;
|
|
|
|
context_object->render_surface_id = VA_INVALID_ID;
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|