Files
libva-v4l2-request-fourier/src/picture.c
T
claude-noether 522fb6daa5 iter14 α-16: env-gated OUTPUT bitstream byte dump pre-QBUF
LIBVA_V4L2_DUMP_OUTPUT=<dir> writes source_data[0..slices_size] to
<dir>/output_p<profile>_s<surface>_t<ts>.bin immediately before
v4l2_queue_buffer OUTPUT. Discriminates whether libva writes the
correct H.264/HEVC bitstream bytes (same as kdirect/input file).

Off by default. Wrapped in static-cache env check.

iter11+12+13 confirmed Bug 4/5 are not in S_EXT_CTRLS payload, not
in kernel substrate (RFC v2), not in CPU cache visibility (α-17 sync
ioctl works but inert). The remaining libva-side surface is the
actual bitstream bytes the kernel reads.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 08:19:29 +00:00

557 lines
17 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* Copyright (C) 2007 Intel Corporation
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "picture.h"
#include "buffer.h"
#include "config.h"
#include "context.h"
#include "request.h"
#include "surface.h"
#include "h264.h"
#include "h265.h"
#include "mpeg2.h"
#include "vp8.h"
#include "vp9.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <linux/videodev2.h>
#include "media.h"
#include "utils.h"
#include "v4l2.h"
#include "autoconfig.h"
static VAStatus codec_store_buffer(struct request_data *driver_data,
struct object_context *context,
VAProfile profile,
struct object_surface *surface_object,
struct object_buffer *buffer_object)
{
switch (buffer_object->type) {
case VASliceDataBufferType:
/*
* Since there is no guarantee that the allocation
* order is the same as the submission order (via
* RenderPicture), we can't use a V4L2 buffer directly
* and have to copy from a regular buffer.
*/
if (context->h264_start_code) {
static const char start_code[3] = { 0x00, 0x00, 0x01 };
memcpy(surface_object->source_data +
surface_object->slices_size,
start_code, sizeof(start_code));
surface_object->slices_size += sizeof(start_code);
}
memcpy(surface_object->source_data +
surface_object->slices_size,
buffer_object->data,
buffer_object->size * buffer_object->count);
surface_object->slices_size +=
buffer_object->size * buffer_object->count;
surface_object->slices_count++;
break;
case VAPictureParameterBufferType:
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
memcpy(&surface_object->params.mpeg2.picture,
buffer_object->data,
sizeof(surface_object->params.mpeg2.picture));
break;
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
memcpy(&surface_object->params.h264.picture,
buffer_object->data,
sizeof(surface_object->params.h264.picture));
break;
case VAProfileHEVCMain:
memcpy(&surface_object->params.h265.picture,
buffer_object->data,
sizeof(surface_object->params.h265.picture));
break;
case VAProfileVP8Version0_3:
memcpy(&surface_object->params.vp8.picture,
buffer_object->data,
sizeof(surface_object->params.vp8.picture));
break;
case VAProfileVP9Profile0:
memcpy(&surface_object->params.vp9.picture,
buffer_object->data,
sizeof(surface_object->params.vp9.picture));
break;
default:
break;
}
break;
case VASliceParameterBufferType:
switch (profile) {
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
memcpy(&surface_object->params.h264.slice,
buffer_object->data,
sizeof(surface_object->params.h264.slice));
break;
case VAProfileHEVCMain: {
unsigned int n = surface_object->params.h265.num_slices;
if (n < HEVC_MAX_SLICES_PER_FRAME) {
memcpy(&surface_object->params.h265.slices[n],
buffer_object->data,
sizeof(VASliceParameterBufferHEVC));
surface_object->params.h265.num_slices = n + 1;
}
/* Keep .slice mirror populated as last-slice ref for
* h265_fill_pps which reads dependent_slice_segment_flag */
memcpy(&surface_object->params.h265.slice,
buffer_object->data,
sizeof(surface_object->params.h265.slice));
break;
}
case VAProfileVP8Version0_3:
memcpy(&surface_object->params.vp8.slice,
buffer_object->data,
sizeof(surface_object->params.vp8.slice));
break;
case VAProfileVP9Profile0:
memcpy(&surface_object->params.vp9.slice,
buffer_object->data,
sizeof(surface_object->params.vp9.slice));
break;
default:
break;
}
break;
case VAIQMatrixBufferType:
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
memcpy(&surface_object->params.mpeg2.iqmatrix,
buffer_object->data,
sizeof(surface_object->params.mpeg2.iqmatrix));
surface_object->params.mpeg2.iqmatrix_set = true;
break;
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
memcpy(&surface_object->params.h264.matrix,
buffer_object->data,
sizeof(surface_object->params.h264.matrix));
surface_object->params.h264.matrix_set = true;
break;
case VAProfileHEVCMain:
memcpy(&surface_object->params.h265.iqmatrix,
buffer_object->data,
sizeof(surface_object->params.h265.iqmatrix));
surface_object->params.h265.iqmatrix_set = true;
break;
case VAProfileVP8Version0_3:
memcpy(&surface_object->params.vp8.iqmatrix,
buffer_object->data,
sizeof(surface_object->params.vp8.iqmatrix));
surface_object->params.vp8.iqmatrix_set = true;
break;
default:
break;
}
break;
case VAProbabilityBufferType:
switch (profile) {
case VAProfileVP8Version0_3:
memcpy(&surface_object->params.vp8.probability,
buffer_object->data,
sizeof(surface_object->params.vp8.probability));
surface_object->params.vp8.probability_set = true;
break;
default:
break;
}
break;
default:
break;
}
return VA_STATUS_SUCCESS;
}
static VAStatus codec_set_controls(struct request_data *driver_data,
struct object_context *context,
VAProfile profile,
struct object_surface *surface_object)
{
int rc;
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
rc = mpeg2_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
rc = h264_set_controls(driver_data, context, profile,
surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileHEVCMain:
rc = h265_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileVP8Version0_3:
rc = vp8_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileVP9Profile0:
rc = vp9_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
default:
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
}
return VA_STATUS_SUCCESS;
}
VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id,
VASurfaceID surface_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct object_surface *surface_object;
struct request_pool_slot *slot;
int slot_index;
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
surface_object = SURFACE(driver_data, surface_id);
if (surface_object == NULL)
return VA_STATUS_ERROR_INVALID_SURFACE;
if (surface_object->status == VASurfaceRendering)
RequestSyncSurface(context, surface_id);
/*
* Iter2 Fix 3: acquire a CAPTURE-pool slot for this decode cycle.
* If the surface still holds a slot from a prior decode (DECODED
* or EXPORTED — the consumer is done with it by definition since
* we got back to BeginPicture for the same surface), release it
* first. The new slot is bound and its V4L2 index + mmap pointers
* are mirrored into surface_object->destination_* so the existing
* QBUF/DQBUF/EXPBUF code paths see no behavioral change.
*/
if (surface_object->current_slot != NULL)
surface_unbind_slot(driver_data, surface_object);
{
struct cap_pool_slot *cap_slot =
cap_pool_acquire(&driver_data->capture_pool, surface_id);
if (cap_slot == NULL)
return VA_STATUS_ERROR_ALLOCATION_FAILED;
surface_bind_slot(surface_object, cap_slot);
/*
* iter8 Phase 7 IMP-1 experiment: env-gated CAPTURE buffer
* pre-zero. LIBVA_V4L2_ZERO_CAPTURE=1 wipes the slot's mmap'd
* region before kernel decode. Discriminates "kernel writes
* partial then aborts" from "kernel writes nothing and we
* see stale residue."
*/
{
static const char *zero_env = NULL;
static bool zero_env_checked = false;
if (!zero_env_checked) {
zero_env = getenv("LIBVA_V4L2_ZERO_CAPTURE");
zero_env_checked = true;
}
if (zero_env != NULL && zero_env[0] == '1') {
unsigned int b;
for (b = 0; b < cap_slot->buffers_count; b++)
if (cap_slot->map[b] != NULL)
memset(cap_slot->map[b], 0,
cap_slot->map_lengths[b]);
}
}
}
/*
* Borrow an OUTPUT (bitstream-input) slot from the driver-wide
* pool for the duration of this Begin/Render/End cycle. The
* surface's source_* fields hold the borrow's mmap pointer/size/
* V4L2 buffer index until RequestSyncSurface releases it after
* VIDIOC_DQBUF.
*/
slot_index = request_pool_acquire(&driver_data->output_pool);
if (slot_index < 0)
return VA_STATUS_ERROR_ALLOCATION_FAILED;
slot = request_pool_slot(&driver_data->output_pool,
(unsigned int)slot_index);
if (slot == NULL) {
request_pool_release(&driver_data->output_pool,
(unsigned int)slot_index);
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
surface_object->source_index = slot->index;
surface_object->source_data = slot->data;
surface_object->source_size = slot->size;
/*
* iter6: bind the slot's permanent request_fd to this surface for the
* duration of the decode cycle. Replaces the iter4 close+alloc-per-
* frame model. The fd is REINIT'd (not closed) at RequestSyncSurface,
* so the kernel-side request object is reset in place — no fd-reuse
* race with another slot's pending decode.
*/
surface_object->request_fd = slot->request_fd;
surface_object->slices_size = 0;
surface_object->slices_count = 0;
surface_object->params.h264.matrix_set = false;
surface_object->params.h265.num_slices = 0;
surface_object->params.vp8.iqmatrix_set = false;
surface_object->params.vp8.probability_set = false;
surface_object->status = VASurfaceRendering;
context_object->render_surface_id = surface_id;
return VA_STATUS_SUCCESS;
}
VAStatus RequestRenderPicture(VADriverContextP context, VAContextID context_id,
VABufferID *buffers_ids, int buffers_count)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct object_config *config_object;
struct object_surface *surface_object;
struct object_buffer *buffer_object;
int rc;
int i;
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
config_object = CONFIG(driver_data, context_object->config_id);
if (config_object == NULL)
return VA_STATUS_ERROR_INVALID_CONFIG;
surface_object =
SURFACE(driver_data, context_object->render_surface_id);
if (surface_object == NULL)
return VA_STATUS_ERROR_INVALID_SURFACE;
for (i = 0; i < buffers_count; i++) {
buffer_object = BUFFER(driver_data, buffers_ids[i]);
if (buffer_object == NULL)
return VA_STATUS_ERROR_INVALID_BUFFER;
rc = codec_store_buffer(driver_data, context_object,
config_object->profile,
surface_object, buffer_object);
if (rc != VA_STATUS_SUCCESS)
return rc;
}
return VA_STATUS_SUCCESS;
}
VAStatus RequestEndPicture(VADriverContextP context, VAContextID context_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct object_config *config_object;
struct object_surface *surface_object;
struct video_format *video_format;
unsigned int output_type, capture_type;
int request_fd;
VAStatus status;
int rc;
video_format = driver_data->video_format;
if (video_format == NULL)
return VA_STATUS_ERROR_OPERATION_FAILED;
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
config_object = CONFIG(driver_data, context_object->config_id);
if (config_object == NULL)
return VA_STATUS_ERROR_INVALID_CONFIG;
surface_object =
SURFACE(driver_data, context_object->render_surface_id);
if (surface_object == NULL)
return VA_STATUS_ERROR_INVALID_SURFACE;
/*
* iter9 α-7: monotonic per-context counter instead of gettimeofday,
* so DPB.reference_ts / OUTPUT QBUF ts stay small (matches
* ffmpeg-v4l2request's pattern). gettimeofday's giant ns may or may
* not be load-bearing for rkvdec's reference resolution — Phase 5
* reviewer flagged this as low-probability (VP9/MPEG-2 use the same
* pattern and PASS), but this is the only remaining wire-byte diff.
*/
context_object->timestamp_counter++;
surface_object->timestamp.tv_sec =
(time_t)(context_object->timestamp_counter / 1000000);
surface_object->timestamp.tv_usec =
(suseconds_t)(context_object->timestamp_counter % 1000000);
/*
* iter6: request_fd was bound to the surface in BeginPicture from
* the OUTPUT pool slot's permanent fd. Per-frame allocation is gone.
*/
request_fd = surface_object->request_fd;
if (request_fd < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
rc = codec_set_controls(driver_data, context_object,
config_object->profile, surface_object);
if (rc != VA_STATUS_SUCCESS)
return rc;
/*
* iter14 α-16: env-gated dump of OUTPUT bitstream bytes immediately
* before QBUF. LIBVA_V4L2_DUMP_OUTPUT=<dir> writes source_data[0..
* slices_size] to <dir>/output_<profile>_<surface>_<frame>.bin.
* Discriminates whether libva writes the same H.264/HEVC slice bytes
* as kdirect — if YES, Bug 4/5 are not in the OUTPUT-side; if NO,
* narrow to which slice-write path produces the divergence.
*
* Off by default; no behavior change when env unset.
*/
{
static const char *dump_env = NULL;
static bool dump_env_checked = false;
if (!dump_env_checked) {
dump_env = getenv("LIBVA_V4L2_DUMP_OUTPUT");
dump_env_checked = true;
}
if (dump_env != NULL && dump_env[0] != '\0' &&
surface_object->source_data != NULL &&
surface_object->slices_size > 0) {
char path[256];
snprintf(path, sizeof(path),
"%s/output_p%d_s%u_t%llu.bin",
dump_env, (int)config_object->profile,
(unsigned int)surface_object->base.id,
(unsigned long long)context_object->timestamp_counter);
FILE *fp = fopen(path, "wb");
if (fp != NULL) {
size_t w = fwrite(surface_object->source_data,
1, surface_object->slices_size,
fp);
request_log("α-16: dumped %zu bytes to %s "
"(slices_count=%u)\n",
w, path,
surface_object->slices_count);
fclose(fp);
} else {
request_log("α-16: fopen(%s) failed: %s\n",
path, strerror(errno));
}
}
}
rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL,
surface_object->destination_index, 0,
surface_object->destination_buffers_count);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
rc = v4l2_queue_buffer(driver_data->video_fd, request_fd, output_type,
&surface_object->timestamp,
surface_object->source_index,
surface_object->slices_size, 1);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
surface_object->slices_size = 0;
status = RequestSyncSurface(context, context_object->render_surface_id);
if (status != VA_STATUS_SUCCESS)
return status;
context_object->render_surface_id = VA_INVALID_ID;
return VA_STATUS_SUCCESS;
}