Files
libva-v4l2-request-fourier/src/picture.c
T
claude-noether bfcb286031 picture: bounds-check codec_store_buffer slice writes against source_size
surface_object->source_data points at an OUTPUT-pool mmap of fixed
size source_size, negotiated by v4l2_query_buffer at request_pool_init
time (kernel sizeimage at S_FMT).  codec_store_buffer's
VASliceDataBufferType branch appended to it at three sites (H.264 Annex-B
start code, VP8 uncompressed-header pad, slice payload) without
consulting that capacity — a stream-level resolution upshift would walk
past the mmap and SIGSEGV inside the memcpy (mpv --hwdec=vaapi-copy on
the daedalus path, issue #13) or corrupt adjacent heap (Firefox RDD).

Add a check at each append site that fails the RenderPicture call with
VA_STATUS_ERROR_ALLOCATION_FAILED when slices_size+payload exceeds
source_size, and logs the over-budget request for postmortem.
libavcodec recreates the surface at the new dimensions on the next
BeginPicture, so a refused upshift slice is recoverable.

Doesn't address the root cause (surfaces should be re-created on
resolution change, or source_data should be grown on demand) but
removes the memory-safety hazard while the larger refactor waits.

Closes marfrit/libva-v4l2-request-fourier#13.
2026-05-21 12:14:48 +02:00

660 lines
21 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* Copyright (C) 2007 Intel Corporation
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "picture.h"
#include "buffer.h"
#include "config.h"
#include "context.h"
#include "request.h"
#include "surface.h"
#include "h264.h"
#include "h265.h"
#include "mpeg2.h"
#include "vp8.h"
#include "vp9.h"
#include "av1.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <linux/videodev2.h>
#include "media.h"
#include "utils.h"
#include "v4l2.h"
#include "autoconfig.h"
static VAStatus codec_store_buffer(struct request_data *driver_data,
struct object_context *context,
VAProfile profile,
struct object_surface *surface_object,
struct object_buffer *buffer_object)
{
switch (buffer_object->type) {
case VASliceDataBufferType: {
/*
* Since there is no guarantee that the allocation
* order is the same as the submission order (via
* RenderPicture), we can't use a V4L2 buffer directly
* and have to copy from a regular buffer.
*
* Bounds check (issue #13): surface_object->source_data points
* at an OUTPUT-pool mmap of fixed size source_size, negotiated
* at S_FMT time. A stream-level resolution upshift can produce
* a slice larger than this allocation; without the guard, the
* memcpy walks past the mmap and SIGSEGVs (mpv --hwdec=vaapi-
* copy) or corrupts adjacent heap (Firefox RDD). Each append
* site below checks the running total against source_size and
* fails the RenderPicture call instead of corrupting memory;
* libavcodec re-creates the surface at the new resolution on
* the next BeginPicture.
*/
size_t cap = surface_object->source_size;
size_t need;
if (context->h264_start_code) {
static const char start_code[3] = { 0x00, 0x00, 0x01 };
need = (size_t)surface_object->slices_size +
sizeof(start_code);
if (need > cap) {
request_log("codec_store_buffer: H.264 start code would overflow OUTPUT buffer (%zu > %zu) — resolution upshift mid-stream?\n",
need, cap);
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
memcpy(surface_object->source_data +
surface_object->slices_size,
start_code, sizeof(start_code));
surface_object->slices_size += sizeof(start_code);
}
/*
* iter33 α-30: VP8 OUTPUT buffer needs the uncompressed
* frame header that ffmpeg-vaapi stripped before submitting
* VASliceData. Hantro's vp8_dec_run reads OUTPUT[0..N] with
* an assumed offset of 10 bytes (keyframe) or 3 bytes
* (interframe) before the first_partition data — see
* rockchip_vpu2_hw_vp8_dec.c:349.
*
* ffmpeg-vaapi (vaapi_vp8.c:191-192) strips
* header_size = 3 + 7 * s->keyframe
* before submitting the slice data, so libva needs to
* pre-pad the OUTPUT with that many bytes. Hantro only
* uses these bytes for offset arithmetic, not parsing,
* so zero-filled placeholder is sufficient.
*
* ffmpeg-v4l2request (kdirect path) does NOT strip the
* header, hence its OUTPUT is byte-equal to SW reference
* and decode works correctly. This is the only material
* difference between the two front-ends for VP8.
*
* key_frame in VAAPI's pic_fields.bits is INVERTED:
* 0 → keyframe, 1 → interframe.
*/
if (profile == VAProfileVP8Version0_3 &&
surface_object->params.vp8.iqmatrix_set /* picture parsed by now */) {
unsigned int header_size =
surface_object->params.vp8.picture.pic_fields.bits.key_frame == 0 ?
10 : 3;
need = (size_t)surface_object->slices_size + header_size;
if (need > cap) {
request_log("codec_store_buffer: VP8 header pad would overflow OUTPUT buffer (%zu > %zu)\n",
need, cap);
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
memset(surface_object->source_data +
surface_object->slices_size,
0, header_size);
surface_object->slices_size += header_size;
}
{
size_t payload = (size_t)buffer_object->size *
buffer_object->count;
need = (size_t)surface_object->slices_size + payload;
if (need > cap) {
request_log("codec_store_buffer: slice payload would overflow OUTPUT buffer (%zu > %zu) — resolution upshift mid-stream?\n",
need, cap);
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
memcpy(surface_object->source_data +
surface_object->slices_size,
buffer_object->data, payload);
surface_object->slices_size += payload;
}
surface_object->slices_count++;
break;
}
case VAPictureParameterBufferType:
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
memcpy(&surface_object->params.mpeg2.picture,
buffer_object->data,
sizeof(surface_object->params.mpeg2.picture));
break;
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
case VAProfileH264High10:
memcpy(&surface_object->params.h264.picture,
buffer_object->data,
sizeof(surface_object->params.h264.picture));
break;
case VAProfileHEVCMain:
case VAProfileHEVCMain10:
memcpy(&surface_object->params.h265.picture,
buffer_object->data,
sizeof(surface_object->params.h265.picture));
break;
case VAProfileVP8Version0_3:
memcpy(&surface_object->params.vp8.picture,
buffer_object->data,
sizeof(surface_object->params.vp8.picture));
break;
case VAProfileVP9Profile0:
memcpy(&surface_object->params.vp9.picture,
buffer_object->data,
sizeof(surface_object->params.vp9.picture));
break;
case VAProfileAV1Profile0:
memcpy(&surface_object->params.av1.picture,
buffer_object->data,
sizeof(surface_object->params.av1.picture));
break;
default:
break;
}
break;
case VASliceParameterBufferType:
switch (profile) {
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
case VAProfileH264High10:
memcpy(&surface_object->params.h264.slice,
buffer_object->data,
sizeof(surface_object->params.h264.slice));
break;
case VAProfileHEVCMain:
case VAProfileHEVCMain10: {
unsigned int n = surface_object->params.h265.num_slices;
if (n < HEVC_MAX_SLICES_PER_FRAME) {
memcpy(&surface_object->params.h265.slices[n],
buffer_object->data,
sizeof(VASliceParameterBufferHEVC));
surface_object->params.h265.num_slices = n + 1;
}
/* Keep .slice mirror populated as last-slice ref for
* h265_fill_pps which reads dependent_slice_segment_flag */
memcpy(&surface_object->params.h265.slice,
buffer_object->data,
sizeof(surface_object->params.h265.slice));
break;
}
case VAProfileVP8Version0_3:
memcpy(&surface_object->params.vp8.slice,
buffer_object->data,
sizeof(surface_object->params.vp8.slice));
break;
case VAProfileVP9Profile0:
memcpy(&surface_object->params.vp9.slice,
buffer_object->data,
sizeof(surface_object->params.vp9.slice));
break;
default:
break;
}
break;
case VAIQMatrixBufferType:
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
memcpy(&surface_object->params.mpeg2.iqmatrix,
buffer_object->data,
sizeof(surface_object->params.mpeg2.iqmatrix));
surface_object->params.mpeg2.iqmatrix_set = true;
break;
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
case VAProfileH264High10:
memcpy(&surface_object->params.h264.matrix,
buffer_object->data,
sizeof(surface_object->params.h264.matrix));
surface_object->params.h264.matrix_set = true;
break;
case VAProfileHEVCMain:
case VAProfileHEVCMain10:
memcpy(&surface_object->params.h265.iqmatrix,
buffer_object->data,
sizeof(surface_object->params.h265.iqmatrix));
surface_object->params.h265.iqmatrix_set = true;
break;
case VAProfileVP8Version0_3:
memcpy(&surface_object->params.vp8.iqmatrix,
buffer_object->data,
sizeof(surface_object->params.vp8.iqmatrix));
surface_object->params.vp8.iqmatrix_set = true;
break;
default:
break;
}
break;
case VAProbabilityBufferType:
switch (profile) {
case VAProfileVP8Version0_3:
memcpy(&surface_object->params.vp8.probability,
buffer_object->data,
sizeof(surface_object->params.vp8.probability));
surface_object->params.vp8.probability_set = true;
break;
default:
break;
}
break;
default:
break;
}
return VA_STATUS_SUCCESS;
}
static VAStatus codec_set_controls(struct request_data *driver_data,
struct object_context *context,
VAProfile profile,
struct object_surface *surface_object)
{
int rc;
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
rc = mpeg2_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
case VAProfileH264High10:
rc = h264_set_controls(driver_data, context, profile,
surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileHEVCMain:
case VAProfileHEVCMain10:
rc = h265_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileVP8Version0_3:
rc = vp8_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileVP9Profile0:
rc = vp9_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileAV1Profile0:
/*
* Populates V4L2_CID_STATELESS_AV1_SEQUENCE from
* VAPictureParameterBufferAV1. The daedalus_v4l2 daemon
* (issue #11 daemon track) synthesises an OBU_SEQUENCE_HEADER
* from this ctrl and prepends it to the slice bitstream
* before handing it to libavcodec/libdav1d, which otherwise
* cannot parse the (sequence-header-stripped) OUTPUT buffer
* that ffmpeg-vaapi delivers.
*
* On the RK3588 vpu981 hardware path the same SEQUENCE ctrl
* is harmless: vpu981's driver parses the OBU stream
* directly and ignores the ctrl payload, so no per-decoder
* gating is required here.
*/
rc = av1_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
default:
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
}
return VA_STATUS_SUCCESS;
}
VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id,
VASurfaceID surface_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct object_surface *surface_object;
struct request_pool_slot *slot;
int slot_index;
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
surface_object = SURFACE(driver_data, surface_id);
if (surface_object == NULL)
return VA_STATUS_ERROR_INVALID_SURFACE;
if (surface_object->status == VASurfaceRendering)
RequestSyncSurface(context, surface_id);
/*
* Iter2 Fix 3: acquire a CAPTURE-pool slot for this decode cycle.
* If the surface still holds a slot from a prior decode (DECODED
* or EXPORTED — the consumer is done with it by definition since
* we got back to BeginPicture for the same surface), release it
* first. The new slot is bound and its V4L2 index + mmap pointers
* are mirrored into surface_object->destination_* so the existing
* QBUF/DQBUF/EXPBUF code paths see no behavioral change.
*/
if (surface_object->current_slot != NULL)
surface_unbind_slot(driver_data, surface_object);
{
struct cap_pool_slot *cap_slot =
cap_pool_acquire(&driver_data->capture_pool, surface_id);
if (cap_slot == NULL)
return VA_STATUS_ERROR_ALLOCATION_FAILED;
surface_bind_slot(surface_object, cap_slot);
/*
* iter8 Phase 7 IMP-1 experiment: env-gated CAPTURE buffer
* pre-zero. LIBVA_V4L2_ZERO_CAPTURE=1 wipes the slot's mmap'd
* region before kernel decode. Discriminates "kernel writes
* partial then aborts" from "kernel writes nothing and we
* see stale residue."
*/
{
static const char *zero_env = NULL;
static bool zero_env_checked = false;
if (!zero_env_checked) {
zero_env = getenv("LIBVA_V4L2_ZERO_CAPTURE");
zero_env_checked = true;
}
if (zero_env != NULL && zero_env[0] == '1') {
unsigned int b;
for (b = 0; b < cap_slot->buffers_count; b++)
if (cap_slot->map[b] != NULL)
memset(cap_slot->map[b], 0,
cap_slot->map_lengths[b]);
}
}
}
/*
* Borrow an OUTPUT (bitstream-input) slot from the driver-wide
* pool for the duration of this Begin/Render/End cycle. The
* surface's source_* fields hold the borrow's mmap pointer/size/
* V4L2 buffer index until RequestSyncSurface releases it after
* VIDIOC_DQBUF.
*/
slot_index = request_pool_acquire(&driver_data->output_pool);
if (slot_index < 0)
return VA_STATUS_ERROR_ALLOCATION_FAILED;
slot = request_pool_slot(&driver_data->output_pool,
(unsigned int)slot_index);
if (slot == NULL) {
request_pool_release(&driver_data->output_pool,
(unsigned int)slot_index);
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
surface_object->source_index = slot->index;
surface_object->source_data = slot->data;
surface_object->source_size = slot->size;
/*
* iter6: bind the slot's permanent request_fd to this surface for the
* duration of the decode cycle. Replaces the iter4 close+alloc-per-
* frame model. The fd is REINIT'd (not closed) at RequestSyncSurface,
* so the kernel-side request object is reset in place — no fd-reuse
* race with another slot's pending decode.
*/
surface_object->request_fd = slot->request_fd;
surface_object->slices_size = 0;
surface_object->slices_count = 0;
surface_object->params.h264.matrix_set = false;
surface_object->params.h265.num_slices = 0;
surface_object->params.vp8.iqmatrix_set = false;
surface_object->params.vp8.probability_set = false;
surface_object->status = VASurfaceRendering;
context_object->render_surface_id = surface_id;
return VA_STATUS_SUCCESS;
}
VAStatus RequestRenderPicture(VADriverContextP context, VAContextID context_id,
VABufferID *buffers_ids, int buffers_count)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct object_config *config_object;
struct object_surface *surface_object;
struct object_buffer *buffer_object;
int rc;
int i;
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
config_object = CONFIG(driver_data, context_object->config_id);
if (config_object == NULL)
return VA_STATUS_ERROR_INVALID_CONFIG;
surface_object =
SURFACE(driver_data, context_object->render_surface_id);
if (surface_object == NULL)
return VA_STATUS_ERROR_INVALID_SURFACE;
for (i = 0; i < buffers_count; i++) {
buffer_object = BUFFER(driver_data, buffers_ids[i]);
if (buffer_object == NULL)
return VA_STATUS_ERROR_INVALID_BUFFER;
rc = codec_store_buffer(driver_data, context_object,
config_object->profile,
surface_object, buffer_object);
if (rc != VA_STATUS_SUCCESS)
return rc;
}
return VA_STATUS_SUCCESS;
}
VAStatus RequestEndPicture(VADriverContextP context, VAContextID context_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct object_config *config_object;
struct object_surface *surface_object;
struct video_format *video_format;
unsigned int output_type, capture_type;
int request_fd;
VAStatus status;
int rc;
video_format = driver_data->video_format;
if (video_format == NULL)
return VA_STATUS_ERROR_OPERATION_FAILED;
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
config_object = CONFIG(driver_data, context_object->config_id);
if (config_object == NULL)
return VA_STATUS_ERROR_INVALID_CONFIG;
surface_object =
SURFACE(driver_data, context_object->render_surface_id);
if (surface_object == NULL)
return VA_STATUS_ERROR_INVALID_SURFACE;
/*
* iter9 α-7: monotonic per-context counter instead of gettimeofday,
* so DPB.reference_ts / OUTPUT QBUF ts stay small (matches
* ffmpeg-v4l2request's pattern). Confirmed in iter30 sweep
* (1×, 1000×, 1000000× multipliers all produce identical output);
* the counter scheme works on both rkvdec and hantro vb2_find_buffer.
*/
context_object->timestamp_counter++;
surface_object->timestamp.tv_sec =
(time_t)(context_object->timestamp_counter / 1000000);
surface_object->timestamp.tv_usec =
(suseconds_t)(context_object->timestamp_counter % 1000000);
/*
* iter6: request_fd was bound to the surface in BeginPicture from
* the OUTPUT pool slot's permanent fd. Per-frame allocation is gone.
*/
request_fd = surface_object->request_fd;
if (request_fd < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
rc = codec_set_controls(driver_data, context_object,
config_object->profile, surface_object);
if (rc != VA_STATUS_SUCCESS)
return rc;
/*
* iter14 α-16: env-gated dump of OUTPUT bitstream bytes immediately
* before QBUF. LIBVA_V4L2_DUMP_OUTPUT=<dir> writes source_data[0..
* slices_size] to <dir>/output_<profile>_<surface>_<frame>.bin.
* Discriminates whether libva writes the same H.264/HEVC slice bytes
* as kdirect — if YES, Bug 4/5 are not in the OUTPUT-side; if NO,
* narrow to which slice-write path produces the divergence.
*
* Off by default; no behavior change when env unset.
*/
{
static const char *dump_env = NULL;
static bool dump_env_checked = false;
if (!dump_env_checked) {
dump_env = getenv("LIBVA_V4L2_DUMP_OUTPUT");
dump_env_checked = true;
}
if (dump_env != NULL && dump_env[0] != '\0' &&
surface_object->source_data != NULL &&
surface_object->slices_size > 0) {
char path[256];
snprintf(path, sizeof(path),
"%s/output_p%d_s%u_t%llu.bin",
dump_env, (int)config_object->profile,
(unsigned int)surface_object->base.id,
(unsigned long long)context_object->timestamp_counter);
FILE *fp = fopen(path, "wb");
if (fp != NULL) {
size_t w = fwrite(surface_object->source_data,
1, surface_object->slices_size,
fp);
request_log("α-16: dumped %zu bytes to %s "
"(slices_count=%u)\n",
w, path,
surface_object->slices_count);
fclose(fp);
} else {
request_log("α-16: fopen(%s) failed: %s\n",
path, strerror(errno));
}
}
}
rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL,
surface_object->destination_index, 0,
surface_object->destination_buffers_count);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
rc = v4l2_queue_buffer(driver_data->video_fd, request_fd, output_type,
&surface_object->timestamp,
surface_object->source_index,
surface_object->slices_size, 1);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
surface_object->slices_size = 0;
status = RequestSyncSurface(context, context_object->render_surface_id);
if (status != VA_STATUS_SUCCESS)
return status;
context_object->render_surface_id = VA_INVALID_ID;
return VA_STATUS_SUCCESS;
}