Files
libva-v4l2-request-fourier/src/context.c
T
claude-noether 3760a70006 iter15 α-19: explicit VIDIOC_S_FMT on CAPTURE side for rkvdec correctness
Phase 3 ioctl-sequence diff: kdirect (ffmpeg-v4l2request) S_FMTs CAPTURE
with NV12 + dimensions after S_FMT OUTPUT, BEFORE CREATE_BUFS. libva's
old code only G_FMTs CAPTURE (per iter5b-β's hantro-targeted comment
that explicit S_FMT puts hantro into an inconsistent state).

For rkvdec on RK3399 the absence of explicit S_FMT CAPTURE doesn't
commit the chosen NV12 format properly. rkvdec HEVC + H.264 silently
produce zero / garbage CAPTURE output — Bug 4 + Bug 5 root cause.

Now: S_FMT OUTPUT → S_FMT CAPTURE → G_FMT CAPTURE. Failure of S_FMT
CAPTURE is non-fatal: fall back to G_FMT (preserves the iter5b-β
hantro path).

Future iter to gate this on driver_kind explicitly per
feedback_per_driver_kludge_gating.md. For now, always-on is safe
because kdirect proves S_FMT CAPTURE works on both rkvdec AND hantro.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 08:33:18 +00:00

546 lines
19 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* Copyright (C) 2007 Intel Corporation
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "context.h"
#include "config.h"
#include "request.h"
#include "surface.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/videodev2.h>
#include <hevc-ctrls.h>
#include "utils.h"
#include "v4l2.h"
#include "autoconfig.h"
VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
int picture_width, int picture_height, int flags,
VASurfaceID *surfaces_ids, int surfaces_count,
VAContextID *context_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_config *config_object;
struct object_context *context_object = NULL;
struct video_format *video_format;
unsigned int destination_sizes[VIDEO_MAX_PLANES];
unsigned int destination_bytesperlines[VIDEO_MAX_PLANES];
unsigned int destination_planes_count;
unsigned int format_width, format_height;
unsigned int pixelformat;
VASurfaceID *ids = NULL;
VAContextID id;
VAStatus status;
unsigned int output_type, capture_type;
unsigned int j;
bool found;
int rc;
/*
* iter5b-β: CreateContext owns the V4L2 OUTPUT-side device-format
* lifecycle (S_FMT, CAPTURE-format probe, cap_pool_init, per-surface
* destination_* fill). Pre-β these lived in CreateSurfaces2 with a
* resolution-change gate; β moves them here because (a) config_id
* is known so the right OUTPUT pixel format can be derived from
* the bound profile, and (b) STREAMON happens at the end of this
* function, so the queue is never streaming when we do S_FMT.
*
* DestroyContext is the only per-session teardown site under β
* (no in-CreateSurfaces2 teardown branch). It STREAMOFFs both
* queues, calls request_pool_destroy + cap_pool_destroy, and
* REQBUFS(0) — leaving the V4L2 device in a clean slate for the
* next CreateContext.
*/
config_object = CONFIG(driver_data, config_id);
if (config_object == NULL) {
status = VA_STATUS_ERROR_INVALID_CONFIG;
goto error;
}
pixelformat = config_object->pixelformat;
if (pixelformat == 0) {
/*
* Defensive: CreateConfig rejects unhandled profiles, so
* pixelformat is always non-zero by the time we get here.
* Belt-and-suspenders.
*/
status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
goto error;
}
/*
* Probe the CAPTURE-side V4L2 format. video_format is a static
* pointer into video.c's formats[]; it stays valid for the life of
* the driver_data and is cached across CreateContext cycles. The
* probe doesn't require any prior S_FMT — v4l2_find_format
* enumerates the device's supported formats directly.
*/
if (!driver_data->video_format) {
video_format = NULL;
found = v4l2_find_format(driver_data->video_fd,
V4L2_BUF_TYPE_VIDEO_CAPTURE,
V4L2_PIX_FMT_SUNXI_TILED_NV12);
if (found)
video_format = video_format_find(V4L2_PIX_FMT_SUNXI_TILED_NV12);
found = v4l2_find_format(driver_data->video_fd,
V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE,
V4L2_PIX_FMT_NV12);
if (found)
video_format = video_format_find(V4L2_PIX_FMT_NV12);
if (video_format == NULL) {
status = VA_STATUS_ERROR_OPERATION_FAILED;
goto error;
}
driver_data->video_format = video_format;
}
video_format = driver_data->video_format;
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
/*
* Commit the OUTPUT pixel format. picture_width/picture_height
* are the kernel-facing dimensions for this decode session. With
* profile-derived pixelformat, hantro's CAPTURE-format derivation
* dispatches to the right codec_mode (pre-β hardcoded H264_SLICE
* meant hantro silently substituted MPEG2_DECODER for HEVC/VP8/VP9
* → all-zero CAPTURE; rkvdec silently dropped HEVC/VP9 → same
* outcome).
*/
rc = v4l2_set_format(driver_data->video_fd, output_type, pixelformat,
picture_width, picture_height);
if (rc < 0) {
status = VA_STATUS_ERROR_OPERATION_FAILED;
goto error;
}
/*
* iter15 α-19: explicit S_FMT on CAPTURE for rkvdec.
*
* Original iter5b-β comment: "Do NOT VIDIOC_S_FMT on CAPTURE — hantro
* reads the SPS from OUTPUT to set CAPTURE shape internally."
*
* Empirical finding at iter15 Phase 3 (2026-05-14): kdirect (ffmpeg-
* v4l2request) does S_FMT on CAPTURE side after S_FMT(OUTPUT),
* then CREATE_BUFS for CAPTURE. libva's old G_FMT-only path skipped
* the S_FMT call. For hantro this was deliberate (works); for rkvdec
* (HEVC + H.264 + VP9 on RK3399) the absence of explicit S_FMT puts
* the driver into a state where it does NOT commit the chosen NV12
* pixel format properly — and the resulting decode silently writes
* garbage or zero for HEVC + H.264 (Bug 4 + Bug 5).
*
* Per [[feedback-per-driver-kludge-gating]]: this driver-specific
* difference should be gated on driver_kind. For now use a single
* always-on S_FMT call as the safe move: kdirect proves S_FMT
* CAPTURE works on both hantro AND rkvdec (it's the reference path).
* The iter5b-β comment is preserved-but-amended below.
*
* Sequence: S_FMT OUTPUT (above) → S_FMT CAPTURE (this) → G_FMT
* CAPTURE (sanity read-back, matches what S_FMT committed).
*/
{
unsigned int capture_pixelformat = V4L2_PIX_FMT_NV12;
rc = v4l2_set_format(driver_data->video_fd, capture_type,
capture_pixelformat, picture_width,
picture_height);
if (rc < 0) {
/* Non-fatal: if the kernel rejects S_FMT CAPTURE (some
* older hantro variants), fall through to G_FMT. */
request_log("iter15 α-19: S_FMT CAPTURE failed (continuing): %s\n",
strerror(errno));
}
}
rc = v4l2_get_format(driver_data->video_fd, capture_type, &format_width,
&format_height, destination_bytesperlines,
destination_sizes, NULL);
if (rc < 0) {
status = VA_STATUS_ERROR_OPERATION_FAILED;
goto error;
}
destination_planes_count = video_format->planes_count;
/*
* Initialize the CAPTURE buffer pool (cap_pool). Pool size =
* max(surfaces_count, MIN_CAP_POOL). The headroom gives LRU
* recycling enough margin to never reuse a buffer within the
* consumer's compositor-hold window for typical playback
* patterns. cap_pool_init does the V4L2 CREATE_BUFS + per-slot
* mmap.
*
* `pool->initialized` is reset to false by cap_pool_destroy in
* DestroyContext; subsequent CreateContext re-inits at the new
* resolution.
*/
if (!driver_data->capture_pool.initialized) {
unsigned int pool_count = surfaces_count > MIN_CAP_POOL ?
surfaces_count : MIN_CAP_POOL;
rc = cap_pool_init(&driver_data->capture_pool,
driver_data->video_fd, capture_type,
pool_count, video_format->v4l2_buffers_count);
if (rc < 0) {
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
goto error;
}
}
/*
* Compute format-uniform destination_* values. Same for all
* surfaces of this format; written once per surface, never
* changed by BeginPicture's slot acquisition.
*/
if (video_format->v4l2_buffers_count == 1) {
destination_sizes[0] = destination_bytesperlines[0] *
format_height;
for (j = 1; j < destination_planes_count; j++)
destination_sizes[j] = destination_sizes[0] / 2;
}
/*
* iter5b-β Commit D: cache the format-uniform CAPTURE geometry
* in driver_data. CreateSurfaces2 calls AFTER this CreateContext
* (ffmpeg vaapi-copy late-surface-allocation case) will lazy-fill
* via surface_fill_format_uniform(); the surface_heap walk below
* fills surfaces that pre-existed when CreateContext fired.
*/
driver_data->fmt_planes_count = destination_planes_count;
driver_data->fmt_buffers_count = video_format->v4l2_buffers_count;
driver_data->fmt_format_height = format_height;
for (j = 0; j < destination_planes_count; j++) {
driver_data->fmt_sizes[j] = destination_sizes[j];
driver_data->fmt_bytesperlines[j] =
destination_bytesperlines[j];
}
driver_data->fmt_valid = true;
/*
* Walk the surface_heap (not just surfaces_ids[]) to populate
* destination_* on every existing surface. Pre-Commit-D we walked
* surfaces_ids[], which is empty for ffmpeg vaapi-copy consumers
* that call vaCreateContext with surfaces_count=0 — those surfaces
* exist in the heap but aren't in the param array. Walking the
* heap catches both flows. Late-created surfaces (after this
* CreateContext) fill via surface_fill_format_uniform in
* CreateSurfaces2's per-surface init.
*/
{
struct object_surface *surface_iter;
int heap_iter;
surface_iter = (struct object_surface *)
object_heap_first(&driver_data->surface_heap,
&heap_iter);
while (surface_iter != NULL) {
surface_fill_format_uniform(driver_data, surface_iter);
surface_iter = (struct object_surface *)
object_heap_next(&driver_data->surface_heap,
&heap_iter);
}
}
id = object_heap_allocate(&driver_data->context_heap);
context_object = CONTEXT(driver_data, id);
if (context_object == NULL) {
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
goto error;
}
memset(&context_object->dpb, 0, sizeof(context_object->dpb));
context_object->timestamp_counter = 0; /* iter9 α-7 */
/*
* Initialize the OUTPUT (bitstream-input) buffer pool. Sized by
* codec pipeline depth (4 H.264 frames in flight is sufficient
* for current hantro/rkvdec scheduling); independent of caller-
* supplied surfaces_count. Pool is owned by driver_data so it
* outlives any single context destroy/recreate cycle.
*
* This replaces the prior per-surface OUTPUT loop, which (a)
* created an empty queue when surfaces_count==0 (ffmpeg vaapi-
* copy path) and (b) only populated surface->source_* for
* surfaces present at vaCreateContext time, NULL-derefing on
* surfaces created later.
*/
/*
* iter6: pool size 16 gives comfortable headroom over typical H.264
* MaxDpbFrames (16) for any consumer that pipelines decode requests.
* Each slot owns its own request_fd (REINIT'd per use).
*/
rc = request_pool_init(&driver_data->output_pool,
driver_data->video_fd, driver_data->media_fd,
output_type, 16);
if (rc < 0) {
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
goto error;
}
/*
* The surface_ids array has been allocated by the caller and
* we don't have any indication wrt its life time. Let's make sure
* its life span is under our control.
*/
if (surfaces_count > 0) {
ids = malloc(surfaces_count * sizeof(VASurfaceID));
if (ids == NULL) {
status = VA_STATUS_ERROR_ALLOCATION_FAILED;
goto error;
}
memcpy(ids, surfaces_ids,
surfaces_count * sizeof(VASurfaceID));
}
/*
* Stateless H.264 device-wide controls. The kernel V4L2 stateless
* framework requires DECODE_MODE and START_CODE be set on the
* device fd (request_fd=-1) before VIDIOC_STREAMON; per-request
* controls (SPS/PPS/etc.) attached to a request_fd come later.
*
* hantro-vpu via rockchip,rk3568-vpu DT compatible (covers RK3568
* and RK3566 — PineTab2 silicon — since they're close enough)
* accepts only DECODE_MODE_FRAME_BASED.
* START_CODE_ANNEX_B preserves leading 0x00000001 in the slice
* payload that h264.c assembles. Errors here are not fatal: not
* every backing driver supports both controls (e.g. cedrus may
* default to SLICE_BASED without exposing DECODE_MODE).
*/
{
struct v4l2_ext_control dev_ctrls[2] = {
{
.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
.value = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
},
{
.id = V4L2_CID_STATELESS_H264_START_CODE,
.value = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
},
};
(void)v4l2_set_controls(driver_data->video_fd, -1,
dev_ctrls, 2);
}
/*
* iter2: HEVC device-wide controls. Same best-effort pattern as
* H.264 above — separate batched call so a kernel that does not
* advertise HEVC controls (e.g. hantro-vpu-dec on RK3568/RK3399)
* silently fails on this batch without invalidating the H.264
* batch. rkvdec on RK3399 advertises HEVC and accepts FRAME_BASED
* + ANNEX_B (only supported menu values per Phase 0 v4l2_inventory).
*/
{
struct v4l2_ext_control hevc_dev_ctrls[2] = {
{
.id = V4L2_CID_STATELESS_HEVC_DECODE_MODE,
.value = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
},
{
.id = V4L2_CID_STATELESS_HEVC_START_CODE,
.value = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
},
};
(void)v4l2_set_controls(driver_data->video_fd, -1,
hevc_dev_ctrls, 2);
}
/*
* Mirror the ANNEX_B start-code mode set on the device above
* into context_object->h264_start_code so picture.c::
* codec_store_buffer prepends 0x00 0x00 0x01 to each slice
* payload it copies into the OUTPUT buffer. Without this, the
* kernel — which we just told to expect ANNEX_B — sees a raw
* NAL stream with no start codes, fails to find slice
* boundaries, and emits a zeroed CAPTURE buffer (visually a
* flat dark-green frame).
*
* iter4 fix: this start-code prepend is ANNEX-B-specific and
* applies to H.264 and HEVC ONLY. MPEG-2, VP8, and VP9 use raw
* frame bitstreams without start codes — prepending 0x00 0x00 0x01
* to a VP9 uncompressed header produces a frame_marker mismatch
* (kernel reads 0x00 instead of 0x10), the rkvdec driver silently
* fails to find a valid frame, and the CAPTURE slot stays at its
* cap_pool init pattern (a dim 0x4c green). Phase 7 verification
* caught this for VP9; iter1+iter3 transitive proof masked it for
* MPEG-2/VP8 because those iters compared payload bytes, not
* decoded pixels.
*
* h264_get_controls() exists for this purpose but is never
* called in the current code path; the planned probe-then-set
* commit will replace this hardcoded assignment with a runtime
* read of the kernel's accepted START_CODE value.
*/
switch (config_object->profile) {
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
case VAProfileHEVCMain:
context_object->h264_start_code = true;
break;
default:
context_object->h264_start_code = false;
break;
}
rc = v4l2_set_stream(driver_data->video_fd, output_type, true);
if (rc < 0) {
status = VA_STATUS_ERROR_OPERATION_FAILED;
goto error;
}
rc = v4l2_set_stream(driver_data->video_fd, capture_type, true);
if (rc < 0) {
status = VA_STATUS_ERROR_OPERATION_FAILED;
goto error;
}
context_object->config_id = config_id;
context_object->render_surface_id = VA_INVALID_ID;
context_object->surfaces_ids = ids;
context_object->surfaces_count = surfaces_count;
context_object->picture_width = picture_width;
context_object->picture_height = picture_height;
context_object->flags = flags;
*context_id = id;
status = VA_STATUS_SUCCESS;
goto complete;
error:
if (ids != NULL)
free(ids);
if (context_object != NULL)
object_heap_free(&driver_data->context_heap,
(struct object_base *)context_object);
complete:
return status;
}
VAStatus RequestDestroyContext(VADriverContextP context, VAContextID context_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct video_format *video_format;
unsigned int output_type, capture_type;
VAStatus status;
int rc;
video_format = driver_data->video_format;
if (video_format == NULL)
return VA_STATUS_ERROR_OPERATION_FAILED;
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
rc = v4l2_set_stream(driver_data->video_fd, output_type, false);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
rc = v4l2_set_stream(driver_data->video_fd, capture_type, false);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
/* Buffers liberation */
status = RequestDestroySurfaces(context, context_object->surfaces_ids,
context_object->surfaces_count);
if (status != VA_STATUS_SUCCESS)
return VA_STATUS_ERROR_OPERATION_FAILED;
free(context_object->surfaces_ids);
object_heap_free(&driver_data->context_heap,
(struct object_base *)context_object);
/*
* iter5b-β: tear down the OUTPUT pool (mmap unmaps) BEFORE
* REQBUFS(0) frees the kernel-side buffers. Pre-β this was done
* only by surface.c's resolution-change branch — which β removed.
* Without this here, the next CreateContext's request_pool_init
* sees pool->initialized=true with stale slot pointers, returns
* 0 without re-CREATE_BUFS, and the next QBUF EINVALs because
* the slots reference buffer indices that no longer exist
* (Phase 5 v2 review CRIT-2).
*/
if (driver_data->output_pool.initialized)
request_pool_destroy(&driver_data->output_pool);
rc = v4l2_request_buffers(driver_data->video_fd, output_type, 0);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
/*
* Iter2 Fix 3 (still relevant under β): cap_pool owns the
* CAPTURE buffers' mmaps + any outstanding our_export_fds. Tear
* it down (which also issues REQBUFS(0) on CAPTURE), so the next
* CreateContext cycle sees a clean slate.
*/
cap_pool_destroy(&driver_data->capture_pool, driver_data->video_fd,
capture_type);
/*
* iter5b-β: driver_data->video_format is a static-ref pointer
* into video.c's formats[]; it stays valid for the life of the
* driver_data and intentionally survives DestroyContext cycles.
* The next CreateContext's `if (!driver_data->video_format)`
* guard skips the probe — correct, because the device's CAPTURE
* format menu doesn't change.
*
* The pre-β surface_reset_format_cache() call here is removed:
* β doesn't have a last_output_{width,height,pixelformat} cache
* (those fields are deleted). Each CreateContext is a fresh
* S_FMT(OUTPUT) cycle.
*
* Commit D: invalidate the format-uniform cache so a CreateSurfaces2
* call between DestroyContext and the next CreateContext doesn't
* lazy-fill with stale geometry from the now-torn-down session.
* The next CreateContext re-populates the cache.
*/
driver_data->fmt_valid = false;
return VA_STATUS_SUCCESS;
}