libva-v4l2-request-fourier/src/context.c

/*
 * Copyright (C) 2007 Intel Corporation
 * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
 * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "context.h"
#include "config.h"
#include "request.h"
#include "surface.h"

#include <errno.h>
#include <stdlib.h>
#include <string.h>

#include <assert.h>

#include <sys/ioctl.h>
#include <sys/mman.h>

#include <linux/videodev2.h>

#include <hevc-ctrls.h>

#include "utils.h"
#include "v4l2.h"

#include "autoconfig.h"

VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
			      int picture_width, int picture_height, int flags,
			      VASurfaceID *surfaces_ids, int surfaces_count,
			      VAContextID *context_id)
{
	struct request_data *driver_data = context->pDriverData;
	struct object_config *config_object;
	struct object_context *context_object = NULL;
	struct video_format *video_format;
	unsigned int destination_sizes[VIDEO_MAX_PLANES];
	unsigned int destination_bytesperlines[VIDEO_MAX_PLANES];
	unsigned int destination_planes_count;
	unsigned int format_width, format_height;
	unsigned int pixelformat;
	VASurfaceID *ids = NULL;
	VAContextID id;
	VAStatus status;
	unsigned int output_type, capture_type;
	unsigned int j;
	bool found;
	int rc;

	/*
	 * iter5b-β: CreateContext owns the V4L2 OUTPUT-side device-format
	 * lifecycle (S_FMT, CAPTURE-format probe, cap_pool_init, per-surface
	 * destination_* fill). Pre-β these lived in CreateSurfaces2 with a
	 * resolution-change gate; β moves them here because (a) config_id
	 * is known so the right OUTPUT pixel format can be derived from
	 * the bound profile, and (b) STREAMON happens at the end of this
	 * function, so the queue is never streaming when we do S_FMT.
	 *
	 * DestroyContext is the only per-session teardown site under β
	 * (no in-CreateSurfaces2 teardown branch). It STREAMOFFs both
	 * queues, calls request_pool_destroy + cap_pool_destroy, and
	 * REQBUFS(0) — leaving the V4L2 device in a clean slate for the
	 * next CreateContext.
	 */
	config_object = CONFIG(driver_data, config_id);
	if (config_object == NULL) {
		status = VA_STATUS_ERROR_INVALID_CONFIG;
		goto error;
	}

	pixelformat = config_object->pixelformat;
	if (pixelformat == 0) {
		/*
		 * Defensive: CreateConfig rejects unhandled profiles, so
		 * pixelformat is always non-zero by the time we get here.
		 * Belt-and-suspenders.
		 */
		status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
		goto error;
	}

	/*
	 * Probe the CAPTURE-side V4L2 format. video_format is a static
	 * pointer into video.c's formats[]; it stays valid for the life of
	 * the driver_data and is cached across CreateContext cycles. The
	 * probe doesn't require any prior S_FMT — v4l2_find_format
	 * enumerates the device's supported formats directly.
	 */
	if (!driver_data->video_format) {
		video_format = NULL;
		found = v4l2_find_format(driver_data->video_fd,
					 V4L2_BUF_TYPE_VIDEO_CAPTURE,
					 V4L2_PIX_FMT_SUNXI_TILED_NV12);
		if (found)
			video_format = video_format_find(V4L2_PIX_FMT_SUNXI_TILED_NV12);

		found = v4l2_find_format(driver_data->video_fd,
					 V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE,
					 V4L2_PIX_FMT_NV12);
		if (found)
			video_format = video_format_find(V4L2_PIX_FMT_NV12);

		if (video_format == NULL) {
			status = VA_STATUS_ERROR_OPERATION_FAILED;
			goto error;
		}

		driver_data->video_format = video_format;
	}
	video_format = driver_data->video_format;

	output_type = v4l2_type_video_output(video_format->v4l2_mplane);
	capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);

	/*
	 * Commit the OUTPUT pixel format. picture_width/picture_height
	 * are the kernel-facing dimensions for this decode session. With
	 * profile-derived pixelformat, hantro's CAPTURE-format derivation
	 * dispatches to the right codec_mode (pre-β hardcoded H264_SLICE
	 * meant hantro silently substituted MPEG2_DECODER for HEVC/VP8/VP9
	 * → all-zero CAPTURE; rkvdec silently dropped HEVC/VP9 → same
	 * outcome).
	 */
	rc = v4l2_set_format(driver_data->video_fd, output_type, pixelformat,
			     picture_width, picture_height);
	if (rc < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	/*
	 * iter15 α-19: explicit S_FMT on CAPTURE for rkvdec.
	 *
	 * Original iter5b-β comment: "Do NOT VIDIOC_S_FMT on CAPTURE — hantro
	 * reads the SPS from OUTPUT to set CAPTURE shape internally."
	 *
	 * Empirical finding at iter15 Phase 3 (2026-05-14): kdirect (ffmpeg-
	 * v4l2request) does S_FMT on CAPTURE side after S_FMT(OUTPUT),
	 * then CREATE_BUFS for CAPTURE. libva's old G_FMT-only path skipped
	 * the S_FMT call. For hantro this was deliberate (works); for rkvdec
	 * (HEVC + H.264 + VP9 on RK3399) the absence of explicit S_FMT puts
	 * the driver into a state where it does NOT commit the chosen NV12
	 * pixel format properly — and the resulting decode silently writes
	 * garbage or zero for HEVC + H.264 (Bug 4 + Bug 5).
	 *
	 * Per [[feedback-per-driver-kludge-gating]]: this driver-specific
	 * difference should be gated on driver_kind. For now use a single
	 * always-on S_FMT call as the safe move: kdirect proves S_FMT
	 * CAPTURE works on both hantro AND rkvdec (it's the reference path).
	 * The iter5b-β comment is preserved-but-amended below.
	 *
	 * Sequence: S_FMT OUTPUT (above) → S_FMT CAPTURE (this) → G_FMT
	 * CAPTURE (sanity read-back, matches what S_FMT committed).
	 */
	{
		unsigned int capture_pixelformat = V4L2_PIX_FMT_NV12;
		rc = v4l2_set_format(driver_data->video_fd, capture_type,
				     capture_pixelformat, picture_width,
				     picture_height);
		if (rc < 0) {
			/* Non-fatal: if the kernel rejects S_FMT CAPTURE (some
			 * older hantro variants), fall through to G_FMT. */
			request_log("iter15 α-19: S_FMT CAPTURE failed (continuing): %s\n",
				    strerror(errno));
		}
	}

	rc = v4l2_get_format(driver_data->video_fd, capture_type, &format_width,
			     &format_height, destination_bytesperlines,
			     destination_sizes, NULL);
	if (rc < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	destination_planes_count = video_format->planes_count;

	/*
	 * Initialize the CAPTURE buffer pool (cap_pool). Pool size =
	 * max(surfaces_count, MIN_CAP_POOL). The headroom gives LRU
	 * recycling enough margin to never reuse a buffer within the
	 * consumer's compositor-hold window for typical playback
	 * patterns. cap_pool_init does the V4L2 CREATE_BUFS + per-slot
	 * mmap.
	 *
	 * `pool->initialized` is reset to false by cap_pool_destroy in
	 * DestroyContext; subsequent CreateContext re-inits at the new
	 * resolution.
	 */
	if (!driver_data->capture_pool.initialized) {
		unsigned int pool_count = surfaces_count > MIN_CAP_POOL ?
					  surfaces_count : MIN_CAP_POOL;
		rc = cap_pool_init(&driver_data->capture_pool,
				   driver_data->video_fd, capture_type,
				   pool_count, video_format->v4l2_buffers_count);
		if (rc < 0) {
			status = VA_STATUS_ERROR_ALLOCATION_FAILED;
			goto error;
		}
	}

	/*
	 * Compute format-uniform destination_* values. Same for all
	 * surfaces of this format; written once per surface, never
	 * changed by BeginPicture's slot acquisition.
	 */
	if (video_format->v4l2_buffers_count == 1) {
		destination_sizes[0] = destination_bytesperlines[0] *
				       format_height;
		for (j = 1; j < destination_planes_count; j++)
			destination_sizes[j] = destination_sizes[0] / 2;
	}

	/*
	 * iter5b-β Commit D: cache the format-uniform CAPTURE geometry
	 * in driver_data. CreateSurfaces2 calls AFTER this CreateContext
	 * (ffmpeg vaapi-copy late-surface-allocation case) will lazy-fill
	 * via surface_fill_format_uniform(); the surface_heap walk below
	 * fills surfaces that pre-existed when CreateContext fired.
	 */
	driver_data->fmt_planes_count = destination_planes_count;
	driver_data->fmt_buffers_count = video_format->v4l2_buffers_count;
	driver_data->fmt_format_height = format_height;
	for (j = 0; j < destination_planes_count; j++) {
		driver_data->fmt_sizes[j] = destination_sizes[j];
		driver_data->fmt_bytesperlines[j] =
			destination_bytesperlines[j];
	}
	driver_data->fmt_valid = true;

	/*
	 * Walk the surface_heap (not just surfaces_ids[]) to populate
	 * destination_* on every existing surface. Pre-Commit-D we walked
	 * surfaces_ids[], which is empty for ffmpeg vaapi-copy consumers
	 * that call vaCreateContext with surfaces_count=0 — those surfaces
	 * exist in the heap but aren't in the param array. Walking the
	 * heap catches both flows. Late-created surfaces (after this
	 * CreateContext) fill via surface_fill_format_uniform in
	 * CreateSurfaces2's per-surface init.
	 */
	{
		struct object_surface *surface_iter;
		int heap_iter;

		surface_iter = (struct object_surface *)
			object_heap_first(&driver_data->surface_heap,
					  &heap_iter);
		while (surface_iter != NULL) {
			surface_fill_format_uniform(driver_data, surface_iter);
			surface_iter = (struct object_surface *)
				object_heap_next(&driver_data->surface_heap,
						 &heap_iter);
		}
	}

	id = object_heap_allocate(&driver_data->context_heap);
	context_object = CONTEXT(driver_data, id);
	if (context_object == NULL) {
		status = VA_STATUS_ERROR_ALLOCATION_FAILED;
		goto error;
	}
	memset(&context_object->dpb, 0, sizeof(context_object->dpb));
	context_object->timestamp_counter = 0;	/* iter9 α-7 */

	/*
	 * Initialize the OUTPUT (bitstream-input) buffer pool. Sized by
	 * codec pipeline depth (4 H.264 frames in flight is sufficient
	 * for current hantro/rkvdec scheduling); independent of caller-
	 * supplied surfaces_count. Pool is owned by driver_data so it
	 * outlives any single context destroy/recreate cycle.
	 *
	 * This replaces the prior per-surface OUTPUT loop, which (a)
	 * created an empty queue when surfaces_count==0 (ffmpeg vaapi-
	 * copy path) and (b) only populated surface->source_* for
	 * surfaces present at vaCreateContext time, NULL-derefing on
	 * surfaces created later.
	 */
	/*
	 * iter6: pool size 16 gives comfortable headroom over typical H.264
	 * MaxDpbFrames (16) for any consumer that pipelines decode requests.
	 * Each slot owns its own request_fd (REINIT'd per use).
	 */
	rc = request_pool_init(&driver_data->output_pool,
			       driver_data->video_fd, driver_data->media_fd,
			       output_type, 16);
	if (rc < 0) {
		status = VA_STATUS_ERROR_ALLOCATION_FAILED;
		goto error;
	}

	/*
	 * The surface_ids array has been allocated by the caller and
	 * we don't have any indication wrt its life time. Let's make sure
	 * its life span is under our control.
	 */
	if (surfaces_count > 0) {
		ids = malloc(surfaces_count * sizeof(VASurfaceID));
		if (ids == NULL) {
			status = VA_STATUS_ERROR_ALLOCATION_FAILED;
			goto error;
		}

		memcpy(ids, surfaces_ids,
		       surfaces_count * sizeof(VASurfaceID));
	}

	/*
	 * Stateless H.264 device-wide controls. The kernel V4L2 stateless
	 * framework requires DECODE_MODE and START_CODE be set on the
	 * device fd (request_fd=-1) before VIDIOC_STREAMON; per-request
	 * controls (SPS/PPS/etc.) attached to a request_fd come later.
	 *
	 * hantro-vpu via rockchip,rk3568-vpu DT compatible (covers RK3568
	 * and RK3566 — PineTab2 silicon — since they're close enough)
	 * accepts only DECODE_MODE_FRAME_BASED.
	 * START_CODE_ANNEX_B preserves leading 0x00000001 in the slice
	 * payload that h264.c assembles. Errors here are not fatal: not
	 * every backing driver supports both controls (e.g. cedrus may
	 * default to SLICE_BASED without exposing DECODE_MODE).
	 */
	{
		struct v4l2_ext_control dev_ctrls[2] = {
			{
				.id = V4L2_CID_STATELESS_H264_DECODE_MODE,
				.value = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED,
			},
			{
				.id = V4L2_CID_STATELESS_H264_START_CODE,
				.value = V4L2_STATELESS_H264_START_CODE_ANNEX_B,
			},
		};
		(void)v4l2_set_controls(driver_data->video_fd, -1,
					dev_ctrls, 2);
	}

	/*
	 * iter2: HEVC device-wide controls. Same best-effort pattern as
	 * H.264 above — separate batched call so a kernel that does not
	 * advertise HEVC controls (e.g. hantro-vpu-dec on RK3568/RK3399)
	 * silently fails on this batch without invalidating the H.264
	 * batch. rkvdec on RK3399 advertises HEVC and accepts FRAME_BASED
	 * + ANNEX_B (only supported menu values per Phase 0 v4l2_inventory).
	 */
	{
		struct v4l2_ext_control hevc_dev_ctrls[2] = {
			{
				.id = V4L2_CID_STATELESS_HEVC_DECODE_MODE,
				.value = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
			},
			{
				.id = V4L2_CID_STATELESS_HEVC_START_CODE,
				.value = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
			},
		};
		(void)v4l2_set_controls(driver_data->video_fd, -1,
					hevc_dev_ctrls, 2);
	}

	/*
	 * Mirror the ANNEX_B start-code mode set on the device above
	 * into context_object->h264_start_code so picture.c::
	 * codec_store_buffer prepends 0x00 0x00 0x01 to each slice
	 * payload it copies into the OUTPUT buffer. Without this, the
	 * kernel — which we just told to expect ANNEX_B — sees a raw
	 * NAL stream with no start codes, fails to find slice
	 * boundaries, and emits a zeroed CAPTURE buffer (visually a
	 * flat dark-green frame).
	 *
	 * iter4 fix: this start-code prepend is ANNEX-B-specific and
	 * applies to H.264 and HEVC ONLY. MPEG-2, VP8, and VP9 use raw
	 * frame bitstreams without start codes — prepending 0x00 0x00 0x01
	 * to a VP9 uncompressed header produces a frame_marker mismatch
	 * (kernel reads 0x00 instead of 0x10), the rkvdec driver silently
	 * fails to find a valid frame, and the CAPTURE slot stays at its
	 * cap_pool init pattern (a dim 0x4c green). Phase 7 verification
	 * caught this for VP9; iter1+iter3 transitive proof masked it for
	 * MPEG-2/VP8 because those iters compared payload bytes, not
	 * decoded pixels.
	 *
	 * h264_get_controls() exists for this purpose but is never
	 * called in the current code path; the planned probe-then-set
	 * commit will replace this hardcoded assignment with a runtime
	 * read of the kernel's accepted START_CODE value.
	 */
	switch (config_object->profile) {
	case VAProfileH264Main:
	case VAProfileH264High:
	case VAProfileH264ConstrainedBaseline:
	case VAProfileH264MultiviewHigh:
	case VAProfileH264StereoHigh:
	case VAProfileHEVCMain:
		context_object->h264_start_code = true;
		break;
	default:
		context_object->h264_start_code = false;
		break;
	}

	rc = v4l2_set_stream(driver_data->video_fd, output_type, true);
	if (rc < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	rc = v4l2_set_stream(driver_data->video_fd, capture_type, true);
	if (rc < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	context_object->config_id = config_id;
	context_object->render_surface_id = VA_INVALID_ID;
	context_object->surfaces_ids = ids;
	context_object->surfaces_count = surfaces_count;
	context_object->picture_width = picture_width;
	context_object->picture_height = picture_height;
	context_object->flags = flags;

	*context_id = id;

	status = VA_STATUS_SUCCESS;
	goto complete;

error:
	if (ids != NULL)
		free(ids);

	if (context_object != NULL)
		object_heap_free(&driver_data->context_heap,
				 (struct object_base *)context_object);

complete:
	return status;
}

VAStatus RequestDestroyContext(VADriverContextP context, VAContextID context_id)
{
	struct request_data *driver_data = context->pDriverData;
	struct object_context *context_object;
	struct video_format *video_format;
	unsigned int output_type, capture_type;
	VAStatus status;
	int rc;

	video_format = driver_data->video_format;
	if (video_format == NULL)
		return VA_STATUS_ERROR_OPERATION_FAILED;

	output_type = v4l2_type_video_output(video_format->v4l2_mplane);
	capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);

	context_object = CONTEXT(driver_data, context_id);
	if (context_object == NULL)
		return VA_STATUS_ERROR_INVALID_CONTEXT;

	rc = v4l2_set_stream(driver_data->video_fd, output_type, false);
	if (rc < 0)
		return VA_STATUS_ERROR_OPERATION_FAILED;

	rc = v4l2_set_stream(driver_data->video_fd, capture_type, false);
	if (rc < 0)
		return VA_STATUS_ERROR_OPERATION_FAILED;

	/* Buffers liberation */

	status = RequestDestroySurfaces(context, context_object->surfaces_ids,
					context_object->surfaces_count);
	if (status != VA_STATUS_SUCCESS)
		return VA_STATUS_ERROR_OPERATION_FAILED;

	free(context_object->surfaces_ids);

	object_heap_free(&driver_data->context_heap,
			 (struct object_base *)context_object);

	/*
	 * iter5b-β: tear down the OUTPUT pool (mmap unmaps) BEFORE
	 * REQBUFS(0) frees the kernel-side buffers. Pre-β this was done
	 * only by surface.c's resolution-change branch — which β removed.
	 * Without this here, the next CreateContext's request_pool_init
	 * sees pool->initialized=true with stale slot pointers, returns
	 * 0 without re-CREATE_BUFS, and the next QBUF EINVALs because
	 * the slots reference buffer indices that no longer exist
	 * (Phase 5 v2 review CRIT-2).
	 */
	if (driver_data->output_pool.initialized)
		request_pool_destroy(&driver_data->output_pool);

	rc = v4l2_request_buffers(driver_data->video_fd, output_type, 0);
	if (rc < 0)
		return VA_STATUS_ERROR_OPERATION_FAILED;

	/*
	 * Iter2 Fix 3 (still relevant under β): cap_pool owns the
	 * CAPTURE buffers' mmaps + any outstanding our_export_fds. Tear
	 * it down (which also issues REQBUFS(0) on CAPTURE), so the next
	 * CreateContext cycle sees a clean slate.
	 */
	cap_pool_destroy(&driver_data->capture_pool, driver_data->video_fd,
			 capture_type);

	/*
	 * iter5b-β: driver_data->video_format is a static-ref pointer
	 * into video.c's formats[]; it stays valid for the life of the
	 * driver_data and intentionally survives DestroyContext cycles.
	 * The next CreateContext's `if (!driver_data->video_format)`
	 * guard skips the probe — correct, because the device's CAPTURE
	 * format menu doesn't change.
	 *
	 * The pre-β surface_reset_format_cache() call here is removed:
	 * β doesn't have a last_output_{width,height,pixelformat} cache
	 * (those fields are deleted). Each CreateContext is a fresh
	 * S_FMT(OUTPUT) cycle.
	 *
	 * Commit D: invalidate the format-uniform cache so a CreateSurfaces2
	 * call between DestroyContext and the next CreateContext doesn't
	 * lazy-fill with stale geometry from the now-torn-down session.
	 * The next CreateContext re-populates the cache.
	 */
	driver_data->fmt_valid = false;

	return VA_STATUS_SUCCESS;
}