/*
 * Copyright (C) 2007 Intel Corporation
 * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
 * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "request.h"
#include "surface.h"

#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>

#include <sys/ioctl.h>
#include <sys/mman.h>

#include <va/va_drmcommon.h>
#include <drm_fourcc.h>
#include <linux/videodev2.h>

#include "media.h"
#include "utils.h"
#include "v4l2.h"
#include "video.h"

/*
 * Per-process cache of the OUTPUT format we've set. The previous
 * SET_FORMAT_OF_OUTPUT_ONCE pattern was a latent bug (Sonnet Phase 5
 * review finding 7.3): mpv probes with small surfaces (e.g. 128x128)
 * before requesting the real resolution (e.g. 1920x1088). The
 * once-only set kept the OUTPUT — and consequently the kernel-derived
 * CAPTURE — format pinned to the probe size. Subsequent
 * v4l2_get_format on CAPTURE then returned the small format, the
 * VADRMPRIMESurfaceDescriptor was filled with width=1920 height=1088
 * but pitch=128 offset=16384, and Mesa rejected the import with
 * "WSI pitch too small." That manifested as the solid-blue render in
 * mpv vaapi mode and the SW fallback in Firefox after frame 0.
 *
 * Fix: track (width, height) and re-set the OUTPUT format whenever
 * the resolution changes. Re-setting requires REQBUFS(0) on both
 * queues first because S_FMT after CREATE_BUFS is rejected by V4L2;
 * we tear down and let the next allocation cycle recreate buffers
 * at the new resolution.
 */
static unsigned int LAST_OUTPUT_WIDTH = 0;
static unsigned int LAST_OUTPUT_HEIGHT = 0;

void surface_reset_format_cache(void)
{
	LAST_OUTPUT_WIDTH = 0;
	LAST_OUTPUT_HEIGHT = 0;
}

/*
 * Iter2 Fix 3 helpers — bind / unbind a cap_pool_slot to an
 * object_surface. Called from BeginPicture (acquire+bind) and
 * DestroySurfaces (unbind). Populates surface_object->destination_*
 * fields from the slot so existing code paths (the QBUF in
 * picture.c::EndPicture, the EXPBUF in ExportSurfaceHandle, the
 * mmap-read in copy_surface_to_image) continue to work unchanged.
 *
 * surface_bind_slot is called only from BeginPicture; the surface's
 * format-uniform fields (destination_planes_count, destination_sizes,
 * destination_offsets, destination_bytesperlines) are already set
 * by CreateSurfaces2 and stay constant.
 */
void surface_bind_slot(struct object_surface *surface_object,
		       struct cap_pool_slot *slot)
{
	unsigned int j;

	surface_object->current_slot = slot;
	surface_object->destination_index = slot->v4l2_index;
	surface_object->destination_buffers_count = slot->buffers_count;

	for (j = 0; j < slot->buffers_count; j++) {
		surface_object->destination_map[j] = slot->map[j];
		surface_object->destination_map_lengths[j] = slot->map_lengths[j];
		surface_object->destination_map_offsets[j] = slot->map_offsets[j];
	}

	/*
	 * destination_data[j] is the per-plane CPU pointer used by
	 * copy_surface_to_image. For single-buffer MPLANE NV12 (our
	 * common case), all planes live in slot->map[0] at varying
	 * offsets recorded in destination_offsets[].
	 */
	if (slot->buffers_count == 1) {
		for (j = 0; j < surface_object->destination_planes_count; j++)
			surface_object->destination_data[j] =
				(unsigned char *)slot->map[0] +
				surface_object->destination_offsets[j];
	} else {
		for (j = 0; j < surface_object->destination_planes_count; j++)
			surface_object->destination_data[j] = slot->map[j];
	}
}

void surface_unbind_slot(struct request_data *driver_data,
			 struct object_surface *surface_object)
{
	if (surface_object->current_slot == NULL)
		return;
	cap_pool_release(&driver_data->capture_pool, surface_object->current_slot);
	surface_object->current_slot = NULL;
}

VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format,
				unsigned int width, unsigned int height,
				VASurfaceID *surfaces_ids,
				unsigned int surfaces_count,
				VASurfaceAttrib *attributes,
				unsigned int attributes_count)
{
	struct request_data *driver_data = context->pDriverData;
	struct object_surface *surface_object;
	struct video_format *video_format = NULL;
	unsigned int destination_sizes[VIDEO_MAX_PLANES];
	unsigned int destination_bytesperlines[VIDEO_MAX_PLANES];
	unsigned int destination_planes_count;
	unsigned int format_width, format_height;
	unsigned int capture_type;
	unsigned int i, j;
	VASurfaceID id;
	bool found;
	int rc;

	/*
	 * Set the OUTPUT format on (re)allocation when the resolution
	 * differs from the last set value. Without this, mpv's small
	 * probe surfaces (128x128) pin the CAPTURE format and the
	 * subsequent real-resolution surface ends up with wrong pitch
	 * in the export descriptor — causing Mesa to reject the
	 * DMA-BUF import. Detail in the LAST_OUTPUT_WIDTH/HEIGHT
	 * comment block at the top of this file.
	 *
	 * TODO: this is still not a clean architecture — v4l2_set_format
	 * after CREATE_BUFS requires REQBUFS(0) first (kernel returns
	 * EBUSY otherwise). For mpv's pattern (probe with small, then
	 * allocate big) the small probe surfaces have not been streamed
	 * yet, so REQBUFS(0) on them works. For consumers that legitimately
	 * stream multiple resolutions in sequence, we'd need to STREAMOFF
	 * + REQBUFS(0) + new S_FMT + new CREATE_BUFS — that's a context-
	 * level redesign for the next iteration.
	 */
	unsigned int pixelformat = V4L2_PIX_FMT_H264_SLICE;
	unsigned int output_type = v4l2_type_video_output(true);

	if (LAST_OUTPUT_WIDTH != width || LAST_OUTPUT_HEIGHT != height) {
		/*
		 * If we've previously allocated buffers at a different
		 * resolution, tear them down on BOTH queues before re-setting
		 * the OUTPUT format. S_FMT is rejected by V4L2 while buffers
		 * exist; hantro derives CAPTURE format from OUTPUT format, so
		 * leftover CAPTURE buffers from the prior resolution would
		 * also block the implicit format change. Sonnet Phase 5
		 * review (iter2 9.1) flagged this as a missing REQBUFS(0)
		 * gap on the CAPTURE side of the resolution-change path.
		 *
		 * Iter2 Fix 3 corollary: cap_pool owns the CAPTURE buffers'
		 * mmaps and slot states. Destroy it (which issues REQBUFS(0)
		 * on capture) before the format change so the next
		 * CreateSurfaces2 step can rebuild the pool at the new
		 * resolution. Without this, pool->initialized stays true,
		 * cap_pool_init below is skipped, and the slots' v4l2_index
		 * fields point to dead buffers from the prior resolution.
		 */
		if (LAST_OUTPUT_WIDTH != 0) {
			if (driver_data->capture_pool.initialized)
				cap_pool_destroy(&driver_data->capture_pool,
						 driver_data->video_fd,
						 v4l2_type_video_capture(true));
			else
				(void)v4l2_request_buffers(driver_data->video_fd,
							   v4l2_type_video_capture(true), 0);
			(void)v4l2_request_buffers(driver_data->video_fd,
						   output_type, 0);
		}

		rc = v4l2_set_format(driver_data->video_fd, output_type, pixelformat,
				     width, height);
		if (rc < 0)
			return VA_STATUS_ERROR_OPERATION_FAILED;

		LAST_OUTPUT_WIDTH = width;
		LAST_OUTPUT_HEIGHT = height;
	}

	if (format != VA_RT_FORMAT_YUV420)
		return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;

        if (!driver_data->video_format) {
		found = v4l2_find_format(driver_data->video_fd,
					 V4L2_BUF_TYPE_VIDEO_CAPTURE,
					 V4L2_PIX_FMT_SUNXI_TILED_NV12);
		if (found)
			video_format = video_format_find(V4L2_PIX_FMT_SUNXI_TILED_NV12);

		found = v4l2_find_format(driver_data->video_fd,
					 V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE,
					 V4L2_PIX_FMT_NV12);
		if (found)
			video_format = video_format_find(V4L2_PIX_FMT_NV12);

		if (video_format == NULL)
			return VA_STATUS_ERROR_OPERATION_FAILED;

		driver_data->video_format = video_format;

		capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);

		/*
		 * Do not VIDIOC_S_FMT on the CAPTURE queue. The hantro
		 * stateless decoder derives the CAPTURE format from the
		 * SPS attached to the OUTPUT request; explicitly setting
		 * it here can put the driver into an inconsistent state.
		 * GStreamer's v4l2slh264dec only G_FMTs CAPTURE (see
		 * gst-plugins-bad/sys/v4l2codecs/gstv4l2decoder.c::
		 * gst_v4l2_decoder_negotiate_src_format), and that
		 * variant produces correct decoded NV12 on the same
		 * hardware where this driver currently emits zeros.
		 *
		 * v4l2_get_format() below queries the driver's current
		 * state and gives us the bytesperline/sizes we need.
		 */
        } else {
		video_format = driver_data->video_format;
		capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
	}

	rc = v4l2_get_format(driver_data->video_fd, capture_type, &format_width,
			     &format_height, destination_bytesperlines,
			     destination_sizes, NULL);
	if (rc < 0)
		return VA_STATUS_ERROR_OPERATION_FAILED;

	destination_planes_count = video_format->planes_count;

	/*
	 * DEBUG INSTRUMENTATION (surface-export diagnosis 2026-05-04):
	 * dump what v4l2_get_format returned. Sonnet's Phase 5 review
	 * hypothesis #4 was that format_height might be 1080 (stream-
	 * signaled) vs 1088 (MB-aligned), causing UV offset to land
	 * 15360 bytes early. Earlier ftrace shows hantro returns
	 * height=1088 — but verify in-driver to be sure.
	 */
	request_log("CreateSurfaces2: surf_width=%u surf_height=%u "
		    "fmt_width=%u fmt_height=%u bytesperline[0]=%u "
		    "sizes[0]=%u sizes[1]=%u planes_count=%u "
		    "v4l2_buffers_count=%u\n",
		    width, height, format_width, format_height,
		    destination_bytesperlines[0],
		    destination_sizes[0], destination_sizes[1],
		    destination_planes_count, video_format->v4l2_buffers_count);

	/*
	 * Iter2 Fix 3: initialize the CAPTURE buffer pool on first call.
	 * Pool size = max(surfaces_count, MIN_CAP_POOL); the +headroom
	 * gives LRU recycling enough margin to never reuse a buffer
	 * within the consumer's compositor-hold window for typical
	 * playback patterns.
	 *
	 * If the pool already exists from a prior CreateSurfaces2 (e.g.
	 * mpv probe surfaces vs. real-resolution surfaces), it stays —
	 * but if the resolution changed (Fix 1's REQBUFS(0) on CAPTURE
	 * fired before this point), the pool was destroyed and we
	 * rebuild here.
	 */
	if (!driver_data->capture_pool.initialized) {
		unsigned int pool_count = surfaces_count > MIN_CAP_POOL ?
					  surfaces_count : MIN_CAP_POOL;
		rc = cap_pool_init(&driver_data->capture_pool,
				   driver_data->video_fd, capture_type,
				   pool_count, video_format->v4l2_buffers_count);
		if (rc < 0)
			return VA_STATUS_ERROR_ALLOCATION_FAILED;
	}

	/*
	 * Compute format-uniform destination_* values (sizes, offsets,
	 * bytesperlines, planes_count). These are the same for all
	 * surfaces of this format, set once per surface here, never
	 * changed by BeginPicture's slot acquisition.
	 */
	if (video_format->v4l2_buffers_count == 1) {
		destination_sizes[0] = destination_bytesperlines[0] *
				       format_height;
		for (j = 1; j < destination_planes_count; j++)
			destination_sizes[j] = destination_sizes[0] / 2;
	}

	for (i = 0; i < surfaces_count; i++) {
		id = object_heap_allocate(&driver_data->surface_heap);
		surface_object = SURFACE(driver_data, id);
		if (surface_object == NULL)
			return VA_STATUS_ERROR_ALLOCATION_FAILED;

		surface_object->current_slot = NULL;	/* iter2 Fix 3 */
		surface_object->destination_index = 0;	/* set on bind */
		surface_object->destination_planes_count = destination_planes_count;
		surface_object->destination_buffers_count =
			video_format->v4l2_buffers_count;

		if (video_format->v4l2_buffers_count == 1) {
			for (j = 0; j < destination_planes_count; j++) {
				surface_object->destination_offsets[j] =
					j > 0 ? destination_sizes[j - 1] : 0;
				surface_object->destination_sizes[j] =
					destination_sizes[j];
				surface_object->destination_bytesperlines[j] =
					destination_bytesperlines[0];
			}
		} else if (video_format->v4l2_buffers_count == destination_planes_count) {
			for (j = 0; j < destination_planes_count; j++) {
				surface_object->destination_offsets[j] = 0;
				surface_object->destination_sizes[j] =
					destination_sizes[j];
				surface_object->destination_bytesperlines[j] =
					destination_bytesperlines[j];
			}
		} else {
			return VA_STATUS_ERROR_ALLOCATION_FAILED;
		}

		surface_object->status = VASurfaceReady;
		surface_object->width = width;
		surface_object->height = height;

		surface_object->source_index = 0;
		surface_object->source_data = NULL;
		surface_object->source_size = 0;

		memset(&surface_object->params, 0,
		       sizeof(surface_object->params));
		surface_object->slices_count = 0;
		surface_object->slices_size = 0;

		surface_object->request_fd = -1;

		surfaces_ids[i] = id;
	}

	return VA_STATUS_SUCCESS;
}

VAStatus RequestCreateSurfaces(VADriverContextP context, int width, int height,
			       int format, int surfaces_count,
			       VASurfaceID *surfaces_ids)
{
	return RequestCreateSurfaces2(context, format, width, height,
				      surfaces_ids, surfaces_count, NULL, 0);
}

VAStatus RequestDestroySurfaces(VADriverContextP context,
				VASurfaceID *surfaces_ids, int surfaces_count)
{
	struct request_data *driver_data = context->pDriverData;
	struct object_surface *surface_object;
	unsigned int i;

	for (i = 0; i < surfaces_count; i++) {
		surface_object = SURFACE(driver_data, surfaces_ids[i]);
		if (surface_object == NULL)
			return VA_STATUS_ERROR_INVALID_SURFACE;

		/*
		 * source_* are now transient borrows from request_pool, not
		 * surface-owned mappings; the pool owns the underlying mmap.
		 * Nothing to free here.
		 *
		 * Iter2 Fix 3: destination_* mappings are owned by cap_pool;
		 * surface_unbind_slot returns the slot to FREE (closing OUR
		 * EXPBUF fd if any). Pool-owned mmaps are freed at
		 * cap_pool_destroy time (RequestDestroyContext).
		 */
		surface_unbind_slot(driver_data, surface_object);

		if (surface_object->request_fd > 0)
			close(surface_object->request_fd);

		object_heap_free(&driver_data->surface_heap,
				 (struct object_base *)surface_object);
	}

	return VA_STATUS_SUCCESS;
}

VAStatus RequestSyncSurface(VADriverContextP context, VASurfaceID surface_id)
{

	struct request_data *driver_data = context->pDriverData;
	struct object_surface *surface_object;
	VAStatus status;
	struct video_format *video_format;
	unsigned int output_type, capture_type;
	int request_fd = -1;
	int rc;

	video_format = driver_data->video_format;
	if (video_format == NULL) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	output_type = v4l2_type_video_output(video_format->v4l2_mplane);
	capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);

	surface_object = SURFACE(driver_data, surface_id);
	if (surface_object == NULL) {
		status = VA_STATUS_ERROR_INVALID_SURFACE;
		goto error;
	}

	if (surface_object->status != VASurfaceRendering) {
		status = VA_STATUS_SUCCESS;
		goto complete;
	}

	request_fd = surface_object->request_fd;
	if (request_fd < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	rc = media_request_queue(request_fd);
	if (rc < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	rc = media_request_wait_completion(request_fd);
	if (rc < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	/*
	 * iter4: instead of REINITing for reuse, close the request_fd here
	 * and force the next BeginPicture to allocate a fresh one. The cached
	 * request_fd path was hitting EINVAL on S_EXT_CTRLS for some surface
	 * recycle patterns (4 individual TRY_EXT_CTRLS on the same fd all
	 * fail with EINVAL — the fd state is bad even though queue+wait+reinit
	 * appeared successful). Allocating fresh per frame is unambiguous and
	 * sidesteps any state-lifecycle issue. Tradeoff: ~one extra ioctl per
	 * frame (MEDIA_IOC_REQUEST_ALLOC + close), negligible cost.
	 */
	close(request_fd);
	surface_object->request_fd = -1;
	(void)0; /* placeholder for the now-removed reinit error path */

	rc = v4l2_dequeue_buffer(driver_data->video_fd, -1, output_type,
				 surface_object->source_index, 1);
	if (rc < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	/*
	 * OUTPUT buffer is back from the kernel: return its pool slot
	 * for reuse and clear the surface's transient borrow handle.
	 */
	request_pool_release(&driver_data->output_pool,
			     surface_object->source_index);
	surface_object->source_data = NULL;
	surface_object->source_size = 0;

	rc = v4l2_dequeue_buffer(driver_data->video_fd, -1, capture_type,
				 surface_object->destination_index,
				 surface_object->destination_buffers_count);
	if (rc < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	/*
	 * Iter2 Fix 3: CAPTURE buffer is back from the kernel with valid
	 * pixel content. Transition the slot IN_DECODE → DECODED. The slot
	 * stays bound to this surface until either ExportSurfaceHandle
	 * (→ EXPORTED), the next BeginPicture for this surface (slot is
	 * released first), or DestroySurfaces (release).
	 */
	if (surface_object->current_slot != NULL)
		cap_pool_mark_decoded(&driver_data->capture_pool,
				      surface_object->current_slot);


	surface_object->status = VASurfaceDisplaying;

	status = VA_STATUS_SUCCESS;
	goto complete;

error:
	if (request_fd >= 0) {
		close(request_fd);
		surface_object->request_fd = -1;
	}

complete:
	return status;
}

VAStatus RequestQuerySurfaceAttributes(VADriverContextP context,
				       VAConfigID config,
				       VASurfaceAttrib *attributes,
				       unsigned int *attributes_count)
{

	struct request_data *driver_data = context->pDriverData;
	VASurfaceAttrib *attributes_list;
	unsigned int attributes_list_size = V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES *
					    sizeof(*attributes);
	int memory_types;
	unsigned int i = 0;

	attributes_list = malloc(attributes_list_size);
	memset(attributes_list, 0, attributes_list_size);

	attributes_list[i].type = VASurfaceAttribPixelFormat;
	attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
	attributes_list[i].value.type = VAGenericValueTypeInteger;
	attributes_list[i].value.value.i = VA_FOURCC_NV12;
	i++;

	attributes_list[i].type = VASurfaceAttribMinWidth;
	attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
	attributes_list[i].value.type = VAGenericValueTypeInteger;
	attributes_list[i].value.value.i = 32;
	i++;

	attributes_list[i].type = VASurfaceAttribMaxWidth;
	attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
	attributes_list[i].value.type = VAGenericValueTypeInteger;
	attributes_list[i].value.value.i = 2048;
	i++;

	attributes_list[i].type = VASurfaceAttribMinHeight;
	attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
	attributes_list[i].value.type = VAGenericValueTypeInteger;
	attributes_list[i].value.value.i = 32;
	i++;

	attributes_list[i].type = VASurfaceAttribMaxHeight;
	attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
	attributes_list[i].value.type = VAGenericValueTypeInteger;
	attributes_list[i].value.value.i = 2048;
	i++;

	attributes_list[i].type = VASurfaceAttribMemoryType;
	attributes_list[i].flags = VA_SURFACE_ATTRIB_GETTABLE |
				   VA_SURFACE_ATTRIB_SETTABLE;
	attributes_list[i].value.type = VAGenericValueTypeInteger;

	memory_types = VA_SURFACE_ATTRIB_MEM_TYPE_VA |
		VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2;

	/*
	 * First version of DRM prime export does not handle modifiers,
	 * that are required for supporting the tiled output format.
	 */

	if (video_format_is_linear(driver_data->video_format))
		memory_types |= VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;

	attributes_list[i].value.value.i = memory_types;
	i++;

	attributes_list_size = i * sizeof(*attributes);

	if (attributes != NULL)
		memcpy(attributes, attributes_list, attributes_list_size);

	free(attributes_list);

	*attributes_count = i;

	return VA_STATUS_SUCCESS;
}

VAStatus RequestQuerySurfaceStatus(VADriverContextP context,
				   VASurfaceID surface_id,
				   VASurfaceStatus *status)
{
	struct request_data *driver_data = context->pDriverData;
	struct object_surface *surface_object;


	surface_object = SURFACE(driver_data, surface_id);
	if (surface_object == NULL)
		return VA_STATUS_ERROR_INVALID_SURFACE;

	*status = surface_object->status;

	request_log("  -> status=%d (Ready=%d Rendering=%d Displaying=%d)\n",
		    *status, VASurfaceReady, VASurfaceRendering,
		    VASurfaceDisplaying);

	return VA_STATUS_SUCCESS;
}

VAStatus RequestPutSurface(VADriverContextP context, VASurfaceID surface_id,
			   void *draw, short src_x, short src_y,
			   unsigned short src_width, unsigned short src_height,
			   short dst_x, short dst_y, unsigned short dst_width,
			   unsigned short dst_height, VARectangle *cliprects,
			   unsigned int cliprects_count, unsigned int flags)
{
	return VA_STATUS_ERROR_UNIMPLEMENTED;
}

VAStatus RequestLockSurface(VADriverContextP context, VASurfaceID surface_id,
			    unsigned int *fourcc, unsigned int *luma_stride,
			    unsigned int *chroma_u_stride,
			    unsigned int *chroma_v_stride,
			    unsigned int *luma_offset,
			    unsigned int *chroma_u_offset,
			    unsigned int *chroma_v_offset,
			    unsigned int *buffer_name, void **buffer)
{
	return VA_STATUS_ERROR_UNIMPLEMENTED;
}

VAStatus RequestUnlockSurface(VADriverContextP context, VASurfaceID surface_id)
{
	return VA_STATUS_ERROR_UNIMPLEMENTED;
}

VAStatus RequestExportSurfaceHandle(VADriverContextP context,
				    VASurfaceID surface_id, uint32_t mem_type,
				    uint32_t flags, void *descriptor)
{
	struct request_data *driver_data = context->pDriverData;
	VADRMPRIMESurfaceDescriptor *surface_descriptor = descriptor;
	struct object_surface *surface_object;
	struct video_format *video_format;
	int *export_fds = NULL;
	unsigned int export_fds_count;
	unsigned int planes_count;
	unsigned int capture_type;
	unsigned int size;
	unsigned int i;
	VAStatus status;
	int rc;

	video_format = driver_data->video_format;
	if (video_format == NULL)
		return VA_STATUS_ERROR_OPERATION_FAILED;

	if (mem_type != VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2)
		return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE;

	surface_object = SURFACE(driver_data, surface_id);
	if (surface_object == NULL)
		return VA_STATUS_ERROR_INVALID_SURFACE;

	export_fds_count = surface_object->destination_buffers_count;
	export_fds = malloc(export_fds_count * sizeof(*export_fds));

	capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);

	rc = v4l2_export_buffer(driver_data->video_fd, capture_type,
				surface_object->destination_index, O_RDONLY,
				export_fds, export_fds_count);
	if (rc < 0) {
		status = VA_STATUS_ERROR_OPERATION_FAILED;
		goto error;
	}

	/*
	 * Iter2 Fix 3: pool now owns OUR copy of the EXPBUF'd fd. The
	 * consumer receives a dup'd / equivalent fd via the descriptor.
	 * Slot transitions DECODED → EXPORTED; it will be force-recyclable
	 * by LRU when the pool is exhausted, but FREE slots are always
	 * preferred.
	 */
	if (surface_object->current_slot != NULL && export_fds_count > 0)
		cap_pool_mark_exported(&driver_data->capture_pool,
				       surface_object->current_slot,
				       export_fds[0]);

	planes_count = surface_object->destination_planes_count;

	surface_descriptor->fourcc = VA_FOURCC_NV12;
	surface_descriptor->width = surface_object->width;
	surface_descriptor->height = surface_object->height;
	surface_descriptor->num_objects = export_fds_count;

	size = 0;

	if (export_fds_count == 1)
		for (i = 0; i < planes_count; i++)
			size += surface_object->destination_sizes[i];

	/*
	 * Iteration 2 Fix 2: choose drm_format_modifier conditionally on
	 * pitch alignment. Mesa's WSI / Panfrost compositor path rejects
	 * DRM_FORMAT_MOD_NONE (= LINEAR explicit) buffers whose pitch isn't
	 * GPU-aligned (typically 64+ bytes for Mali). For 1920-wide content
	 * the pitch is 1920 (64-aligned, fine); for 864-wide content the
	 * pitch is 864 (only 16-aligned), Mesa rejects with "WSI pitch not
	 * properly aligned" and Firefox falls back to SW.
	 *
	 * Setting DRM_FORMAT_MOD_INVALID tells the importer "modifier
	 * unknown, treat as implicit / texture-only" — Firefox's
	 * DMABufSurface.cpp:1920 explicitly omits modifier attribs from
	 * eglCreateImage when the value is MOD_INVALID, bypassing Mesa's
	 * scanout-alignment check. The buffer is then texture-imported
	 * (small perf cost) instead of WSI scanout-imported, which is
	 * the correct behavior for a buffer that doesn't meet scanout
	 * alignment requirements.
	 *
	 * We branch on pitch alignment to preserve LINEAR semantics for
	 * already-aligned content (avoids unnecessary perf cost on the
	 * common 1920-wide case).
	 *
	 * Sonnet Phase 5 review (iter2 question 4) endorsed this
	 * conditional approach over a universal MOD_INVALID change.
	 */
	for (i = 0; i < export_fds_count; i++) {
		uint64_t modifier = video_format->drm_modifier;
		unsigned int bytesperline =
			surface_object->destination_bytesperlines[0];
		if (bytesperline & 63) /* not 64-byte aligned */
			modifier = DRM_FORMAT_MOD_INVALID;
		surface_descriptor->objects[i].drm_format_modifier = modifier;
		surface_descriptor->objects[i].fd = export_fds[i];
		surface_descriptor->objects[i].size = export_fds_count == 1 ?
						      size :
						      surface_object->destination_sizes[i];
	}

	/*
	 * Layer construction depends on the consumer's request flags
	 * (VA_EXPORT_SURFACE_*_LAYERS):
	 *
	 *   COMPOSED_LAYERS (default, mpv): one layer carrying both
	 *   Y and UV planes (drm_format=NV12, num_planes=2). Mesa
	 *   imports as a single NV12 EGLImage.
	 *
	 *   SEPARATE_LAYERS (Firefox 150 RDD): two layers, Y as a
	 *   single-plane R8 layer, UV as a single-plane GR88 layer.
	 *   Firefox's GetVAAPISurfaceDescriptor passes
	 *   VA_EXPORT_SURFACE_SEPARATE_LAYERS so its DMABufSurfaceYUV
	 *   import code can address Y and UV planes independently.
	 *   Without this branch, Firefox parsed our COMPOSED layout
	 *   as if it were SEPARATE, found bogus layer-1 data, and
	 *   silently fell back to FFmpeg(FFVPX) software decode.
	 *
	 * The earlier path 0001 mplane port assumed a single COMPOSED
	 * shape — fine for mpv but breaks any consumer requesting
	 * SEPARATE. Honor the flag.
	 */
	if ((flags & VA_EXPORT_SURFACE_SEPARATE_LAYERS) && planes_count == 2) {
		surface_descriptor->num_layers = 2;

		/* Layer 0: Y plane as DRM_FORMAT_R8 (1 byte/pixel luma). */
		surface_descriptor->layers[0].drm_format = DRM_FORMAT_R8;
		surface_descriptor->layers[0].num_planes = 1;
		surface_descriptor->layers[0].object_index[0] =
			export_fds_count == 1 ? 0 : 0;
		surface_descriptor->layers[0].offset[0] =
			surface_object->destination_offsets[0];
		surface_descriptor->layers[0].pitch[0] =
			surface_object->destination_bytesperlines[0];

		/* Layer 1: UV plane as DRM_FORMAT_GR88 (interleaved
		 * U+V, 2 bytes/pixel chroma at half resolution). */
		surface_descriptor->layers[1].drm_format = DRM_FORMAT_GR88;
		surface_descriptor->layers[1].num_planes = 1;
		surface_descriptor->layers[1].object_index[0] =
			export_fds_count == 1 ? 0 : 1;
		surface_descriptor->layers[1].offset[0] =
			surface_object->destination_offsets[1];
		surface_descriptor->layers[1].pitch[0] =
			surface_object->destination_bytesperlines[1];
	} else {
		/* COMPOSED_LAYERS / default: one layer with all planes. */
		surface_descriptor->num_layers = 1;
		surface_descriptor->layers[0].drm_format = video_format->drm_format;
		surface_descriptor->layers[0].num_planes = planes_count;

		for (i = 0; i < planes_count; i++) {
			surface_descriptor->layers[0].object_index[i] =
				export_fds_count == 1 ? 0 : i;
			surface_descriptor->layers[0].offset[i] =
				surface_object->destination_offsets[i];
			surface_descriptor->layers[0].pitch[i] =
				surface_object->destination_bytesperlines[i];
		}
	}

	/*
	 * DEBUG INSTRUMENTATION (surface-export diagnosis 2026-05-04):
	 * dump the full descriptor so we can compare against what mpv
	 * reports importing via --msg-level=vd=v --msg-level=vo=v.
	 * Phase 5 review identified DMA-BUF surface export as the
	 * likely root cause of the solid-blue render in mpv vaapi mode.
	 */
	request_log("ExportSurfaceHandle: surf=%u fd[0]=%d fourcc=0x%x "
		    "w=%u h=%u num_objects=%u num_layers=%u "
		    "obj[0].size=%u drm_fmt=0x%x drm_mod=0x%llx num_planes=%u "
		    "p[0].off=%u pitch=%u  p[1].off=%u pitch=%u\n",
		    surface_id,
		    export_fds_count > 0 ? export_fds[0] : -1,
		    surface_descriptor->fourcc,
		    surface_descriptor->width,
		    surface_descriptor->height,
		    surface_descriptor->num_objects,
		    surface_descriptor->num_layers,
		    surface_descriptor->objects[0].size,
		    surface_descriptor->layers[0].drm_format,
		    (unsigned long long)surface_descriptor->objects[0].drm_format_modifier,
		    surface_descriptor->layers[0].num_planes,
		    surface_descriptor->layers[0].offset[0],
		    surface_descriptor->layers[0].pitch[0],
		    planes_count > 1 ? surface_descriptor->layers[0].offset[1] : 0,
		    planes_count > 1 ? surface_descriptor->layers[0].pitch[1] : 0);

	status = VA_STATUS_SUCCESS;
	goto complete;

error:
	for (i = 0; i < export_fds_count; i++)
		if (export_fds[i] >= 0)
			close(export_fds[i]);

complete:
	if (export_fds != NULL)
		free(export_fds);

	return status;
}