/* * Copyright (C) 2007 Intel Corporation * Copyright (C) 2016 Florent Revest * Copyright (C) 2018 Paul Kocialkowski * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "image.h" #include "buffer.h" #include "request.h" #include "surface.h" #include "video.h" #include #include #include #include #include #include #include "tiled_yuv.h" #include "utils.h" #include "v4l2.h" VAStatus RequestCreateImage(VADriverContextP context, VAImageFormat *format, int width, int height, VAImage *image) { struct request_data *driver_data = context->pDriverData; unsigned int destination_sizes[VIDEO_MAX_PLANES]; unsigned int destination_bytesperlines[VIDEO_MAX_PLANES]; unsigned int destination_planes_count; unsigned int planes_count; unsigned int format_width, format_height; unsigned int size; unsigned int capture_type; struct video_format *video_format; struct object_image *image_object; VABufferID buffer_id; VAImageID id; VAStatus status; unsigned int i; int rc; video_format = driver_data->video_format; if (video_format == NULL) return VA_STATUS_ERROR_OPERATION_FAILED; capture_type = v4l2_type_video_capture(video_format->v4l2_mplane); /* * FIXME: This should be replaced by per-pixelformat hadling to * determine the logical plane offsets and sizes; */ rc = v4l2_get_format(driver_data->video_fd, capture_type, &format_width, &format_height, destination_bytesperlines, destination_sizes, &planes_count); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; destination_planes_count = video_format->planes_count; size = 0; /* The size returned by V4L2 covers buffers, not logical planes. */ for (i = 0; i < planes_count; i++) size += destination_sizes[i]; /* Here we calculate the sizes assuming NV12. */ destination_sizes[0] = destination_bytesperlines[0] * format_height; for (i = 1; i < destination_planes_count; i++) { destination_bytesperlines[i] = destination_bytesperlines[0]; destination_sizes[i] = destination_sizes[0] / 2; } id = object_heap_allocate(&driver_data->image_heap); image_object = IMAGE(driver_data, id); if (image_object == NULL) return VA_STATUS_ERROR_ALLOCATION_FAILED; status = RequestCreateBuffer(context, 0, VAImageBufferType, size, 1, NULL, &buffer_id); if (status != VA_STATUS_SUCCESS) { object_heap_free(&driver_data->image_heap, (struct object_base *)image_object); return status; } memset(image, 0, sizeof(*image)); image->format = *format; image->width = width; image->height = height; image->buf = buffer_id; image->image_id = id; image->num_planes = destination_planes_count; image->data_size = size; for (i = 0; i < image->num_planes; i++) { image->pitches[i] = destination_bytesperlines[i]; image->offsets[i] = i > 0 ? destination_sizes[i - 1] : 0; } image_object->image = *image; return VA_STATUS_SUCCESS; } VAStatus RequestDestroyImage(VADriverContextP context, VAImageID image_id) { struct request_data *driver_data = context->pDriverData; struct object_image *image_object; VAStatus status; image_object = IMAGE(driver_data, image_id); if (image_object == NULL) return VA_STATUS_ERROR_INVALID_IMAGE; status = RequestDestroyBuffer(context, image_object->image.buf); if (status != VA_STATUS_SUCCESS) return status; object_heap_free(&driver_data->image_heap, (struct object_base *)image_object); return VA_STATUS_SUCCESS; } static VAStatus copy_surface_to_image (struct request_data *driver_data, struct object_surface *surface_object, VAImage *image) { struct object_buffer *buffer_object; unsigned int i; int sync_fds[VIDEO_MAX_PLANES]; unsigned int n_sync_fds = 0; buffer_object = BUFFER(driver_data, image->buf); if (buffer_object == NULL) return VA_STATUS_ERROR_INVALID_BUFFER; for (i = 0; i < VIDEO_MAX_PLANES; i++) sync_fds[i] = -1; /* * iter13 α-17: explicit cache sync around the CAPTURE buffer read. * * The CAPTURE buffer is V4L2_MEMORY_MMAP and was mapped at * cap_pool_init time with cached attributes. Kernel decode writes to * the buffer via DMA, which doesn't propagate to the CPU's cache * observer for that virtual mapping. Reading from * surface_object->destination_data[] without an explicit cache * invalidation returns stale data — observed empirically as Bug 4 * (H.264 partial-fill) and Bug 5 (HEVC all-zero) when libva went * through the SAME readback path that kdirect ffmpeg-v4l2request + * DRM_PRIME-mmap successfully reads (kdirect's drm-prime mmap * implicitly handles sync). * * DMA_BUF_IOCTL_SYNC(START | READ) makes the CPU mapping coherent * with the producing engine's writes; END releases the sync. * Per V4L2 + dma-buf spec, this is the userspace contract for * cached-mmap'd buffers (Tomasz Figa, linaro-mm-sig 2024-07-11). * * Requires a dma-buf fd: get one via VIDIOC_EXPBUF, sync, close. * Per-call cost is one ioctl pair + one fd open/close per plane. * Could be optimised by caching the EXPBUF fd on the cap_pool slot, * but doing it just-in-time keeps the lifecycle uncomplicated. The * EXPBUF fd's dup count doesn't affect the V4L2 buffer's underlying * pages; closing the fd is a no-op on memory. * * If EXPBUF fails (e.g., consumer-held EXPBUF prevents a second one * — only true for hantro G1 oddity), we skip the sync silently. The * existing pre-iter13 behavior is preserved on the error path. */ if (surface_object->current_slot != NULL && driver_data->video_format != NULL) { unsigned int capture_type = v4l2_type_video_capture(driver_data->video_format->v4l2_mplane); if (v4l2_export_buffer(driver_data->video_fd, capture_type, surface_object->destination_index, O_RDONLY, sync_fds, surface_object->destination_buffers_count) >= 0) { n_sync_fds = surface_object->destination_buffers_count; for (i = 0; i < n_sync_fds; i++) { struct dma_buf_sync s = { .flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ, }; /* failure is non-fatal: we continue with the read */ (void)ioctl(sync_fds[i], DMA_BUF_IOCTL_SYNC, &s); } } } /* * AV1 film_grain: when this surface is the display surface of a * decode (current_display_picture != current_frame with apply_grain=1), * its slot is NULL because BeginPicture only fired on the decode * surface. Follow the back-link set in av1_set_controls and borrow * the decode surface's destination_data + sizes for the copy. */ if (surface_object->current_slot == NULL && surface_object->linked_decode_surface_id != VA_INVALID_SURFACE) { struct object_surface *decode_surface = SURFACE(driver_data, surface_object->linked_decode_surface_id); if (decode_surface != NULL && decode_surface->current_slot != NULL) { /* Mirror the fields we read below. The surface heap * pointer is stable for the surface's lifetime; we * only need destination_data + destination_sizes + * destination_planes_count from it. */ surface_object->destination_planes_count = decode_surface->destination_planes_count; for (i = 0; i < decode_surface->destination_planes_count; i++) { surface_object->destination_data[i] = decode_surface->destination_data[i]; surface_object->destination_sizes[i] = decode_surface->destination_sizes[i]; } } } for (i = 0; i < surface_object->destination_planes_count; i++) { /* AV1 Phase 3 diag: surface NULL-deref hunt. */ if (buffer_object->data == NULL || surface_object->destination_data[i] == NULL) { request_log("copy_surface_to_image NULL i=%u " "buf_data=%p dest_data=%p dest_size=%u " "planes=%u slot=%p linked=0x%x\n", i, (void *)buffer_object->data, (void *)surface_object->destination_data[i], surface_object->destination_sizes[i], surface_object->destination_planes_count, (void *)surface_object->current_slot, surface_object->linked_decode_surface_id); return VA_STATUS_ERROR_OPERATION_FAILED; } #ifdef __arm__ if (!video_format_is_linear(driver_data->video_format)) tiled_to_planar(surface_object->destination_data[i], buffer_object->data + image->offsets[i], image->pitches[i], image->width, i == 0 ? image->height : image->height / 2); else { #endif memcpy(buffer_object->data + image->offsets[i], surface_object->destination_data[i], surface_object->destination_sizes[i]); #ifdef __arm__ } #endif } /* iter13 α-17: release cache sync. END pairs with each START. */ for (i = 0; i < n_sync_fds; i++) { struct dma_buf_sync s = { .flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ, }; (void)ioctl(sync_fds[i], DMA_BUF_IOCTL_SYNC, &s); close(sync_fds[i]); } return VA_STATUS_SUCCESS; } VAStatus RequestDeriveImage(VADriverContextP context, VASurfaceID surface_id, VAImage *image) { struct request_data *driver_data = context->pDriverData; struct object_surface *surface_object; struct object_buffer *buffer_object; VAImageFormat format; VAStatus status; surface_object = SURFACE(driver_data, surface_id); if (surface_object == NULL) return VA_STATUS_ERROR_INVALID_SURFACE; if (surface_object->status == VASurfaceRendering) { status = RequestSyncSurface(context, surface_id); if (status != VA_STATUS_SUCCESS) return status; } /* Fully populate VAImageFormat to match QueryImageFormats output. */ memset(&format, 0, sizeof(format)); format.fourcc = VA_FOURCC_NV12; format.byte_order = VA_LSB_FIRST; format.bits_per_pixel = 12; status = RequestCreateImage(context, &format, surface_object->width, surface_object->height, image); if (status != VA_STATUS_SUCCESS) return status; /* * Iter2 Fix 3: skip the surface→image copy when no CAPTURE slot is * bound. ffmpeg's av_hwframe_ctx_init probes vaDeriveImage on a * never-decoded surface to learn the format; it doesn't read the * data. With the cap_pool decoupling, destination_data[] is NULL * until BeginPicture binds a slot — copying from a NULL source * crashed in memcpy. The image's buffer remains zero-initialized; * subsequent post-decode DeriveImage on the same surface (after * BeginPicture has bound a slot) does the real copy. */ if (surface_object->current_slot != NULL) { status = copy_surface_to_image (driver_data, surface_object, image); if (status != VA_STATUS_SUCCESS) return status; } surface_object->status = VASurfaceReady; buffer_object = BUFFER(driver_data, image->buf); buffer_object->derived_surface_id = surface_id; return VA_STATUS_SUCCESS; } VAStatus RequestQueryImageFormats(VADriverContextP context, VAImageFormat *formats, int *formats_count) { /* * Populate the VAImageFormat fully per VAAPI spec for NV12 — * not just .fourcc. Consumers (FFmpeg's hwcontext_vaapi, mpv, * Firefox) read .byte_order and .bits_per_pixel; leaving them * uninitialized inherits whatever caller-stack garbage is in * the buffer and produces non-deterministic behavior. Reference: * Mesa's gallium/frontends/va/image.c::vlVaQueryImageFormats and * intel-vaapi-driver's i965_drv_video.c — both publish NV12 * with byte_order=VA_LSB_FIRST and bits_per_pixel=12. * * For YUV formats, depth/red_mask/green_mask/blue_mask/alpha_mask * are not meaningful (those describe RGB bit layouts); leave them * zeroed via memset before populating. */ memset(&formats[0], 0, sizeof(formats[0])); formats[0].fourcc = VA_FOURCC_NV12; formats[0].byte_order = VA_LSB_FIRST; formats[0].bits_per_pixel = 12; *formats_count = 1; return VA_STATUS_SUCCESS; } VAStatus RequestSetImagePalette(VADriverContextP context, VAImageID image_id, unsigned char *palette) { return VA_STATUS_ERROR_UNIMPLEMENTED; } VAStatus RequestGetImage(VADriverContextP context, VASurfaceID surface_id, int x, int y, unsigned int width, unsigned int height, VAImageID image_id) { struct request_data *driver_data = context->pDriverData; struct object_surface *surface_object; struct object_image *image_object; VAImage *image; surface_object = SURFACE(driver_data, surface_id); if (surface_object == NULL) return VA_STATUS_ERROR_INVALID_SURFACE; image_object = IMAGE(driver_data, image_id); if (image_object == NULL) return VA_STATUS_ERROR_INVALID_IMAGE; image = &image_object->image; if (x != 0 || y != 0 || width != image->width || height != image->height) return VA_STATUS_ERROR_UNIMPLEMENTED; return copy_surface_to_image (driver_data, surface_object, image); } VAStatus RequestPutImage(VADriverContextP context, VASurfaceID surface_id, VAImageID image, int src_x, int src_y, unsigned int src_width, unsigned int src_height, int dst_x, int dst_y, unsigned int dst_width, unsigned int dst_height) { return VA_STATUS_ERROR_UNIMPLEMENTED; }