/* * Copyright (C) 2007 Intel Corporation * Copyright (C) 2016 Florent Revest * Copyright (C) 2018 Paul Kocialkowski * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "picture.h" #include "buffer.h" #include "config.h" #include "context.h" #include "request.h" #include "surface.h" #include "h264.h" #include "h265.h" #include "mpeg2.h" #include "vp8.h" #include "vp9.h" #include #include #include #include #include #include #include #include "media.h" #include "utils.h" #include "v4l2.h" #include "autoconfig.h" static VAStatus codec_store_buffer(struct request_data *driver_data, struct object_context *context, VAProfile profile, struct object_surface *surface_object, struct object_buffer *buffer_object) { switch (buffer_object->type) { case VASliceDataBufferType: { /* * Since there is no guarantee that the allocation * order is the same as the submission order (via * RenderPicture), we can't use a V4L2 buffer directly * and have to copy from a regular buffer. */ size_t slice_bytes = buffer_object->size * buffer_object->count; /* * iter28b DIAG (env-gated): trim trailing N bytes from * VAAPI's slice_data buffer for HEVC. Tests the hypothesis * that ffmpeg-vaapi inflates slice_data_size vs * ffmpeg-v4l2request. Set LIBVA_HEVC_TRIM_TRAILING=N to * trim. For BBB frame 2 with libva 5549 vs kdirect 5509: * try N=40. */ if (profile == VAProfileHEVCMain) { const char *trim_env = getenv("LIBVA_HEVC_TRIM_TRAILING"); if (trim_env) { unsigned long t = strtoul(trim_env, NULL, 0); if (t < slice_bytes) slice_bytes -= t; } } if (context->h264_start_code) { static const char start_code[3] = { 0x00, 0x00, 0x01 }; memcpy(surface_object->source_data + surface_object->slices_size, start_code, sizeof(start_code)); surface_object->slices_size += sizeof(start_code); } memcpy(surface_object->source_data + surface_object->slices_size, buffer_object->data, slice_bytes); surface_object->slices_size += slice_bytes; surface_object->slices_count++; break; } case VAPictureParameterBufferType: switch (profile) { case VAProfileMPEG2Simple: case VAProfileMPEG2Main: memcpy(&surface_object->params.mpeg2.picture, buffer_object->data, sizeof(surface_object->params.mpeg2.picture)); break; case VAProfileH264Main: case VAProfileH264High: case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: memcpy(&surface_object->params.h264.picture, buffer_object->data, sizeof(surface_object->params.h264.picture)); break; case VAProfileHEVCMain: memcpy(&surface_object->params.h265.picture, buffer_object->data, sizeof(surface_object->params.h265.picture)); break; case VAProfileVP8Version0_3: memcpy(&surface_object->params.vp8.picture, buffer_object->data, sizeof(surface_object->params.vp8.picture)); break; case VAProfileVP9Profile0: memcpy(&surface_object->params.vp9.picture, buffer_object->data, sizeof(surface_object->params.vp9.picture)); break; default: break; } break; case VASliceParameterBufferType: switch (profile) { case VAProfileH264Main: case VAProfileH264High: case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: memcpy(&surface_object->params.h264.slice, buffer_object->data, sizeof(surface_object->params.h264.slice)); break; case VAProfileHEVCMain: { unsigned int n = surface_object->params.h265.num_slices; if (n < HEVC_MAX_SLICES_PER_FRAME) { memcpy(&surface_object->params.h265.slices[n], buffer_object->data, sizeof(VASliceParameterBufferHEVC)); surface_object->params.h265.num_slices = n + 1; } /* Keep .slice mirror populated as last-slice ref for * h265_fill_pps which reads dependent_slice_segment_flag */ memcpy(&surface_object->params.h265.slice, buffer_object->data, sizeof(surface_object->params.h265.slice)); break; } case VAProfileVP8Version0_3: memcpy(&surface_object->params.vp8.slice, buffer_object->data, sizeof(surface_object->params.vp8.slice)); break; case VAProfileVP9Profile0: memcpy(&surface_object->params.vp9.slice, buffer_object->data, sizeof(surface_object->params.vp9.slice)); break; default: break; } break; case VAIQMatrixBufferType: switch (profile) { case VAProfileMPEG2Simple: case VAProfileMPEG2Main: memcpy(&surface_object->params.mpeg2.iqmatrix, buffer_object->data, sizeof(surface_object->params.mpeg2.iqmatrix)); surface_object->params.mpeg2.iqmatrix_set = true; break; case VAProfileH264Main: case VAProfileH264High: case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: memcpy(&surface_object->params.h264.matrix, buffer_object->data, sizeof(surface_object->params.h264.matrix)); surface_object->params.h264.matrix_set = true; break; case VAProfileHEVCMain: memcpy(&surface_object->params.h265.iqmatrix, buffer_object->data, sizeof(surface_object->params.h265.iqmatrix)); surface_object->params.h265.iqmatrix_set = true; break; case VAProfileVP8Version0_3: memcpy(&surface_object->params.vp8.iqmatrix, buffer_object->data, sizeof(surface_object->params.vp8.iqmatrix)); surface_object->params.vp8.iqmatrix_set = true; break; default: break; } break; case VAProbabilityBufferType: switch (profile) { case VAProfileVP8Version0_3: memcpy(&surface_object->params.vp8.probability, buffer_object->data, sizeof(surface_object->params.vp8.probability)); surface_object->params.vp8.probability_set = true; break; default: break; } break; default: break; } return VA_STATUS_SUCCESS; } static VAStatus codec_set_controls(struct request_data *driver_data, struct object_context *context, VAProfile profile, struct object_surface *surface_object) { int rc; switch (profile) { case VAProfileMPEG2Simple: case VAProfileMPEG2Main: rc = mpeg2_set_controls(driver_data, context, surface_object); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; break; case VAProfileH264Main: case VAProfileH264High: case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: rc = h264_set_controls(driver_data, context, profile, surface_object); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; break; case VAProfileHEVCMain: rc = h265_set_controls(driver_data, context, surface_object); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; break; case VAProfileVP8Version0_3: rc = vp8_set_controls(driver_data, context, surface_object); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; break; case VAProfileVP9Profile0: rc = vp9_set_controls(driver_data, context, surface_object); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; break; default: return VA_STATUS_ERROR_UNSUPPORTED_PROFILE; } return VA_STATUS_SUCCESS; } VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id, VASurfaceID surface_id) { struct request_data *driver_data = context->pDriverData; struct object_context *context_object; struct object_surface *surface_object; struct request_pool_slot *slot; int slot_index; context_object = CONTEXT(driver_data, context_id); if (context_object == NULL) return VA_STATUS_ERROR_INVALID_CONTEXT; surface_object = SURFACE(driver_data, surface_id); if (surface_object == NULL) return VA_STATUS_ERROR_INVALID_SURFACE; if (surface_object->status == VASurfaceRendering) RequestSyncSurface(context, surface_id); /* * Iter2 Fix 3: acquire a CAPTURE-pool slot for this decode cycle. * If the surface still holds a slot from a prior decode (DECODED * or EXPORTED — the consumer is done with it by definition since * we got back to BeginPicture for the same surface), release it * first. The new slot is bound and its V4L2 index + mmap pointers * are mirrored into surface_object->destination_* so the existing * QBUF/DQBUF/EXPBUF code paths see no behavioral change. */ if (surface_object->current_slot != NULL) surface_unbind_slot(driver_data, surface_object); { struct cap_pool_slot *cap_slot = cap_pool_acquire(&driver_data->capture_pool, surface_id); if (cap_slot == NULL) return VA_STATUS_ERROR_ALLOCATION_FAILED; surface_bind_slot(surface_object, cap_slot); /* * iter8 Phase 7 IMP-1 experiment: env-gated CAPTURE buffer * pre-zero. LIBVA_V4L2_ZERO_CAPTURE=1 wipes the slot's mmap'd * region before kernel decode. Discriminates "kernel writes * partial then aborts" from "kernel writes nothing and we * see stale residue." */ { static const char *zero_env = NULL; static bool zero_env_checked = false; if (!zero_env_checked) { zero_env = getenv("LIBVA_V4L2_ZERO_CAPTURE"); zero_env_checked = true; } if (zero_env != NULL && zero_env[0] == '1') { unsigned int b; for (b = 0; b < cap_slot->buffers_count; b++) if (cap_slot->map[b] != NULL) memset(cap_slot->map[b], 0, cap_slot->map_lengths[b]); } } } /* * Borrow an OUTPUT (bitstream-input) slot from the driver-wide * pool for the duration of this Begin/Render/End cycle. The * surface's source_* fields hold the borrow's mmap pointer/size/ * V4L2 buffer index until RequestSyncSurface releases it after * VIDIOC_DQBUF. */ slot_index = request_pool_acquire(&driver_data->output_pool); if (slot_index < 0) return VA_STATUS_ERROR_ALLOCATION_FAILED; slot = request_pool_slot(&driver_data->output_pool, (unsigned int)slot_index); if (slot == NULL) { request_pool_release(&driver_data->output_pool, (unsigned int)slot_index); return VA_STATUS_ERROR_ALLOCATION_FAILED; } surface_object->source_index = slot->index; surface_object->source_data = slot->data; surface_object->source_size = slot->size; /* * iter6: bind the slot's permanent request_fd to this surface for the * duration of the decode cycle. Replaces the iter4 close+alloc-per- * frame model. The fd is REINIT'd (not closed) at RequestSyncSurface, * so the kernel-side request object is reset in place — no fd-reuse * race with another slot's pending decode. */ surface_object->request_fd = slot->request_fd; surface_object->slices_size = 0; surface_object->slices_count = 0; surface_object->params.h264.matrix_set = false; surface_object->params.h265.num_slices = 0; surface_object->params.vp8.iqmatrix_set = false; surface_object->params.vp8.probability_set = false; surface_object->status = VASurfaceRendering; context_object->render_surface_id = surface_id; return VA_STATUS_SUCCESS; } VAStatus RequestRenderPicture(VADriverContextP context, VAContextID context_id, VABufferID *buffers_ids, int buffers_count) { struct request_data *driver_data = context->pDriverData; struct object_context *context_object; struct object_config *config_object; struct object_surface *surface_object; struct object_buffer *buffer_object; int rc; int i; context_object = CONTEXT(driver_data, context_id); if (context_object == NULL) return VA_STATUS_ERROR_INVALID_CONTEXT; config_object = CONFIG(driver_data, context_object->config_id); if (config_object == NULL) return VA_STATUS_ERROR_INVALID_CONFIG; surface_object = SURFACE(driver_data, context_object->render_surface_id); if (surface_object == NULL) return VA_STATUS_ERROR_INVALID_SURFACE; for (i = 0; i < buffers_count; i++) { buffer_object = BUFFER(driver_data, buffers_ids[i]); if (buffer_object == NULL) return VA_STATUS_ERROR_INVALID_BUFFER; rc = codec_store_buffer(driver_data, context_object, config_object->profile, surface_object, buffer_object); if (rc != VA_STATUS_SUCCESS) return rc; } return VA_STATUS_SUCCESS; } VAStatus RequestEndPicture(VADriverContextP context, VAContextID context_id) { struct request_data *driver_data = context->pDriverData; struct object_context *context_object; struct object_config *config_object; struct object_surface *surface_object; struct video_format *video_format; unsigned int output_type, capture_type; int request_fd; VAStatus status; int rc; video_format = driver_data->video_format; if (video_format == NULL) return VA_STATUS_ERROR_OPERATION_FAILED; output_type = v4l2_type_video_output(video_format->v4l2_mplane); capture_type = v4l2_type_video_capture(video_format->v4l2_mplane); context_object = CONTEXT(driver_data, context_id); if (context_object == NULL) return VA_STATUS_ERROR_INVALID_CONTEXT; config_object = CONFIG(driver_data, context_object->config_id); if (config_object == NULL) return VA_STATUS_ERROR_INVALID_CONFIG; surface_object = SURFACE(driver_data, context_object->render_surface_id); if (surface_object == NULL) return VA_STATUS_ERROR_INVALID_SURFACE; /* * iter9 α-7: monotonic per-context counter instead of gettimeofday, * so DPB.reference_ts / OUTPUT QBUF ts stay small (matches * ffmpeg-v4l2request's pattern). gettimeofday's giant ns may or may * not be load-bearing for rkvdec's reference resolution — Phase 5 * reviewer flagged this as low-probability (VP9/MPEG-2 use the same * pattern and PASS), but this is the only remaining wire-byte diff. */ context_object->timestamp_counter++; surface_object->timestamp.tv_sec = (time_t)(context_object->timestamp_counter / 1000000); surface_object->timestamp.tv_usec = (suseconds_t)(context_object->timestamp_counter % 1000000); /* * iter6: request_fd was bound to the surface in BeginPicture from * the OUTPUT pool slot's permanent fd. Per-frame allocation is gone. */ request_fd = surface_object->request_fd; if (request_fd < 0) return VA_STATUS_ERROR_OPERATION_FAILED; rc = codec_set_controls(driver_data, context_object, config_object->profile, surface_object); if (rc != VA_STATUS_SUCCESS) return rc; /* * iter14 α-16: env-gated dump of OUTPUT bitstream bytes immediately * before QBUF. LIBVA_V4L2_DUMP_OUTPUT= writes source_data[0.. * slices_size] to /output___.bin. * Discriminates whether libva writes the same H.264/HEVC slice bytes * as kdirect — if YES, Bug 4/5 are not in the OUTPUT-side; if NO, * narrow to which slice-write path produces the divergence. * * Off by default; no behavior change when env unset. */ { static const char *dump_env = NULL; static bool dump_env_checked = false; if (!dump_env_checked) { dump_env = getenv("LIBVA_V4L2_DUMP_OUTPUT"); dump_env_checked = true; } if (dump_env != NULL && dump_env[0] != '\0' && surface_object->source_data != NULL && surface_object->slices_size > 0) { char path[256]; snprintf(path, sizeof(path), "%s/output_p%d_s%u_t%llu.bin", dump_env, (int)config_object->profile, (unsigned int)surface_object->base.id, (unsigned long long)context_object->timestamp_counter); FILE *fp = fopen(path, "wb"); if (fp != NULL) { size_t w = fwrite(surface_object->source_data, 1, surface_object->slices_size, fp); request_log("α-16: dumped %zu bytes to %s " "(slices_count=%u)\n", w, path, surface_object->slices_count); fclose(fp); } else { request_log("α-16: fopen(%s) failed: %s\n", path, strerror(errno)); } } } rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL, surface_object->destination_index, 0, surface_object->destination_buffers_count); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; rc = v4l2_queue_buffer(driver_data->video_fd, request_fd, output_type, &surface_object->timestamp, surface_object->source_index, surface_object->slices_size, 1); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; surface_object->slices_size = 0; status = RequestSyncSurface(context, context_object->render_surface_id); if (status != VA_STATUS_SUCCESS) return status; context_object->render_surface_id = VA_INVALID_ID; return VA_STATUS_SUCCESS; }