forked from marfrit/libva-v4l2-request-fourier
841f616e74
VAAPI signals "explicit scaling lists are present in the bitstream"
implicitly: the consumer (ffmpeg-vaapi, mpv, etc.) sends a
VAIQMatrixBufferH264 alongside RenderPicture iff
sps_scaling_matrix_present_flag || pps_scaling_matrix_present_flag.
When the bitstream uses default (flat) scaling, no IQMatrixBuffer
arrives and the in-tree h264.matrix struct stays zero-initialised.
fourier's existing codec_store_buffer for MPEG2 and HEVC tracks this
via a per-surface iqmatrix_set boolean (surface.h::mpeg2.iqmatrix_set,
h265.iqmatrix_set) — the H.264 path was missing the equivalent flag,
so set_controls always submitted the scaling matrix, including the
zero-initialised case.
Symptom on hantro-vpu RK3568: when TRANSFORM_8X8_MODE is enabled in
PPS, the kernel multiplies all 8x8 DCT coefficients by the zeroed
scaling_list_8x8, producing a zeroed CAPTURE buffer despite a
successful decode round-trip (no V4L2_BUF_FLAG_ERROR,
bytesused=3655712 reported).
Earlier draft of this patch unconditionally omitted SCALING_MATRIX in
FRAME_BASED. That's corpus-correct (bbb has no explicit scaling
lists) but the wrong predicate: the kernel-side gating is by
"matrix-supplied vs. not," not by decode mode. Streams that signal
explicit scaling lists must submit SCALING_MATRIX in either mode.
Contract verification (audit_0008_decode_params_2026-05-01.md +
hantro_h264.c::assemble_scaling_list): the kernel uses the supplied
matrix when SCALING_MATRIX is in the control batch and falls back
to spec-defined defaults when absent. Mode-independent.
This patch:
- surface.h: adds bool matrix_set to params.h264, mirroring
mpeg2.iqmatrix_set / h265.iqmatrix_set.
- picture.c codec_store_buffer (H.264 VAIQMatrixBufferType case):
sets matrix_set = true when the buffer arrives.
- picture.c RequestBeginPicture: resets matrix_set = false at the
start of each Begin/Render/End cycle.
- h264.c h264_set_controls: builds the controls[] array
incrementally; SPS/PPS/DECODE_PARAMS always; SCALING_MATRIX iff
matrix_set; SLICE_PARAMS only in SLICE_BASED; PRED_WEIGHTS only
when both SLICE_BASED and V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED.
The pre-existing FRAME_BASED-omits-SLICE_PARAMS rule is preserved —
kernel doc ext-ctrls-codec-stateless.rst:752: "When this mode is
selected, the V4L2_CID_STATELESS_H264_SLICE_PARAMS control shall
not be set."
Cross-reference: kernel UAPI section
ext-ctrls-codec-stateless.rst V4L2_CID_STATELESS_H264_SCALING_MATRIX
(matrix supplied iff explicit scaling lists in bitstream) and
hantro_h264.c::assemble_scaling_list (consumes supplied matrix or
falls back to defaults).
Signed-off-by: Markus Fritsche <fritsche.markus@gmail.com>
416 lines
12 KiB
C
416 lines
12 KiB
C
/*
|
|
* Copyright (C) 2007 Intel Corporation
|
|
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
|
|
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
* of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "picture.h"
|
|
#include "buffer.h"
|
|
#include "config.h"
|
|
#include "context.h"
|
|
#include "request.h"
|
|
#include "surface.h"
|
|
|
|
#include "h264.h"
|
|
#include "h265.h"
|
|
#include "mpeg2.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <linux/videodev2.h>
|
|
|
|
#include "media.h"
|
|
#include "utils.h"
|
|
#include "v4l2.h"
|
|
|
|
#include "autoconfig.h"
|
|
|
|
static VAStatus codec_store_buffer(struct request_data *driver_data,
|
|
struct object_context *context,
|
|
VAProfile profile,
|
|
struct object_surface *surface_object,
|
|
struct object_buffer *buffer_object)
|
|
{
|
|
switch (buffer_object->type) {
|
|
case VASliceDataBufferType:
|
|
/*
|
|
* Since there is no guarantee that the allocation
|
|
* order is the same as the submission order (via
|
|
* RenderPicture), we can't use a V4L2 buffer directly
|
|
* and have to copy from a regular buffer.
|
|
*/
|
|
if (context->h264_start_code) {
|
|
static const char start_code[3] = { 0x00, 0x00, 0x01 };
|
|
|
|
memcpy(surface_object->source_data +
|
|
surface_object->slices_size,
|
|
start_code, sizeof(start_code));
|
|
surface_object->slices_size += sizeof(start_code);
|
|
}
|
|
memcpy(surface_object->source_data +
|
|
surface_object->slices_size,
|
|
buffer_object->data,
|
|
buffer_object->size * buffer_object->count);
|
|
surface_object->slices_size +=
|
|
buffer_object->size * buffer_object->count;
|
|
surface_object->slices_count++;
|
|
break;
|
|
|
|
case VAPictureParameterBufferType:
|
|
switch (profile) {
|
|
case VAProfileMPEG2Simple:
|
|
case VAProfileMPEG2Main:
|
|
memcpy(&surface_object->params.mpeg2.picture,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.mpeg2.picture));
|
|
break;
|
|
|
|
case VAProfileH264Main:
|
|
case VAProfileH264High:
|
|
case VAProfileH264ConstrainedBaseline:
|
|
case VAProfileH264MultiviewHigh:
|
|
case VAProfileH264StereoHigh:
|
|
memcpy(&surface_object->params.h264.picture,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h264.picture));
|
|
break;
|
|
|
|
case VAProfileHEVCMain:
|
|
memcpy(&surface_object->params.h265.picture,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h265.picture));
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case VASliceParameterBufferType:
|
|
switch (profile) {
|
|
case VAProfileH264Main:
|
|
case VAProfileH264High:
|
|
case VAProfileH264ConstrainedBaseline:
|
|
case VAProfileH264MultiviewHigh:
|
|
case VAProfileH264StereoHigh:
|
|
memcpy(&surface_object->params.h264.slice,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h264.slice));
|
|
break;
|
|
|
|
case VAProfileHEVCMain:
|
|
memcpy(&surface_object->params.h265.slice,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h265.slice));
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case VAIQMatrixBufferType:
|
|
switch (profile) {
|
|
case VAProfileMPEG2Simple:
|
|
case VAProfileMPEG2Main:
|
|
memcpy(&surface_object->params.mpeg2.iqmatrix,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.mpeg2.iqmatrix));
|
|
surface_object->params.mpeg2.iqmatrix_set = true;
|
|
break;
|
|
|
|
case VAProfileH264Main:
|
|
case VAProfileH264High:
|
|
case VAProfileH264ConstrainedBaseline:
|
|
case VAProfileH264MultiviewHigh:
|
|
case VAProfileH264StereoHigh:
|
|
memcpy(&surface_object->params.h264.matrix,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h264.matrix));
|
|
surface_object->params.h264.matrix_set = true;
|
|
break;
|
|
|
|
case VAProfileHEVCMain:
|
|
memcpy(&surface_object->params.h265.iqmatrix,
|
|
buffer_object->data,
|
|
sizeof(surface_object->params.h265.iqmatrix));
|
|
surface_object->params.h265.iqmatrix_set = true;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
static VAStatus codec_set_controls(struct request_data *driver_data,
|
|
struct object_context *context,
|
|
VAProfile profile,
|
|
struct object_surface *surface_object)
|
|
{
|
|
int rc;
|
|
|
|
switch (profile) {
|
|
case VAProfileMPEG2Simple:
|
|
case VAProfileMPEG2Main:
|
|
rc = mpeg2_set_controls(driver_data, context, surface_object);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
break;
|
|
|
|
case VAProfileH264Main:
|
|
case VAProfileH264High:
|
|
case VAProfileH264ConstrainedBaseline:
|
|
case VAProfileH264MultiviewHigh:
|
|
case VAProfileH264StereoHigh:
|
|
rc = h264_set_controls(driver_data, context, profile,
|
|
surface_object);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
break;
|
|
|
|
case VAProfileHEVCMain:
|
|
/* Fourier-local: HEVC stripped, no HW support on RK3566. */
|
|
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
|
|
|
default:
|
|
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
|
}
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id,
|
|
VASurfaceID surface_id)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_context *context_object;
|
|
struct object_surface *surface_object;
|
|
struct request_pool_slot *slot;
|
|
int slot_index;
|
|
|
|
context_object = CONTEXT(driver_data, context_id);
|
|
if (context_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONTEXT;
|
|
|
|
surface_object = SURFACE(driver_data, surface_id);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_SURFACE;
|
|
|
|
if (surface_object->status == VASurfaceRendering)
|
|
RequestSyncSurface(context, surface_id);
|
|
|
|
/*
|
|
* Borrow an OUTPUT (bitstream-input) slot from the driver-wide
|
|
* pool for the duration of this Begin/Render/End cycle. The
|
|
* surface's source_* fields hold the borrow's mmap pointer/size/
|
|
* V4L2 buffer index until RequestSyncSurface releases it after
|
|
* VIDIOC_DQBUF.
|
|
*/
|
|
slot_index = request_pool_acquire(&driver_data->output_pool);
|
|
if (slot_index < 0)
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
|
|
slot = request_pool_slot(&driver_data->output_pool,
|
|
(unsigned int)slot_index);
|
|
if (slot == NULL) {
|
|
request_pool_release(&driver_data->output_pool,
|
|
(unsigned int)slot_index);
|
|
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
|
}
|
|
|
|
surface_object->source_index = slot->index;
|
|
surface_object->source_data = slot->data;
|
|
surface_object->source_size = slot->size;
|
|
surface_object->slices_size = 0;
|
|
surface_object->slices_count = 0;
|
|
surface_object->params.h264.matrix_set = false;
|
|
|
|
surface_object->status = VASurfaceRendering;
|
|
context_object->render_surface_id = surface_id;
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestRenderPicture(VADriverContextP context, VAContextID context_id,
|
|
VABufferID *buffers_ids, int buffers_count)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_context *context_object;
|
|
struct object_config *config_object;
|
|
struct object_surface *surface_object;
|
|
struct object_buffer *buffer_object;
|
|
int rc;
|
|
int i;
|
|
|
|
context_object = CONTEXT(driver_data, context_id);
|
|
if (context_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONTEXT;
|
|
|
|
config_object = CONFIG(driver_data, context_object->config_id);
|
|
if (config_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONFIG;
|
|
|
|
surface_object =
|
|
SURFACE(driver_data, context_object->render_surface_id);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_SURFACE;
|
|
|
|
for (i = 0; i < buffers_count; i++) {
|
|
buffer_object = BUFFER(driver_data, buffers_ids[i]);
|
|
if (buffer_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_BUFFER;
|
|
|
|
rc = codec_store_buffer(driver_data, context_object,
|
|
config_object->profile,
|
|
surface_object, buffer_object);
|
|
if (rc != VA_STATUS_SUCCESS)
|
|
return rc;
|
|
}
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|
|
|
|
VAStatus RequestEndPicture(VADriverContextP context, VAContextID context_id)
|
|
{
|
|
struct request_data *driver_data = context->pDriverData;
|
|
struct object_context *context_object;
|
|
struct object_config *config_object;
|
|
struct object_surface *surface_object;
|
|
struct video_format *video_format;
|
|
unsigned int output_type, capture_type;
|
|
int request_fd;
|
|
VAStatus status;
|
|
int rc;
|
|
|
|
video_format = driver_data->video_format;
|
|
if (video_format == NULL)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
|
|
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
|
|
|
|
context_object = CONTEXT(driver_data, context_id);
|
|
if (context_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONTEXT;
|
|
|
|
config_object = CONFIG(driver_data, context_object->config_id);
|
|
if (config_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_CONFIG;
|
|
|
|
surface_object =
|
|
SURFACE(driver_data, context_object->render_surface_id);
|
|
if (surface_object == NULL)
|
|
return VA_STATUS_ERROR_INVALID_SURFACE;
|
|
|
|
gettimeofday(&surface_object->timestamp, NULL);
|
|
|
|
request_fd = surface_object->request_fd;
|
|
if (request_fd < 0) {
|
|
request_fd = media_request_alloc(driver_data->media_fd);
|
|
if (request_fd < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
surface_object->request_fd = request_fd;
|
|
}
|
|
|
|
rc = codec_set_controls(driver_data, context_object,
|
|
config_object->profile, surface_object);
|
|
if (rc != VA_STATUS_SUCCESS)
|
|
return rc;
|
|
|
|
/*
|
|
* DEBUG INSTRUMENTATION (0011): write a sentinel pattern into
|
|
* the CAPTURE buffer's first 32 bytes BEFORE QBUF. If after
|
|
* DQBUF the sentinel survives (per surface.c hex dump), the
|
|
* kernel never wrote to this buffer. If the sentinel is gone
|
|
* (replaced by zeros), the kernel did write but our CPU read
|
|
* sees stale-cached data — cache-coherency issue.
|
|
*/
|
|
{
|
|
unsigned char *p = (unsigned char *)
|
|
surface_object->destination_map[0];
|
|
if (p != NULL) {
|
|
unsigned int i;
|
|
for (i = 0; i < 32; i++)
|
|
p[i] = 0xab;
|
|
}
|
|
}
|
|
|
|
rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL,
|
|
surface_object->destination_index, 0,
|
|
surface_object->destination_buffers_count);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
/*
|
|
* DEBUG INSTRUMENTATION (0010): hex-dump first 32 bytes of the
|
|
* OUTPUT buffer at the moment we hand it to the kernel. Helps
|
|
* pin down whether our bitstream prepend logic is correct.
|
|
* For a valid ANNEX_B IDR slice the dump should start
|
|
* 00 00 01 65 ... (00 00 01 = start code; 0x65 = nal_ref_idc=3,
|
|
* nal_unit_type=5 = IDR slice). Removed once Step 1 decode is
|
|
* verified working.
|
|
*/
|
|
{
|
|
const unsigned char *p = surface_object->source_data;
|
|
char hex[32 * 3 + 1] = { 0 };
|
|
unsigned int i, n = surface_object->slices_size < 32 ?
|
|
surface_object->slices_size : 32;
|
|
for (i = 0; i < n; i++)
|
|
snprintf(hex + i * 3, 4, " %02x", p[i]);
|
|
request_log("OUTPUT[idx=%u, len=%u]:%s\n",
|
|
surface_object->source_index,
|
|
surface_object->slices_size, hex);
|
|
}
|
|
|
|
rc = v4l2_queue_buffer(driver_data->video_fd, request_fd, output_type,
|
|
&surface_object->timestamp,
|
|
surface_object->source_index,
|
|
surface_object->slices_size, 1);
|
|
if (rc < 0)
|
|
return VA_STATUS_ERROR_OPERATION_FAILED;
|
|
|
|
surface_object->slices_size = 0;
|
|
|
|
status = RequestSyncSurface(context, context_object->render_surface_id);
|
|
if (status != VA_STATUS_SUCCESS)
|
|
return status;
|
|
|
|
context_object->render_surface_id = VA_INVALID_ID;
|
|
|
|
return VA_STATUS_SUCCESS;
|
|
}
|