Files
libva-v4l2-request-fourier/src/picture.c
T
test0r 841f616e74 h264: gate SCALING_MATRIX submission on VAIQMatrixBuffer presence
VAAPI signals "explicit scaling lists are present in the bitstream"
implicitly: the consumer (ffmpeg-vaapi, mpv, etc.) sends a
VAIQMatrixBufferH264 alongside RenderPicture iff
sps_scaling_matrix_present_flag || pps_scaling_matrix_present_flag.
When the bitstream uses default (flat) scaling, no IQMatrixBuffer
arrives and the in-tree h264.matrix struct stays zero-initialised.

fourier's existing codec_store_buffer for MPEG2 and HEVC tracks this
via a per-surface iqmatrix_set boolean (surface.h::mpeg2.iqmatrix_set,
h265.iqmatrix_set) — the H.264 path was missing the equivalent flag,
so set_controls always submitted the scaling matrix, including the
zero-initialised case.

Symptom on hantro-vpu RK3568: when TRANSFORM_8X8_MODE is enabled in
PPS, the kernel multiplies all 8x8 DCT coefficients by the zeroed
scaling_list_8x8, producing a zeroed CAPTURE buffer despite a
successful decode round-trip (no V4L2_BUF_FLAG_ERROR,
bytesused=3655712 reported).

Earlier draft of this patch unconditionally omitted SCALING_MATRIX in
FRAME_BASED. That's corpus-correct (bbb has no explicit scaling
lists) but the wrong predicate: the kernel-side gating is by
"matrix-supplied vs. not," not by decode mode. Streams that signal
explicit scaling lists must submit SCALING_MATRIX in either mode.

Contract verification (audit_0008_decode_params_2026-05-01.md +
hantro_h264.c::assemble_scaling_list): the kernel uses the supplied
matrix when SCALING_MATRIX is in the control batch and falls back
to spec-defined defaults when absent. Mode-independent.

This patch:
  - surface.h: adds bool matrix_set to params.h264, mirroring
    mpeg2.iqmatrix_set / h265.iqmatrix_set.
  - picture.c codec_store_buffer (H.264 VAIQMatrixBufferType case):
    sets matrix_set = true when the buffer arrives.
  - picture.c RequestBeginPicture: resets matrix_set = false at the
    start of each Begin/Render/End cycle.
  - h264.c h264_set_controls: builds the controls[] array
    incrementally; SPS/PPS/DECODE_PARAMS always; SCALING_MATRIX iff
    matrix_set; SLICE_PARAMS only in SLICE_BASED; PRED_WEIGHTS only
    when both SLICE_BASED and V4L2_H264_CTRL_PRED_WEIGHTS_REQUIRED.

The pre-existing FRAME_BASED-omits-SLICE_PARAMS rule is preserved —
kernel doc ext-ctrls-codec-stateless.rst:752: "When this mode is
selected, the V4L2_CID_STATELESS_H264_SLICE_PARAMS control shall
not be set."

Cross-reference: kernel UAPI section
ext-ctrls-codec-stateless.rst V4L2_CID_STATELESS_H264_SCALING_MATRIX
(matrix supplied iff explicit scaling lists in bitstream) and
hantro_h264.c::assemble_scaling_list (consumes supplied matrix or
falls back to defaults).

Signed-off-by: Markus Fritsche <fritsche.markus@gmail.com>
2026-05-04 09:45:05 +00:00

416 lines
12 KiB
C

/*
* Copyright (C) 2007 Intel Corporation
* Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
* Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "picture.h"
#include "buffer.h"
#include "config.h"
#include "context.h"
#include "request.h"
#include "surface.h"
#include "h264.h"
#include "h265.h"
#include "mpeg2.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <linux/videodev2.h>
#include "media.h"
#include "utils.h"
#include "v4l2.h"
#include "autoconfig.h"
static VAStatus codec_store_buffer(struct request_data *driver_data,
struct object_context *context,
VAProfile profile,
struct object_surface *surface_object,
struct object_buffer *buffer_object)
{
switch (buffer_object->type) {
case VASliceDataBufferType:
/*
* Since there is no guarantee that the allocation
* order is the same as the submission order (via
* RenderPicture), we can't use a V4L2 buffer directly
* and have to copy from a regular buffer.
*/
if (context->h264_start_code) {
static const char start_code[3] = { 0x00, 0x00, 0x01 };
memcpy(surface_object->source_data +
surface_object->slices_size,
start_code, sizeof(start_code));
surface_object->slices_size += sizeof(start_code);
}
memcpy(surface_object->source_data +
surface_object->slices_size,
buffer_object->data,
buffer_object->size * buffer_object->count);
surface_object->slices_size +=
buffer_object->size * buffer_object->count;
surface_object->slices_count++;
break;
case VAPictureParameterBufferType:
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
memcpy(&surface_object->params.mpeg2.picture,
buffer_object->data,
sizeof(surface_object->params.mpeg2.picture));
break;
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
memcpy(&surface_object->params.h264.picture,
buffer_object->data,
sizeof(surface_object->params.h264.picture));
break;
case VAProfileHEVCMain:
memcpy(&surface_object->params.h265.picture,
buffer_object->data,
sizeof(surface_object->params.h265.picture));
break;
default:
break;
}
break;
case VASliceParameterBufferType:
switch (profile) {
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
memcpy(&surface_object->params.h264.slice,
buffer_object->data,
sizeof(surface_object->params.h264.slice));
break;
case VAProfileHEVCMain:
memcpy(&surface_object->params.h265.slice,
buffer_object->data,
sizeof(surface_object->params.h265.slice));
break;
default:
break;
}
break;
case VAIQMatrixBufferType:
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
memcpy(&surface_object->params.mpeg2.iqmatrix,
buffer_object->data,
sizeof(surface_object->params.mpeg2.iqmatrix));
surface_object->params.mpeg2.iqmatrix_set = true;
break;
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
memcpy(&surface_object->params.h264.matrix,
buffer_object->data,
sizeof(surface_object->params.h264.matrix));
surface_object->params.h264.matrix_set = true;
break;
case VAProfileHEVCMain:
memcpy(&surface_object->params.h265.iqmatrix,
buffer_object->data,
sizeof(surface_object->params.h265.iqmatrix));
surface_object->params.h265.iqmatrix_set = true;
break;
default:
break;
}
break;
default:
break;
}
return VA_STATUS_SUCCESS;
}
static VAStatus codec_set_controls(struct request_data *driver_data,
struct object_context *context,
VAProfile profile,
struct object_surface *surface_object)
{
int rc;
switch (profile) {
case VAProfileMPEG2Simple:
case VAProfileMPEG2Main:
rc = mpeg2_set_controls(driver_data, context, surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileH264Main:
case VAProfileH264High:
case VAProfileH264ConstrainedBaseline:
case VAProfileH264MultiviewHigh:
case VAProfileH264StereoHigh:
rc = h264_set_controls(driver_data, context, profile,
surface_object);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
break;
case VAProfileHEVCMain:
/* Fourier-local: HEVC stripped, no HW support on RK3566. */
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
default:
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
}
return VA_STATUS_SUCCESS;
}
VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id,
VASurfaceID surface_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct object_surface *surface_object;
struct request_pool_slot *slot;
int slot_index;
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
surface_object = SURFACE(driver_data, surface_id);
if (surface_object == NULL)
return VA_STATUS_ERROR_INVALID_SURFACE;
if (surface_object->status == VASurfaceRendering)
RequestSyncSurface(context, surface_id);
/*
* Borrow an OUTPUT (bitstream-input) slot from the driver-wide
* pool for the duration of this Begin/Render/End cycle. The
* surface's source_* fields hold the borrow's mmap pointer/size/
* V4L2 buffer index until RequestSyncSurface releases it after
* VIDIOC_DQBUF.
*/
slot_index = request_pool_acquire(&driver_data->output_pool);
if (slot_index < 0)
return VA_STATUS_ERROR_ALLOCATION_FAILED;
slot = request_pool_slot(&driver_data->output_pool,
(unsigned int)slot_index);
if (slot == NULL) {
request_pool_release(&driver_data->output_pool,
(unsigned int)slot_index);
return VA_STATUS_ERROR_ALLOCATION_FAILED;
}
surface_object->source_index = slot->index;
surface_object->source_data = slot->data;
surface_object->source_size = slot->size;
surface_object->slices_size = 0;
surface_object->slices_count = 0;
surface_object->params.h264.matrix_set = false;
surface_object->status = VASurfaceRendering;
context_object->render_surface_id = surface_id;
return VA_STATUS_SUCCESS;
}
VAStatus RequestRenderPicture(VADriverContextP context, VAContextID context_id,
VABufferID *buffers_ids, int buffers_count)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct object_config *config_object;
struct object_surface *surface_object;
struct object_buffer *buffer_object;
int rc;
int i;
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
config_object = CONFIG(driver_data, context_object->config_id);
if (config_object == NULL)
return VA_STATUS_ERROR_INVALID_CONFIG;
surface_object =
SURFACE(driver_data, context_object->render_surface_id);
if (surface_object == NULL)
return VA_STATUS_ERROR_INVALID_SURFACE;
for (i = 0; i < buffers_count; i++) {
buffer_object = BUFFER(driver_data, buffers_ids[i]);
if (buffer_object == NULL)
return VA_STATUS_ERROR_INVALID_BUFFER;
rc = codec_store_buffer(driver_data, context_object,
config_object->profile,
surface_object, buffer_object);
if (rc != VA_STATUS_SUCCESS)
return rc;
}
return VA_STATUS_SUCCESS;
}
VAStatus RequestEndPicture(VADriverContextP context, VAContextID context_id)
{
struct request_data *driver_data = context->pDriverData;
struct object_context *context_object;
struct object_config *config_object;
struct object_surface *surface_object;
struct video_format *video_format;
unsigned int output_type, capture_type;
int request_fd;
VAStatus status;
int rc;
video_format = driver_data->video_format;
if (video_format == NULL)
return VA_STATUS_ERROR_OPERATION_FAILED;
output_type = v4l2_type_video_output(video_format->v4l2_mplane);
capture_type = v4l2_type_video_capture(video_format->v4l2_mplane);
context_object = CONTEXT(driver_data, context_id);
if (context_object == NULL)
return VA_STATUS_ERROR_INVALID_CONTEXT;
config_object = CONFIG(driver_data, context_object->config_id);
if (config_object == NULL)
return VA_STATUS_ERROR_INVALID_CONFIG;
surface_object =
SURFACE(driver_data, context_object->render_surface_id);
if (surface_object == NULL)
return VA_STATUS_ERROR_INVALID_SURFACE;
gettimeofday(&surface_object->timestamp, NULL);
request_fd = surface_object->request_fd;
if (request_fd < 0) {
request_fd = media_request_alloc(driver_data->media_fd);
if (request_fd < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
surface_object->request_fd = request_fd;
}
rc = codec_set_controls(driver_data, context_object,
config_object->profile, surface_object);
if (rc != VA_STATUS_SUCCESS)
return rc;
/*
* DEBUG INSTRUMENTATION (0011): write a sentinel pattern into
* the CAPTURE buffer's first 32 bytes BEFORE QBUF. If after
* DQBUF the sentinel survives (per surface.c hex dump), the
* kernel never wrote to this buffer. If the sentinel is gone
* (replaced by zeros), the kernel did write but our CPU read
* sees stale-cached data — cache-coherency issue.
*/
{
unsigned char *p = (unsigned char *)
surface_object->destination_map[0];
if (p != NULL) {
unsigned int i;
for (i = 0; i < 32; i++)
p[i] = 0xab;
}
}
rc = v4l2_queue_buffer(driver_data->video_fd, -1, capture_type, NULL,
surface_object->destination_index, 0,
surface_object->destination_buffers_count);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
/*
* DEBUG INSTRUMENTATION (0010): hex-dump first 32 bytes of the
* OUTPUT buffer at the moment we hand it to the kernel. Helps
* pin down whether our bitstream prepend logic is correct.
* For a valid ANNEX_B IDR slice the dump should start
* 00 00 01 65 ... (00 00 01 = start code; 0x65 = nal_ref_idc=3,
* nal_unit_type=5 = IDR slice). Removed once Step 1 decode is
* verified working.
*/
{
const unsigned char *p = surface_object->source_data;
char hex[32 * 3 + 1] = { 0 };
unsigned int i, n = surface_object->slices_size < 32 ?
surface_object->slices_size : 32;
for (i = 0; i < n; i++)
snprintf(hex + i * 3, 4, " %02x", p[i]);
request_log("OUTPUT[idx=%u, len=%u]:%s\n",
surface_object->source_index,
surface_object->slices_size, hex);
}
rc = v4l2_queue_buffer(driver_data->video_fd, request_fd, output_type,
&surface_object->timestamp,
surface_object->source_index,
surface_object->slices_size, 1);
if (rc < 0)
return VA_STATUS_ERROR_OPERATION_FAILED;
surface_object->slices_size = 0;
status = RequestSyncSurface(context, context_object->render_surface_id);
if (status != VA_STATUS_SUCCESS)
return status;
context_object->render_surface_id = VA_INVALID_ID;
return VA_STATUS_SUCCESS;
}