forked from marfrit/libva-v4l2-request-fourier
ampere-av1 Phase 2.1: implement av1_set_controls body (~500 LoC)
Replaces stub av1_set_controls with full VAAPI → V4L2 stateless AV1
control translation. Four V4L2 controls batched per-frame:
V4L2_CID_STATELESS_AV1_SEQUENCE (sequence-level flags)
V4L2_CID_STATELESS_AV1_FRAME (heavy — quant, lf, cdef, lr, gm,
tile_info, refs, frame flags)
V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY[] (DYNAMIC_ARRAY, size=MAX(N,1))
V4L2_CID_STATELESS_AV1_FILM_GRAIN (gated on driver_data->has_av1_film_grain)
Reference: Kwiboo/FFmpeg v4l2-request-n8.1:libavcodec/v4l2_request_av1.c
(636 LoC); same V4L2 output schema, sourced from VAAPI's
VADecPictureParameterBufferAV1 instead of FFmpeg's AV1RawSequenceHeader.
VAAPI gap notes (fields the spec needs but VAAPI doesn't expose):
- sequence max_frame_{width,height}_minus_1 — use current frame size
- enable_warped_motion / enable_ref_frame_mvs / enable_superres /
enable_restoration sequence-level — conservative set-true (per-frame
flags gate actual behavior)
- order_hints[], reference_frame_ts[] — zero (kernel cross-refs by
OUTPUT timestamp / surface id)
- tile_start_col_sb[] / tile_start_row_sb[] — reconstruct via
prefix-sum on VAAPI's width/height_in_sbs_minus_1[]
- tile_size_bytes — set to 4 for multi-tile frames (max value), 0
for single-tile (matches Kwiboo's conditional)
- render_width/height — fall back to coded dimensions
- current_frame_id / refresh_frame_flags / skip_mode_frame_idx /
buffer_removal_time / frame_refs_short_signaling — zero
- film_grain_params_ref_idx / update_grain — zero (only consulted in
reuse paths; apply_grain=1 + populated arrays drive decode directly)
F1/F2/F3 risk mitigations per phase1_plan_v2:
F1: mi_col/row_starts sentinel = 2 * ((frame_width + 7) >> 3) at
index [tile_cols]/[tile_rows] — mirrors Kwiboo lines 238/244
F2: superres_denom direct from VAAPI's superres_scale_denominator
(VAAPI's encoding is the final value; no AV1_SUPERRES_DENOM_MIN
math). Fallback to AV1_SUPERRES_NUM=8 if zero.
F3: loop_restoration_size[] gated on USES_LR flag derived from
y_t != 0 || cb_t != 0 || cr_t != 0 — mirrors Kwiboo lines 281-287
Plus:
- request.h: has_av1_film_grain bool on driver_data
- request.c: probe VIDIOC_QUERY_EXT_CTRL for FILM_GRAIN on vpu981 fd
at VA_DRIVER_INIT (Janet v3 amendment A: init-time, not lazy)
Compile-tested on boltzmann (aarch64 native, gcc 15.2.1): clean .so,
0 errors, pre-existing GStreamer #warnings only.
Phase 3 verification on ampere is next: 208x208 smoke + film_grain
stress vector (av1-1-b8-23-film_grain-50.ivf) byte-compare libva vs
kdirect (Phase 0 proved kdirect bit-perfect).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,23 +1,33 @@
|
||||
/*
|
||||
* Copyright (C) 2026 claude-noether <claude-noether@reauktion.de>
|
||||
*
|
||||
* ampere-av1-enablement Phase 2: AV1 codec dispatcher for libva-v4l2-
|
||||
* ampere-av1-enablement Phase 2.1: AV1 codec dispatcher for libva-v4l2-
|
||||
* request-fourier. Translates VAAPI AV1 picture/slice parameter buffers
|
||||
* into V4L2 stateless AV1 controls (V4L2_CID_STATELESS_AV1_*) for the
|
||||
* Rockchip vpu981 hardware on RK3588.
|
||||
*
|
||||
* Reference implementations (field semantics):
|
||||
* - Kwiboo/FFmpeg v4l2-request-n8.1:libavcodec/v4l2_request_av1.c
|
||||
* (636 LoC, reads from FFmpeg's AV1RawSequenceHeader; the v4l2_ctrl
|
||||
* output is identical to what we need)
|
||||
* - ~/src/libva-v4l2-request-fourier/src/vp9.c (architectural pattern:
|
||||
* set_controls / multi-control batch / request_fd dispatch)
|
||||
* Reference: Kwiboo/FFmpeg v4l2-request-n8.1:libavcodec/v4l2_request_av1.c
|
||||
* (636 LoC; reads from FFmpeg's AV1RawSequenceHeader + AV1RawFrameHeader).
|
||||
* VAAPI exposes the same AV1 spec semantics through different struct
|
||||
* shapes: sequence-level fields are folded into VADecPictureParameterBufferAV1
|
||||
* (no separate sequence buffer); per-frame fields live in the same struct.
|
||||
*
|
||||
* F1/F2/F3 risk mitigations per phase1_plan_v2 §"General fill_frame
|
||||
* implementation risks":
|
||||
* F1 tile_info.mi_col/row_starts sentinel = 2 * ((frame_width + 7) >> 3)
|
||||
* mirrors Kwiboo lines 238/244 exactly.
|
||||
* F2 superres_denom: VAAPI exposes superres_scale_denominator directly
|
||||
* and per spec it's already 8 when use_superres=0. No offset math
|
||||
* needed (Kwiboo does it because FFmpeg stores raw coded_denom).
|
||||
* F3 loop_restoration_size[] gated on USES_LR flag mirrors Kwiboo
|
||||
* lines 281-287 exactly.
|
||||
*
|
||||
* V4L2 controls (4 per frame, batched in one VIDIOC_S_EXT_CTRLS):
|
||||
* 1. V4L2_CID_STATELESS_AV1_SEQUENCE (small, set per stream-ish)
|
||||
* 2. V4L2_CID_STATELESS_AV1_FRAME (the heavy one — 8 sub-structs)
|
||||
* 1. V4L2_CID_STATELESS_AV1_SEQUENCE
|
||||
* 2. V4L2_CID_STATELESS_AV1_FRAME
|
||||
* 3. V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY[] (DYNAMIC_ARRAY)
|
||||
* 4. V4L2_CID_STATELESS_AV1_FILM_GRAIN (conditional on probe)
|
||||
* 4. V4L2_CID_STATELESS_AV1_FILM_GRAIN (conditional on driver_data->
|
||||
* has_av1_film_grain probe)
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
@@ -53,39 +63,513 @@
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Sanity asserts to catch kernel uAPI drift. If these fire, the kernel
|
||||
* headers on the build machine are out of sync with what the running
|
||||
* driver expects — silent register-misalignment bugs result. */
|
||||
* driver expects — silent register-misalignment bugs result. Cross-compile
|
||||
* hazard per Janet v3 amendment: native-arm64 builds only (boltzmann +
|
||||
* ampere); no cross from x86 against ARM kernel headers. */
|
||||
_Static_assert(sizeof(struct v4l2_ctrl_av1_tile_group_entry) == 16,
|
||||
"v4l2_ctrl_av1_tile_group_entry size drift — recheck uAPI");
|
||||
|
||||
/*
|
||||
* Phase 2 step 4 — stub set_controls. Compiles and links; returns -1
|
||||
* with a clear log message so the test infrastructure sees AV1 dispatch
|
||||
* fail cleanly (not crash) until Phase 2.1 implements the actual field
|
||||
* mappings.
|
||||
*
|
||||
* The full implementation follows Kwiboo's fill_sequence / fill_frame /
|
||||
* fill_film_grain functions, mapping VAAPI AV1 picture parameters
|
||||
* (VADecPictureParameterBufferAV1) to V4L2 control structs. Per Janet
|
||||
* review v2, three implementation-time risks must be specifically
|
||||
* handled:
|
||||
* F1 tile_info.mi_col/row_starts sentinel for multi-tile streams
|
||||
* F2 superres_denom = AV1_SUPERRES_NUM (8) when use_superres=0
|
||||
* F3 loop_restoration_size[] gated on USES_LR flag direction
|
||||
*/
|
||||
/* Per AV1 spec, when use_superres=0 the superres denominator is 8.
|
||||
* VAAPI's superres_scale_denominator already encodes this directly
|
||||
* (per va_dec_av1.h: "When use_superres=0, superres_scale_denominator
|
||||
* must be 8"). Kwiboo's AV1_SUPERRES_DENOM_MIN+coded_denom math is
|
||||
* not needed when reading from VAAPI. */
|
||||
#define AV1_SUPERRES_NUM 8
|
||||
|
||||
/* AV1 spec maxima used for V4L2 array sizing. */
|
||||
#define BACKEND_AV1_MAX_SEGMENTS 8
|
||||
#define BACKEND_AV1_SEG_LVL_MAX 8
|
||||
#define BACKEND_AV1_SEG_LVL_REF_FRAME 5
|
||||
#define BACKEND_AV1_NUM_REF_FRAMES 8
|
||||
#define BACKEND_AV1_TOTAL_REFS_PER_FRAME 8
|
||||
#define BACKEND_AV1_REFS_PER_FRAME 7
|
||||
|
||||
/* ===== fill_sequence ===== */
|
||||
static void av1_fill_sequence(VADecPictureParameterBufferAV1 *picture,
|
||||
struct v4l2_ctrl_av1_sequence *ctrl)
|
||||
{
|
||||
uint8_t bit_depth;
|
||||
|
||||
memset(ctrl, 0, sizeof(*ctrl));
|
||||
|
||||
switch (picture->bit_depth_idx) {
|
||||
case 0: bit_depth = 8; break;
|
||||
case 1: bit_depth = 10; break;
|
||||
case 2: bit_depth = 12; break;
|
||||
default: bit_depth = 8; break;
|
||||
}
|
||||
|
||||
ctrl->seq_profile = picture->profile;
|
||||
ctrl->order_hint_bits = picture->seq_info_fields.fields.enable_order_hint ?
|
||||
(picture->order_hint_bits_minus_1 + 1) : 0;
|
||||
ctrl->bit_depth = bit_depth;
|
||||
/* VAAPI does NOT separately expose max_frame_{width,height}_minus_1
|
||||
* (sequence-level). Use the current frame size as a proxy. Correct
|
||||
* for fixed-size sequences (the 208/352/1080p test vectors). */
|
||||
ctrl->max_frame_width_minus_1 = picture->frame_width_minus1;
|
||||
ctrl->max_frame_height_minus_1 = picture->frame_height_minus1;
|
||||
|
||||
if (picture->seq_info_fields.fields.still_picture)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_STILL_PICTURE;
|
||||
if (picture->seq_info_fields.fields.use_128x128_superblock)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK;
|
||||
if (picture->seq_info_fields.fields.enable_filter_intra)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA;
|
||||
if (picture->seq_info_fields.fields.enable_intra_edge_filter)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER;
|
||||
if (picture->seq_info_fields.fields.enable_interintra_compound)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND;
|
||||
if (picture->seq_info_fields.fields.enable_masked_compound)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND;
|
||||
/* VAAPI doesn't expose enable_warped_motion as a sequence flag;
|
||||
* per-frame allow_warped_motion gates it. Conservative: set true so
|
||||
* per-frame flag is honored. */
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_WARPED_MOTION;
|
||||
if (picture->seq_info_fields.fields.enable_dual_filter)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER;
|
||||
if (picture->seq_info_fields.fields.enable_order_hint)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_ORDER_HINT;
|
||||
if (picture->seq_info_fields.fields.enable_jnt_comp)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP;
|
||||
/* enable_ref_frame_mvs / enable_superres / enable_restoration not
|
||||
* exposed at sequence level — conservative set-true. */
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_REF_FRAME_MVS;
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_SUPERRES;
|
||||
if (picture->seq_info_fields.fields.enable_cdef)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF;
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_ENABLE_RESTORATION;
|
||||
if (picture->seq_info_fields.fields.mono_chrome)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_MONO_CHROME;
|
||||
if (picture->seq_info_fields.fields.color_range)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_COLOR_RANGE;
|
||||
if (picture->seq_info_fields.fields.subsampling_x)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_X;
|
||||
if (picture->seq_info_fields.fields.subsampling_y)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_SUBSAMPLING_Y;
|
||||
if (picture->seq_info_fields.fields.film_grain_params_present)
|
||||
ctrl->flags |= V4L2_AV1_SEQUENCE_FLAG_FILM_GRAIN_PARAMS_PRESENT;
|
||||
}
|
||||
|
||||
/* ===== fill_frame ===== */
|
||||
static void av1_fill_frame(VADecPictureParameterBufferAV1 *picture,
|
||||
struct v4l2_ctrl_av1_frame *ctrl)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
memset(ctrl, 0, sizeof(*ctrl));
|
||||
|
||||
/* ---- tile_info ---- */
|
||||
ctrl->tile_info.context_update_tile_id = picture->context_update_tile_id;
|
||||
ctrl->tile_info.tile_cols = picture->tile_cols;
|
||||
ctrl->tile_info.tile_rows = picture->tile_rows;
|
||||
if (picture->tile_cols > 1 || picture->tile_rows > 1)
|
||||
ctrl->tile_info.tile_size_bytes = 4;
|
||||
else
|
||||
ctrl->tile_info.tile_size_bytes = 0;
|
||||
|
||||
if (picture->pic_info_fields.bits.uniform_tile_spacing_flag)
|
||||
ctrl->tile_info.flags |= V4L2_AV1_TILE_INFO_FLAG_UNIFORM_TILE_SPACING;
|
||||
|
||||
/* F1: mi_col/row_starts[]: prefix-sum from width_in_sbs_minus_1[]+1
|
||||
* (Kwiboo reads tile_start_col_sb[] directly; VAAPI doesn't expose
|
||||
* starts, only widths — reconstruct via accumulation). Plus the
|
||||
* sentinel at index tile_cols/tile_rows. */
|
||||
{
|
||||
uint16_t cum = 0;
|
||||
for (i = 0; i < picture->tile_cols && i < 63; i++) {
|
||||
ctrl->tile_info.mi_col_starts[i] = cum;
|
||||
ctrl->tile_info.width_in_sbs_minus_1[i] =
|
||||
picture->width_in_sbs_minus_1[i];
|
||||
cum = (uint16_t)(cum + picture->width_in_sbs_minus_1[i] + 1);
|
||||
}
|
||||
ctrl->tile_info.mi_col_starts[picture->tile_cols] =
|
||||
2 * ((picture->frame_width_minus1 + 1 + 7) >> 3);
|
||||
}
|
||||
{
|
||||
uint16_t cum = 0;
|
||||
for (i = 0; i < picture->tile_rows && i < 63; i++) {
|
||||
ctrl->tile_info.mi_row_starts[i] = cum;
|
||||
ctrl->tile_info.height_in_sbs_minus_1[i] =
|
||||
picture->height_in_sbs_minus_1[i];
|
||||
cum = (uint16_t)(cum + picture->height_in_sbs_minus_1[i] + 1);
|
||||
}
|
||||
ctrl->tile_info.mi_row_starts[picture->tile_rows] =
|
||||
2 * ((picture->frame_height_minus1 + 1 + 7) >> 3);
|
||||
}
|
||||
|
||||
/* ---- quantization ---- */
|
||||
ctrl->quantization.base_q_idx = picture->base_qindex;
|
||||
ctrl->quantization.delta_q_y_dc = picture->y_dc_delta_q;
|
||||
ctrl->quantization.delta_q_u_dc = picture->u_dc_delta_q;
|
||||
ctrl->quantization.delta_q_u_ac = picture->u_ac_delta_q;
|
||||
ctrl->quantization.delta_q_v_dc = picture->v_dc_delta_q;
|
||||
ctrl->quantization.delta_q_v_ac = picture->v_ac_delta_q;
|
||||
ctrl->quantization.qm_y = picture->qmatrix_fields.bits.qm_y;
|
||||
ctrl->quantization.qm_u = picture->qmatrix_fields.bits.qm_u;
|
||||
ctrl->quantization.qm_v = picture->qmatrix_fields.bits.qm_v;
|
||||
ctrl->quantization.delta_q_res =
|
||||
picture->mode_control_fields.bits.log2_delta_q_res;
|
||||
|
||||
if (picture->u_dc_delta_q != picture->v_dc_delta_q ||
|
||||
picture->u_ac_delta_q != picture->v_ac_delta_q)
|
||||
ctrl->quantization.flags |= V4L2_AV1_QUANTIZATION_FLAG_DIFF_UV_DELTA;
|
||||
if (picture->qmatrix_fields.bits.using_qmatrix)
|
||||
ctrl->quantization.flags |= V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX;
|
||||
if (picture->mode_control_fields.bits.delta_q_present_flag)
|
||||
ctrl->quantization.flags |= V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT;
|
||||
|
||||
/* ---- segmentation ---- */
|
||||
if (picture->seg_info.segment_info_fields.bits.enabled)
|
||||
ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_ENABLED;
|
||||
if (picture->seg_info.segment_info_fields.bits.update_map)
|
||||
ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP;
|
||||
if (picture->seg_info.segment_info_fields.bits.temporal_update)
|
||||
ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE;
|
||||
if (picture->seg_info.segment_info_fields.bits.update_data)
|
||||
ctrl->segmentation.flags |= V4L2_AV1_SEGMENTATION_FLAG_UPDATE_DATA;
|
||||
|
||||
for (i = 0; i < BACKEND_AV1_MAX_SEGMENTS; i++) {
|
||||
for (j = 0; j < BACKEND_AV1_SEG_LVL_MAX; j++) {
|
||||
if (picture->seg_info.feature_mask[i] & (1 << j)) {
|
||||
ctrl->segmentation.feature_enabled[i] |=
|
||||
V4L2_AV1_SEGMENT_FEATURE_ENABLED(j);
|
||||
ctrl->segmentation.last_active_seg_id = i;
|
||||
if (j >= BACKEND_AV1_SEG_LVL_REF_FRAME)
|
||||
ctrl->segmentation.flags |=
|
||||
V4L2_AV1_SEGMENTATION_FLAG_SEG_ID_PRE_SKIP;
|
||||
}
|
||||
ctrl->segmentation.feature_data[i][j] =
|
||||
picture->seg_info.feature_data[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- loop_filter ---- */
|
||||
ctrl->loop_filter.level[0] = picture->filter_level[0];
|
||||
ctrl->loop_filter.level[1] = picture->filter_level[1];
|
||||
ctrl->loop_filter.level[2] = picture->filter_level_u;
|
||||
ctrl->loop_filter.level[3] = picture->filter_level_v;
|
||||
ctrl->loop_filter.sharpness =
|
||||
picture->loop_filter_info_fields.bits.sharpness_level;
|
||||
ctrl->loop_filter.mode_deltas[0] = picture->mode_deltas[0];
|
||||
ctrl->loop_filter.mode_deltas[1] = picture->mode_deltas[1];
|
||||
ctrl->loop_filter.delta_lf_res =
|
||||
picture->mode_control_fields.bits.log2_delta_lf_res;
|
||||
for (i = 0; i < BACKEND_AV1_NUM_REF_FRAMES; i++)
|
||||
ctrl->loop_filter.ref_deltas[i] = picture->ref_deltas[i];
|
||||
|
||||
if (picture->loop_filter_info_fields.bits.mode_ref_delta_enabled)
|
||||
ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED;
|
||||
if (picture->loop_filter_info_fields.bits.mode_ref_delta_update)
|
||||
ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_UPDATE;
|
||||
if (picture->mode_control_fields.bits.delta_lf_present_flag)
|
||||
ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT;
|
||||
if (picture->mode_control_fields.bits.delta_lf_multi)
|
||||
ctrl->loop_filter.flags |= V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI;
|
||||
|
||||
/* ---- cdef ---- */
|
||||
ctrl->cdef.damping_minus_3 = picture->cdef_damping_minus_3;
|
||||
ctrl->cdef.bits = picture->cdef_bits;
|
||||
for (i = 0; i < (unsigned)(1 << picture->cdef_bits) && i < 8; i++) {
|
||||
uint8_t y = picture->cdef_y_strengths[i];
|
||||
uint8_t uv = picture->cdef_uv_strengths[i];
|
||||
ctrl->cdef.y_pri_strength[i] = (y >> 2) & 0x0F;
|
||||
ctrl->cdef.y_sec_strength[i] = y & 0x03;
|
||||
ctrl->cdef.uv_pri_strength[i] = (uv >> 2) & 0x0F;
|
||||
ctrl->cdef.uv_sec_strength[i] = uv & 0x03;
|
||||
}
|
||||
|
||||
/* ---- loop_restoration ---- (F3) */
|
||||
{
|
||||
uint8_t remap[4] = {
|
||||
V4L2_AV1_FRAME_RESTORE_NONE,
|
||||
V4L2_AV1_FRAME_RESTORE_WIENER,
|
||||
V4L2_AV1_FRAME_RESTORE_SGRPROJ,
|
||||
V4L2_AV1_FRAME_RESTORE_SWITCHABLE,
|
||||
};
|
||||
uint8_t y_t = picture->loop_restoration_fields.bits.yframe_restoration_type & 3;
|
||||
uint8_t cb_t = picture->loop_restoration_fields.bits.cbframe_restoration_type & 3;
|
||||
uint8_t cr_t = picture->loop_restoration_fields.bits.crframe_restoration_type & 3;
|
||||
bool uses_lr = false;
|
||||
|
||||
ctrl->loop_restoration.frame_restoration_type[0] = remap[y_t];
|
||||
ctrl->loop_restoration.frame_restoration_type[1] = remap[cb_t];
|
||||
ctrl->loop_restoration.frame_restoration_type[2] = remap[cr_t];
|
||||
if (y_t != 0)
|
||||
uses_lr = true;
|
||||
if (cb_t != 0 || cr_t != 0) {
|
||||
uses_lr = true;
|
||||
ctrl->loop_restoration.flags |=
|
||||
V4L2_AV1_LOOP_RESTORATION_FLAG_USES_CHROMA_LR;
|
||||
}
|
||||
|
||||
ctrl->loop_restoration.lr_unit_shift =
|
||||
picture->loop_restoration_fields.bits.lr_unit_shift;
|
||||
ctrl->loop_restoration.lr_uv_shift =
|
||||
picture->loop_restoration_fields.bits.lr_uv_shift;
|
||||
|
||||
if (uses_lr) {
|
||||
uint8_t shift = picture->loop_restoration_fields.bits.lr_unit_shift;
|
||||
uint8_t uv_shift = picture->loop_restoration_fields.bits.lr_uv_shift;
|
||||
ctrl->loop_restoration.flags |=
|
||||
V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR;
|
||||
ctrl->loop_restoration.loop_restoration_size[0] =
|
||||
1 << (6 + shift);
|
||||
ctrl->loop_restoration.loop_restoration_size[1] =
|
||||
1 << (6 + shift - uv_shift);
|
||||
ctrl->loop_restoration.loop_restoration_size[2] =
|
||||
1 << (6 + shift - uv_shift);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- global_motion ---- */
|
||||
for (i = 0; i < BACKEND_AV1_TOTAL_REFS_PER_FRAME; i++) {
|
||||
if (i == 0)
|
||||
continue; /* INTRA_FRAME slot — no warp */
|
||||
ctrl->global_motion.type[i] = picture->wm[i - 1].wmtype;
|
||||
for (j = 0; j < 6; j++)
|
||||
ctrl->global_motion.params[i][j] = picture->wm[i - 1].wmmat[j];
|
||||
if (picture->wm[i - 1].invalid)
|
||||
ctrl->global_motion.invalid |=
|
||||
V4L2_AV1_GLOBAL_MOTION_IS_INVALID(i);
|
||||
switch (picture->wm[i - 1].wmtype) {
|
||||
case 1:
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_TRANSLATION;
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_GLOBAL;
|
||||
break;
|
||||
case 2:
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_ROT_ZOOM;
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_GLOBAL;
|
||||
break;
|
||||
case 3:
|
||||
ctrl->global_motion.flags[i] |=
|
||||
V4L2_AV1_GLOBAL_MOTION_FLAG_IS_GLOBAL;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---- reference frames + order hints ---- */
|
||||
for (i = 0; i < BACKEND_AV1_TOTAL_REFS_PER_FRAME; i++) {
|
||||
/* VAAPI doesn't expose order_hints[]; leave zero. */
|
||||
ctrl->order_hints[i] = 0;
|
||||
ctrl->reference_frame_ts[i] = 0;
|
||||
}
|
||||
for (i = 0; i < BACKEND_AV1_REFS_PER_FRAME; i++)
|
||||
ctrl->ref_frame_idx[i] = picture->ref_frame_idx[i];
|
||||
|
||||
/* F2: superres_denom direct from VAAPI; fallback to AV1_SUPERRES_NUM
|
||||
* if zero (spec violation but defensive). */
|
||||
ctrl->superres_denom = picture->superres_scale_denominator
|
||||
? picture->superres_scale_denominator : AV1_SUPERRES_NUM;
|
||||
|
||||
ctrl->skip_mode_frame[0] = 0;
|
||||
ctrl->skip_mode_frame[1] = 0;
|
||||
ctrl->primary_ref_frame = picture->primary_ref_frame;
|
||||
ctrl->frame_type = picture->pic_info_fields.bits.frame_type;
|
||||
ctrl->order_hint = picture->order_hint;
|
||||
ctrl->upscaled_width = picture->frame_width_minus1 + 1;
|
||||
ctrl->interpolation_filter = picture->interp_filter;
|
||||
ctrl->tx_mode = picture->mode_control_fields.bits.tx_mode;
|
||||
ctrl->frame_width_minus_1 = picture->frame_width_minus1;
|
||||
ctrl->frame_height_minus_1 = picture->frame_height_minus1;
|
||||
ctrl->render_width_minus_1 = picture->frame_width_minus1;
|
||||
ctrl->render_height_minus_1 = picture->frame_height_minus1;
|
||||
ctrl->current_frame_id = 0;
|
||||
ctrl->refresh_frame_flags = 0;
|
||||
|
||||
/* ---- frame flags ---- */
|
||||
if (picture->pic_info_fields.bits.show_frame)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_SHOW_FRAME;
|
||||
if (picture->pic_info_fields.bits.showable_frame)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_SHOWABLE_FRAME;
|
||||
if (picture->pic_info_fields.bits.error_resilient_mode)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE;
|
||||
if (picture->pic_info_fields.bits.disable_cdf_update)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE;
|
||||
if (picture->pic_info_fields.bits.allow_screen_content_tools)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS;
|
||||
if (picture->pic_info_fields.bits.force_integer_mv)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV;
|
||||
if (picture->pic_info_fields.bits.allow_intrabc)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC;
|
||||
if (picture->pic_info_fields.bits.use_superres)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_USE_SUPERRES;
|
||||
if (picture->pic_info_fields.bits.allow_high_precision_mv)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV;
|
||||
if (picture->pic_info_fields.bits.is_motion_mode_switchable)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE;
|
||||
if (picture->pic_info_fields.bits.use_ref_frame_mvs)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS;
|
||||
if (picture->pic_info_fields.bits.disable_frame_end_update_cdf)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF;
|
||||
if (picture->pic_info_fields.bits.allow_warped_motion)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION;
|
||||
if (picture->mode_control_fields.bits.reference_select)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT;
|
||||
if (picture->mode_control_fields.bits.reduced_tx_set_used)
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET;
|
||||
if (picture->mode_control_fields.bits.skip_mode_present) {
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_SKIP_MODE_ALLOWED;
|
||||
ctrl->flags |= V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT;
|
||||
}
|
||||
}
|
||||
|
||||
/* ===== fill_film_grain ===== */
|
||||
static void av1_fill_film_grain(VADecPictureParameterBufferAV1 *picture,
|
||||
struct v4l2_ctrl_av1_film_grain *ctrl)
|
||||
{
|
||||
VAFilmGrainStructAV1 *fg = &picture->film_grain_info;
|
||||
unsigned int i;
|
||||
|
||||
memset(ctrl, 0, sizeof(*ctrl));
|
||||
|
||||
ctrl->cr_mult = fg->cr_mult;
|
||||
ctrl->grain_seed = fg->grain_seed;
|
||||
/* VAAPI doesn't expose film_grain_params_ref_idx (the reuse-from-
|
||||
* previous-frame index). Leave zero — only consulted when
|
||||
* update_grain=0, which VAAPI also doesn't expose. */
|
||||
ctrl->film_grain_params_ref_idx = 0;
|
||||
ctrl->num_y_points = fg->num_y_points;
|
||||
ctrl->num_cb_points = fg->num_cb_points;
|
||||
ctrl->num_cr_points = fg->num_cr_points;
|
||||
ctrl->grain_scaling_minus_8 =
|
||||
fg->film_grain_info_fields.bits.grain_scaling_minus_8;
|
||||
ctrl->ar_coeff_lag = fg->film_grain_info_fields.bits.ar_coeff_lag;
|
||||
ctrl->ar_coeff_shift_minus_6 =
|
||||
fg->film_grain_info_fields.bits.ar_coeff_shift_minus_6;
|
||||
ctrl->grain_scale_shift =
|
||||
fg->film_grain_info_fields.bits.grain_scale_shift;
|
||||
ctrl->cb_mult = fg->cb_mult;
|
||||
ctrl->cb_luma_mult = fg->cb_luma_mult;
|
||||
ctrl->cr_luma_mult = fg->cr_luma_mult;
|
||||
ctrl->cb_offset = fg->cb_offset;
|
||||
ctrl->cr_offset = fg->cr_offset;
|
||||
|
||||
if (fg->film_grain_info_fields.bits.apply_grain)
|
||||
ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN;
|
||||
/* VAAPI doesn't expose update_grain; not setting the flag means
|
||||
* "reuse params from film_grain_params_ref_idx" — defaulting to
|
||||
* "update with submitted params" (which is what apply_grain implies
|
||||
* when set). The flag's omission is safe for vpu981 which derives
|
||||
* grain state from the submitted control payload, not from a
|
||||
* separate reuse signal. */
|
||||
if (fg->film_grain_info_fields.bits.chroma_scaling_from_luma)
|
||||
ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA;
|
||||
if (fg->film_grain_info_fields.bits.overlap_flag)
|
||||
ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP;
|
||||
if (fg->film_grain_info_fields.bits.clip_to_restricted_range)
|
||||
ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE;
|
||||
|
||||
if (!fg->film_grain_info_fields.bits.apply_grain)
|
||||
return;
|
||||
|
||||
for (i = 0; i < fg->num_y_points && i < 14; i++) {
|
||||
ctrl->point_y_value[i] = fg->point_y_value[i];
|
||||
ctrl->point_y_scaling[i] = fg->point_y_scaling[i];
|
||||
}
|
||||
for (i = 0; i < fg->num_cb_points && i < 10; i++) {
|
||||
ctrl->point_cb_value[i] = fg->point_cb_value[i];
|
||||
ctrl->point_cb_scaling[i] = fg->point_cb_scaling[i];
|
||||
}
|
||||
for (i = 0; i < fg->num_cr_points && i < 10; i++) {
|
||||
ctrl->point_cr_value[i] = fg->point_cr_value[i];
|
||||
ctrl->point_cr_scaling[i] = fg->point_cr_scaling[i];
|
||||
}
|
||||
for (i = 0; i < 24; i++)
|
||||
ctrl->ar_coeffs_y_plus_128[i] = (uint8_t)(fg->ar_coeffs_y[i] + 128);
|
||||
for (i = 0; i < 25; i++) {
|
||||
ctrl->ar_coeffs_cb_plus_128[i] = (uint8_t)(fg->ar_coeffs_cb[i] + 128);
|
||||
ctrl->ar_coeffs_cr_plus_128[i] = (uint8_t)(fg->ar_coeffs_cr[i] + 128);
|
||||
}
|
||||
}
|
||||
|
||||
/* ===== orchestrator ===== */
|
||||
int av1_set_controls(struct request_data *driver_data,
|
||||
struct object_context *context,
|
||||
struct object_surface *surface_object)
|
||||
{
|
||||
(void)driver_data;
|
||||
(void)context;
|
||||
(void)surface_object;
|
||||
VADecPictureParameterBufferAV1 *picture =
|
||||
&surface_object->params.av1.picture;
|
||||
unsigned int num_tiles = surface_object->params.av1.num_tile_group_entries;
|
||||
struct v4l2_ctrl_av1_sequence sequence;
|
||||
struct v4l2_ctrl_av1_frame frame;
|
||||
struct v4l2_ctrl_av1_film_grain film_grain;
|
||||
struct v4l2_ctrl_av1_tile_group_entry *tile_entries = NULL;
|
||||
struct v4l2_ext_control controls[4];
|
||||
unsigned int n = 0;
|
||||
unsigned int i;
|
||||
unsigned int alloc_tiles;
|
||||
int rc;
|
||||
|
||||
request_log("ampere-av1: av1_set_controls stub — Phase 2.1 will "
|
||||
"implement fill_sequence/fill_frame/fill_film_grain/"
|
||||
"fill_tile_group_entries\n");
|
||||
return -1;
|
||||
(void)context;
|
||||
|
||||
if (num_tiles > AV1_MAX_TILES)
|
||||
num_tiles = AV1_MAX_TILES;
|
||||
|
||||
/* DYNAMIC_ARRAY size = MAX(num_tiles, 1) per Janet v2 Q1
|
||||
* amendment — kernel UB on size=0. */
|
||||
alloc_tiles = num_tiles > 0 ? num_tiles : 1;
|
||||
tile_entries = calloc(alloc_tiles, sizeof(*tile_entries));
|
||||
if (tile_entries == NULL)
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < num_tiles; i++) {
|
||||
VASliceParameterBufferAV1 *slice =
|
||||
&surface_object->params.av1.tile_group_entries[i];
|
||||
tile_entries[i].tile_offset = slice->slice_data_offset;
|
||||
tile_entries[i].tile_size = slice->slice_data_size;
|
||||
tile_entries[i].tile_row = (uint8_t)slice->tile_row;
|
||||
tile_entries[i].tile_col = (uint8_t)slice->tile_column;
|
||||
}
|
||||
|
||||
av1_fill_sequence(picture, &sequence);
|
||||
av1_fill_frame(picture, &frame);
|
||||
if (driver_data->has_av1_film_grain)
|
||||
av1_fill_film_grain(picture, &film_grain);
|
||||
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_AV1_SEQUENCE,
|
||||
.ptr = &sequence,
|
||||
.size = sizeof(sequence),
|
||||
};
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_AV1_FRAME,
|
||||
.ptr = &frame,
|
||||
.size = sizeof(frame),
|
||||
};
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY,
|
||||
.ptr = tile_entries,
|
||||
.size = sizeof(*tile_entries) * alloc_tiles,
|
||||
};
|
||||
if (driver_data->has_av1_film_grain) {
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_AV1_FILM_GRAIN,
|
||||
.ptr = &film_grain,
|
||||
.size = sizeof(film_grain),
|
||||
};
|
||||
}
|
||||
|
||||
rc = v4l2_set_controls(driver_data->video_fd,
|
||||
surface_object->request_fd,
|
||||
controls, n);
|
||||
|
||||
free(tile_entries);
|
||||
|
||||
if (rc < 0) {
|
||||
request_log("ampere-av1: VIDIOC_S_EXT_CTRLS failed rc=%d\n", rc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -830,6 +830,24 @@ VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context)
|
||||
"vendored GStreamer parser)\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* ampere-av1 Phase 2.1: probe V4L2_CID_STATELESS_AV1_FILM_GRAIN
|
||||
* on the vpu981 fd. Per Janet v3 amendment, this runs at backend
|
||||
* init (not lazily) so any race window with concurrent device
|
||||
* switching can't observe an inconsistent flag.
|
||||
*/
|
||||
driver_data->has_av1_film_grain = false;
|
||||
if (driver_data->video_fd_vpu981 >= 0) {
|
||||
struct v4l2_query_ext_ctrl qec;
|
||||
if (v4l2_query_ext_ctrl(driver_data->video_fd_vpu981,
|
||||
V4L2_CID_STATELESS_AV1_FILM_GRAIN,
|
||||
&qec) == 0) {
|
||||
driver_data->has_av1_film_grain = true;
|
||||
request_log("ampere-av1: vpu981 advertises FILM_GRAIN "
|
||||
"control (will include in per-frame batch)\n");
|
||||
}
|
||||
}
|
||||
|
||||
status = VA_STATUS_SUCCESS;
|
||||
goto complete;
|
||||
|
||||
|
||||
@@ -113,6 +113,18 @@ struct request_data {
|
||||
bool has_hevc_ext_sps_rps_rkvdec;
|
||||
bool has_hevc_ext_sps_rps_hantro;
|
||||
|
||||
/*
|
||||
* ampere-av1 Phase 2.1: probe result for the optional
|
||||
* V4L2_CID_STATELESS_AV1_FILM_GRAIN control on the vpu981 fd.
|
||||
* Probed at VA_DRIVER_INIT (per Janet v3 amendment — init-time
|
||||
* not lazy). Consumed by av1_set_controls to conditionally include
|
||||
* the 4th control in the per-frame batch.
|
||||
*
|
||||
* True iff vpu981 advertises the control via VIDIOC_QUERY_EXT_CTRL.
|
||||
* False for non-RK3588 hosts (no vpu981 fd) or older kernels.
|
||||
*/
|
||||
bool has_av1_film_grain;
|
||||
|
||||
/*
|
||||
* iter2 — cached SPS-derived RPS arrays. SPS NALs only appear in
|
||||
* source_data on IDR frames; non-IDR frames' h265_set_controls
|
||||
|
||||
Reference in New Issue
Block a user