ampere iter2: HEVC EXT_SPS_ST_RPS / _LT_RPS dynamic-array submission (VDPU381)
Fixes the rkvdec_hevc_prepare_hw_st_rps out-of-bounds kernel OOPS that blocked HEVC decode on ampere (RK3588) per marfrit/libva-v4l2-request-fourier#3 and ampere-fourier iter1 close. Mechanism (Phase 5 amendment to issue body): The new EXT_SPS controls are registered as V4L2_CTRL_FLAG_DYNAMIC_ARRAY in vdpu38x_hevc_ctrl_descs (rkvdec.c:279/284) with cfg.dims = { 65 }. The v4l2-ctrl framework init-allocates 1 zeroed element (ctrls-core.c:2116). When num_short_term_ref_pic_sets > 1, rkvdec_hevc_prepare_hw_st_rps (rkvdec-hevc-common.c:393-405) iterates idx 0..N-1 and overruns the 1-element kernel allocation. Submitting an N-element dynamic-array control via S_EXT_CTRLS extends the framework allocation. Userspace fix: - VIDIOC_QUERY_EXT_CTRL probe at first HEVC CreateContext sets driver_data->has_ext_sps_rps (true on VDPU381/383, false on legacy RK3399 — control unregistered there, so fresnel iter38 5/5 + iter39 sub-profile paths are byte-identical to pre-iter2). - When set, h265_set_controls appends EXT_SPS_ST_RPS + _LT_RPS as calloc'd zero arrays, sized by VAAPI's count fields and capped at H.265 §7.4.3.2 spec maxima (ST 64, LT 32). Min 1 (kernel rejects 0). - Free post-S_EXT_CTRLS. Decode correctness scope: VAAPI does NOT expose per-set st_ref_pic_set syntax elements (delta_idx_minus1, delta_rps_sign, etc.) — confirmed in va_dec_hevc.h. All-zero entries give empty inter-pred RPS per set, which is correct for IDR-only streams and incorrect for streams with inter-pred RPS dependence. iter2 acceptance: stop the OOPS. Decode-correctness for inter-RPS content is a known follow-up requiring either bitstream-snoop or SPS-passthrough via a new VAAPI extension. Files: - include/hevc-ctrls.h: #ifndef-guarded fallback definitions for V4L2_CID_STATELESS_HEVC_EXT_SPS_{ST,LT}_RPS + structs (ampere host is on linux-api-headers 6.19-1; the new CIDs land in 7.0). - src/request.h: driver_data->has_ext_sps_rps (persists for driver lifetime; gated solely by HEVC code path so cross-codec leakage impossible). - src/context.c: probe at HEVC CreateContext via v4l2_query_ext_ctrl. - src/h265.c: controls[5] → controls[7]; #include <hevc-ctrls.h> (replaces <linux/v4l2-controls.h>) for forward UAPI compatibility. Compile-tested on boltzmann (aarch64 native, gcc 15.2.1): clean .so, 0 new warnings. Fresnel cross-device safety: legacy RK3399 rkvdec_ctrl table omits the CIDs; probe returns false; new code path never executes. iter39 sub-profile work (commits662f887+8746690) is preserved in-tree; iter2 is a forward-compatible additive change. Refs: marfrit/libva-v4l2-request-fourier#3 ampere-fourier/iter1_close.md HEVC blocker ampere-fourier/iter2_phase0_findings.md Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+34
-1
@@ -2,8 +2,41 @@
|
||||
/* Fourier-local override: HEVC controls are upstream since linux-media
|
||||
* 6.6+, so defer to the kernel's linux/v4l2-controls.h instead of
|
||||
* duplicating the struct definitions (duplication causes redefinition
|
||||
* errors on newer linux-api-headers). */
|
||||
* errors on newer linux-api-headers).
|
||||
*
|
||||
* iter2 (ampere-fourier, 2026-05-17): the new EXT_SPS_ST_RPS / _LT_RPS
|
||||
* controls landed in linux-media v8 series (linux-mmind-v7.0 has them)
|
||||
* but linux-api-headers <7.0 doesn't. Ship guarded fallback definitions
|
||||
* so the backend builds on older Arch hosts. Newer headers no-op these.
|
||||
*/
|
||||
#ifndef _LIBVA_V4L2_REQUEST_HEVC_CTRLS_H
|
||||
#define _LIBVA_V4L2_REQUEST_HEVC_CTRLS_H
|
||||
#include <linux/v4l2-controls.h>
|
||||
|
||||
#ifndef V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS
|
||||
#define V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS (V4L2_CID_CODEC_STATELESS_BASE + 408)
|
||||
#define V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS (V4L2_CID_CODEC_STATELESS_BASE + 409)
|
||||
|
||||
#define V4L2_HEVC_EXT_SPS_ST_RPS_FLAG_INTER_REF_PIC_SET_PRED 0x1
|
||||
#define V4L2_HEVC_EXT_SPS_LT_RPS_FLAG_USED_LT 0x1
|
||||
|
||||
struct v4l2_ctrl_hevc_ext_sps_st_rps {
|
||||
__u8 delta_idx_minus1;
|
||||
__u8 delta_rps_sign;
|
||||
__u8 num_negative_pics;
|
||||
__u8 num_positive_pics;
|
||||
__u32 used_by_curr_pic;
|
||||
__u32 use_delta_flag;
|
||||
__u16 abs_delta_rps_minus1;
|
||||
__u16 delta_poc_s0_minus1[16];
|
||||
__u16 delta_poc_s1_minus1[16];
|
||||
__u16 flags;
|
||||
};
|
||||
|
||||
struct v4l2_ctrl_hevc_ext_sps_lt_rps {
|
||||
__u16 lt_ref_pic_poc_lsb_sps;
|
||||
__u16 flags;
|
||||
};
|
||||
#endif /* V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS */
|
||||
|
||||
#endif
|
||||
|
||||
@@ -339,6 +339,28 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* iter2 (ampere-fourier): probe for HEVC EXT_SPS_ST_RPS dynamic-array
|
||||
* control. Present on VDPU381/383 (RK3588), absent on legacy RK3399
|
||||
* rkvdec. Result drives h265_set_controls to append empty-but-allocated
|
||||
* EXT_SPS_ST_RPS + _LT_RPS controls — required to stop a kernel
|
||||
* out-of-bounds OOPS in rkvdec_hevc_prepare_hw_st_rps when the
|
||||
* framework's dynamic-array init (1 zeroed element) is undersized for
|
||||
* streams with num_short_term_ref_pic_sets > 1.
|
||||
*
|
||||
* Probed once per driver lifetime; flag persists (gating is HEVC-only
|
||||
* so cross-codec leakage cannot occur).
|
||||
*/
|
||||
if ((config_object->profile == VAProfileHEVCMain ||
|
||||
config_object->profile == VAProfileHEVCMain10) &&
|
||||
!driver_data->has_ext_sps_rps) {
|
||||
struct v4l2_query_ext_ctrl qec;
|
||||
if (v4l2_query_ext_ctrl(driver_data->video_fd,
|
||||
V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS,
|
||||
&qec) == 0)
|
||||
driver_data->has_ext_sps_rps = true;
|
||||
}
|
||||
|
||||
destination_planes_count = video_format->planes_count;
|
||||
|
||||
/*
|
||||
|
||||
+65
-2
@@ -77,7 +77,9 @@
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
#include <linux/v4l2-controls.h>
|
||||
/* iter2: use local hevc-ctrls shim — adds EXT_SPS_ST_RPS / _LT_RPS
|
||||
* fallback defs guarded by #ifndef when linux-api-headers < 7.0. */
|
||||
#include <hevc-ctrls.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "v4l2.h"
|
||||
@@ -599,7 +601,9 @@ int h265_set_controls(struct request_data *driver_data,
|
||||
struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
|
||||
struct v4l2_ctrl_hevc_slice_params *slice_params_array = NULL;
|
||||
|
||||
struct v4l2_ext_control controls[5];
|
||||
struct v4l2_ext_control controls[7];
|
||||
struct v4l2_ctrl_hevc_ext_sps_st_rps *ext_st_rps = NULL;
|
||||
struct v4l2_ctrl_hevc_ext_sps_lt_rps *ext_lt_rps = NULL;
|
||||
unsigned int n = 0;
|
||||
unsigned int i;
|
||||
unsigned int prefix_bytes;
|
||||
@@ -690,11 +694,70 @@ int h265_set_controls(struct request_data *driver_data,
|
||||
.size = sizeof(decode_params),
|
||||
};
|
||||
|
||||
/*
|
||||
* iter2 (ampere-fourier): on VDPU381/383 (has_ext_sps_rps probed at
|
||||
* CreateContext via VIDIOC_QUERY_EXT_CTRL), submit EXT_SPS_ST_RPS +
|
||||
* EXT_SPS_LT_RPS as zero-initialized dynamic arrays sized by the
|
||||
* VAAPI SPS counts (capped at H.265 §7.4.3.2 spec maxima: ST 64, LT
|
||||
* 32). The kernel registered these dynamic-array controls with
|
||||
* cfg.dims = { 65 } (vdpu38x_hevc_ctrl_descs); the framework
|
||||
* init-allocates 1 zeroed element. When num_short_term_ref_pic_sets
|
||||
* > 1, rkvdec_hevc_prepare_hw_st_rps iterates idx 0..N-1 and the
|
||||
* single-element kernel allocation triggers an out-of-bounds OOPS.
|
||||
* Submitting an N-element control extends the framework allocation.
|
||||
*
|
||||
* Decode correctness for non-IDR content: VAAPI does NOT expose the
|
||||
* per-set st_ref_pic_set syntax elements (delta_idx_minus1 etc.) —
|
||||
* confirmed in va_dec_hevc.h. Submission of all-zero entries gives
|
||||
* empty inter-pred RPS per set, which is correct for IDR-only
|
||||
* streams and incorrect for streams with inter-pred RPS dependence.
|
||||
* iter2 acceptance: stop the OOPS. Decode-correctness for inter-RPS
|
||||
* streams is a known follow-up.
|
||||
*/
|
||||
if (driver_data->has_ext_sps_rps) {
|
||||
unsigned int n_st = picture->num_short_term_ref_pic_sets;
|
||||
unsigned int n_lt = picture->num_long_term_ref_pic_sps;
|
||||
if (n_st > 64)
|
||||
n_st = 64;
|
||||
if (n_lt > 32)
|
||||
n_lt = 32;
|
||||
if (n_st == 0)
|
||||
n_st = 1; /* dynamic-array minimum; kernel rejects 0 */
|
||||
if (n_lt == 0)
|
||||
n_lt = 1;
|
||||
|
||||
ext_st_rps = calloc(n_st,
|
||||
sizeof(struct v4l2_ctrl_hevc_ext_sps_st_rps));
|
||||
ext_lt_rps = calloc(n_lt,
|
||||
sizeof(struct v4l2_ctrl_hevc_ext_sps_lt_rps));
|
||||
if (ext_st_rps == NULL || ext_lt_rps == NULL) {
|
||||
free(slice_params_array);
|
||||
free(ext_st_rps);
|
||||
free(ext_lt_rps);
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
}
|
||||
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS,
|
||||
.ptr = ext_st_rps,
|
||||
.size = n_st *
|
||||
sizeof(struct v4l2_ctrl_hevc_ext_sps_st_rps),
|
||||
};
|
||||
controls[n++] = (struct v4l2_ext_control){
|
||||
.id = V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS,
|
||||
.ptr = ext_lt_rps,
|
||||
.size = n_lt *
|
||||
sizeof(struct v4l2_ctrl_hevc_ext_sps_lt_rps),
|
||||
};
|
||||
}
|
||||
|
||||
rc = v4l2_set_controls(driver_data->video_fd,
|
||||
surface_object->request_fd,
|
||||
controls, n);
|
||||
|
||||
free(slice_params_array);
|
||||
free(ext_st_rps);
|
||||
free(ext_lt_rps);
|
||||
|
||||
if (rc < 0)
|
||||
return VA_STATUS_ERROR_OPERATION_FAILED;
|
||||
|
||||
@@ -144,6 +144,20 @@ struct request_data {
|
||||
* Reset to false at DestroyContext.
|
||||
*/
|
||||
bool is_10bit;
|
||||
|
||||
/*
|
||||
* iter2 (ampere-fourier): rkvdec on this host exposes the
|
||||
* V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS dynamic-array control
|
||||
* (VDPU381/383 path). When true, h265_set_controls appends
|
||||
* EXT_SPS_ST_RPS + EXT_SPS_LT_RPS (zero-initialized arrays sized
|
||||
* by VAAPI's count fields, capped at H.265 spec maxima 64/32).
|
||||
* Required to prevent rkvdec_hevc_prepare_hw_st_rps out-of-bounds
|
||||
* kernel OOPS when num_short_term_ref_pic_sets > 1.
|
||||
* Probed at first HEVC CreateContext via VIDIOC_QUERY_EXT_CTRL.
|
||||
* Persists for driver lifetime (gated solely by HEVC-only code
|
||||
* path, so cross-codec leakage cannot occur).
|
||||
*/
|
||||
bool has_ext_sps_rps;
|
||||
};
|
||||
|
||||
VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context);
|
||||
|
||||
Reference in New Issue
Block a user