From f61f7363806f19ababfe646c30acfd9ceebf9d81 Mon Sep 17 00:00:00 2001 From: claude-noether Date: Sun, 17 May 2026 09:34:58 +0000 Subject: [PATCH] ampere iter2: HEVC EXT_SPS_ST_RPS / _LT_RPS dynamic-array submission (VDPU381) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the rkvdec_hevc_prepare_hw_st_rps out-of-bounds kernel OOPS that blocked HEVC decode on ampere (RK3588) per marfrit/libva-v4l2-request-fourier#3 and ampere-fourier iter1 close. Mechanism (Phase 5 amendment to issue body): The new EXT_SPS controls are registered as V4L2_CTRL_FLAG_DYNAMIC_ARRAY in vdpu38x_hevc_ctrl_descs (rkvdec.c:279/284) with cfg.dims = { 65 }. The v4l2-ctrl framework init-allocates 1 zeroed element (ctrls-core.c:2116). When num_short_term_ref_pic_sets > 1, rkvdec_hevc_prepare_hw_st_rps (rkvdec-hevc-common.c:393-405) iterates idx 0..N-1 and overruns the 1-element kernel allocation. Submitting an N-element dynamic-array control via S_EXT_CTRLS extends the framework allocation. Userspace fix: - VIDIOC_QUERY_EXT_CTRL probe at first HEVC CreateContext sets driver_data->has_ext_sps_rps (true on VDPU381/383, false on legacy RK3399 — control unregistered there, so fresnel iter38 5/5 + iter39 sub-profile paths are byte-identical to pre-iter2). - When set, h265_set_controls appends EXT_SPS_ST_RPS + _LT_RPS as calloc'd zero arrays, sized by VAAPI's count fields and capped at H.265 §7.4.3.2 spec maxima (ST 64, LT 32). Min 1 (kernel rejects 0). - Free post-S_EXT_CTRLS. Decode correctness scope: VAAPI does NOT expose per-set st_ref_pic_set syntax elements (delta_idx_minus1, delta_rps_sign, etc.) — confirmed in va_dec_hevc.h. All-zero entries give empty inter-pred RPS per set, which is correct for IDR-only streams and incorrect for streams with inter-pred RPS dependence. iter2 acceptance: stop the OOPS. Decode-correctness for inter-RPS content is a known follow-up requiring either bitstream-snoop or SPS-passthrough via a new VAAPI extension. Files: - include/hevc-ctrls.h: #ifndef-guarded fallback definitions for V4L2_CID_STATELESS_HEVC_EXT_SPS_{ST,LT}_RPS + structs (ampere host is on linux-api-headers 6.19-1; the new CIDs land in 7.0). - src/request.h: driver_data->has_ext_sps_rps (persists for driver lifetime; gated solely by HEVC code path so cross-codec leakage impossible). - src/context.c: probe at HEVC CreateContext via v4l2_query_ext_ctrl. - src/h265.c: controls[5] → controls[7]; #include (replaces ) for forward UAPI compatibility. Compile-tested on boltzmann (aarch64 native, gcc 15.2.1): clean .so, 0 new warnings. Fresnel cross-device safety: legacy RK3399 rkvdec_ctrl table omits the CIDs; probe returns false; new code path never executes. iter39 sub-profile work (commits 662f887 + 8746690) is preserved in-tree; iter2 is a forward-compatible additive change. Refs: marfrit/libva-v4l2-request-fourier#3 ampere-fourier/iter1_close.md HEVC blocker ampere-fourier/iter2_phase0_findings.md Co-Authored-By: Claude Opus 4.7 --- include/hevc-ctrls.h | 35 ++++++++++++++++++++++- src/context.c | 22 +++++++++++++++ src/h265.c | 67 ++++++++++++++++++++++++++++++++++++++++++-- src/request.h | 14 +++++++++ 4 files changed, 135 insertions(+), 3 deletions(-) diff --git a/include/hevc-ctrls.h b/include/hevc-ctrls.h index d802692..5cb02f9 100644 --- a/include/hevc-ctrls.h +++ b/include/hevc-ctrls.h @@ -2,8 +2,41 @@ /* Fourier-local override: HEVC controls are upstream since linux-media * 6.6+, so defer to the kernel's linux/v4l2-controls.h instead of * duplicating the struct definitions (duplication causes redefinition - * errors on newer linux-api-headers). */ + * errors on newer linux-api-headers). + * + * iter2 (ampere-fourier, 2026-05-17): the new EXT_SPS_ST_RPS / _LT_RPS + * controls landed in linux-media v8 series (linux-mmind-v7.0 has them) + * but linux-api-headers <7.0 doesn't. Ship guarded fallback definitions + * so the backend builds on older Arch hosts. Newer headers no-op these. + */ #ifndef _LIBVA_V4L2_REQUEST_HEVC_CTRLS_H #define _LIBVA_V4L2_REQUEST_HEVC_CTRLS_H #include + +#ifndef V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS +#define V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS (V4L2_CID_CODEC_STATELESS_BASE + 408) +#define V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS (V4L2_CID_CODEC_STATELESS_BASE + 409) + +#define V4L2_HEVC_EXT_SPS_ST_RPS_FLAG_INTER_REF_PIC_SET_PRED 0x1 +#define V4L2_HEVC_EXT_SPS_LT_RPS_FLAG_USED_LT 0x1 + +struct v4l2_ctrl_hevc_ext_sps_st_rps { + __u8 delta_idx_minus1; + __u8 delta_rps_sign; + __u8 num_negative_pics; + __u8 num_positive_pics; + __u32 used_by_curr_pic; + __u32 use_delta_flag; + __u16 abs_delta_rps_minus1; + __u16 delta_poc_s0_minus1[16]; + __u16 delta_poc_s1_minus1[16]; + __u16 flags; +}; + +struct v4l2_ctrl_hevc_ext_sps_lt_rps { + __u16 lt_ref_pic_poc_lsb_sps; + __u16 flags; +}; +#endif /* V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS */ + #endif diff --git a/src/context.c b/src/context.c index 4785399..101d6aa 100644 --- a/src/context.c +++ b/src/context.c @@ -339,6 +339,28 @@ VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id, } } + /* + * iter2 (ampere-fourier): probe for HEVC EXT_SPS_ST_RPS dynamic-array + * control. Present on VDPU381/383 (RK3588), absent on legacy RK3399 + * rkvdec. Result drives h265_set_controls to append empty-but-allocated + * EXT_SPS_ST_RPS + _LT_RPS controls — required to stop a kernel + * out-of-bounds OOPS in rkvdec_hevc_prepare_hw_st_rps when the + * framework's dynamic-array init (1 zeroed element) is undersized for + * streams with num_short_term_ref_pic_sets > 1. + * + * Probed once per driver lifetime; flag persists (gating is HEVC-only + * so cross-codec leakage cannot occur). + */ + if ((config_object->profile == VAProfileHEVCMain || + config_object->profile == VAProfileHEVCMain10) && + !driver_data->has_ext_sps_rps) { + struct v4l2_query_ext_ctrl qec; + if (v4l2_query_ext_ctrl(driver_data->video_fd, + V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS, + &qec) == 0) + driver_data->has_ext_sps_rps = true; + } + destination_planes_count = video_format->planes_count; /* diff --git a/src/h265.c b/src/h265.c index 59f06bb..83a4254 100644 --- a/src/h265.c +++ b/src/h265.c @@ -77,7 +77,9 @@ #include #include -#include +/* iter2: use local hevc-ctrls shim — adds EXT_SPS_ST_RPS / _LT_RPS + * fallback defs guarded by #ifndef when linux-api-headers < 7.0. */ +#include #include "utils.h" #include "v4l2.h" @@ -599,7 +601,9 @@ int h265_set_controls(struct request_data *driver_data, struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix; struct v4l2_ctrl_hevc_slice_params *slice_params_array = NULL; - struct v4l2_ext_control controls[5]; + struct v4l2_ext_control controls[7]; + struct v4l2_ctrl_hevc_ext_sps_st_rps *ext_st_rps = NULL; + struct v4l2_ctrl_hevc_ext_sps_lt_rps *ext_lt_rps = NULL; unsigned int n = 0; unsigned int i; unsigned int prefix_bytes; @@ -690,11 +694,70 @@ int h265_set_controls(struct request_data *driver_data, .size = sizeof(decode_params), }; + /* + * iter2 (ampere-fourier): on VDPU381/383 (has_ext_sps_rps probed at + * CreateContext via VIDIOC_QUERY_EXT_CTRL), submit EXT_SPS_ST_RPS + + * EXT_SPS_LT_RPS as zero-initialized dynamic arrays sized by the + * VAAPI SPS counts (capped at H.265 §7.4.3.2 spec maxima: ST 64, LT + * 32). The kernel registered these dynamic-array controls with + * cfg.dims = { 65 } (vdpu38x_hevc_ctrl_descs); the framework + * init-allocates 1 zeroed element. When num_short_term_ref_pic_sets + * > 1, rkvdec_hevc_prepare_hw_st_rps iterates idx 0..N-1 and the + * single-element kernel allocation triggers an out-of-bounds OOPS. + * Submitting an N-element control extends the framework allocation. + * + * Decode correctness for non-IDR content: VAAPI does NOT expose the + * per-set st_ref_pic_set syntax elements (delta_idx_minus1 etc.) — + * confirmed in va_dec_hevc.h. Submission of all-zero entries gives + * empty inter-pred RPS per set, which is correct for IDR-only + * streams and incorrect for streams with inter-pred RPS dependence. + * iter2 acceptance: stop the OOPS. Decode-correctness for inter-RPS + * streams is a known follow-up. + */ + if (driver_data->has_ext_sps_rps) { + unsigned int n_st = picture->num_short_term_ref_pic_sets; + unsigned int n_lt = picture->num_long_term_ref_pic_sps; + if (n_st > 64) + n_st = 64; + if (n_lt > 32) + n_lt = 32; + if (n_st == 0) + n_st = 1; /* dynamic-array minimum; kernel rejects 0 */ + if (n_lt == 0) + n_lt = 1; + + ext_st_rps = calloc(n_st, + sizeof(struct v4l2_ctrl_hevc_ext_sps_st_rps)); + ext_lt_rps = calloc(n_lt, + sizeof(struct v4l2_ctrl_hevc_ext_sps_lt_rps)); + if (ext_st_rps == NULL || ext_lt_rps == NULL) { + free(slice_params_array); + free(ext_st_rps); + free(ext_lt_rps); + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + + controls[n++] = (struct v4l2_ext_control){ + .id = V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS, + .ptr = ext_st_rps, + .size = n_st * + sizeof(struct v4l2_ctrl_hevc_ext_sps_st_rps), + }; + controls[n++] = (struct v4l2_ext_control){ + .id = V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS, + .ptr = ext_lt_rps, + .size = n_lt * + sizeof(struct v4l2_ctrl_hevc_ext_sps_lt_rps), + }; + } + rc = v4l2_set_controls(driver_data->video_fd, surface_object->request_fd, controls, n); free(slice_params_array); + free(ext_st_rps); + free(ext_lt_rps); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; diff --git a/src/request.h b/src/request.h index 9bd92ca..5ab8be1 100644 --- a/src/request.h +++ b/src/request.h @@ -144,6 +144,20 @@ struct request_data { * Reset to false at DestroyContext. */ bool is_10bit; + + /* + * iter2 (ampere-fourier): rkvdec on this host exposes the + * V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS dynamic-array control + * (VDPU381/383 path). When true, h265_set_controls appends + * EXT_SPS_ST_RPS + EXT_SPS_LT_RPS (zero-initialized arrays sized + * by VAAPI's count fields, capped at H.265 spec maxima 64/32). + * Required to prevent rkvdec_hevc_prepare_hw_st_rps out-of-bounds + * kernel OOPS when num_short_term_ref_pic_sets > 1. + * Probed at first HEVC CreateContext via VIDIOC_QUERY_EXT_CTRL. + * Persists for driver lifetime (gated solely by HEVC-only code + * path, so cross-codec leakage cannot occur). + */ + bool has_ext_sps_rps; }; VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context);