diff --git a/src/h265.c b/src/h265.c index 59f06bb..ce094d4 100644 --- a/src/h265.c +++ b/src/h265.c @@ -70,6 +70,7 @@ #include "surface.h" #include +#include #include #include @@ -79,6 +80,9 @@ #include #include +#include "hevc-ctrls/v4l2-hevc-ext-controls.h" +#include "h265_parser/gst/codecparsers/gsth265parser.h" + #include "utils.h" #include "v4l2.h" @@ -582,6 +586,177 @@ static void h265_fill_scaling_matrix(VAIQMatrixBufferHEVC *iqmatrix, } /* ===== Clause 1: orchestrator — batched 5-control submission ===== */ +/* + * iter2 (ampere-kernel-decoders) — parse the HEVC SPS NAL out of the + * decode-time bitstream buffer (when present — typically only on IDR + * frames) via the vendored GStreamer 1.28.2 H.265 parser, map the + * resulting GstH265ShortTermRefPicSet + GstH265ShortTermRefPicSetExt + * arrays into V4L2_CID_STATELESS_HEVC_EXT_SPS_{ST,LT}_RPS struct + * arrays, and cache them on driver_data for reuse by subsequent + * non-IDR frames whose source_data buffer doesn't carry the SPS. + * + * Why: Linux 7.0 VDPU381/383 rkvdec requires the kernel-side RPS + * arrays to be populated; userspace VAAPI doesn't expose this data + * via VAPictureParameterBufferHEVC (only the COUNTS). Mirrors + * GStreamer's gst_v4l2_codec_h265_dec_fill_ext_sps_rps shape + * (gst-plugins-bad/sys/v4l2codecs/gstv4l2codech265dec.c, merged in + * GStreamer 1.28 via MR !10820). + * + * Returns 0 on success (cache is valid after this call, controls + * arrays available in driver_data->hevc_rps_cache_*), negative on + * parse failure with cache left in its previous state. + * + * If source_data does NOT contain an SPS NAL and the cache is NOT + * yet valid (first frame of a stream where IDR happens to lack + * embedded SPS), returns -ENODATA. Caller decides what to do + * (typically: skip the controls submission and let the kernel hit + * its early-return path; if the kernel still OOPSes that's the + * F1 falsifier and we loop back to Phase 0). + */ +static int h265_populate_ext_sps_rps_cache(struct request_data *driver_data, + struct object_surface *surface_object) +{ + const guint8 *src = surface_object->source_data; + gsize src_size = surface_object->slices_size; + GstH265Parser *parser; + GstH265NalUnit nalu; + GstH265SPS sps; + GstH265SPSEXT sps_ext; + GstH265ParserResult pr; + int err = -ENODATA; + + parser = gst_h265_parser_new(); + if (parser == NULL) + return -ENOMEM; + + /* Walk source_data for NAL units; first NAL with type==33 (SPS) + * is what we parse. Annex-B start codes (3- or 4-byte) are + * detected by gst_h265_parser_identify_nalu_unchecked. */ + gsize offset = 0; + while (offset < src_size) { + pr = gst_h265_parser_identify_nalu(parser, src, offset, src_size, + &nalu); + if (pr != GST_H265_PARSER_OK && pr != GST_H265_PARSER_NO_NAL_END) + break; + + if (nalu.type == GST_H265_NAL_SPS) { + /* + * gst_h265_parser_parse_sps_ext fills both the base + * SPS and the extended-RPS SPSEXT struct. The plain + * gst_h265_parser_parse_sps only fills the base — + * its internally-parsed sps_ext is discarded (see + * gsth265parser.c:2050+ where the function calls + * parse_sps_ext with a LOCAL sps_ext variable). We + * need the EXT data for the V4L2 EXT_SPS_*_RPS + * controls, so call the _ext variant directly. + */ + memset(&sps, 0, sizeof(sps)); + memset(&sps_ext, 0, sizeof(sps_ext)); + pr = gst_h265_parser_parse_sps_ext(parser, &nalu, + &sps, &sps_ext, TRUE); + if (pr != GST_H265_PARSER_OK) + break; + + /* Allocate the V4L2 struct arrays sized by the + * parser's reported counts; free any previous + * cache before overwriting. */ + free(driver_data->hevc_rps_cache_st); + driver_data->hevc_rps_cache_st = NULL; + free(driver_data->hevc_rps_cache_lt); + driver_data->hevc_rps_cache_lt = NULL; + driver_data->hevc_rps_cache_valid = false; + + driver_data->hevc_rps_cache_st_count = + sps.num_short_term_ref_pic_sets; + driver_data->hevc_rps_cache_lt_count = + sps.num_long_term_ref_pics_sps; + + if (driver_data->hevc_rps_cache_st_count > 0) { + driver_data->hevc_rps_cache_st = calloc( + driver_data->hevc_rps_cache_st_count, + sizeof(struct v4l2_ctrl_hevc_ext_sps_st_rps)); + if (driver_data->hevc_rps_cache_st == NULL) { + err = -ENOMEM; + break; + } + for (unsigned int i = 0; + i < driver_data->hevc_rps_cache_st_count; + i++) { + struct v4l2_ctrl_hevc_ext_sps_st_rps *dst = + &driver_data->hevc_rps_cache_st[i]; + const GstH265ShortTermRefPicSet *st = + &sps.short_term_ref_pic_set[i]; + const GstH265ShortTermRefPicSetExt *ste = + &sps_ext.short_term_ref_pic_set_ext[i]; + + if (st->inter_ref_pic_set_prediction_flag) + dst->flags |= + V4L2_HEVC_EXT_SPS_ST_RPS_FLAG_INTER_REF_PIC_SET_PRED; + dst->delta_idx_minus1 = st->delta_idx_minus1; + dst->delta_rps_sign = st->delta_rps_sign; + dst->abs_delta_rps_minus1 = st->abs_delta_rps_minus1; + dst->num_negative_pics = st->NumNegativePics; + dst->num_positive_pics = st->NumPositivePics; + + /* GStreamer's ShortTermRefPicSetExt + * carries the per-RPS-entry use_delta / + * used_by_curr_pic / delta_poc_s0/s1 + * arrays (added GStreamer 1.28 + * alongside the V4L2 controls). */ + for (unsigned int j = 0; j < 16; j++) { + if (ste->used_by_curr_pic_flag[j]) + dst->used_by_curr_pic |= (1u << j); + if (ste->use_delta_flag[j]) + dst->use_delta_flag |= (1u << j); + dst->delta_poc_s0_minus1[j] = + ste->delta_poc_s0_minus1[j]; + dst->delta_poc_s1_minus1[j] = + ste->delta_poc_s1_minus1[j]; + } + } + } + + if (driver_data->hevc_rps_cache_lt_count > 0) { + driver_data->hevc_rps_cache_lt = calloc( + driver_data->hevc_rps_cache_lt_count, + sizeof(struct v4l2_ctrl_hevc_ext_sps_lt_rps)); + if (driver_data->hevc_rps_cache_lt == NULL) { + err = -ENOMEM; + break; + } + for (unsigned int i = 0; + i < driver_data->hevc_rps_cache_lt_count; + i++) { + struct v4l2_ctrl_hevc_ext_sps_lt_rps *dst = + &driver_data->hevc_rps_cache_lt[i]; + dst->lt_ref_pic_poc_lsb_sps = + sps.lt_ref_pic_poc_lsb_sps[i]; + if (sps.used_by_curr_pic_lt_sps_flag[i]) + dst->flags |= + V4L2_HEVC_EXT_SPS_LT_RPS_FLAG_USED_LT; + } + } + + driver_data->hevc_rps_cache_valid = true; + err = 0; + break; + } + + offset = nalu.offset + nalu.size; + } + + gst_h265_parser_free(parser); + + /* If the SPS NAL wasn't in this frame's source_data but we have + * a cached valid RPS from a prior frame, that's the non-IDR + * common case — report success so the caller submits the + * cached arrays. */ + if (err == -ENODATA && driver_data->hevc_rps_cache_valid) + err = 0; + + return err; +} + int h265_set_controls(struct request_data *driver_data, struct object_context *context_object, struct object_surface *surface_object) @@ -599,7 +774,7 @@ int h265_set_controls(struct request_data *driver_data, struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix; struct v4l2_ctrl_hevc_slice_params *slice_params_array = NULL; - struct v4l2_ext_control controls[5]; + struct v4l2_ext_control controls[7]; unsigned int n = 0; unsigned int i; unsigned int prefix_bytes; @@ -690,6 +865,45 @@ int h265_set_controls(struct request_data *driver_data, .size = sizeof(decode_params), }; + /* + * iter2 (ampere-kernel-decoders): VDPU381/383 rkvdec on Linux + * 7.0+ requires the EXT_SPS_{ST,LT}_RPS controls populated with + * parser-derived data. RK3399 rkvdec (linux 6.x or 7.x pre- + * VDPU381 bindings) doesn't have these CIDs; probe at init time + * (request.c::probe_hevc_ext_sps_rps_controls) gates this block. + * + * Per feedback_per_driver_kludge_gating, also gate explicitly on + * driver-kind to keep the human-readable intent clear even though + * the probe naturally returns false for RK3399. + */ + if (driver_data->has_hevc_ext_sps_rps_rkvdec) { + int err = h265_populate_ext_sps_rps_cache(driver_data, + surface_object); + if (err == 0 && driver_data->hevc_rps_cache_valid) { + if (driver_data->hevc_rps_cache_st_count > 0) { + controls[n++] = (struct v4l2_ext_control){ + .id = V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS, + .ptr = driver_data->hevc_rps_cache_st, + .size = sizeof(struct v4l2_ctrl_hevc_ext_sps_st_rps) * + driver_data->hevc_rps_cache_st_count, + }; + } + if (driver_data->hevc_rps_cache_lt_count > 0) { + controls[n++] = (struct v4l2_ext_control){ + .id = V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS, + .ptr = driver_data->hevc_rps_cache_lt, + .size = sizeof(struct v4l2_ctrl_hevc_ext_sps_lt_rps) * + driver_data->hevc_rps_cache_lt_count, + }; + } + } + /* If err is -ENODATA AND cache not valid (first-ever + * frame happens to lack an SPS NAL): we DON'T submit the + * new controls. The kernel's early-return-on-NULL path in + * rkvdec_hevc_prepare_hw_st_rps should fire and prevent + * the OOPS — Phase 7 verifies this matches the prediction. */ + } + rc = v4l2_set_controls(driver_data->video_fd, surface_object->request_fd, controls, n); diff --git a/src/request.h b/src/request.h index e2b4c86..2d67e2f 100644 --- a/src/request.h +++ b/src/request.h @@ -38,6 +38,8 @@ #include +#include "hevc-ctrls/v4l2-hevc-ext-controls.h" + #define V4L2_REQUEST_STR_VENDOR "v4l2-request" #define V4L2_REQUEST_MAX_PROFILES 13 @@ -97,6 +99,29 @@ struct request_data { bool has_hevc_ext_sps_rps_rkvdec; bool has_hevc_ext_sps_rps_hantro; + /* + * iter2 — cached SPS-derived RPS arrays. SPS NALs only appear in + * source_data on IDR frames; non-IDR frames' h265_set_controls + * reuse the cached arrays so we don't submit zero-filled RPS to + * the kernel (which would re-trigger the OOPS the iter2 fix is + * designed to prevent). Single-slot cache (sps_id 0 only) — + * adequate for the BBB / typical-stream case; multi-SPS streams + * would need expanding to a [16] cache keyed by sps_id. + * + * The cache stores the post-mapped V4L2 control struct arrays + * (not the intermediate GstH265SPS) so request.h doesn't need + * to know about the vendored GStreamer parser types — only the + * V4L2 UAPI structs from hevc-ctrls/v4l2-hevc-ext-controls.h + * included above. + * + * Owned by h265.c; freed at RequestTerminate. + */ + struct v4l2_ctrl_hevc_ext_sps_st_rps *hevc_rps_cache_st; + unsigned int hevc_rps_cache_st_count; + struct v4l2_ctrl_hevc_ext_sps_lt_rps *hevc_rps_cache_lt; + unsigned int hevc_rps_cache_lt_count; + bool hevc_rps_cache_valid; + struct video_format *video_format; /*