From cf8cd9d2be50727f38662fb1a8a553a1ddcbc3a4 Mon Sep 17 00:00:00 2001 From: claude-noether Date: Sun, 17 May 2026 17:00:52 +0000 Subject: [PATCH] h265: cap pred-weight + ref-list loops at VAAPI source size (15) V4L2_HEVC_DPB_ENTRIES_NUM_MAX is 16, but VASliceParameterBufferHEVC::RefPicList is [2][15] and the eight delta_*_weight_lX / luma_offset_lX / delta_chroma_weight_lX / ChromaOffsetLX arrays are all [15]. Iterating the per-slot copy loops to 16 over-reads the VAAPI source by one element. The bug was always there but hidden under -O3 (meson's default buildtype=release): GCC unrolled the inner loop and dead-folded the out-of-bounds load. Under -O2 (Arch makepkg CFLAGS) the canonical vectorised loop ran and produced a real SEGV at v4l2_request_drv_video.so + 0xb3a4 inside h265_fill_slice_params, breaking HEVC immediately after the package install on fresnel (iter38 5/5 baseline dropped to 4/5). Define a local VA_HEVC_REF_LIST_LEN (15) and use it as the cap for the four offending loops. RefPicList and pred_weight_table copies now respect the source bound; V4L2 destination still has 16 slots, the upper one stays at memset-zero which is correct. Verified locally: -O2 build + package re-install restores HEVC to bit-exact PASS vs kdirect (sha 108f925bb6cbb6c9). iter38 5/5 baseline restored. Co-Authored-By: Claude Opus 4.7 --- src/h265.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/h265.c b/src/h265.c index ce094d4..7cd4880 100644 --- a/src/h265.c +++ b/src/h265.c @@ -83,6 +83,18 @@ #include "hevc-ctrls/v4l2-hevc-ext-controls.h" #include "h265_parser/gst/codecparsers/gsth265parser.h" +/* + * VAAPI source arrays for HEVC ref/weight tables are sized 15 + * (VASliceParameterBufferHEVC::RefPicList[2][15], + * delta_luma_weight_l0[15], luma_offset_l0[15], etc. — see + * /usr/include/va/va_dec_hevc.h). V4L2_HEVC_DPB_ENTRIES_NUM_MAX + * is 16; iterating to that bound over-reads the VAAPI source by + * one element. Hidden by -O3 unrolling but manifests as a SEGV + * under -O2 vectorisation (regression discovered in package + * builds 2026-05-17). Cap all per-ref/weight loops at this. + */ +#define VA_HEVC_REF_LIST_LEN 15 + #include "utils.h" #include "v4l2.h" @@ -465,13 +477,21 @@ static void h265_fill_slice_params(VAPictureParameterBufferHEVC *picture, /* Q2: slice_segment_addr from VAAPI (was missing in old h265.c). */ slice_params->slice_segment_addr = slice->slice_segment_address; - /* Ref index arrays (DPB indices). For I-slices both are unused. */ - for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX && + /* + * Ref index arrays (DPB indices). For I-slices both are unused. + * + * Cap iteration at VAAPI source size (15) — V4L2_HEVC_DPB_ENTRIES_NUM_MAX + * is 16, but VASliceParameterBufferHEVC::RefPicList is RefPicList[2][15]. + * Iterating to 16 reads one past the source array; with -O2 GCC vectorises + * the copy and the over-read produces a real SEGV (manifested in package + * builds with Arch makepkg CFLAGS, plain -O3 release builds hid it). + */ + for (i = 0; i < VA_HEVC_REF_LIST_LEN && slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) { if (i < (slice->num_ref_idx_l0_active_minus1 + 1U)) slice_params->ref_idx_l0[i] = slice->RefPicList[0][i]; } - for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX && + for (i = 0; i < VA_HEVC_REF_LIST_LEN && slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) { if (i < (slice->num_ref_idx_l1_active_minus1 + 1U)) slice_params->ref_idx_l1[i] = slice->RefPicList[1][i]; @@ -503,7 +523,9 @@ static void h265_fill_slice_params(VAPictureParameterBufferHEVC *picture, slice_params->pred_weight_table.delta_chroma_log2_weight_denom = slice->delta_chroma_log2_weight_denom; - for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX && + /* Pred weight tables — cap at VAAPI source array size (15), same + * reason as the RefPicList loops above. */ + for (i = 0; i < VA_HEVC_REF_LIST_LEN && slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) { slice_params->pred_weight_table.delta_luma_weight_l0[i] = slice->delta_luma_weight_l0[i]; @@ -516,7 +538,7 @@ static void h265_fill_slice_params(VAPictureParameterBufferHEVC *picture, slice->ChromaOffsetL0[i][j]; } } - for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX && + for (i = 0; i < VA_HEVC_REF_LIST_LEN && slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) { slice_params->pred_weight_table.delta_luma_weight_l1[i] = slice->delta_luma_weight_l1[i];