h265: cap pred-weight + ref-list loops at VAAPI source size (15)

V4L2_HEVC_DPB_ENTRIES_NUM_MAX is 16, but
VASliceParameterBufferHEVC::RefPicList is [2][15] and the eight
delta_*_weight_lX / luma_offset_lX / delta_chroma_weight_lX /
ChromaOffsetLX arrays are all [15]. Iterating the per-slot copy
loops to 16 over-reads the VAAPI source by one element.

The bug was always there but hidden under -O3 (meson's default
buildtype=release): GCC unrolled the inner loop and dead-folded
the out-of-bounds load. Under -O2 (Arch makepkg CFLAGS) the
canonical vectorised loop ran and produced a real SEGV at
v4l2_request_drv_video.so + 0xb3a4 inside h265_fill_slice_params,
breaking HEVC immediately after the package install on fresnel
(iter38 5/5 baseline dropped to 4/5).

Define a local VA_HEVC_REF_LIST_LEN (15) and use it as the cap
for the four offending loops. RefPicList and pred_weight_table
copies now respect the source bound; V4L2 destination still has
16 slots, the upper one stays at memset-zero which is correct.

Verified locally: -O2 build + package re-install restores HEVC
to bit-exact PASS vs kdirect (sha 108f925bb6cbb6c9). iter38 5/5
baseline restored.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 17:00:52 +00:00
parent c9f32aff49
commit cf8cd9d2be
+27 -5
View File
@@ -83,6 +83,18 @@
#include "hevc-ctrls/v4l2-hevc-ext-controls.h" #include "hevc-ctrls/v4l2-hevc-ext-controls.h"
#include "h265_parser/gst/codecparsers/gsth265parser.h" #include "h265_parser/gst/codecparsers/gsth265parser.h"
/*
* VAAPI source arrays for HEVC ref/weight tables are sized 15
* (VASliceParameterBufferHEVC::RefPicList[2][15],
* delta_luma_weight_l0[15], luma_offset_l0[15], etc. — see
* /usr/include/va/va_dec_hevc.h). V4L2_HEVC_DPB_ENTRIES_NUM_MAX
* is 16; iterating to that bound over-reads the VAAPI source by
* one element. Hidden by -O3 unrolling but manifests as a SEGV
* under -O2 vectorisation (regression discovered in package
* builds 2026-05-17). Cap all per-ref/weight loops at this.
*/
#define VA_HEVC_REF_LIST_LEN 15
#include "utils.h" #include "utils.h"
#include "v4l2.h" #include "v4l2.h"
@@ -465,13 +477,21 @@ static void h265_fill_slice_params(VAPictureParameterBufferHEVC *picture,
/* Q2: slice_segment_addr from VAAPI (was missing in old h265.c). */ /* Q2: slice_segment_addr from VAAPI (was missing in old h265.c). */
slice_params->slice_segment_addr = slice->slice_segment_address; slice_params->slice_segment_addr = slice->slice_segment_address;
/* Ref index arrays (DPB indices). For I-slices both are unused. */ /*
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX && * Ref index arrays (DPB indices). For I-slices both are unused.
*
* Cap iteration at VAAPI source size (15) — V4L2_HEVC_DPB_ENTRIES_NUM_MAX
* is 16, but VASliceParameterBufferHEVC::RefPicList is RefPicList[2][15].
* Iterating to 16 reads one past the source array; with -O2 GCC vectorises
* the copy and the over-read produces a real SEGV (manifested in package
* builds with Arch makepkg CFLAGS, plain -O3 release builds hid it).
*/
for (i = 0; i < VA_HEVC_REF_LIST_LEN &&
slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) { slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) {
if (i < (slice->num_ref_idx_l0_active_minus1 + 1U)) if (i < (slice->num_ref_idx_l0_active_minus1 + 1U))
slice_params->ref_idx_l0[i] = slice->RefPicList[0][i]; slice_params->ref_idx_l0[i] = slice->RefPicList[0][i];
} }
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX && for (i = 0; i < VA_HEVC_REF_LIST_LEN &&
slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) { slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) {
if (i < (slice->num_ref_idx_l1_active_minus1 + 1U)) if (i < (slice->num_ref_idx_l1_active_minus1 + 1U))
slice_params->ref_idx_l1[i] = slice->RefPicList[1][i]; slice_params->ref_idx_l1[i] = slice->RefPicList[1][i];
@@ -503,7 +523,9 @@ static void h265_fill_slice_params(VAPictureParameterBufferHEVC *picture,
slice_params->pred_weight_table.delta_chroma_log2_weight_denom = slice_params->pred_weight_table.delta_chroma_log2_weight_denom =
slice->delta_chroma_log2_weight_denom; slice->delta_chroma_log2_weight_denom;
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX && /* Pred weight tables — cap at VAAPI source array size (15), same
* reason as the RefPicList loops above. */
for (i = 0; i < VA_HEVC_REF_LIST_LEN &&
slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) { slice_type != V4L2_HEVC_SLICE_TYPE_I; i++) {
slice_params->pred_weight_table.delta_luma_weight_l0[i] = slice_params->pred_weight_table.delta_luma_weight_l0[i] =
slice->delta_luma_weight_l0[i]; slice->delta_luma_weight_l0[i];
@@ -516,7 +538,7 @@ static void h265_fill_slice_params(VAPictureParameterBufferHEVC *picture,
slice->ChromaOffsetL0[i][j]; slice->ChromaOffsetL0[i][j];
} }
} }
for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX && for (i = 0; i < VA_HEVC_REF_LIST_LEN &&
slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) { slice_type == V4L2_HEVC_SLICE_TYPE_B; i++) {
slice_params->pred_weight_table.delta_luma_weight_l1[i] = slice_params->pred_weight_table.delta_luma_weight_l1[i] =
slice->delta_luma_weight_l1[i]; slice->delta_luma_weight_l1[i];