From 5fb7e36955c35742027359c0032d241c5c1fffa6 Mon Sep 17 00:00:00 2001 From: claude-noether Date: Sun, 17 May 2026 10:28:32 +0000 Subject: [PATCH] ampere-av1 Phase 3 fix: wire reference_frame_ts[] from VAAPI ref_frame_map[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2.1 first hardware test on ampere passed frame 1 (IDR) bit-exact vs kdirect but frame 2 (inter) diverged starting at byte 64897. Root cause: reference_frame_ts[] left at zero — kernel can't cross-reference prior CAPTURE buffers without timestamps. Fix: in av1_set_controls (which has driver_data), iterate VAAPI's ref_frame_map[8] (VASurfaceIDs), look up each via SURFACE(driver_data, ref_id), and pull v4l2_timeval_to_ns(&ref_surface->timestamp) into the V4L2 ctrl. VA_INVALID_SURFACE entries stay at calloc-zero. Mirrors the vp9.c:614-628 pattern scaled to AV1's 8 ref slots. surface_object->timestamp itself is populated in picture.c::EndPicture from context_object->timestamp_counter at QBUF time on the OUTPUT buffer — already in place from iter1 baseline. Verification on ampere (/tmp/test_av1.ivf 208x208, 2 frames): Frame 1 + 2 libva sha 029ee72c214b37c1 == kdirect 029ee72c214b37c1 → 100% byte-identical, kdirect was Phase 0-verified bit-perfect order_hints[] still zero — VAAPI doesn't expose per-ref POC; observed not load-bearing on the 208x208 smoke vector. Multi-tile + film_grain stress vectors are next (av1-1-b8-23-film_grain-50.ivf). Co-Authored-By: Claude Opus 4.7 --- src/av1.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/av1.c b/src/av1.c index 5b2970c..964c035 100644 --- a/src/av1.c +++ b/src/av1.c @@ -53,11 +53,14 @@ #include "av1.h" #include "context.h" +#include "object_heap.h" #include "request.h" #include "surface.h" #include "utils.h" #include "v4l2.h" +#include + #include #include @@ -359,11 +362,13 @@ static void av1_fill_frame(VADecPictureParameterBufferAV1 *picture, } /* ---- reference frames + order hints ---- */ - for (i = 0; i < BACKEND_AV1_TOTAL_REFS_PER_FRAME; i++) { - /* VAAPI doesn't expose order_hints[]; leave zero. */ + /* reference_frame_ts[] is filled by the orchestrator (av1_set_controls) + * which has driver_data for the SURFACE() lookup. order_hints[] not + * exposed per-ref by VAAPI — leave zero. ref_frame_idx[7] is the + * index map from spec-defined ref slots (LAST..ALTREF) into + * ref_frame_map[8] (the surface IDs). */ + for (i = 0; i < BACKEND_AV1_TOTAL_REFS_PER_FRAME; i++) ctrl->order_hints[i] = 0; - ctrl->reference_frame_ts[i] = 0; - } for (i = 0; i < BACKEND_AV1_REFS_PER_FRAME; i++) ctrl->ref_frame_idx[i] = picture->ref_frame_idx[i]; @@ -534,6 +539,29 @@ int av1_set_controls(struct request_data *driver_data, av1_fill_sequence(picture, &sequence); av1_fill_frame(picture, &frame); + + /* + * Phase 2.1 + frame-2 divergence fix: wire reference_frame_ts[]. + * VAAPI exposes ref_frame_map[8] as VASurfaceIDs; the kernel needs + * v4l2-style timestamps to cross-reference the corresponding + * CAPTURE buffers (set on the OUTPUT buffer at QBUF time per + * picture.c::EndPicture, via surface_object->timestamp). Mirrors + * the vp9.c:614-628 pattern, scaled to AV1's 8 ref slots. + * + * VA_INVALID_SURFACE entries stay at the calloc'd zero timestamp + * (kernel reads zero, doesn't try to dereference). + */ + for (i = 0; i < BACKEND_AV1_TOTAL_REFS_PER_FRAME; i++) { + VASurfaceID ref_id = picture->ref_frame_map[i]; + struct object_surface *ref_surface; + if (ref_id == VA_INVALID_SURFACE) + continue; + ref_surface = SURFACE(driver_data, ref_id); + if (ref_surface) + frame.reference_frame_ts[i] = + v4l2_timeval_to_ns(&ref_surface->timestamp); + } + if (driver_data->has_av1_film_grain) av1_fill_film_grain(picture, &film_grain);