diff --git a/src/av1.c b/src/av1.c index 0148567..f559b5d 100644 --- a/src/av1.c +++ b/src/av1.c @@ -460,13 +460,19 @@ static void av1_fill_film_grain(VADecPictureParameterBufferAV1 *picture, ctrl->cb_offset = fg->cb_offset; ctrl->cr_offset = fg->cr_offset; - if (fg->film_grain_info_fields.bits.apply_grain) + if (fg->film_grain_info_fields.bits.apply_grain) { ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN; - /* VAAPI doesn't expose update_grain; not setting it means - * "reuse from film_grain_params_ref_idx (=0)". A previous test - * with UPDATE_GRAIN=1 unconditional triggered a segfault on the - * 352x288 film_grain-50 fixture — keep update_grain implicit-zero - * here and revisit during Phase 3 deeper investigation. */ + /* kdirect strace diff confirmed: V4L2_AV1_FILM_GRAIN_FLAG_ + * UPDATE_GRAIN must be set when apply_grain=1 (kdirect's + * flags byte is 0x0B = APPLY|UPDATE|...). VAAPI's + * VAFilmGrainStructAV1 doesn't expose update_grain + * separately. Default to UPDATE=1 (use submitted params, + * not reuse from non-existent prior film_grain ref). The + * earlier segfault we saw with this flag was unmasked by + * the link-NULL deref (now fixed via linked_decode_surface); + * not caused by UPDATE_GRAIN itself. */ + ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_UPDATE_GRAIN; + } if (fg->film_grain_info_fields.bits.chroma_scaling_from_luma) ctrl->flags |= V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA; if (fg->film_grain_info_fields.bits.overlap_flag) @@ -517,6 +523,24 @@ int av1_set_controls(struct request_data *driver_data, (void)context; + /* + * AV1 film_grain link: when apply_grain=1, ffmpeg-vaapi allocates a + * separate display surface (current_display_picture) from the decode + * surface (current_frame). vpu981 HW applies grain inline to the + * decode CAPTURE buffer, so the consumable data is in current_frame's + * slot. ffmpeg then calls vaGetImage on current_display_picture which + * has no slot bound. Link the display surface back to the decode + * surface so copy_surface_to_image can borrow destination_data[]. + */ + if (picture->current_display_picture != VA_INVALID_SURFACE && + picture->current_display_picture != picture->current_frame) { + struct object_surface *display_surface = + SURFACE(driver_data, picture->current_display_picture); + if (display_surface != NULL) + display_surface->linked_decode_surface_id = + picture->current_frame; + } + if (num_tiles > AV1_MAX_TILES) num_tiles = AV1_MAX_TILES; @@ -553,12 +577,27 @@ int av1_set_controls(struct request_data *driver_data, for (i = 0; i < BACKEND_AV1_TOTAL_REFS_PER_FRAME; i++) { VASurfaceID ref_id = picture->ref_frame_map[i]; struct object_surface *ref_surface; + uint64_t ts; if (ref_id == VA_INVALID_SURFACE) continue; ref_surface = SURFACE(driver_data, ref_id); - if (ref_surface) - frame.reference_frame_ts[i] = - v4l2_timeval_to_ns(&ref_surface->timestamp); + if (ref_surface == NULL) + continue; + ts = v4l2_timeval_to_ns(&ref_surface->timestamp); + /* AV1 film_grain: if the referenced surface is a display + * surface (linked to a decode surface), its own timestamp + * may be zero (never bound for OUTPUT QBUF). Follow the + * link to find the decode surface that carries the real + * timestamp. */ + if (ts == 0 && + ref_surface->linked_decode_surface_id != VA_INVALID_SURFACE) { + struct object_surface *dec = + SURFACE(driver_data, + ref_surface->linked_decode_surface_id); + if (dec != NULL) + ts = v4l2_timeval_to_ns(&dec->timestamp); + } + frame.reference_frame_ts[i] = ts; } if (driver_data->has_av1_film_grain) diff --git a/src/image.c b/src/image.c index 4acb662..97a57ed 100644 --- a/src/image.c +++ b/src/image.c @@ -216,7 +216,50 @@ static VAStatus copy_surface_to_image (struct request_data *driver_data, } } + /* + * AV1 film_grain: when this surface is the display surface of a + * decode (current_display_picture != current_frame with apply_grain=1), + * its slot is NULL because BeginPicture only fired on the decode + * surface. Follow the back-link set in av1_set_controls and borrow + * the decode surface's destination_data + sizes for the copy. + */ + if (surface_object->current_slot == NULL && + surface_object->linked_decode_surface_id != VA_INVALID_SURFACE) { + struct object_surface *decode_surface = + SURFACE(driver_data, + surface_object->linked_decode_surface_id); + if (decode_surface != NULL && + decode_surface->current_slot != NULL) { + /* Mirror the fields we read below. The surface heap + * pointer is stable for the surface's lifetime; we + * only need destination_data + destination_sizes + + * destination_planes_count from it. */ + surface_object->destination_planes_count = + decode_surface->destination_planes_count; + for (i = 0; i < decode_surface->destination_planes_count; i++) { + surface_object->destination_data[i] = + decode_surface->destination_data[i]; + surface_object->destination_sizes[i] = + decode_surface->destination_sizes[i]; + } + } + } + for (i = 0; i < surface_object->destination_planes_count; i++) { + /* AV1 Phase 3 diag: surface NULL-deref hunt. */ + if (buffer_object->data == NULL || + surface_object->destination_data[i] == NULL) { + request_log("copy_surface_to_image NULL i=%u " + "buf_data=%p dest_data=%p dest_size=%u " + "planes=%u slot=%p linked=0x%x\n", + i, (void *)buffer_object->data, + (void *)surface_object->destination_data[i], + surface_object->destination_sizes[i], + surface_object->destination_planes_count, + (void *)surface_object->current_slot, + surface_object->linked_decode_surface_id); + return VA_STATUS_ERROR_OPERATION_FAILED; + } #ifdef __arm__ if (!video_format_is_linear(driver_data->video_format)) tiled_to_planar(surface_object->destination_data[i], diff --git a/src/picture.c b/src/picture.c index ebc5dac..013371a 100644 --- a/src/picture.c +++ b/src/picture.c @@ -361,6 +361,12 @@ VAStatus RequestBeginPicture(VADriverContextP context, VAContextID context_id, if (surface_object == NULL) return VA_STATUS_ERROR_INVALID_SURFACE; + /* AV1 Phase 3 diag */ + request_log("BeginPicture id=0x%x prev_slot=%p status=%d\n", + surface_object->base.id, + (void *)surface_object->current_slot, + surface_object->status); + if (surface_object->status == VASurfaceRendering) RequestSyncSurface(context, surface_id); diff --git a/src/surface.c b/src/surface.c index 341637c..09298bf 100644 --- a/src/surface.c +++ b/src/surface.c @@ -111,6 +111,13 @@ void surface_unbind_slot(struct request_data *driver_data, { if (surface_object->current_slot == NULL) return; + /* AV1 Phase 3 diag: log every unbind with surface id + slot idx + * + status — confirms whether BeginPicture rebind is racing the + * consumer's vaGetImage on the previous frame. */ + request_log("surface_unbind_slot id=0x%x status=%d slot_idx=%u\n", + surface_object->base.id, + surface_object->status, + surface_object->current_slot->v4l2_index); cap_pool_release(&driver_data->capture_pool, surface_object->current_slot); surface_object->current_slot = NULL; } @@ -192,6 +199,7 @@ VAStatus RequestCreateSurfaces2(VADriverContextP context, unsigned int format, return VA_STATUS_ERROR_ALLOCATION_FAILED; surface_object->current_slot = NULL; /* iter2 Fix 3 */ + surface_object->linked_decode_surface_id = VA_INVALID_SURFACE; surface_object->destination_index = 0; /* set on bind */ surface_object->destination_planes_count = 0; /* set at CreateContext */ surface_object->destination_buffers_count = 0; /* set at CreateContext */ diff --git a/src/surface.h b/src/surface.h index f7b5386..d47caa7 100644 --- a/src/surface.h +++ b/src/surface.h @@ -89,6 +89,23 @@ struct object_surface { struct timeval timestamp; + /* + * AV1 Phase 3: for streams with apply_grain=1, VAAPI's + * VADecPictureParameterBufferAV1 carries current_display_picture + * (display-time surface) separate from current_frame (decode + * target). vpu981 HW applies grain inline to the decode CAPTURE + * buffer, so the decoded data lives in current_frame's slot — but + * ffmpeg calls vaGetImage on current_display_picture which has no + * slot bound. linked_decode_surface_id, set in av1_set_controls + * on the display surface, points to the decode surface so + * copy_surface_to_image can borrow its destination_data[]. + * + * VA_INVALID_SURFACE = no link (the common case: 8-bit codecs, + * AV1 with apply_grain=0, AV1 frames where cur_frame == + * cur_display). + */ + VASurfaceID linked_decode_surface_id; + union { struct { VAPictureParameterBufferMPEG2 picture;