From ac891a01fa6df320b236cb6f15edb9e4b4a1c906 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Mon, 4 May 2026 17:32:12 +0000 Subject: [PATCH] surface: honor VA_EXPORT_SURFACE_SEPARATE_LAYERS in vaExportSurfaceHandle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Firefox 150's RDD calls vaExportSurfaceHandle with the VA_EXPORT_SURFACE_SEPARATE_LAYERS flag (per FFmpegVideoDecoder.cpp GetVAAPISurfaceDescriptor at the libva-VAAPI export site). With that flag, libva consumers expect 2 separate layers — Y as DRM_FORMAT_R8, UV as DRM_FORMAT_GR88, each with num_planes=1 — not the COMPOSED single-layer-with-2-planes shape we always returned regardless of flags. Our previous code ignored the flag parameter and always built the COMPOSED descriptor. mpv works with that because mpv passes the default (COMPOSED) flag and the shape matches. Firefox's DMABufSurfaceYUV import code parsed our COMPOSED descriptor as if it were SEPARATE, found bogus layer-1 data, silently fell back to FFmpeg(FFVPX) software decode after frame 0. Fix: branch on the flag and build the appropriate descriptor. flags & VA_EXPORT_SURFACE_SEPARATE_LAYERS: num_layers=2 layers[0] = Y as DRM_FORMAT_R8, num_planes=1 layers[1] = UV as DRM_FORMAT_GR88, num_planes=1 default (COMPOSED, including unflagged): num_layers=1, drm_format=DRM_FORMAT_NV12, num_planes=2 (existing behavior, preserved for mpv et al.) For the single-fd case (hantro NV12 backed by one CMA buffer), both layers reference object_index=0 with different offsets and pitches (both stride=1920 for 1920x1088). Diagnosed via Firefox source dive (mozilla/gecko-dev master, dom/media/platforms/ffmpeg/FFmpegVideoDecoder.cpp:1638) — the explicit flag in the export call was the discriminator between mpv's success and Firefox's silent SW fallback. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/surface.c | 66 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/src/surface.c b/src/surface.c index 432a46a..58ca9cb 100644 --- a/src/surface.c +++ b/src/surface.c @@ -672,18 +672,64 @@ VAStatus RequestExportSurfaceHandle(VADriverContextP context, surface_object->destination_sizes[i]; } - surface_descriptor->num_layers = 1; + /* + * Layer construction depends on the consumer's request flags + * (VA_EXPORT_SURFACE_*_LAYERS): + * + * COMPOSED_LAYERS (default, mpv): one layer carrying both + * Y and UV planes (drm_format=NV12, num_planes=2). Mesa + * imports as a single NV12 EGLImage. + * + * SEPARATE_LAYERS (Firefox 150 RDD): two layers, Y as a + * single-plane R8 layer, UV as a single-plane GR88 layer. + * Firefox's GetVAAPISurfaceDescriptor passes + * VA_EXPORT_SURFACE_SEPARATE_LAYERS so its DMABufSurfaceYUV + * import code can address Y and UV planes independently. + * Without this branch, Firefox parsed our COMPOSED layout + * as if it were SEPARATE, found bogus layer-1 data, and + * silently fell back to FFmpeg(FFVPX) software decode. + * + * The earlier path 0001 mplane port assumed a single COMPOSED + * shape — fine for mpv but breaks any consumer requesting + * SEPARATE. Honor the flag. + */ + if ((flags & VA_EXPORT_SURFACE_SEPARATE_LAYERS) && planes_count == 2) { + surface_descriptor->num_layers = 2; - surface_descriptor->layers[0].drm_format = video_format->drm_format; - surface_descriptor->layers[0].num_planes = planes_count; + /* Layer 0: Y plane as DRM_FORMAT_R8 (1 byte/pixel luma). */ + surface_descriptor->layers[0].drm_format = DRM_FORMAT_R8; + surface_descriptor->layers[0].num_planes = 1; + surface_descriptor->layers[0].object_index[0] = + export_fds_count == 1 ? 0 : 0; + surface_descriptor->layers[0].offset[0] = + surface_object->destination_offsets[0]; + surface_descriptor->layers[0].pitch[0] = + surface_object->destination_bytesperlines[0]; - for (i = 0; i < planes_count; i++) { - surface_descriptor->layers[0].object_index[i] = - export_fds_count == 1 ? 0 : i; - surface_descriptor->layers[0].offset[i] = - surface_object->destination_offsets[i]; - surface_descriptor->layers[0].pitch[i] = - surface_object->destination_bytesperlines[i]; + /* Layer 1: UV plane as DRM_FORMAT_GR88 (interleaved + * U+V, 2 bytes/pixel chroma at half resolution). */ + surface_descriptor->layers[1].drm_format = DRM_FORMAT_GR88; + surface_descriptor->layers[1].num_planes = 1; + surface_descriptor->layers[1].object_index[0] = + export_fds_count == 1 ? 0 : 1; + surface_descriptor->layers[1].offset[0] = + surface_object->destination_offsets[1]; + surface_descriptor->layers[1].pitch[0] = + surface_object->destination_bytesperlines[1]; + } else { + /* COMPOSED_LAYERS / default: one layer with all planes. */ + surface_descriptor->num_layers = 1; + surface_descriptor->layers[0].drm_format = video_format->drm_format; + surface_descriptor->layers[0].num_planes = planes_count; + + for (i = 0; i < planes_count; i++) { + surface_descriptor->layers[0].object_index[i] = + export_fds_count == 1 ? 0 : i; + surface_descriptor->layers[0].offset[i] = + surface_object->destination_offsets[i]; + surface_descriptor->layers[0].pitch[i] = + surface_object->destination_bytesperlines[i]; + } } /*