/* * Copyright (C) 2007 Intel Corporation * Copyright (C) 2016 Florent Revest * Copyright (C) 2018 Paul Kocialkowski * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "context.h" #include "config.h" #include "request.h" #include "surface.h" #include #include #include #include #include #include #include #include #include "nv15.h" /* iter40: fallback V4L2_PIX_FMT_NV15 define for Pi 5 * Debian headers that ship NC12 but not NV15. */ #include "nv12_col128.h" /* iter40: NC12 detile primitive + UV offset helper */ #include "utils.h" #include "v4l2.h" #include "autoconfig.h" VAStatus RequestCreateContext(VADriverContextP context, VAConfigID config_id, int picture_width, int picture_height, int flags, VASurfaceID *surfaces_ids, int surfaces_count, VAContextID *context_id) { struct request_data *driver_data = context->pDriverData; struct object_config *config_object; struct object_context *context_object = NULL; struct video_format *video_format; unsigned int destination_sizes[VIDEO_MAX_PLANES]; unsigned int destination_bytesperlines[VIDEO_MAX_PLANES]; unsigned int destination_planes_count; unsigned int format_width, format_height; unsigned int pixelformat; VASurfaceID *ids = NULL; VAContextID id; VAStatus status; unsigned int output_type, capture_type; unsigned int j; bool found; int rc; /* * iter5b-β: CreateContext owns the V4L2 OUTPUT-side device-format * lifecycle (S_FMT, CAPTURE-format probe, cap_pool_init, per-surface * destination_* fill). Pre-β these lived in CreateSurfaces2 with a * resolution-change gate; β moves them here because (a) config_id * is known so the right OUTPUT pixel format can be derived from * the bound profile, and (b) STREAMON happens at the end of this * function, so the queue is never streaming when we do S_FMT. * * DestroyContext is the only per-session teardown site under β * (no in-CreateSurfaces2 teardown branch). It STREAMOFFs both * queues, calls request_pool_destroy + cap_pool_destroy, and * REQBUFS(0) — leaving the V4L2 device in a clean slate for the * next CreateContext. */ config_object = CONFIG(driver_data, config_id); if (config_object == NULL) { status = VA_STATUS_ERROR_INVALID_CONFIG; goto error; } pixelformat = config_object->pixelformat; if (pixelformat == 0) { /* * Defensive: CreateConfig rejects unhandled profiles, so * pixelformat is always non-zero by the time we get here. * Belt-and-suspenders. */ status = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; goto error; } /* * Probe the CAPTURE-side V4L2 format. video_format is a static * pointer into video.c's formats[]; it stays valid for the life of * the driver_data and is cached across CreateContext cycles. The * probe doesn't require any prior S_FMT — v4l2_find_format * enumerates the device's supported formats directly. * * iter39: choose NV15 (10-bit packed) for Hi10P / Main10 profiles, * NV12 (8-bit) otherwise. If the cached video_format doesn't match * the profile's bit-depth requirement, invalidate and re-probe — * sibling pattern to iter38's device-switch invalidation in * request_switch_device_for_profile(). */ { bool want_10bit = (config_object->profile == VAProfileH264High10 || config_object->profile == VAProfileHEVCMain10); bool is_rpi = (driver_data->video_fd == driver_data->video_fd_rpi_hevc_dec); /* * iter40: per-fd preferred pixelformat. rpi-hevc-dec exposes * NC12 (8-bit) / NC30 (10-bit), not NV12 / NV15. */ unsigned int want_pixfmt; if (is_rpi) want_pixfmt = want_10bit ? V4L2_PIX_FMT_NV12_10_COL128 : V4L2_PIX_FMT_NV12_COL128; else want_pixfmt = want_10bit ? V4L2_PIX_FMT_NV15 : V4L2_PIX_FMT_NV12; if (driver_data->video_format && driver_data->video_format->v4l2_format != want_pixfmt && driver_data->video_format->v4l2_format != V4L2_PIX_FMT_SUNXI_TILED_NV12) driver_data->video_format = NULL; } if (!driver_data->video_format) { bool want_10bit = (config_object->profile == VAProfileH264High10 || config_object->profile == VAProfileHEVCMain10); bool is_rpi = (driver_data->video_fd == driver_data->video_fd_rpi_hevc_dec); video_format = NULL; if (is_rpi) { /* * iter40: rpi-hevc-dec CAPTURE is NC12 (8-bit SAND * 128-pixel-wide column tile) or NC30 (10-bit variant). * Direct map; the kernel exposes BOTH formats in * VIDIOC_ENUM_FMT(CAPTURE_MPLANE) without a pre-SPS * step (verified Phase 0 strace), so find_format would * also succeed — skip it for symmetry with the NV15 * iter39 branch below. */ video_format = video_format_find( want_10bit ? V4L2_PIX_FMT_NV12_10_COL128 : V4L2_PIX_FMT_NV12_COL128); } else if (!want_10bit) { found = v4l2_find_format(driver_data->video_fd, V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_PIX_FMT_SUNXI_TILED_NV12); if (found) video_format = video_format_find(V4L2_PIX_FMT_SUNXI_TILED_NV12); found = v4l2_find_format(driver_data->video_fd, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, V4L2_PIX_FMT_NV12); if (found) video_format = video_format_find(V4L2_PIX_FMT_NV12); } else { /* * iter39 fresnel fix: rkvdec only advertises NV15 in * VIDIOC_ENUM_FMT(CAPTURE) AFTER S_FMT(OUTPUT) + * S_EXT_CTRLS(SPS) resolve image_fmt to 420_10BIT. * Before that, only NV12 is enumerated. Pre-finding * NV15 always fails. Skip the find_format check and * directly map to our NV15 video_format entry; the * later S_FMT(CAPTURE) commits the actual NV15 mode * once the synthetic SPS sets bit_depth_luma_minus8=2. */ video_format = video_format_find(V4L2_PIX_FMT_NV15); } if (video_format == NULL) { status = VA_STATUS_ERROR_OPERATION_FAILED; goto error; } driver_data->video_format = video_format; } video_format = driver_data->video_format; /* iter39: session-wide flag drives image.c reporting + unpack. */ driver_data->is_10bit = (config_object->profile == VAProfileH264High10 || config_object->profile == VAProfileHEVCMain10); output_type = v4l2_type_video_output(video_format->v4l2_mplane); capture_type = v4l2_type_video_capture(video_format->v4l2_mplane); /* * Commit the OUTPUT pixel format. picture_width/picture_height * are the kernel-facing dimensions for this decode session. With * profile-derived pixelformat, hantro's CAPTURE-format derivation * dispatches to the right codec_mode (pre-β hardcoded H264_SLICE * meant hantro silently substituted MPEG2_DECODER for HEVC/VP8/VP9 * → all-zero CAPTURE; rkvdec silently dropped HEVC/VP9 → same * outcome). */ rc = v4l2_set_format(driver_data->video_fd, output_type, pixelformat, picture_width, picture_height); if (rc < 0) { status = VA_STATUS_ERROR_OPERATION_FAILED; goto error; } /* * iter15 α-19: explicit S_FMT on CAPTURE for rkvdec. * * Original iter5b-β comment: "Do NOT VIDIOC_S_FMT on CAPTURE — hantro * reads the SPS from OUTPUT to set CAPTURE shape internally." * * Empirical finding at iter15 Phase 3 (2026-05-14): kdirect (ffmpeg- * v4l2request) does S_FMT on CAPTURE side after S_FMT(OUTPUT), * then CREATE_BUFS for CAPTURE. libva's old G_FMT-only path skipped * the S_FMT call. For hantro this was deliberate (works); for rkvdec * (HEVC + H.264 + VP9 on RK3399) the absence of explicit S_FMT puts * the driver into a state where it does NOT commit the chosen NV12 * pixel format properly — and the resulting decode silently writes * garbage or zero for HEVC + H.264 (Bug 4 + Bug 5). * * Per [[feedback-per-driver-kludge-gating]]: this driver-specific * difference should be gated on driver_kind. For now use a single * always-on S_FMT call as the safe move: kdirect proves S_FMT * CAPTURE works on both hantro AND rkvdec (it's the reference path). * The iter5b-β comment is preserved-but-amended below. * * Sequence: S_FMT OUTPUT (above) → S_FMT CAPTURE (this) → G_FMT * CAPTURE (sanity read-back, matches what S_FMT committed). */ { /* * iter40: take the CAPTURE pixelformat from the resolved * video_format slot — that's per-fd, per-bit-depth correct. * rkvdec 8-bit → NV12 * rkvdec 10-bit → NV15 * hantro 8-bit → NV12 * rpi-hevc-dec → NC12 (V4L2_PIX_FMT_NV12_COL128) * Pre-iter40 this was hardcoded NV12/NV15 — the rpi-hevc-dec * fd would then have S_FMT(NV12) issued, and the kernel * "helpfully" substituted V4L2_PIX_FMT_NV12MT_COL128 (the * MULTI-PLANE-NON-CONTIGUOUS variant) instead of the * SINGLE-PLANE NC12 we wanted, breaking cap_pool QUERYBUF * downstream (Phase 7 iter40 first-run discovery). */ unsigned int capture_pixelformat = driver_data->video_format->v4l2_format; rc = v4l2_set_format(driver_data->video_fd, capture_type, capture_pixelformat, picture_width, picture_height); if (rc < 0) { /* Non-fatal: if the kernel rejects S_FMT CAPTURE (some * older hantro variants), fall through to G_FMT. */ request_log("iter15 α-19: S_FMT CAPTURE failed (continuing): %s\n", strerror(errno)); } } rc = v4l2_get_format(driver_data->video_fd, capture_type, &format_width, &format_height, destination_bytesperlines, destination_sizes, NULL); if (rc < 0) { status = VA_STATUS_ERROR_OPERATION_FAILED; goto error; } /* * iter25 α-25: synthetic-SPS injection to pre-seed ctx->image_fmt * before CAPTURE buffer allocation. * * Root cause (iter17→iter24 kernel-printk chain): rkvdec_s_ctrl for * HEVC_SPS / H264_SPS calls get_image_fmt() and, if the resolved * image_fmt differs from the cached ctx->image_fmt (default * RKVDEC_IMG_FMT_ANY), tries to reset the CAPTURE format. The reset * returns -EBUSY when vb2_is_busy(CAPTURE_queue) — i.e. any CAPTURE * buffer is allocated. * * libva (iter5b-β CAPTURE pool) pre-allocates 24 CAPTURE buffers * via cap_pool_init below — before any per-frame S_EXT_CTRLS * arrives. So the first real HEVC_SPS at decode time fails with * -EBUSY in try_or_set_cluster, breaks v4l2_ctrl_request_setup's * outer loop, and leaves ctx->ctrl_hdl[SPS..DECODE_PARAMS] at all- * zero contents. rkvdec_hevc_run reads zero, hardware sees w=0 * h=0, decoded CAPTURE is all-zero (Bug 5 + Bug 4). * * Fix: while CAPTURE is still empty (before cap_pool_init), inject * a synthetic SPS containing the profile's chroma + bit_depth so * rkvdec_s_ctrl resolves image_fmt and updates ctx->image_fmt * before vb2_is_busy can return true. From then on, per-frame * SPS submissions with matching profile parameters see * image_fmt_changed=false → skip reset → commit succeeds. * * Gated by config->profile: only HEVC and H.264 paths set * get_image_fmt in their rkvdec coded_fmt_desc->ops; VP9 / MPEG-2 / * VP8 are unaffected (rkvdec_s_ctrl returns 0 immediately when * get_image_fmt is NULL, or those codecs are routed to hantro). * * Failure is best-effort: if the kernel returns -EBUSY/-EINVAL here * (e.g. driver doesn't expose the control on this DT path), we fall * through and may still hit the original bug for that codec — but * the device-init DECODE_MODE + START_CODE block below ALSO uses * void-cast best-effort, so this is consistent with prior pattern. */ /* * iter40 (Phase 5 review F6): the synthetic-SPS pre-seed is an * rkvdec-specific quirk fix (the -EBUSY-on-CAPTURE-busy bug in * rkvdec_s_ctrl). rpi-hevc-dec does NOT need it and uses a * different submission ordering (Phase 0 strace: S_FMT_OUTPUT → * REQBUFS_OUTPUT → S_FMT_CAPTURE → CREATE_BUFS_CAPTURE → STREAMON, * with per-frame SPS via S_EXT_CTRLS class=0xf010000). Sending a * stale dummy SPS at context-init time would leave rpi-hevc-dec's * internal state on the dummy until the first real per-frame SPS * arrives — exact behavior unknown but a known divergence from * kdirect. * * Skip pre-seed when the active fd is rpi-hevc-dec. rkvdec / * hantro paths unchanged. */ if (driver_data->video_fd != driver_data->video_fd_rpi_hevc_dec) { /* * iter39: 10-bit profiles set bit_depth_luma_minus8 = 2 in * the synthetic SPS so rkvdec's get_image_fmt resolves to * RKVDEC_IMG_FMT_420_10BIT (per rkvdec-h264-common.c:196 + * rkvdec-hevc-common.c:467). Image_fmt resolution depends * only on bit_depth_luma_minus8 and chroma_format_idc; * profile_idc is ignored for image_fmt and v4l2_ctrl_hevc_sps * has no profile_idc field at all. */ bool ten = driver_data->is_10bit; switch (config_object->profile) { case VAProfileHEVCMain: case VAProfileHEVCMain10: { struct v4l2_ctrl_hevc_sps dummy_sps; struct v4l2_ext_control dummy_ctrl; memset(&dummy_sps, 0, sizeof(dummy_sps)); dummy_sps.chroma_format_idc = 1; /* 4:2:0 */ dummy_sps.bit_depth_luma_minus8 = ten ? 2 : 0; dummy_sps.bit_depth_chroma_minus8 = ten ? 2 : 0; dummy_sps.pic_width_in_luma_samples = picture_width; dummy_sps.pic_height_in_luma_samples = picture_height; dummy_ctrl.id = V4L2_CID_STATELESS_HEVC_SPS; dummy_ctrl.ptr = &dummy_sps; dummy_ctrl.size = sizeof(dummy_sps); (void)v4l2_set_controls(driver_data->video_fd, -1, &dummy_ctrl, 1); break; } case VAProfileH264Main: case VAProfileH264High: case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: case VAProfileH264High10: { struct v4l2_ctrl_h264_sps dummy_sps; struct v4l2_ext_control dummy_ctrl; memset(&dummy_sps, 0, sizeof(dummy_sps)); dummy_sps.chroma_format_idc = 1; /* 4:2:0 */ dummy_sps.bit_depth_luma_minus8 = ten ? 2 : 0; dummy_sps.bit_depth_chroma_minus8 = ten ? 2 : 0; dummy_sps.pic_width_in_mbs_minus1 = (picture_width + 15) / 16 - 1; dummy_sps.pic_height_in_map_units_minus1 = (picture_height + 15) / 16 - 1; dummy_sps.profile_idc = ten ? 110 : 100; /* High10 : High */ dummy_sps.level_idc = 41; /* * FRAME_MBS_ONLY required: rkvdec_h264_validate_sps * doubles height for non-frame-mbs-only streams to * compute frame-height from field-height. Without * this flag, dummy with (height_in_map_units+1)*16 * = 1088 doubles to 2176 > coded_fmt 1080 → -EINVAL. */ dummy_sps.flags = V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY; dummy_ctrl.id = V4L2_CID_STATELESS_H264_SPS; dummy_ctrl.ptr = &dummy_sps; dummy_ctrl.size = sizeof(dummy_sps); (void)v4l2_set_controls(driver_data->video_fd, -1, &dummy_ctrl, 1); break; } default: break; } } /* iter40: end of pre-seed-skip-on-rpi-hevc-dec guard */ destination_planes_count = video_format->planes_count; /* * Initialize the CAPTURE buffer pool (cap_pool). Pool size = * max(surfaces_count, MIN_CAP_POOL). The headroom gives LRU * recycling enough margin to never reuse a buffer within the * consumer's compositor-hold window for typical playback * patterns. cap_pool_init does the V4L2 CREATE_BUFS + per-slot * mmap. * * `pool->initialized` is reset to false by cap_pool_destroy in * DestroyContext; subsequent CreateContext re-inits at the new * resolution. */ if (!driver_data->capture_pool.initialized) { unsigned int pool_count = surfaces_count > MIN_CAP_POOL ? surfaces_count : MIN_CAP_POOL; rc = cap_pool_init(&driver_data->capture_pool, driver_data->video_fd, capture_type, pool_count, video_format->v4l2_buffers_count); if (rc < 0) { status = VA_STATUS_ERROR_ALLOCATION_FAILED; goto error; } } /* * Compute format-uniform destination_* values. Same for all * surfaces of this format; written once per surface, never * changed by BeginPicture's slot acquisition. */ if (video_format->v4l2_buffers_count == 1) { if (video_format->v4l2_format == V4L2_PIX_FMT_NV12_COL128) { /* * iter40: NC12 SAND layout: Y plane size is * NUM_COLUMNS * TILE_W * ALIGN(height, 8) (= linear * NV12 Y for column-aligned widths), UV plane is half. * The kernel-reported destination_bytesperlines[0] is * the COLUMN stride (ALIGN(height,8)*3/2), not the * linear Y stride — using it × format_height gives the * wrong intra-buffer UV offset (destination_offsets[1] * derives from destination_sizes[0] in * surface_fill_format_uniform). * * Use format_width/format_height (kernel-returned from * G_FMT) not picture_width/height (caller request), * because the kernel applies its own ALIGN rules; the * UV plane location is keyed off the kernel layout. */ unsigned int uv_off = nv12_col128_uv_plane_offset( format_width, format_height); destination_sizes[0] = uv_off; for (j = 1; j < destination_planes_count; j++) destination_sizes[j] = uv_off / 2; request_log("iter40: NC12 sizes pic=%ux%u fmt=%ux%u bpl=%u uv_off=%u sizeimage(kernel)=%u\n", picture_width, picture_height, format_width, format_height, destination_bytesperlines[0], uv_off, destination_bytesperlines[0] * format_height); } else { destination_sizes[0] = destination_bytesperlines[0] * format_height; for (j = 1; j < destination_planes_count; j++) destination_sizes[j] = destination_sizes[0] / 2; } } /* * iter5b-β Commit D: cache the format-uniform CAPTURE geometry * in driver_data. CreateSurfaces2 calls AFTER this CreateContext * (ffmpeg vaapi-copy late-surface-allocation case) will lazy-fill * via surface_fill_format_uniform(); the surface_heap walk below * fills surfaces that pre-existed when CreateContext fired. */ driver_data->fmt_planes_count = destination_planes_count; driver_data->fmt_buffers_count = video_format->v4l2_buffers_count; driver_data->fmt_format_height = format_height; for (j = 0; j < destination_planes_count; j++) { driver_data->fmt_sizes[j] = destination_sizes[j]; driver_data->fmt_bytesperlines[j] = destination_bytesperlines[j]; } driver_data->fmt_valid = true; /* * Walk the surface_heap (not just surfaces_ids[]) to populate * destination_* on every existing surface. Pre-Commit-D we walked * surfaces_ids[], which is empty for ffmpeg vaapi-copy consumers * that call vaCreateContext with surfaces_count=0 — those surfaces * exist in the heap but aren't in the param array. Walking the * heap catches both flows. Late-created surfaces (after this * CreateContext) fill via surface_fill_format_uniform in * CreateSurfaces2's per-surface init. */ { struct object_surface *surface_iter; int heap_iter; surface_iter = (struct object_surface *) object_heap_first(&driver_data->surface_heap, &heap_iter); while (surface_iter != NULL) { surface_fill_format_uniform(driver_data, surface_iter); surface_iter = (struct object_surface *) object_heap_next(&driver_data->surface_heap, &heap_iter); } } id = object_heap_allocate(&driver_data->context_heap); context_object = CONTEXT(driver_data, id); if (context_object == NULL) { status = VA_STATUS_ERROR_ALLOCATION_FAILED; goto error; } memset(&context_object->dpb, 0, sizeof(context_object->dpb)); context_object->timestamp_counter = 0; /* iter9 α-7 */ /* * Initialize the OUTPUT (bitstream-input) buffer pool. Sized by * codec pipeline depth (4 H.264 frames in flight is sufficient * for current hantro/rkvdec scheduling); independent of caller- * supplied surfaces_count. Pool is owned by driver_data so it * outlives any single context destroy/recreate cycle. * * This replaces the prior per-surface OUTPUT loop, which (a) * created an empty queue when surfaces_count==0 (ffmpeg vaapi- * copy path) and (b) only populated surface->source_* for * surfaces present at vaCreateContext time, NULL-derefing on * surfaces created later. */ /* * iter6: pool size 16 gives comfortable headroom over typical H.264 * MaxDpbFrames (16) for any consumer that pipelines decode requests. * Each slot owns its own request_fd (REINIT'd per use). */ rc = request_pool_init(&driver_data->output_pool, driver_data->video_fd, driver_data->media_fd, output_type, 16); if (rc < 0) { status = VA_STATUS_ERROR_ALLOCATION_FAILED; goto error; } /* * The surface_ids array has been allocated by the caller and * we don't have any indication wrt its life time. Let's make sure * its life span is under our control. */ if (surfaces_count > 0) { ids = malloc(surfaces_count * sizeof(VASurfaceID)); if (ids == NULL) { status = VA_STATUS_ERROR_ALLOCATION_FAILED; goto error; } memcpy(ids, surfaces_ids, surfaces_count * sizeof(VASurfaceID)); } /* * Stateless H.264 device-wide controls. The kernel V4L2 stateless * framework requires DECODE_MODE and START_CODE be set on the * device fd (request_fd=-1) before VIDIOC_STREAMON; per-request * controls (SPS/PPS/etc.) attached to a request_fd come later. * * hantro-vpu via rockchip,rk3568-vpu DT compatible (covers RK3568 * and RK3566 — PineTab2 silicon — since they're close enough) * accepts only DECODE_MODE_FRAME_BASED. * START_CODE_ANNEX_B preserves leading 0x00000001 in the slice * payload that h264.c assembles. Errors here are not fatal: not * every backing driver supports both controls (e.g. cedrus may * default to SLICE_BASED without exposing DECODE_MODE). */ { struct v4l2_ext_control dev_ctrls[2] = { { .id = V4L2_CID_STATELESS_H264_DECODE_MODE, .value = V4L2_STATELESS_H264_DECODE_MODE_FRAME_BASED, }, { .id = V4L2_CID_STATELESS_H264_START_CODE, .value = V4L2_STATELESS_H264_START_CODE_ANNEX_B, }, }; (void)v4l2_set_controls(driver_data->video_fd, -1, dev_ctrls, 2); } /* * iter2: HEVC device-wide controls. Same best-effort pattern as * H.264 above — separate batched call so a kernel that does not * advertise HEVC controls (e.g. hantro-vpu-dec on RK3568/RK3399) * silently fails on this batch without invalidating the H.264 * batch. rkvdec on RK3399 advertises HEVC and accepts FRAME_BASED * + ANNEX_B (only supported menu values per Phase 0 v4l2_inventory). */ { /* * iter40: per-driver HEVC start_code menu value. rkvdec / * hantro path uses ANNEX_B + start-code-prepended payload. * rpi-hevc-dec uses NONE — confirmed empirically Phase 7 * (any other mode → V4L2_BUF_FLAG_ERROR on every CAPTURE * DQBUF, all-zero output). kdirect's strace also shows * start_code=0 on rpi-hevc-dec. Both are accepted by the * driver's QUERY_EXT_CTRL menu (min=0 max=1), but only NONE * actually drives correct decode on the Pi. */ bool is_rpi = (driver_data->video_fd == driver_data->video_fd_rpi_hevc_dec); struct v4l2_ext_control hevc_dev_ctrls[2] = { { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, .value = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, }, { .id = V4L2_CID_STATELESS_HEVC_START_CODE, .value = is_rpi ? 0 /* V4L2_STATELESS_HEVC_START_CODE_NONE */ : V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, }, }; (void)v4l2_set_controls(driver_data->video_fd, -1, hevc_dev_ctrls, 2); } /* * Mirror the ANNEX_B start-code mode set on the device above * into context_object->h264_start_code so picture.c:: * codec_store_buffer prepends 0x00 0x00 0x01 to each slice * payload it copies into the OUTPUT buffer. Without this, the * kernel — which we just told to expect ANNEX_B — sees a raw * NAL stream with no start codes, fails to find slice * boundaries, and emits a zeroed CAPTURE buffer (visually a * flat dark-green frame). * * iter4 fix: this start-code prepend is ANNEX-B-specific and * applies to H.264 and HEVC ONLY. MPEG-2, VP8, and VP9 use raw * frame bitstreams without start codes — prepending 0x00 0x00 0x01 * to a VP9 uncompressed header produces a frame_marker mismatch * (kernel reads 0x00 instead of 0x10), the rkvdec driver silently * fails to find a valid frame, and the CAPTURE slot stays at its * cap_pool init pattern (a dim 0x4c green). Phase 7 verification * caught this for VP9; iter1+iter3 transitive proof masked it for * MPEG-2/VP8 because those iters compared payload bytes, not * decoded pixels. * * h264_get_controls() exists for this purpose but is never * called in the current code path; the planned probe-then-set * commit will replace this hardcoded assignment with a runtime * read of the kernel's accepted START_CODE value. */ { bool is_rpi = (driver_data->video_fd == driver_data->video_fd_rpi_hevc_dec); switch (config_object->profile) { case VAProfileH264Main: case VAProfileH264High: case VAProfileH264ConstrainedBaseline: case VAProfileH264MultiviewHigh: case VAProfileH264StereoHigh: context_object->h264_start_code = true; break; case VAProfileHEVCMain: /* iter40: rpi-hevc-dec rejects start-code-prepended * payload (DQBUF error flag on every CAPTURE buffer). * Gate to match the per-driver START_CODE menu value * set above: NONE on rpi → no prepend; ANNEX_B on * rkvdec → prepend. */ context_object->h264_start_code = !is_rpi; break; default: context_object->h264_start_code = false; break; } } rc = v4l2_set_stream(driver_data->video_fd, output_type, true); if (rc < 0) { status = VA_STATUS_ERROR_OPERATION_FAILED; goto error; } rc = v4l2_set_stream(driver_data->video_fd, capture_type, true); if (rc < 0) { status = VA_STATUS_ERROR_OPERATION_FAILED; goto error; } context_object->config_id = config_id; context_object->render_surface_id = VA_INVALID_ID; context_object->surfaces_ids = ids; context_object->surfaces_count = surfaces_count; context_object->picture_width = picture_width; context_object->picture_height = picture_height; context_object->flags = flags; *context_id = id; status = VA_STATUS_SUCCESS; goto complete; error: if (ids != NULL) free(ids); if (context_object != NULL) object_heap_free(&driver_data->context_heap, (struct object_base *)context_object); complete: return status; } VAStatus RequestDestroyContext(VADriverContextP context, VAContextID context_id) { struct request_data *driver_data = context->pDriverData; struct object_context *context_object; struct video_format *video_format; unsigned int output_type, capture_type; VAStatus status; int rc; video_format = driver_data->video_format; if (video_format == NULL) return VA_STATUS_ERROR_OPERATION_FAILED; output_type = v4l2_type_video_output(video_format->v4l2_mplane); capture_type = v4l2_type_video_capture(video_format->v4l2_mplane); context_object = CONTEXT(driver_data, context_id); if (context_object == NULL) return VA_STATUS_ERROR_INVALID_CONTEXT; rc = v4l2_set_stream(driver_data->video_fd, output_type, false); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; rc = v4l2_set_stream(driver_data->video_fd, capture_type, false); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; /* Buffers liberation */ status = RequestDestroySurfaces(context, context_object->surfaces_ids, context_object->surfaces_count); if (status != VA_STATUS_SUCCESS) return VA_STATUS_ERROR_OPERATION_FAILED; free(context_object->surfaces_ids); object_heap_free(&driver_data->context_heap, (struct object_base *)context_object); /* * iter5b-β: tear down the OUTPUT pool (mmap unmaps) BEFORE * REQBUFS(0) frees the kernel-side buffers. Pre-β this was done * only by surface.c's resolution-change branch — which β removed. * Without this here, the next CreateContext's request_pool_init * sees pool->initialized=true with stale slot pointers, returns * 0 without re-CREATE_BUFS, and the next QBUF EINVALs because * the slots reference buffer indices that no longer exist * (Phase 5 v2 review CRIT-2). */ if (driver_data->output_pool.initialized) request_pool_destroy(&driver_data->output_pool); rc = v4l2_request_buffers(driver_data->video_fd, output_type, 0); if (rc < 0) return VA_STATUS_ERROR_OPERATION_FAILED; /* * Iter2 Fix 3 (still relevant under β): cap_pool owns the * CAPTURE buffers' mmaps + any outstanding our_export_fds. Tear * it down (which also issues REQBUFS(0) on CAPTURE), so the next * CreateContext cycle sees a clean slate. */ cap_pool_destroy(&driver_data->capture_pool, driver_data->video_fd, capture_type); /* * iter5b-β: driver_data->video_format is a static-ref pointer * into video.c's formats[]; it stays valid for the life of the * driver_data and intentionally survives DestroyContext cycles. * The next CreateContext's `if (!driver_data->video_format)` * guard skips the probe — correct, because the device's CAPTURE * format menu doesn't change. * * The pre-β surface_reset_format_cache() call here is removed: * β doesn't have a last_output_{width,height,pixelformat} cache * (those fields are deleted). Each CreateContext is a fresh * S_FMT(OUTPUT) cycle. * * Commit D: invalidate the format-uniform cache so a CreateSurfaces2 * call between DestroyContext and the next CreateContext doesn't * lazy-fill with stale geometry from the now-torn-down session. * The next CreateContext re-populates the cache. */ driver_data->fmt_valid = false; /* iter39: clear 10-bit session flag — next CreateContext re-sets. */ driver_data->is_10bit = false; return VA_STATUS_SUCCESS; }