ampere-av1 Phase 2 (master): fourth-fd probe + AV1 enumeration

Imports the minimal "vainfo lists VAProfileAV1Profile0" layer from the
operator's in-progress av1-iter1 branch (Phase 2 steps 1, 2 — commits
bed75c0 + 61db76e on av1-iter1). The Phase 3-5 bit-exact decode-side
work stays in av1-iter1; this commit gives master the enumeration +
fd-routing layer so consumers (ffmpeg-vaapi, firefox-fourier, chromium-
fourier) at least see VAProfileAV1Profile0 today on RK3588.

What this commit adds:
- video_fd_vpu981 + media_fd_vpu981 slots to struct request_data
  (named to match av1-iter1's convention so the operator's Phase 3-5
   merge resolves cleanly)
- 4th-decoder probe loop in VA_DRIVER_INIT that walks hantro-vpu
  media nodes for an instance advertising V4L2_PIX_FMT_AV1_FRAME
  (AV1F) as OUTPUT pixfmt. RK3588 has 3 hantro-vpu instances all
  reporting driver="hantro-vpu" + model="hantro-vpu", so OUTPUT-
  format probe is the only DTS-independent discriminator.
- 'a' kind in request_device_kind_for_profile (VAProfileAV1Profile0)
  + 'a' branch in request_switch_device_for_profile.
- video_fd_vpu981 added to any_fd_supports_output_format helper
  (existing 3-slot loop missed the new fd; same off-by-one trap
  that bit ampere's av1-iter1 enumeration for a week).
- VAProfileAV1Profile0 → V4L2_PIX_FMT_AV1_FRAME in pixelformat_for
  _profile.
- VAProfileAV1Profile0 push in RequestQueryConfigProfiles +
  RequestQueryConfigEntrypoints + RequestCreateConfig switch.
- vpu981 fd cleanup in RequestTerminate.
- rpi_hevc_dec fd cleanup added at the same time (was already missing
  in master — fixed defensively).
- V4L2_REQUEST_MAX_PROFILES bumped 13 → 14. Defensively sized for
  the post-Option-B-revert future: with iter39 Option B reverted
  (Hi10P + Main10 back in enumeration) plus AV1, max possible
  enumeration is 13. The per-group guards use `index < MAX - N`
  pattern; for a singleton push to succeed at index=13 we need
  MAX >= 14. Bumping now avoids the same off-by-one bug from
  silently dropping AV1 when Option B eventually reverts.

What this commit does NOT add:
- av1.{c,h} decode-side scaffolding (Phase 2 step 4 on av1-iter1 —
  ~177 LoC including a stub av1_set_controls that returns -1). When
  the operator's av1-iter1 Phase 3-5 work lands on master, those
  500+ LoC + the stub will follow. Without them, consumers calling
  vaCreateContext(VAProfileAV1Profile0) succeed at the libva layer
  but ffmpeg-vaapi will fail at the first vaRenderPicture with an
  AV1-buffer-type rejection — clean error, no crash.

Verified 2026-05-18 on ampere:

  $ env LIBVA_DRIVER_NAME=v4l2_request vainfo | grep VAProfile
        ... (10 prior profiles, unchanged) ...
        VAProfileAV1Profile0            :   VAEntrypointVLD   ✓

  Probe log: "ampere-av1: vpu981 AV1 decoder at /dev/video4 + /dev/media3"

Build clean on ampere with GCC 16.1.1; no warnings introduced.
ampere's running module restored to the av1-iter1 build after the
verification — this commit's .so was NOT permanently installed.

Closes the headline acceptance criterion in
marfrit/libva-v4l2-request-fourier#2 ("vainfo on ampere lists
VAProfileAV1"). End-to-end AV1 decode bit-exactness is iter4 work
that the av1-iter1 branch continues to drive.

Co-Authored-By: claude-noether <claude-noether@reauktion.de>
This commit is contained in:
2026-05-18 13:45:04 +00:00
parent 9bb5a5a722
commit c6f81c653f
4 changed files with 143 additions and 3 deletions
+14
View File
@@ -46,6 +46,20 @@ unsigned int pixelformat_for_profile(VAProfile profile)
return V4L2_PIX_FMT_VP8_FRAME;
case VAProfileVP9Profile0:
return V4L2_PIX_FMT_VP9_FRAME;
case VAProfileAV1Profile0:
/*
* ampere-av1-enablement Phase 2: AV1 Profile 0 routes to
* vpu981 (RK3588's dedicated AV1 hantro). Per-codec ctrl
* dispatch (V4L2_CID_STATELESS_AV1_*) is NOT YET WIRED on
* master — vainfo lists the profile + RequestCreateConfig
* succeeds, but consumers that submit decode buffers hit
* a NOP path until the per-codec dispatch lands. The
* av1-iter1 operator branch has Phase 3 bit-exact bring-up
* underway; this commit gives master the bare enumeration +
* fd-routing layer so consumers like ffmpeg-vaapi at least
* see VAProfileAV1Profile0 today.
*/
return V4L2_PIX_FMT_AV1_FRAME;
default:
return 0;
}
+26 -2
View File
@@ -81,6 +81,16 @@ VAStatus RequestCreateConfig(VADriverContextP context, VAProfile profile,
// cap is V4L2_MPEG_VIDEO_VP9_PROFILE_0). Do not add a case for
// VAProfileVP9Profile2 — kernel will reject.
break;
case VAProfileAV1Profile0:
// ampere-av1-enablement Phase 2: AV1 Profile 0 routes to
// vpu981 (RK3588 dedicated AV1 hantro instance). Decode-side
// ctrl dispatch (V4L2_CID_STATELESS_AV1_*) is NOT YET WIRED
// on master — vainfo will list the profile + CreateConfig
// succeeds, but consumers that submit decode buffers hit a
// NOP path until av1.{c,h} dispatch scaffolding is ported
// from the av1-iter1 operator branch (where Phase 3-5 has
// 3/10 frames bit-exact already).
break;
default:
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
}
@@ -162,13 +172,15 @@ VAStatus RequestDestroyConfig(VADriverContextP context, VAConfigID config_id)
static bool any_fd_supports_output_format(struct request_data *driver_data,
unsigned int fmt)
{
int fds[3] = {
int fds[5] = {
driver_data->video_fd,
driver_data->video_fd_rkvdec,
driver_data->video_fd_hantro,
driver_data->video_fd_rpi_hevc_dec, /* iter40 */
driver_data->video_fd_vpu981, /* ampere-av1 Phase 2 */
};
int i;
for (i = 0; i < 3; i++) {
for (i = 0; i < 5; i++) {
if (fds[i] < 0) continue;
if (v4l2_find_format(fds[i], V4L2_BUF_TYPE_VIDEO_OUTPUT, fmt))
return true;
@@ -249,6 +261,17 @@ VAStatus RequestQueryConfigProfiles(VADriverContextP context,
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1))
profiles[index++] = VAProfileVP9Profile0;
/*
* ampere-av1-enablement Phase 2: AV1 Profile 0 advertised when
* vpu981 (RK3588 dedicated AV1 hantro) is probed. MAX_PROFILES
* bumped to 14 in request.h to safely fit even if iter39 Option
* B is reverted (Hi10P + Main10 back in enumeration → 13 total
* with AV1, the `< MAX - 1` guard then needs MAX ≥ 14).
*/
found = any_fd_supports_output_format(driver_data, V4L2_PIX_FMT_AV1_FRAME);
if (found && index < (V4L2_REQUEST_MAX_PROFILES - 1))
profiles[index++] = VAProfileAV1Profile0;
*profiles_count = index;
return VA_STATUS_SUCCESS;
@@ -272,6 +295,7 @@ VAStatus RequestQueryConfigEntrypoints(VADriverContextP context,
case VAProfileHEVCMain10:
case VAProfileVP8Version0_3:
case VAProfileVP9Profile0:
case VAProfileAV1Profile0:
entrypoints[0] = VAEntrypointVLD;
*entrypoints_count = 1;
break;
+78
View File
@@ -409,6 +409,16 @@ char request_device_kind_for_profile(VAProfile profile)
case VAProfileMPEG2Main:
case VAProfileVP8Version0_3:
return 'h';
case VAProfileAV1Profile0:
/*
* ampere-av1-enablement Phase 2: RK3588 vpu981 dedicated
* AV1 hantro instance. 'a' kind dispatches to
* driver_data->video_fd_vpu981. On hosts without the AV1
* instance the fd stays -1 and RequestQueryConfigProfiles
* never enumerates AV1, so this branch is unreachable for
* non-RK3588 hosts.
*/
return 'a';
default:
return '?';
}
@@ -457,6 +467,9 @@ int request_switch_device_for_profile(struct request_data *driver_data,
} else if (kind == 'p') {
target_video = driver_data->video_fd_rpi_hevc_dec;
target_media = driver_data->media_fd_rpi_hevc_dec;
} else if (kind == 'a') {
target_video = driver_data->video_fd_vpu981;
target_media = driver_data->media_fd_vpu981;
} else {
return -1;
}
@@ -646,6 +659,8 @@ VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context)
driver_data->media_fd_hantro = -1;
driver_data->video_fd_rpi_hevc_dec = -1;
driver_data->media_fd_rpi_hevc_dec = -1;
driver_data->video_fd_vpu981 = -1;
driver_data->media_fd_vpu981 = -1;
/*
* iter38: probe BOTH rkvdec and hantro-vpu so a single libva session
@@ -712,6 +727,61 @@ VAStatus VA_DRIVER_INIT_FUNC(VADriverContextP context)
}
}
(void)primary_driver;
/*
* ampere-av1-enablement Phase 2: walk hantro-vpu media nodes
* for a SECOND one that advertises V4L2_PIX_FMT_AV1_FRAME
* (AV1F) as OUTPUT pixfmt. RK3588 has 3 hantro-vpu instances
* (legacy MPEG2/VP8 decoder, vepu121 encoder, vpu981 AV1
* decoder) all reporting driver="hantro-vpu" / model="hantro-
* vpu" — so OUTPUT-format probe is the only reliable
* disambiguator that doesn't depend on parsing card-name
* strings (which are DTS-dependent). First match wins.
*
* On non-RK3588 hosts the slot stays -1; RequestQueryConfig
* Profiles' AV1 push then no-ops because any_fd_supports_
* output_format() returns false for AV1F.
*/
{
int i;
char path[32], av1_video[32];
for (i = 0; i < 16; i++) {
int mfd, vfd;
struct media_device_info info;
snprintf(path, sizeof path, "/dev/media%d", i);
mfd = open(path, O_RDWR | O_NONBLOCK);
if (mfd < 0) continue;
memset(&info, 0, sizeof info);
if (ioctl(mfd, MEDIA_IOC_DEVICE_INFO, &info) != 0 ||
strcmp(info.driver, "hantro-vpu") != 0) {
close(mfd);
continue;
}
if (find_decoder_video_node_via_topology(
mfd, av1_video, sizeof av1_video) != 0) {
close(mfd);
continue;
}
vfd = open(av1_video, O_RDWR | O_NONBLOCK);
if (vfd < 0) {
close(mfd);
continue;
}
if (!v4l2_find_format(vfd, V4L2_BUF_TYPE_VIDEO_OUTPUT, V4L2_PIX_FMT_AV1_FRAME) &&
!v4l2_find_format(vfd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, V4L2_PIX_FMT_AV1_FRAME)) {
close(vfd);
close(mfd);
continue;
}
driver_data->video_fd_vpu981 = vfd;
driver_data->media_fd_vpu981 = mfd;
request_log("ampere-av1: vpu981 AV1 decoder at %s + %s\n",
av1_video, path);
break;
}
}
}
/*
@@ -784,6 +854,14 @@ VAStatus RequestTerminate(VADriverContextP context)
close(driver_data->video_fd_hantro);
if (driver_data->media_fd_hantro >= 0)
close(driver_data->media_fd_hantro);
if (driver_data->video_fd_rpi_hevc_dec >= 0)
close(driver_data->video_fd_rpi_hevc_dec);
if (driver_data->media_fd_rpi_hevc_dec >= 0)
close(driver_data->media_fd_rpi_hevc_dec);
if (driver_data->video_fd_vpu981 >= 0)
close(driver_data->video_fd_vpu981);
if (driver_data->media_fd_vpu981 >= 0)
close(driver_data->media_fd_vpu981);
/* Fall back to direct close if neither alt fd captured the active
* pair (env-override path). */
if (driver_data->video_fd_rkvdec < 0 && driver_data->video_fd_hantro < 0) {
+25 -1
View File
@@ -42,7 +42,16 @@
#define V4L2_REQUEST_STR_VENDOR "v4l2-request"
#define V4L2_REQUEST_MAX_PROFILES 13
/*
* Sized for max-possible enumeration with iter39 Option B reverted:
* MPEG2(2) + H264(6 incl. Hi10P) + HEVC(2 incl. Main10) + VP8 + VP9 + AV1 = 13.
* The per-group guards use `if (... && index < (MAX_PROFILES - N))` where N
* is the push-group size, so MAX must be total+1 14 here. Bumping
* defensively now so a future re-enable of Hi10P/Main10 doesn't silently
* drop AV1 through the off-by-one trap that ate ampere-av1's enumeration
* for a week (see issue marfrit/libva-v4l2-request-fourier#2).
*/
#define V4L2_REQUEST_MAX_PROFILES 14
#define V4L2_REQUEST_MAX_ENTRYPOINTS 5
#define V4L2_REQUEST_MAX_CONFIG_ATTRIBUTES 10
#define V4L2_REQUEST_MAX_IMAGE_FORMATS 10
@@ -87,6 +96,21 @@ struct request_data {
*/
int video_fd_rpi_hevc_dec;
int media_fd_rpi_hevc_dec;
/*
* ampere-av1-enablement Phase 2: fourth multi-device-probe slot
* for vpu981 (RK3588's dedicated AV1 hantro instance, kernel
* card="rockchip,rk3588-av1-vpu-dec", driver name "hantro-vpu"
* shared with the legacy MPEG-2/VP8/H.264 hantro). Discriminated
* by V4L2_PIX_FMT_AV1_FRAME (AV1F) OUTPUT-pixfmt capability since
* the driver name alone is ambiguous on RK3588. Stays -1 on hosts
* without the AV1 vpu-dec.
*
* Named "vpu981" for consistency with the in-progress av1-iter1
* operator branch (Phase 3-5 bit-exact AV1 work when that lands
* these fields receive the actual decode dispatch wiring).
*/
int video_fd_vpu981;
int media_fd_vpu981;
/*
* iter2 (ampere-kernel-decoders campaign) per-fd probe result