/* SPDX-License-Identifier: BSD-2-Clause */ /* * test_m2m_stream — multi-frame V4L2 m2m streaming verification. * * Drives a complete VP9 IVF file through /dev/video0: * 1. parse IVF (per-frame size+data) * 2. open + S_FMT both queues * 3. REQBUFS N buffers each * 4. Loop: QBUF OUTPUT[i % N] (mmap + copy), DQBUF OUTPUT, * DQBUF CAPTURE → dump NV12 to file * 5. STREAMOFF, close * * Concatenates all decoded frames into one big NV12 dump; the * caller compares against a reference `ffmpeg -pix_fmt nv12 -f * rawvideo` dump for the same input. * * Usage: * test_m2m_stream [w] [h] [codec] * defaults: w=320 h=240 codec=vp9 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define V4L2_DEV "/dev/video0" #define POLL_TIMEOUT_MS 5000 #define NUM_OUTPUT_BUFS 4 #define NUM_CAPTURE_BUFS 4 static void die(const char *msg) { perror(msg); exit(1); } static uint64_t now_us(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (uint64_t) ts.tv_sec * 1000000ull + (uint64_t) (ts.tv_nsec / 1000ull); } static int cmp_u64(const void *a, const void *b) { uint64_t va = *(const uint64_t *) a, vb = *(const uint64_t *) b; return (va > vb) - (va < vb); } struct ivf_frame { uint8_t *data; uint32_t size; }; /* * Parse an Annex-B H.264 stream into ACCESS UNITS. An access * unit contains zero or more non-VCL NALs (SPS/PPS/SEI/AUD) * followed by one VCL NAL (slice). Submitting NALs individually * confuses FFmpeg's H.264 decoder — it needs SPS+PPS plus a * complete slice to produce a frame. We accumulate NALs in a * pending buffer; when we see a VCL NAL (type 1 or 5) we flush * (pending + that VCL NAL) as one access unit. * * Width/height aren't carried in the Annex-B framing; caller * must supply them via the [w] [h] command-line args. */ static int find_next_startcode(const uint8_t *d, size_t off, size_t len) { while (off + 3 <= len) { if (d[off] == 0 && d[off + 1] == 0) { if (d[off + 2] == 1) return (int) off; if (off + 4 <= len && d[off + 2] == 0 && d[off + 3] == 1) return (int) off; } off++; } return -1; } /* * Given a NAL chunk (starts with 0x000001 or 0x00000001), * return the H.264 NAL unit type (byte after the start code, * masked with 0x1F). */ static int h264_nal_type(const uint8_t *nal, size_t sz) { size_t off; if (sz < 4) return -1; /* skip the 3- or 4-byte start code */ if (nal[2] == 1) off = 3; else if (sz >= 5 && nal[2] == 0 && nal[3] == 1) off = 4; else return -1; if (off >= sz) return -1; return nal[off] & 0x1F; } static struct ivf_frame *parse_annexb(const char *path, int *out_count) { uint8_t *buf; struct stat st; int fd; ssize_t n; int count = 0, cap = 16; struct ivf_frame *frames; int off, next; uint8_t *pending = NULL; size_t pending_len = 0; fd = open(path, O_RDONLY); if (fd < 0) die("open annex-b"); if (fstat(fd, &st) < 0) die("fstat"); buf = malloc(st.st_size); if (!buf) die("malloc annex-b"); n = read(fd, buf, st.st_size); if (n != st.st_size) die("read annex-b"); close(fd); frames = malloc(cap * sizeof(*frames)); if (!frames) die("malloc frames"); off = find_next_startcode(buf, 0, (size_t) st.st_size); if (off < 0) { fprintf(stderr, "no Annex-B start code in %s\n", path); exit(1); } while (off < st.st_size) { size_t start = (size_t) off; size_t end, sz; int nal_type; next = find_next_startcode(buf, start + 3, (size_t) st.st_size); end = (next < 0) ? (size_t) st.st_size : (size_t) next; sz = end - start; nal_type = h264_nal_type(buf + start, sz); /* Append this NAL to the pending access unit. */ pending = realloc(pending, pending_len + sz); if (!pending) die("realloc pending au"); memcpy(pending + pending_len, buf + start, sz); pending_len += sz; /* VCL NAL types 1 (non-IDR slice) and 5 (IDR slice) * close the access unit. */ if (nal_type == 1 || nal_type == 5) { if (count >= cap) { cap *= 2; frames = realloc(frames, cap * sizeof(*frames)); if (!frames) die("realloc frames"); } frames[count].size = (uint32_t) pending_len; frames[count].data = pending; count++; pending = NULL; pending_len = 0; } off = (next < 0) ? (int) st.st_size : next; } free(pending); free(buf); *out_count = count; return frames; } /* Parse an IVF file into a vector of frames (caller frees). */ static struct ivf_frame *parse_ivf(const char *path, int *out_count, uint32_t *out_w, uint32_t *out_h) { uint8_t *buf; struct stat st; int fd; ssize_t n; size_t off = 32; int count = 0, cap = 16; struct ivf_frame *frames; fd = open(path, O_RDONLY); if (fd < 0) die("open ivf"); if (fstat(fd, &st) < 0) die("fstat"); buf = malloc(st.st_size); if (!buf) die("malloc ivf"); n = read(fd, buf, st.st_size); if (n != st.st_size) die("read ivf"); close(fd); if (memcmp(buf, "DKIF", 4)) { fprintf(stderr, "not IVF\n"); exit(1); } *out_w = buf[12] | (buf[13] << 8); *out_h = buf[14] | (buf[15] << 8); frames = malloc(cap * sizeof(*frames)); if (!frames) die("malloc frames"); while (off + 12 <= (size_t) st.st_size) { uint32_t sz = buf[off] | (buf[off + 1] << 8) | (buf[off + 2] << 16) | (buf[off + 3] << 24); off += 12; if (off + sz > (size_t) st.st_size) { fprintf(stderr, "truncated frame at %zu\n", off); break; } if (count >= cap) { cap *= 2; frames = realloc(frames, cap * sizeof(*frames)); if (!frames) die("realloc frames"); } frames[count].size = sz; frames[count].data = malloc(sz); if (!frames[count].data) die("malloc frame"); memcpy(frames[count].data, buf + off, sz); off += sz; count++; } free(buf); *out_count = count; return frames; } static void free_frames(struct ivf_frame *f, int n) { int i; for (i = 0; i < n; i++) free(f[i].data); free(f); } int main(int argc, char **argv) { const char *ivf_path, *out_path; uint32_t override_w = 0, override_h = 0; uint32_t output_fourcc = V4L2_PIX_FMT_VP9_FRAME; uint32_t capture_fourcc = V4L2_PIX_FMT_NV12M; int capture_num_planes = 2; uint32_t w, h; int fd, frame_count; struct ivf_frame *frames; struct v4l2_format fmt; struct v4l2_requestbuffers reqbuf; struct v4l2_buffer buf; struct v4l2_plane planes[2]; enum v4l2_buf_type t; void *out_maps[NUM_OUTPUT_BUFS]; size_t out_map_size = 0; void *cap_y[NUM_CAPTURE_BUFS], *cap_uv[NUM_CAPTURE_BUFS]; size_t cap_y_size = 0, cap_uv_size = 0; FILE *of; int i, decoded = 0; uint64_t *per_frame_us = NULL; uint64_t total_start, total_us; if (argc < 3) { fprintf(stderr, "usage: %s [w] [h] [codec]\n" " codec: vp9 | av1 | h264 (default vp9)\n", argv[0]); return 2; } ivf_path = argv[1]; out_path = argv[2]; if (argc >= 5) { override_w = (uint32_t) atoi(argv[3]); override_h = (uint32_t) atoi(argv[4]); } if (argc >= 6) { const char *cn = argv[5]; if (!strcmp(cn, "vp9")) output_fourcc = V4L2_PIX_FMT_VP9_FRAME; else if (!strcmp(cn, "av1")) output_fourcc = V4L2_PIX_FMT_AV1_FRAME; else if (!strcmp(cn, "h264")) output_fourcc = V4L2_PIX_FMT_H264_SLICE; else { fprintf(stderr, "unknown codec %s\n", cn); return 2; } } if (argc >= 7) { const char *cf = argv[6]; if (!strcmp(cf, "nv12m")) { capture_fourcc = V4L2_PIX_FMT_NV12M; capture_num_planes = 2; } else if (!strcmp(cf, "p010")) { capture_fourcc = V4L2_PIX_FMT_P010; capture_num_planes = 1; } else { fprintf(stderr, "unknown capture format %s\n", cf); return 2; } } /* * Format detection: IVF starts with 'DKIF' magic; anything * else is treated as Annex-B (H.264 NAL stream). Width/ * height come from the IVF header for IVF, or must be * provided as CLI args for Annex-B. */ { uint8_t hdr4[4] = { 0 }; int hfd = open(ivf_path, O_RDONLY); if (hfd < 0) die("open input"); if (read(hfd, hdr4, 4) != 4) die("read header"); close(hfd); if (!memcmp(hdr4, "DKIF", 4)) { frames = parse_ivf(ivf_path, &frame_count, &w, &h); } else { if (!override_w || !override_h) { fprintf(stderr, "non-IVF input: explicit [w] [h] required\n"); return 2; } w = override_w; h = override_h; frames = parse_annexb(ivf_path, &frame_count); } } if (override_w) w = override_w; if (override_h) h = override_h; printf("parsed %d frames, %ux%u\n", frame_count, w, h); fd = open(V4L2_DEV, O_RDWR); if (fd < 0) die("open " V4L2_DEV); /* S_FMT OUTPUT */ memset(&fmt, 0, sizeof(fmt)); fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; fmt.fmt.pix_mp.width = w; fmt.fmt.pix_mp.height = h; fmt.fmt.pix_mp.pixelformat = output_fourcc; if (ioctl(fd, VIDIOC_S_FMT, &fmt) < 0) die("S_FMT OUTPUT"); /* S_FMT CAPTURE */ memset(&fmt, 0, sizeof(fmt)); fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; fmt.fmt.pix_mp.width = w; fmt.fmt.pix_mp.height = h; fmt.fmt.pix_mp.pixelformat = capture_fourcc; if (ioctl(fd, VIDIOC_S_FMT, &fmt) < 0) die("S_FMT CAPTURE"); cap_y_size = fmt.fmt.pix_mp.plane_fmt[0].sizeimage; cap_uv_size = capture_num_planes > 1 ? fmt.fmt.pix_mp.plane_fmt[1].sizeimage : 0; printf("CAPTURE fmt=%c%c%c%c planes=%u sizeimage=[%zu,%zu]\n", capture_fourcc & 0xff, (capture_fourcc >> 8) & 0xff, (capture_fourcc >> 16) & 0xff, (capture_fourcc >> 24) & 0xff, fmt.fmt.pix_mp.num_planes, cap_y_size, cap_uv_size); /* REQBUFS OUTPUT + mmap each */ memset(&reqbuf, 0, sizeof(reqbuf)); reqbuf.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; reqbuf.memory = V4L2_MEMORY_MMAP; reqbuf.count = NUM_OUTPUT_BUFS; if (ioctl(fd, VIDIOC_REQBUFS, &reqbuf) < 0) die("REQBUFS OUTPUT"); printf("OUTPUT reqbufs -> %u\n", reqbuf.count); for (i = 0; i < NUM_OUTPUT_BUFS; i++) { memset(&buf, 0, sizeof(buf)); memset(planes, 0, sizeof(planes)); buf.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; buf.memory = V4L2_MEMORY_MMAP; buf.index = i; buf.m.planes = planes; buf.length = 1; if (ioctl(fd, VIDIOC_QUERYBUF, &buf) < 0) die("QUERYBUF OUTPUT"); out_map_size = planes[0].length; out_maps[i] = mmap(NULL, planes[0].length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, planes[0].m.mem_offset); if (out_maps[i] == MAP_FAILED) die("mmap OUTPUT"); } /* REQBUFS CAPTURE + mmap each */ memset(&reqbuf, 0, sizeof(reqbuf)); reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; reqbuf.memory = V4L2_MEMORY_MMAP; reqbuf.count = NUM_CAPTURE_BUFS; if (ioctl(fd, VIDIOC_REQBUFS, &reqbuf) < 0) die("REQBUFS CAPTURE"); printf("CAPTURE reqbufs -> %u\n", reqbuf.count); for (i = 0; i < NUM_CAPTURE_BUFS; i++) { memset(&buf, 0, sizeof(buf)); memset(planes, 0, sizeof(planes)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; buf.memory = V4L2_MEMORY_MMAP; buf.index = i; buf.m.planes = planes; buf.length = capture_num_planes; if (ioctl(fd, VIDIOC_QUERYBUF, &buf) < 0) die("QUERYBUF CAPTURE"); cap_y[i] = mmap(NULL, planes[0].length, PROT_READ, MAP_SHARED, fd, planes[0].m.mem_offset); if (cap_y[i] == MAP_FAILED) die("mmap CAPTURE Y"); if (capture_num_planes > 1) { cap_uv[i] = mmap(NULL, planes[1].length, PROT_READ, MAP_SHARED, fd, planes[1].m.mem_offset); if (cap_uv[i] == MAP_FAILED) die("mmap CAPTURE UV"); } else { cap_uv[i] = NULL; } /* QBUF all capture buffers up front */ memset(&buf, 0, sizeof(buf)); memset(planes, 0, sizeof(planes)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; buf.memory = V4L2_MEMORY_MMAP; buf.index = i; buf.m.planes = planes; buf.length = capture_num_planes; if (ioctl(fd, VIDIOC_QBUF, &buf) < 0) die("QBUF CAPTURE init"); } t = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; if (ioctl(fd, VIDIOC_STREAMON, &t) < 0) die("STREAMON OUTPUT"); t = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; if (ioctl(fd, VIDIOC_STREAMON, &t) < 0) die("STREAMON CAPTURE"); printf("STREAMON both\n"); of = fopen(out_path, "wb"); if (!of) die("fopen out"); per_frame_us = calloc((size_t) frame_count, sizeof(*per_frame_us)); if (!per_frame_us) die("calloc per_frame_us"); total_start = now_us(); /* Feed one bitstream frame at a time; serialise DQBUF after each. */ for (i = 0; i < frame_count; i++) { int idx = i % NUM_OUTPUT_BUFS; struct pollfd p = { .fd = fd, .events = POLLIN | POLLOUT }; size_t y_actual, uv_actual; int cap_idx; uint64_t frame_start = now_us(); if (frames[i].size > out_map_size) { fprintf(stderr, "frame %d too big: %u > %zu\n", i, frames[i].size, out_map_size); break; } memcpy(out_maps[idx], frames[i].data, frames[i].size); memset(&buf, 0, sizeof(buf)); memset(planes, 0, sizeof(planes)); buf.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; buf.memory = V4L2_MEMORY_MMAP; buf.index = idx; buf.m.planes = planes; buf.length = 1; planes[0].bytesused = frames[i].size; if (ioctl(fd, VIDIOC_QBUF, &buf) < 0) die("QBUF OUTPUT"); if (poll(&p, 1, POLL_TIMEOUT_MS) <= 0) die("poll"); /* DQBUF OUTPUT (returns the buffer to userspace pool) */ memset(&buf, 0, sizeof(buf)); memset(planes, 0, sizeof(planes)); buf.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; buf.memory = V4L2_MEMORY_MMAP; buf.m.planes = planes; buf.length = 1; if (ioctl(fd, VIDIOC_DQBUF, &buf) < 0) die("DQBUF OUTPUT"); /* DQBUF CAPTURE */ memset(&buf, 0, sizeof(buf)); memset(planes, 0, sizeof(planes)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; buf.memory = V4L2_MEMORY_MMAP; buf.m.planes = planes; buf.length = capture_num_planes; if (ioctl(fd, VIDIOC_DQBUF, &buf) < 0) die("DQBUF CAPTURE"); cap_idx = buf.index; if (buf.flags & V4L2_BUF_FLAG_ERROR) { fprintf(stderr, " frame %d CAPTURE ERROR\n", i); break; } y_actual = planes[0].bytesused ? planes[0].bytesused : cap_y_size; uv_actual = (capture_num_planes > 1 && planes[1].bytesused) ? planes[1].bytesused : cap_uv_size; fwrite(cap_y[cap_idx], 1, y_actual, of); if (capture_num_planes > 1 && cap_uv[cap_idx]) fwrite(cap_uv[cap_idx], 1, uv_actual, of); per_frame_us[decoded] = now_us() - frame_start; decoded++; /* Recycle the CAPTURE buffer */ memset(&buf, 0, sizeof(buf)); memset(planes, 0, sizeof(planes)); buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; buf.memory = V4L2_MEMORY_MMAP; buf.index = cap_idx; buf.m.planes = planes; buf.length = capture_num_planes; if (ioctl(fd, VIDIOC_QBUF, &buf) < 0) die("QBUF CAPTURE recycle"); } total_us = now_us() - total_start; fclose(of); printf("decoded %d / %d frames to %s\n", decoded, frame_count, out_path); if (decoded > 0) { uint64_t *sorted = malloc(decoded * sizeof(*sorted)); uint64_t sum = 0; double mean_us, fps; int i; memcpy(sorted, per_frame_us, decoded * sizeof(*sorted)); qsort(sorted, decoded, sizeof(*sorted), cmp_u64); for (i = 0; i < decoded; i++) sum += per_frame_us[i]; mean_us = (double) sum / (double) decoded; fps = 1e6 * (double) decoded / (double) total_us; printf("perf: mean=%.0fus p50=%luus p99=%luus min=%luus max=%luus | wall=%lums fps=%.1f\n", mean_us, (unsigned long) sorted[decoded / 2], (unsigned long) sorted[(decoded * 99) / 100], (unsigned long) sorted[0], (unsigned long) sorted[decoded - 1], (unsigned long) (total_us / 1000), fps); free(sorted); } free(per_frame_us); t = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; ioctl(fd, VIDIOC_STREAMOFF, &t); t = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; ioctl(fd, VIDIOC_STREAMOFF, &t); close(fd); free_frames(frames, frame_count); return decoded == frame_count ? 0 : 1; }