daemon: per-frame decode_us + periodic stats (#11 step 1) #15

Merged
marfrit merged 1 commits from noether/daemon-decode-stats into main 2026-05-21 18:26:50 +00:00
+111 -3
View File
@@ -10,12 +10,55 @@
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <linux/videodev2.h>
#include <libavcodec/avcodec.h>
#include <libavutil/pixfmt.h>
/*
* Per-codec running stats — daedalus-v4l2#11 step 1. Establishes
* baseline observability before any daedalus-fourier kernel
* substitution lands, so we can see what each substitution actually
* shifted. Per-frame `decoder: OK` line now carries decode_us; a
* "decoder stats" summary line lands every DAEDALUS_STATS_EVERY OK
* frames with throughput + per-frame budget aggregates.
*
* Counters are static (process-local) and unsynchronised — the
* daemon's chardev event loop is single-threaded, so no atomics or
* locking needed. Reset when codec_id changes (different stream).
*/
#define DAEDALUS_STATS_EVERY 60u
struct daedalus_decode_stats {
uint32_t codec_id;
uint64_t frames;
uint64_t total_decode_ns;
uint64_t total_bitstream_bytes;
uint64_t total_mbs; /* derived from frame WxH; H.264-style 16x16 */
struct timespec window_start;
};
static struct daedalus_decode_stats g_stats;
static inline uint64_t timespec_delta_ns(const struct timespec *a,
const struct timespec *b)
{
return (uint64_t)(b->tv_sec - a->tv_sec) * 1000000000ull +
(uint64_t)(b->tv_nsec - a->tv_nsec);
}
static const char *codec_id_name(uint32_t cid)
{
switch (cid) {
case DAEDALUS_CODEC_VP9: return "vp9";
case DAEDALUS_CODEC_AV1: return "av1";
case DAEDALUS_CODEC_H264: return "h264";
default: return "?";
}
}
/*
* FNV-1a 32-bit hash. Used as a compact digest of the decoded
* frame's YUV planes so the kernel can verify "the daemon produced
@@ -467,6 +510,17 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec,
dec->pkt->size = (int) req->bitstream_len;
}
/*
* Time send_packet+receive_frame for the per-frame `decoder: OK`
* line + the periodic stats summary. Includes only the
* libavcodec round-trip — not the bitstream packing, SPS/PPS
* synth, or pack-to-planes work (those are accounted for
* separately in the request's overall handle time).
*/
struct timespec t_decode_start, t_decode_end;
uint64_t decode_ns = 0;
clock_gettime(CLOCK_MONOTONIC, &t_decode_start);
rc = fm->avcodec_send_packet(ctx, dec->pkt);
if (rc < 0) {
log_err("decoder: avcodec_send_packet failed: %d", rc);
@@ -476,8 +530,11 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec,
fm->av_frame_unref(dec->frame);
rc = fm->avcodec_receive_frame(ctx, dec->frame);
clock_gettime(CLOCK_MONOTONIC, &t_decode_end);
decode_ns = timespec_delta_ns(&t_decode_start, &t_decode_end);
if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) {
log_debug("decoder: no frame ready yet (rc=%d)", rc);
log_debug("decoder: no frame ready yet (rc=%d, %lu us)",
rc, (unsigned long)(decode_ns / 1000));
resp->status = DAEDALUS_DECODE_NO_FRAME;
goto out;
}
@@ -584,10 +641,61 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec,
fr->format, req->capture_pix_fmt);
}
log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u",
log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u decode_us=%lu",
fr->width, fr->height, fr->format,
desc ? desc->name : "?",
h, luma_len, chroma_len);
h, luma_len, chroma_len,
(unsigned long)(decode_ns / 1000));
/*
* Periodic stats summary (every DAEDALUS_STATS_EVERY frames).
* Reset window on codec change. Gives observable baseline
* for the daedalus-v4l2#11 substitution arc: fps, average
* decode_us, MB/s throughput, bitstream B/MB. Compare
* against daedalus-fourier README's per-kernel NEON
* baselines (e.g. H.264 IDCT 4x4 = 175 Mblock/s) to gauge
* which substitutions are worth pursuing.
*/
if (g_stats.codec_id != req->codec_id) {
g_stats.codec_id = req->codec_id;
g_stats.frames = 0;
g_stats.total_decode_ns = 0;
g_stats.total_bitstream_bytes = 0;
g_stats.total_mbs = 0;
clock_gettime(CLOCK_MONOTONIC, &g_stats.window_start);
}
g_stats.frames++;
g_stats.total_decode_ns += decode_ns;
g_stats.total_bitstream_bytes += req->bitstream_len;
g_stats.total_mbs += (uint64_t)((fr->width + 15) / 16) *
(uint64_t)((fr->height + 15) / 16);
if (g_stats.frames % DAEDALUS_STATS_EVERY == 0) {
struct timespec t_now;
clock_gettime(CLOCK_MONOTONIC, &t_now);
uint64_t window_ns =
timespec_delta_ns(&g_stats.window_start, &t_now);
double window_s = (double)window_ns / 1e9;
double fps = window_s > 0 ?
(double)g_stats.frames / window_s : 0.0;
double avg_decode_us = g_stats.frames > 0 ?
(double)g_stats.total_decode_ns /
(double)g_stats.frames / 1000.0 : 0.0;
double mb_per_s = window_s > 0 ?
(double)g_stats.total_mbs / window_s : 0.0;
double bs_b_per_mb = g_stats.total_mbs > 0 ?
(double)g_stats.total_bitstream_bytes /
(double)g_stats.total_mbs : 0.0;
log_info("decoder stats: codec=%s "
"frames=%llu window=%.2fs fps=%.2f "
"avg_decode_us=%.1f mbs_per_s=%.0f "
"bs_b_per_mb=%.2f",
codec_id_name(g_stats.codec_id),
(unsigned long long)g_stats.frames,
window_s, fps, avg_decode_us,
mb_per_s, bs_b_per_mb);
}
}
fm->av_frame_unref(dec->frame);