diff --git a/daemon/src/decoder.c b/daemon/src/decoder.c index a43d6ab..10f70f5 100644 --- a/daemon/src/decoder.c +++ b/daemon/src/decoder.c @@ -10,12 +10,55 @@ #include #include #include +#include #include #include #include +/* + * Per-codec running stats — daedalus-v4l2#11 step 1. Establishes + * baseline observability before any daedalus-fourier kernel + * substitution lands, so we can see what each substitution actually + * shifted. Per-frame `decoder: OK` line now carries decode_us; a + * "decoder stats" summary line lands every DAEDALUS_STATS_EVERY OK + * frames with throughput + per-frame budget aggregates. + * + * Counters are static (process-local) and unsynchronised — the + * daemon's chardev event loop is single-threaded, so no atomics or + * locking needed. Reset when codec_id changes (different stream). + */ +#define DAEDALUS_STATS_EVERY 60u + +struct daedalus_decode_stats { + uint32_t codec_id; + uint64_t frames; + uint64_t total_decode_ns; + uint64_t total_bitstream_bytes; + uint64_t total_mbs; /* derived from frame WxH; H.264-style 16x16 */ + struct timespec window_start; +}; + +static struct daedalus_decode_stats g_stats; + +static inline uint64_t timespec_delta_ns(const struct timespec *a, + const struct timespec *b) +{ + return (uint64_t)(b->tv_sec - a->tv_sec) * 1000000000ull + + (uint64_t)(b->tv_nsec - a->tv_nsec); +} + +static const char *codec_id_name(uint32_t cid) +{ + switch (cid) { + case DAEDALUS_CODEC_VP9: return "vp9"; + case DAEDALUS_CODEC_AV1: return "av1"; + case DAEDALUS_CODEC_H264: return "h264"; + default: return "?"; + } +} + /* * FNV-1a 32-bit hash. Used as a compact digest of the decoded * frame's YUV planes so the kernel can verify "the daemon produced @@ -467,6 +510,17 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec, dec->pkt->size = (int) req->bitstream_len; } + /* + * Time send_packet+receive_frame for the per-frame `decoder: OK` + * line + the periodic stats summary. Includes only the + * libavcodec round-trip — not the bitstream packing, SPS/PPS + * synth, or pack-to-planes work (those are accounted for + * separately in the request's overall handle time). + */ + struct timespec t_decode_start, t_decode_end; + uint64_t decode_ns = 0; + clock_gettime(CLOCK_MONOTONIC, &t_decode_start); + rc = fm->avcodec_send_packet(ctx, dec->pkt); if (rc < 0) { log_err("decoder: avcodec_send_packet failed: %d", rc); @@ -476,8 +530,11 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec, fm->av_frame_unref(dec->frame); rc = fm->avcodec_receive_frame(ctx, dec->frame); + clock_gettime(CLOCK_MONOTONIC, &t_decode_end); + decode_ns = timespec_delta_ns(&t_decode_start, &t_decode_end); if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) { - log_debug("decoder: no frame ready yet (rc=%d)", rc); + log_debug("decoder: no frame ready yet (rc=%d, %lu us)", + rc, (unsigned long)(decode_ns / 1000)); resp->status = DAEDALUS_DECODE_NO_FRAME; goto out; } @@ -584,10 +641,61 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec, fr->format, req->capture_pix_fmt); } - log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u", + log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u decode_us=%lu", fr->width, fr->height, fr->format, desc ? desc->name : "?", - h, luma_len, chroma_len); + h, luma_len, chroma_len, + (unsigned long)(decode_ns / 1000)); + + /* + * Periodic stats summary (every DAEDALUS_STATS_EVERY frames). + * Reset window on codec change. Gives observable baseline + * for the daedalus-v4l2#11 substitution arc: fps, average + * decode_us, MB/s throughput, bitstream B/MB. Compare + * against daedalus-fourier README's per-kernel NEON + * baselines (e.g. H.264 IDCT 4x4 = 175 Mblock/s) to gauge + * which substitutions are worth pursuing. + */ + if (g_stats.codec_id != req->codec_id) { + g_stats.codec_id = req->codec_id; + g_stats.frames = 0; + g_stats.total_decode_ns = 0; + g_stats.total_bitstream_bytes = 0; + g_stats.total_mbs = 0; + clock_gettime(CLOCK_MONOTONIC, &g_stats.window_start); + } + g_stats.frames++; + g_stats.total_decode_ns += decode_ns; + g_stats.total_bitstream_bytes += req->bitstream_len; + g_stats.total_mbs += (uint64_t)((fr->width + 15) / 16) * + (uint64_t)((fr->height + 15) / 16); + + if (g_stats.frames % DAEDALUS_STATS_EVERY == 0) { + struct timespec t_now; + clock_gettime(CLOCK_MONOTONIC, &t_now); + uint64_t window_ns = + timespec_delta_ns(&g_stats.window_start, &t_now); + double window_s = (double)window_ns / 1e9; + double fps = window_s > 0 ? + (double)g_stats.frames / window_s : 0.0; + double avg_decode_us = g_stats.frames > 0 ? + (double)g_stats.total_decode_ns / + (double)g_stats.frames / 1000.0 : 0.0; + double mb_per_s = window_s > 0 ? + (double)g_stats.total_mbs / window_s : 0.0; + double bs_b_per_mb = g_stats.total_mbs > 0 ? + (double)g_stats.total_bitstream_bytes / + (double)g_stats.total_mbs : 0.0; + + log_info("decoder stats: codec=%s " + "frames=%llu window=%.2fs fps=%.2f " + "avg_decode_us=%.1f mbs_per_s=%.0f " + "bs_b_per_mb=%.2f", + codec_id_name(g_stats.codec_id), + (unsigned long long)g_stats.frames, + window_s, fps, avg_decode_us, + mb_per_s, bs_b_per_mb); + } } fm->av_frame_unref(dec->frame);