daemon: per-frame decode_us + periodic stats (#11 step 1) #15
+111
-3
@@ -10,12 +10,55 @@
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <linux/videodev2.h>
|
||||
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavutil/pixfmt.h>
|
||||
|
||||
/*
|
||||
* Per-codec running stats — daedalus-v4l2#11 step 1. Establishes
|
||||
* baseline observability before any daedalus-fourier kernel
|
||||
* substitution lands, so we can see what each substitution actually
|
||||
* shifted. Per-frame `decoder: OK` line now carries decode_us; a
|
||||
* "decoder stats" summary line lands every DAEDALUS_STATS_EVERY OK
|
||||
* frames with throughput + per-frame budget aggregates.
|
||||
*
|
||||
* Counters are static (process-local) and unsynchronised — the
|
||||
* daemon's chardev event loop is single-threaded, so no atomics or
|
||||
* locking needed. Reset when codec_id changes (different stream).
|
||||
*/
|
||||
#define DAEDALUS_STATS_EVERY 60u
|
||||
|
||||
struct daedalus_decode_stats {
|
||||
uint32_t codec_id;
|
||||
uint64_t frames;
|
||||
uint64_t total_decode_ns;
|
||||
uint64_t total_bitstream_bytes;
|
||||
uint64_t total_mbs; /* derived from frame WxH; H.264-style 16x16 */
|
||||
struct timespec window_start;
|
||||
};
|
||||
|
||||
static struct daedalus_decode_stats g_stats;
|
||||
|
||||
static inline uint64_t timespec_delta_ns(const struct timespec *a,
|
||||
const struct timespec *b)
|
||||
{
|
||||
return (uint64_t)(b->tv_sec - a->tv_sec) * 1000000000ull +
|
||||
(uint64_t)(b->tv_nsec - a->tv_nsec);
|
||||
}
|
||||
|
||||
static const char *codec_id_name(uint32_t cid)
|
||||
{
|
||||
switch (cid) {
|
||||
case DAEDALUS_CODEC_VP9: return "vp9";
|
||||
case DAEDALUS_CODEC_AV1: return "av1";
|
||||
case DAEDALUS_CODEC_H264: return "h264";
|
||||
default: return "?";
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* FNV-1a 32-bit hash. Used as a compact digest of the decoded
|
||||
* frame's YUV planes so the kernel can verify "the daemon produced
|
||||
@@ -467,6 +510,17 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec,
|
||||
dec->pkt->size = (int) req->bitstream_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Time send_packet+receive_frame for the per-frame `decoder: OK`
|
||||
* line + the periodic stats summary. Includes only the
|
||||
* libavcodec round-trip — not the bitstream packing, SPS/PPS
|
||||
* synth, or pack-to-planes work (those are accounted for
|
||||
* separately in the request's overall handle time).
|
||||
*/
|
||||
struct timespec t_decode_start, t_decode_end;
|
||||
uint64_t decode_ns = 0;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t_decode_start);
|
||||
|
||||
rc = fm->avcodec_send_packet(ctx, dec->pkt);
|
||||
if (rc < 0) {
|
||||
log_err("decoder: avcodec_send_packet failed: %d", rc);
|
||||
@@ -476,8 +530,11 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec,
|
||||
|
||||
fm->av_frame_unref(dec->frame);
|
||||
rc = fm->avcodec_receive_frame(ctx, dec->frame);
|
||||
clock_gettime(CLOCK_MONOTONIC, &t_decode_end);
|
||||
decode_ns = timespec_delta_ns(&t_decode_start, &t_decode_end);
|
||||
if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF) {
|
||||
log_debug("decoder: no frame ready yet (rc=%d)", rc);
|
||||
log_debug("decoder: no frame ready yet (rc=%d, %lu us)",
|
||||
rc, (unsigned long)(decode_ns / 1000));
|
||||
resp->status = DAEDALUS_DECODE_NO_FRAME;
|
||||
goto out;
|
||||
}
|
||||
@@ -584,10 +641,61 @@ int daedalus_decoder_run_request(struct daedalus_decoder *dec,
|
||||
fr->format, req->capture_pix_fmt);
|
||||
}
|
||||
|
||||
log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u",
|
||||
log_info("decoder: OK %dx%d fmt=%d (%s) fnv1a=0x%08x luma=%u chroma=%u decode_us=%lu",
|
||||
fr->width, fr->height, fr->format,
|
||||
desc ? desc->name : "?",
|
||||
h, luma_len, chroma_len);
|
||||
h, luma_len, chroma_len,
|
||||
(unsigned long)(decode_ns / 1000));
|
||||
|
||||
/*
|
||||
* Periodic stats summary (every DAEDALUS_STATS_EVERY frames).
|
||||
* Reset window on codec change. Gives observable baseline
|
||||
* for the daedalus-v4l2#11 substitution arc: fps, average
|
||||
* decode_us, MB/s throughput, bitstream B/MB. Compare
|
||||
* against daedalus-fourier README's per-kernel NEON
|
||||
* baselines (e.g. H.264 IDCT 4x4 = 175 Mblock/s) to gauge
|
||||
* which substitutions are worth pursuing.
|
||||
*/
|
||||
if (g_stats.codec_id != req->codec_id) {
|
||||
g_stats.codec_id = req->codec_id;
|
||||
g_stats.frames = 0;
|
||||
g_stats.total_decode_ns = 0;
|
||||
g_stats.total_bitstream_bytes = 0;
|
||||
g_stats.total_mbs = 0;
|
||||
clock_gettime(CLOCK_MONOTONIC, &g_stats.window_start);
|
||||
}
|
||||
g_stats.frames++;
|
||||
g_stats.total_decode_ns += decode_ns;
|
||||
g_stats.total_bitstream_bytes += req->bitstream_len;
|
||||
g_stats.total_mbs += (uint64_t)((fr->width + 15) / 16) *
|
||||
(uint64_t)((fr->height + 15) / 16);
|
||||
|
||||
if (g_stats.frames % DAEDALUS_STATS_EVERY == 0) {
|
||||
struct timespec t_now;
|
||||
clock_gettime(CLOCK_MONOTONIC, &t_now);
|
||||
uint64_t window_ns =
|
||||
timespec_delta_ns(&g_stats.window_start, &t_now);
|
||||
double window_s = (double)window_ns / 1e9;
|
||||
double fps = window_s > 0 ?
|
||||
(double)g_stats.frames / window_s : 0.0;
|
||||
double avg_decode_us = g_stats.frames > 0 ?
|
||||
(double)g_stats.total_decode_ns /
|
||||
(double)g_stats.frames / 1000.0 : 0.0;
|
||||
double mb_per_s = window_s > 0 ?
|
||||
(double)g_stats.total_mbs / window_s : 0.0;
|
||||
double bs_b_per_mb = g_stats.total_mbs > 0 ?
|
||||
(double)g_stats.total_bitstream_bytes /
|
||||
(double)g_stats.total_mbs : 0.0;
|
||||
|
||||
log_info("decoder stats: codec=%s "
|
||||
"frames=%llu window=%.2fs fps=%.2f "
|
||||
"avg_decode_us=%.1f mbs_per_s=%.0f "
|
||||
"bs_b_per_mb=%.2f",
|
||||
codec_id_name(g_stats.codec_id),
|
||||
(unsigned long long)g_stats.frames,
|
||||
window_s, fps, avg_decode_us,
|
||||
mb_per_s, bs_b_per_mb);
|
||||
}
|
||||
}
|
||||
|
||||
fm->av_frame_unref(dec->frame);
|
||||
|
||||
Reference in New Issue
Block a user