phase1: bench_flush_frame substrate selector + IDCT-layer CPU vs QPU data #10
@@ -12,9 +12,19 @@
|
|||||||
* NOT a ctest — produces wall-time numbers, doesn't pass/fail.
|
* NOT a ctest — produces wall-time numbers, doesn't pass/fail.
|
||||||
* Invoke manually after a build:
|
* Invoke manually after a build:
|
||||||
*
|
*
|
||||||
* ./build/bench_flush_frame [width] [height] [iters] [warmup]
|
* ./build/bench_flush_frame [width] [height] [iters] [warmup] [substrate]
|
||||||
*
|
*
|
||||||
* Defaults: 1920 1088 100 5
|
* Defaults: 1920 1088 100 5 auto
|
||||||
|
*
|
||||||
|
* The [substrate] argument selects the dispatch path:
|
||||||
|
* auto — recipe table picks (V3D7 when available, else NEON)
|
||||||
|
* cpu — force NEON path
|
||||||
|
* qpu — force V3D7 path (fails on hosts without it)
|
||||||
|
*
|
||||||
|
* Run both to quantify the substrate gap. The "QPU is default
|
||||||
|
* substrate" decree (2026-05-23, feedback_qpu_is_default_substrate.md)
|
||||||
|
* is a policy claim; this bench is how we measure whether the policy
|
||||||
|
* pays off for the IDCT layer specifically.
|
||||||
*
|
*
|
||||||
* The first `warmup` iterations are excluded from the timing
|
* The first `warmup` iterations are excluded from the timing
|
||||||
* average because the daedalus-fourier shader pool needs to
|
* average because the daedalus-fourier shader pool needs to
|
||||||
@@ -70,6 +80,18 @@ int main(int argc, char **argv)
|
|||||||
int iters = argc > 3 ? atoi(argv[3]) : 100;
|
int iters = argc > 3 ? atoi(argv[3]) : 100;
|
||||||
int warmup = argc > 4 ? atoi(argv[4]) : 5;
|
int warmup = argc > 4 ? atoi(argv[4]) : 5;
|
||||||
|
|
||||||
|
daedalus_decoder_substrate sub = DAEDALUS_DECODER_SUBSTRATE_AUTO;
|
||||||
|
const char *sub_name = "auto";
|
||||||
|
if (argc > 5) {
|
||||||
|
if (!strcmp(argv[5], "cpu")) { sub = DAEDALUS_DECODER_SUBSTRATE_CPU; sub_name = "cpu"; }
|
||||||
|
else if (!strcmp(argv[5], "qpu")) { sub = DAEDALUS_DECODER_SUBSTRATE_QPU; sub_name = "qpu"; }
|
||||||
|
else if (!strcmp(argv[5], "auto")) { /* default */ }
|
||||||
|
else {
|
||||||
|
fprintf(stderr, "unknown substrate '%s' (want auto/cpu/qpu)\n", argv[5]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (warmup >= iters) {
|
if (warmup >= iters) {
|
||||||
fprintf(stderr, "warmup (%d) must be < iters (%d)\n", warmup, iters);
|
fprintf(stderr, "warmup (%d) must be < iters (%d)\n", warmup, iters);
|
||||||
return 1;
|
return 1;
|
||||||
@@ -78,14 +100,18 @@ int main(int argc, char **argv)
|
|||||||
int mb_w = width / 16;
|
int mb_w = width / 16;
|
||||||
int mb_h = height / 16;
|
int mb_h = height / 16;
|
||||||
int n_mbs = mb_w * mb_h;
|
int n_mbs = mb_w * mb_h;
|
||||||
printf("bench_flush_frame: %dx%d (%d MBs), %d iters (%d warmup)\n",
|
printf("bench_flush_frame: %dx%d (%d MBs), %d iters (%d warmup), substrate=%s\n",
|
||||||
width, height, n_mbs, iters, warmup);
|
width, height, n_mbs, iters, warmup, sub_name);
|
||||||
|
|
||||||
daedalus_decoder *dec = daedalus_decoder_create(width, height);
|
daedalus_decoder *dec = daedalus_decoder_create(width, height);
|
||||||
if (!dec) {
|
if (!dec) {
|
||||||
fprintf(stderr, "SKIP: ctx create failed (Vulkan / V3D7 unavailable)\n");
|
fprintf(stderr, "SKIP: ctx create failed (Vulkan / V3D7 unavailable)\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (daedalus_decoder_set_substrate(dec, sub) != 0) {
|
||||||
|
fprintf(stderr, "set_substrate(%s) failed\n", sub_name);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
printf("ctx has_qpu=%d\n", daedalus_decoder_has_qpu(dec));
|
printf("ctx has_qpu=%d\n", daedalus_decoder_has_qpu(dec));
|
||||||
|
|
||||||
/* Pre-generate per-MB random coeffs once. We re-append the same
|
/* Pre-generate per-MB random coeffs once. We re-append the same
|
||||||
|
|||||||
Reference in New Issue
Block a user