/* Generic harness for single-stepping benchmark functions under GDB. * Copies the raw bytes of funcNN.bin into an RWX buffer and calls through * a function pointer. GDB stepi from the call site drops you right into the * target function's first instruction. No QEMU needed — boltzmann is aarch64. * * Build: run `make` in this dir. * Run: * ./gdb_debug.elf 1 — memset * ./gdb_debug.elf 2 — memcpy32 * ./gdb_debug.elf 3 — magic_memset (will SIGSEGV unless 0x1fe000 is mapped) * ./gdb_debug.elf 4 — train_phy_block; mmaps a synthetic PHY block at * FAKE_PHY_BASE pre-populated with "training-done" * responses so all 4 polls exit on first iteration. * ./gdb_debug.elf 4 stuck * — train_phy_block but with MMIO left at zero so the * polls would loop forever (interrupt with Ctrl+C). * Useful for confirming v3fb trampolines time out * cleanly when applied to a patched func_04.bin. * * Under GDB: see README.md. */ #include #include #include #include #include extern uint8_t _binary_func_01_bin_start[], _binary_func_01_bin_end[]; extern uint8_t _binary_func_02_bin_start[], _binary_func_02_bin_end[]; extern uint8_t _binary_func_03_bin_start[], _binary_func_03_bin_end[]; extern uint8_t _binary_func_04_bin_start[], _binary_func_04_bin_end[]; typedef void (*f1_t)(void *, uint8_t, uint64_t); typedef void (*f2_t)(uint32_t *, const uint32_t *, uint64_t); typedef void (*f3_t)(void); typedef void (*f4_t)(uint64_t /* ctx pointer */); static void *rwx_copy(const void *src, size_t len) { void *p = mmap(NULL, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { perror("mmap"); exit(1); } memcpy(p, src, len); __builtin___clear_cache(p, (char *)p + len); return p; } /* For function 4 (train_phy_block) we need a synthetic PHY block. * The function does: base = *(u64 *)(ctx + 0xb8); base += 0x8000; ... * So we need (a) a ctx struct with a valid base pointer at +0xb8, * (b) a PHY block at base + 0x8000 with the right register layout. * * We pick FAKE_PHY_BASE so PHY block (at +0x8000) lands somewhere mappable. * 0x40000000 is well outside libc / stack / heap on aarch64 Linux. */ #define FAKE_PHY_BASE 0x40000000UL /* requested via mmap MAP_FIXED */ #define FAKE_PHY_LEN 0x10000 /* 64 KiB = enough for [+0x8000..+0x8200] */ #define PHY_CTL_OFF 0x110 #define PHY_STAT_A_OFF 0x118 #define PHY_STAT_B_OFF 0x120 #define PHY_CFG_A_OFF 0x154 #define PHY_CFG_B_OFF 0x160 #define PHY_HANDSHAKE_OFF 0x184 struct phy_ctx { uint8_t pad[0xB8]; uint64_t base; /* lives at offset 0xB8 */ }; static struct phy_ctx ctx; static void prep_synthetic_phy(int let_polls_pass) { void *m = mmap((void *)FAKE_PHY_BASE, FAKE_PHY_LEN, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); if (m == MAP_FAILED) { perror("mmap synthetic PHY"); exit(1); } memset(m, 0, FAKE_PHY_LEN); ctx.base = FAKE_PHY_BASE; /* ctx->base read by func at +0xB8 */ volatile uint8_t *phy = (volatile uint8_t *)(FAKE_PHY_BASE + 0x8000); if (let_polls_pass) { /* Pre-populate the registers the function polls so each LDR * sees a "done" value on the first iteration. */ *(volatile uint32_t *)(phy + PHY_STAT_A_OFF) = 0xF0000001U; /* bits[31:28] non-zero */ *(volatile uint32_t *)(phy + PHY_STAT_B_OFF) = 0xF0000001U; *(volatile uint32_t *)(phy + PHY_HANDSHAKE_OFF) = 0x00000003U; /* bits[1:0] non-zero */ } printf("synthetic PHY mapped at 0x%lx, polls = %s\n", (unsigned long)m, let_polls_pass ? "PASS" : "STUCK (will loop)"); } static void __attribute__((noinline)) call_func(void (*fn)(void), int which, int variant) { switch (which) { case 1: { char buf[64] = {0}; printf("pre: buf[10]=0x%02x\n", (uint8_t)buf[10]); ((f1_t)fn)(buf, 0xAB, 16); printf("post: buf[10]=0x%02x (expect 0xab)\n", (uint8_t)buf[10]); break; } case 2: { uint32_t dst[8] = {0}, src[8]; for (int i = 0; i < 8; i++) src[i] = 0xDEAD0000U | i; ((f2_t)fn)(dst, src, sizeof dst); printf("dst[3]=0x%08x (expect 0xdead0003)\n", dst[3]); break; } case 3: printf("calling magic_memset — SIGSEGVs on LDR of 0x1fe004 in user mode.\n"); ((f3_t)fn)(); break; case 4: { prep_synthetic_phy(variant); printf("calling train_phy_block(ctx)\n"); ((f4_t)fn)((uint64_t)&ctx); printf("train_phy_block returned successfully.\n"); volatile uint8_t *phy = (volatile uint8_t *)(FAKE_PHY_BASE + 0x8000); printf("post: CTL=0x%08x CFG_A=0x%08x CFG_B=0x%08x\n", *(volatile uint32_t *)(phy + PHY_CTL_OFF), *(volatile uint32_t *)(phy + PHY_CFG_A_OFF), *(volatile uint32_t *)(phy + PHY_CFG_B_OFF)); break; } } } int main(int argc, char **argv) { if (argc < 2) { fprintf(stderr, "usage: %s {1|2|3|4} [stuck]\n", argv[0]); return 2; } int which = atoi(argv[1]); int variant = (argc >= 3 && strcmp(argv[2], "stuck") == 0) ? 0 : 1; void (*fn)(void); switch (which) { case 1: fn = rwx_copy(_binary_func_01_bin_start, _binary_func_01_bin_end - _binary_func_01_bin_start); break; case 2: fn = rwx_copy(_binary_func_02_bin_start, _binary_func_02_bin_end - _binary_func_02_bin_start); break; case 3: fn = rwx_copy(_binary_func_03_bin_start, _binary_func_03_bin_end - _binary_func_03_bin_start); break; case 4: fn = rwx_copy(_binary_func_04_bin_start, _binary_func_04_bin_end - _binary_func_04_bin_start); break; default: fprintf(stderr, "unknown index %d\n", which); return 2; } printf("function %d loaded at %p\n", which, fn); call_func(fn, which, variant); return 0; }