benchmark/: three-way RE-tool comparison + first real C-lift
Three small functions extracted from the v1.19 conservative blob with
ground-truth C and per-tool (Ghidra / retdec / decomp.me) docs:
01_memset — byte memset, 28 B
02_memcpy32 — word-aligned memcpy, 36 B
03_magic_memset — magic check + tail-call to memset, 40 B
04_train_phy_block — first real poll-site function (104 B, 26 insts),
contains poll sites 12-15
Results in RESULTS.md:
- Ghidra: A on all four. Auto-decompile is close to final.
- retdec: A on #3, F on #1 and #2 (no register-arg inference on raw),
C on #4 (mistakes & 0xF0000000 for < 0x10000000).
GRIND_LOG.md (in 04_train_phy_block/) records the matching-decomp
iteration: 116-byte candidate.c at -Os vs vendor 104 bytes = 89.7%
size match on first real iteration. Remaining gap is GCC's choice of
`cmp w, w_const; b.ls` over vendor's `tst w, #imm; b.eq` for the
mask tests.
gdb_debug/ holds a native-aarch64 GDB single-stepper for the three
benchmark functions — boltzmann smoke test passed (memset:
buf[10] 0x00→0xab).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,26 @@
|
||||
BENCH := $(abspath ..)
|
||||
|
||||
.PHONY: all clean
|
||||
all: gdb_debug.elf
|
||||
|
||||
# Wrap each benchmark function's raw bytes into an .o with predictable
|
||||
# symbols _binary_func_NN_bin_{start,end}, regardless of the cwd-dependent
|
||||
# symbol names that `ld -b binary` generates.
|
||||
define WRAP_BIN
|
||||
$1.o: $(BENCH)/$2/func.bin
|
||||
cp $$< $1.bin
|
||||
ld -r -b binary -o $$@.raw $1.bin
|
||||
rm -f $1.bin
|
||||
objcopy $$$$(nm $$@.raw | awk '/_func_bin_start$$$$/{printf " --redefine-sym %s=_binary_$1_bin_start",$$$$3} /_func_bin_end$$$$/{printf " --redefine-sym %s=_binary_$1_bin_end",$$$$3}') $$@.raw $$@
|
||||
rm -f $$@.raw
|
||||
endef
|
||||
|
||||
$(eval $(call WRAP_BIN,func_01,01_memset))
|
||||
$(eval $(call WRAP_BIN,func_02,02_memcpy32))
|
||||
$(eval $(call WRAP_BIN,func_03,03_magic_memset))
|
||||
|
||||
gdb_debug.elf: harness.c func_01.o func_02.o func_03.o
|
||||
gcc -O0 -g -Wall -o $@ $^
|
||||
|
||||
clean:
|
||||
rm -f gdb_debug.elf func_*.o *.bin
|
||||
@@ -0,0 +1,72 @@
|
||||
# gdb_debug — single-step the benchmark functions under GDB
|
||||
|
||||
Wraps each of `01_memset` / `02_memcpy32` / `03_magic_memset` in a
|
||||
C harness, copies the raw bytes into an RWX buffer, and calls through
|
||||
a function pointer. GDB attached to the harness lets you step every
|
||||
machine instruction of the real blob code — **no QEMU needed because
|
||||
boltzmann (and ampere, ohm, hertz) are natively aarch64.**
|
||||
|
||||
## Build
|
||||
|
||||
```
|
||||
make # builds ./gdb_debug.elf natively on aarch64
|
||||
```
|
||||
|
||||
Cross-build recipe (if you ever want to run on x86 oppenheimer via
|
||||
qemu-user) lives in the Makefile; replace `gcc` with
|
||||
`aarch64-linux-gnu-gcc` and `ld` with `aarch64-linux-gnu-ld`, and launch
|
||||
under `qemu-aarch64-static -g 1234 ./gdb_debug.elf 1` with
|
||||
`gdb-multiarch` attaching to `:1234`.
|
||||
|
||||
## Run under GDB
|
||||
|
||||
```
|
||||
gdb ./gdb_debug.elf
|
||||
(gdb) set pagination off
|
||||
(gdb) layout split # TUI: source / asm / regs split
|
||||
(gdb) break call_func # the dispatcher — one breakpoint catches all three
|
||||
(gdb) run 1 # 1=memset 2=memcpy32 3=magic_memset
|
||||
(gdb) stepi # one machine instruction
|
||||
(gdb) info reg # full register dump
|
||||
(gdb) x/8i $pc # peek 8 upcoming instructions
|
||||
(gdb) display/i $pc # auto-show next instruction on every stop
|
||||
(gdb) x/16bx $x0 # hex-dump 16 bytes from what X0 points at
|
||||
```
|
||||
|
||||
## What to look for
|
||||
|
||||
### Function 1 (memset)
|
||||
After `MOV X3, #0`, each iteration: `CMP X2, X3` → `B.NE` → `STRB W1, [X0, X3]`
|
||||
→ `ADD X3, X3, #1` → back. Watch `$x3` advance, inspect `x/16bx $x0` to see
|
||||
the buffer filling with `0xAB`.
|
||||
|
||||
### Function 2 (memcpy32)
|
||||
First instruction is the alignment mask: `AND X2, X2, #0xfffffffc`.
|
||||
Set a watchpoint on `$x2` to catch the mask, then step the loop to watch
|
||||
4-byte transfers: `LDR W4, [X1, X3]` ; `STR W4, [X0, X3]` ; `ADD X3, X3, #4`.
|
||||
|
||||
### Function 3 (magic_memset)
|
||||
Will **SIGSEGV** on `LDR W2, [X0, #4]` because `X0 = 0x1fe000` is unmapped
|
||||
in user mode. That crash **is** the verification — it proves the function
|
||||
really does target that absolute address. To execute the full path, add
|
||||
before `call_func`:
|
||||
|
||||
```c
|
||||
mmap((void*)0x1fe000, 4096, PROT_READ|PROT_WRITE,
|
||||
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
|
||||
*(uint32_t*)0x1fe004 = 0x54410001;
|
||||
```
|
||||
|
||||
Then the magic check passes and GDB steps into the tail-call to memset.
|
||||
|
||||
## Why this scaffold beats `ddr_emu2` for verifying trampolines
|
||||
|
||||
`ddr_emu2` dies at PC=0x10a80 in the emulator because it can't model an
|
||||
MMIO register — blind spot for us. Native GDB on an aarch64 host runs the
|
||||
*actual* CPU with full instruction fidelity; the limit becomes "can we
|
||||
fake the MMIO responses?" rather than "does the emulator know this
|
||||
instruction?". For compute-only code (functions 1 and 2), zero prep
|
||||
needed. For MMIO-touching code, `mmap(MAP_FIXED)` + a signal handler
|
||||
stub can serve as a synthetic PHY — **that's the path to single-stepping
|
||||
a patched trampoline through the real ISA with fake hardware replies**,
|
||||
which is exactly what the next round of v3fb verification would need.
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Executable
BIN
Binary file not shown.
@@ -0,0 +1,74 @@
|
||||
/* Generic harness for single-stepping one of the benchmark functions under GDB.
|
||||
* Copies the raw bytes of funcNN.bin into an RWX buffer and calls through
|
||||
* a function pointer. GDB stepi from the call site drops you right into the
|
||||
* target function's first instruction. No QEMU needed — boltzmann is aarch64.
|
||||
*
|
||||
* Build: run `make` in this dir (native aarch64 only, for now).
|
||||
* Run: ./gdb_debug.elf {1|2|3} (1=memset 2=memcpy32 3=magic_memset)
|
||||
*
|
||||
* Under GDB: see README.md.
|
||||
*/
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
extern uint8_t _binary_func_01_bin_start[], _binary_func_01_bin_end[];
|
||||
extern uint8_t _binary_func_02_bin_start[], _binary_func_02_bin_end[];
|
||||
extern uint8_t _binary_func_03_bin_start[], _binary_func_03_bin_end[];
|
||||
|
||||
typedef void (*f1_t)(void *, uint8_t, uint64_t);
|
||||
typedef void (*f2_t)(uint32_t *, const uint32_t *, uint64_t);
|
||||
typedef void (*f3_t)(void);
|
||||
|
||||
static void *rwx_copy(const void *src, size_t len) {
|
||||
void *p = mmap(NULL, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (p == MAP_FAILED) { perror("mmap"); exit(1); }
|
||||
memcpy(p, src, len);
|
||||
__builtin___clear_cache(p, (char *)p + len);
|
||||
return p;
|
||||
}
|
||||
|
||||
static void __attribute__((noinline))
|
||||
call_func(void (*fn)(void), int which) {
|
||||
switch (which) {
|
||||
case 1: {
|
||||
char buf[64] = {0};
|
||||
printf("pre: buf[10]=0x%02x\n", (uint8_t)buf[10]);
|
||||
((f1_t)fn)(buf, 0xAB, 16);
|
||||
printf("post: buf[10]=0x%02x (expect 0xab)\n", (uint8_t)buf[10]);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
uint32_t dst[8] = {0}, src[8];
|
||||
for (int i = 0; i < 8; i++) src[i] = 0xDEAD0000U | i;
|
||||
((f2_t)fn)(dst, src, sizeof dst);
|
||||
printf("dst[3]=0x%08x (expect 0xdead0003)\n", dst[3]);
|
||||
break;
|
||||
}
|
||||
case 3:
|
||||
printf("calling magic_memset — SIGSEGVs on LDR of 0x1fe004 in user mode.\n");
|
||||
((f3_t)fn)();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc != 2) { fprintf(stderr, "usage: %s {1|2|3}\n", argv[0]); return 2; }
|
||||
int which = atoi(argv[1]);
|
||||
void (*fn)(void);
|
||||
switch (which) {
|
||||
case 1: fn = rwx_copy(_binary_func_01_bin_start,
|
||||
_binary_func_01_bin_end - _binary_func_01_bin_start); break;
|
||||
case 2: fn = rwx_copy(_binary_func_02_bin_start,
|
||||
_binary_func_02_bin_end - _binary_func_02_bin_start); break;
|
||||
case 3: fn = rwx_copy(_binary_func_03_bin_start,
|
||||
_binary_func_03_bin_end - _binary_func_03_bin_start); break;
|
||||
default: fprintf(stderr, "unknown index %d\n", which); return 2;
|
||||
}
|
||||
printf("function %d loaded at %p\n", which, fn);
|
||||
call_func(fn, which);
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user