/* Best matching candidate so far for FUN_0000d328. * Compile: gcc -Os -ffreestanding -nostdlib -c candidate.c -o candidate.o * Score: 116 bytes vs vendor 104 bytes (88% size match, 12 bytes / 3 insts over). * * Remaining gap vs vendor: * - GCC emits `cmp w, w_loaded_const ; b.ls` for `(x & 0xF0000000) == 0` * instead of vendor's `tst w, #0xF0000000 ; b.eq` (both 12 bytes, but * vendor avoids materializing the mask in a register, saving 4 bytes * per loop, twice = 8 bytes). * - GCC emits `add x1, x0, #0x200 ; ldur x2, [x1, #-124]` for the * `[base+0x184]` accesses inside the handshake loop, vs vendor's * direct `ldr w1, [x0, #0x184]`. Costs us ~4 bytes. * * Next iterations to try: * 1. Inline-asm for the mask-tests to force TST encoding. * 2. `__builtin_expect((x & 0xF0000000) != 0, 0)` to hint loop direction. * 3. Alternative compilers: clang, ARMCC (the latter is what Rockchip * almost certainly used; need to source it). */ typedef volatile unsigned int u32v; typedef volatile unsigned long u64v; void train_phy_block(unsigned long ctx) { unsigned char *phy = (unsigned char *)(*(unsigned long *)(ctx + 0xb8) + 0x8000); *(u32v *)(phy + 0x110) = 0xf000f000u; while ((*(u32v *)(phy + 0x118) & 0xf0000000u) == 0u) ; while ((*(u32v *)(phy + 0x120) & 0xf0000000u) == 0u) ; *(u32v *)(phy + 0x160) = 0x30003u; *(u32v *)(phy + 0x154) = 0x30003u; while ((*(u64v *)(phy + 0x184) & 3ul) == 0ul) ; *(u32v *)(phy + 0x154) = 0x30000u; while ((*(u64v *)(phy + 0x184) & 3ul) != 0ul) ; *(u32v *)(phy + 0x160) = 0x30000u; *(u32v *)(phy + 0x110) = 0xf0000000u; }