04_train_phy_block: clang -Oz + 32-bit-load pattern = 100% size match
Changed u64v handshake reads to u32v with an inline zero-extending upcast. Clang -Oz now emits 104 bytes, exactly matching vendor's 104 bytes, with 26 instructions on both sides. Three semantic-equivalent byte differences remain (register allocation, tst-form, test width) that aren't closable from C alone — need armclang or inline asm. Matching-decomp verdict for this function: semantic equivalence + size identity + instruction-count identity = the practical ceiling. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,24 +1,12 @@
|
||||
/* Best matching candidate so far for FUN_0000d328.
|
||||
* Compile: gcc -Os -ffreestanding -nostdlib -c candidate.c -o candidate.o
|
||||
* Score: 116 bytes vs vendor 104 bytes (88% size match, 12 bytes / 3 insts over).
|
||||
*
|
||||
* Remaining gap vs vendor:
|
||||
* - GCC emits `cmp w, w_loaded_const ; b.ls` for `(x & 0xF0000000) == 0`
|
||||
* instead of vendor's `tst w, #0xF0000000 ; b.eq` (both 12 bytes, but
|
||||
* vendor avoids materializing the mask in a register, saving 4 bytes
|
||||
* per loop, twice = 8 bytes).
|
||||
* - GCC emits `add x1, x0, #0x200 ; ldur x2, [x1, #-124]` for the
|
||||
* `[base+0x184]` accesses inside the handshake loop, vs vendor's
|
||||
* direct `ldr w1, [x0, #0x184]`. Costs us ~4 bytes.
|
||||
*
|
||||
* Next iterations to try:
|
||||
* 1. Inline-asm for the mask-tests to force TST encoding.
|
||||
* 2. `__builtin_expect((x & 0xF0000000) != 0, 0)` to hint loop direction.
|
||||
* 3. Alternative compilers: clang, ARMCC (the latter is what Rockchip
|
||||
* almost certainly used; need to source it).
|
||||
*/
|
||||
typedef volatile unsigned int u32v;
|
||||
typedef volatile unsigned long u64v;
|
||||
/* Iteration 3: match vendor's "32-bit load, 64-bit test" pattern.
|
||||
* The u64v volatile forced clang to 64-bit loads and hoist the base
|
||||
* address out of the loop. Use u32v loads but upcast for the test. */
|
||||
typedef volatile unsigned int u32v;
|
||||
|
||||
static inline unsigned long xld(u32v *p) {
|
||||
/* Zero-extend 32-bit load to 64-bit implicit via ldr w; tst x. */
|
||||
return (unsigned long)*p;
|
||||
}
|
||||
|
||||
void train_phy_block(unsigned long ctx)
|
||||
{
|
||||
@@ -28,9 +16,9 @@ void train_phy_block(unsigned long ctx)
|
||||
while ((*(u32v *)(phy + 0x120) & 0xf0000000u) == 0u) ;
|
||||
*(u32v *)(phy + 0x160) = 0x30003u;
|
||||
*(u32v *)(phy + 0x154) = 0x30003u;
|
||||
while ((*(u64v *)(phy + 0x184) & 3ul) == 0ul) ;
|
||||
while ((xld((u32v *)(phy + 0x184)) & 3ul) == 0ul) ;
|
||||
*(u32v *)(phy + 0x154) = 0x30000u;
|
||||
while ((*(u64v *)(phy + 0x184) & 3ul) != 0ul) ;
|
||||
while ((xld((u32v *)(phy + 0x184)) & 3ul) != 0ul) ;
|
||||
*(u32v *)(phy + 0x160) = 0x30000u;
|
||||
*(u32v *)(phy + 0x110) = 0xf0000000u;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user