benchmark/05_prep_freq_change: second poll-site function, reference-C only
FUN_0000d10c @ 0xd10c (49 insts) contains poll site 11. Semantically decoded as a PHY-side prologue for frequency-change handshake: saves current state of one PHY CTL + four secondary-table entries, waits for PHY firmware to reach state 1 (idle). Matching-decomp iteration deferred vs the clean first lift (d328) — d10c's two-base-pointer csel pattern plus parity-dependent offset chain gives GCC too much register-allocation freedom. Getting to >=90% byte-match would be an afternoon of iteration; time better spent expanding pre-UART coverage breadth. Poll-site coverage so far: d328: sites 12, 13, 14, 15 (C candidate at 89.7% size match) d10c: site 11 (reference C only, no matching iteration) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
# GRIND_LOG — FUN_0000d10c (second poll-site function)
|
||||
|
||||
**Status: reference.c only.** Matching-decomp iteration deferred — the
|
||||
two-base-pointer csel pattern plus 4 save-and-modify blocks with
|
||||
parity-dependent offsets is too much register-allocation freedom to
|
||||
reach ≥90% byte-match in a single session.
|
||||
|
||||
## What's here
|
||||
|
||||
- `func.bin` — raw 196-byte slice, offset 0xd10c..0xd1cf in v1.19
|
||||
- `func.s` — objdumped GNU asm, absolute addresses preserved
|
||||
- `reference.c` — best-effort ground-truth C; semantics inferred from
|
||||
the assembly. Several constants (0x30, 0x4c, 0x1F0000, 0x2FFF0FFF,
|
||||
0x40000000) remain unnamed pending an AI-Ghidra rename pass.
|
||||
|
||||
## Why the deferral
|
||||
|
||||
This function is the PHY-side prologue of a frequency-change handshake.
|
||||
Compared to FUN_0000d328 (the first lift we did — clean linear PHY-poke
|
||||
sequence):
|
||||
|
||||
| axis | d328 (done) | d10c (this one) |
|
||||
|---|---|---|
|
||||
| instructions | 26 | 49 |
|
||||
| base pointers used | 1 | 2 (ch_base + sec_table) |
|
||||
| csel / conditional moves | 0 | 1 (picks sec_table by ch > 1) |
|
||||
| indexed addressing with shift | 0 | 4+ (`[x3, x6, LSL #0]` etc.) |
|
||||
| offset arithmetic tricks | offset add once | ubfiz + mul + add chains |
|
||||
|
||||
GCC has too many equally-valid ways to arrange `csel` plus multi-base
|
||||
address generation. Without ARMCC, byte-match would plateau at ~70%
|
||||
after naive iteration. With objdiff + AI-Ghidra naming + a dedicated
|
||||
afternoon, a good effort would push to 85-90%.
|
||||
|
||||
## Best next step for this file
|
||||
|
||||
1. AI-Ghidra rename pass (Hermes-2-Pro, GhidrAssist agentic mode) on
|
||||
the constants. Likely auto-names some via tool-call.
|
||||
2. Compile reference.c with `-Os` and count matching insts vs func.s
|
||||
line-by-line — baseline score.
|
||||
3. Iterate with `restrict`-qualified ptrs, `__builtin_expect` hints,
|
||||
or inline-asm for the csel.
|
||||
|
||||
**Productive for now: move on.** We have poll site 12-15 covered (via
|
||||
d328) and poll site 11 semantically documented. When UART arrives, the
|
||||
equivalent C source exists for the high-value sites — that's what
|
||||
actually matters.
|
||||
Binary file not shown.
@@ -0,0 +1,56 @@
|
||||
|
||||
func.bin: file format binary
|
||||
|
||||
|
||||
Disassembly of section .data:
|
||||
|
||||
000000000000d10c <.data>:
|
||||
d10c: 2a0103e4 mov w4, w1
|
||||
d110: 7100043f cmp w1, #0x1
|
||||
d114: d37be886 lsl x6, x4, #5
|
||||
d118: a94c8c07 ldp x7, x3, [x0, #200]
|
||||
d11c: f8666805 ldr x5, [x0, x6]
|
||||
d120: 8b060046 add x6, x2, x6
|
||||
d124: 9a878063 csel x3, x3, x7, hi // hi = pmore
|
||||
d128: 914040a0 add x0, x5, #0x10, lsl #12
|
||||
d12c: b9418005 ldr w5, [x0, #384]
|
||||
d130: b90238c5 str w5, [x6, #568]
|
||||
d134: 529fa006 mov w6, #0xfd00 // #64768
|
||||
d138: 72bfff66 movk w6, #0xfffb, lsl #16
|
||||
d13c: b9418005 ldr w5, [x0, #384]
|
||||
d140: 0a0600a5 and w5, w5, w6
|
||||
d144: b9018005 str w5, [x0, #384]
|
||||
d148: b9401405 ldr w5, [x0, #20]
|
||||
d14c: 120008a5 and w5, w5, #0x7
|
||||
d150: 710004bf cmp w5, #0x1
|
||||
d154: 54ffffa1 b.ne 0xd148 // b.any
|
||||
d158: 12000021 and w1, w1, #0x1
|
||||
d15c: 52800600 mov w0, #0x30 // #48
|
||||
d160: 8b041442 add x2, x2, x4, lsl #5
|
||||
d164: 1b007c20 mul w0, w1, w0
|
||||
d168: 11013021 add w1, w1, #0x4c
|
||||
d16c: d37e1c21 ubfiz x1, x1, #2, #8
|
||||
d170: 11006005 add w5, w0, #0x18
|
||||
d174: b8656865 ldr w5, [x3, x5]
|
||||
d178: 12003ca4 and w4, w5, #0xffff
|
||||
d17c: b9024444 str w4, [x2, #580]
|
||||
d180: 11001004 add w4, w0, #0x4
|
||||
d184: 11009000 add w0, w0, #0x24
|
||||
d188: b8646865 ldr w5, [x3, x4]
|
||||
d18c: 120010a5 and w5, w5, #0x1f
|
||||
d190: b9024045 str w5, [x2, #576]
|
||||
d194: 52a003e5 mov w5, #0x1f0000 // #2031616
|
||||
d198: b8246865 str w5, [x3, x4]
|
||||
d19c: 5285ffe5 mov w5, #0x2fff // #12287
|
||||
d1a0: b8606864 ldr w4, [x3, x0]
|
||||
d1a4: 0a050084 and w4, w4, w5
|
||||
d1a8: b9024844 str w4, [x2, #584]
|
||||
d1ac: 5281ffe4 mov w4, #0xfff // #4095
|
||||
d1b0: 72a5ffe4 movk w4, #0x2fff, lsl #16
|
||||
d1b4: b8206864 str w4, [x3, x0]
|
||||
d1b8: b8616860 ldr w0, [x3, x1]
|
||||
d1bc: 12120000 and w0, w0, #0x4000
|
||||
d1c0: b9024c40 str w0, [x2, #588]
|
||||
d1c4: 52a80000 mov w0, #0x40000000 // #1073741824
|
||||
d1c8: b8216860 str w0, [x3, x1]
|
||||
d1cc: d65f03c0 ret
|
||||
@@ -0,0 +1,106 @@
|
||||
/* Best-effort ground truth for FUN_0000d10c @ 0xd10c (196 bytes / 49 insts).
|
||||
* Contains poll site 11 (the sole site in this function).
|
||||
*
|
||||
* Harder to lift than FUN_0000d328 — uses two base pointers (channel-switched
|
||||
* via csel) and several save-and-modify loops on PHY registers indexed
|
||||
* through a secondary table pointer. Represents the "prepare-for-freq-change"
|
||||
* family of functions.
|
||||
*
|
||||
* Signature: void prep_freq_change(struct ctx *ctx, uint32_t ch, void *save_area);
|
||||
* X0 = ctx (opaque)
|
||||
* W1 = channel index (0..3)
|
||||
* X2 = save_area where original register values are cached for later restore
|
||||
*
|
||||
* Structure of ctx (guessed from offsets):
|
||||
* ctx[0..7] : array of channel base pointers? (ctx[ch*32] = channel base)
|
||||
* ctx[0xc8], ctx[0xd0] : two "secondary register table" base pointers
|
||||
* (one picked based on ch > 1)
|
||||
*
|
||||
* Behaviour:
|
||||
* 1. Look up the PHY base pointer for channel `ch` from ctx[ch*32].
|
||||
* 2. Select one of two secondary tables based on whether `ch > 1`.
|
||||
* 3. Read+save PHY @ +0x10180; clear bits 0xFFFBFD00, write back.
|
||||
* 4. Poll site 11: wait for PHY +0x10014 [2:0] == 1.
|
||||
* 5. For a fixed set of secondary-table offsets (depend on ch parity):
|
||||
* save old value (masked), write a "disable" value.
|
||||
* 6. Return. The restore step happens in a sibling function.
|
||||
*
|
||||
* This is the PHY-side half of a frequency-scaling handshake: pause
|
||||
* training, record current state, clear active masks, wait for the
|
||||
* PHY firmware to reach "state 1" (idle?), then prime the hardware
|
||||
* for the new clock.
|
||||
*
|
||||
* NOTE: matching-decomp iteration on this function is deferred — it
|
||||
* will take several hours to reach ≥90% byte-match because GCC's
|
||||
* register allocation for the two-base-pointer csel pattern differs
|
||||
* substantially from the vendor's. The AI-Ghidra rename pass
|
||||
* (GhidrAssist + Hermes-2-Pro on dirac) may help identify the
|
||||
* semantic names for the offset constants (0x4c, 0x30, 0x1F0000,
|
||||
* 0x2FFF0FFF, etc.) before we continue here.
|
||||
*/
|
||||
#include <stdint.h>
|
||||
|
||||
struct ctx; /* opaque; array+pointer layout inferred from offsets */
|
||||
|
||||
#define PHY_OFF_BASE 0x10000
|
||||
#define PHY_CTL_180 0x180 /* absolute: +0x10180 */
|
||||
#define PHY_STATE_014 0x14 /* absolute: +0x10014 */
|
||||
|
||||
#define PHY_CTL_CLR_MASK 0xFFFBFD00U
|
||||
#define PHY_STATE_IDLE 1U
|
||||
|
||||
static inline uint32_t rd(volatile uint8_t *b, unsigned o) {
|
||||
return *(volatile uint32_t *)(b + o);
|
||||
}
|
||||
static inline void wr(volatile uint8_t *b, unsigned o, uint32_t v) {
|
||||
*(volatile uint32_t *)(b + o) = v;
|
||||
}
|
||||
|
||||
void prep_freq_change(struct ctx *ctx, uint32_t ch, uint8_t *save_area)
|
||||
{
|
||||
uint64_t *ch_bases = (uint64_t *)ctx;
|
||||
uint64_t *sec_table_a = *(uint64_t **)((uint8_t *)ctx + 0xC8);
|
||||
uint64_t *sec_table_b = *(uint64_t **)((uint8_t *)ctx + 0xD0);
|
||||
uint64_t *sec_table = (ch > 1) ? sec_table_b : sec_table_a;
|
||||
|
||||
volatile uint8_t *phy_ch = (volatile uint8_t *)(ch_bases[ch] + PHY_OFF_BASE);
|
||||
uint8_t *ch_save = save_area + ch * 32;
|
||||
|
||||
/* Read + cache + mask-modify PHY CTL register */
|
||||
wr((uint8_t *)ch_save, 0x238, rd(phy_ch, PHY_CTL_180));
|
||||
uint32_t v = rd(phy_ch, PHY_CTL_180) & PHY_CTL_CLR_MASK;
|
||||
wr(phy_ch, PHY_CTL_180, v);
|
||||
|
||||
/* Poll site 11 — wait for PHY state machine to report idle */
|
||||
while ((rd(phy_ch, PHY_STATE_014) & 7U) != PHY_STATE_IDLE)
|
||||
;
|
||||
|
||||
/* The rest: save + clear secondary-table entries indexed by a
|
||||
* parity-dependent pattern. Offsets 0x30, 0x4c, 0x18, 0x24 into
|
||||
* the secondary table; value masks 0xffff, 0x1f, 0x2fff, 0x4000;
|
||||
* replacement writes 0x1f0000, 0x2fff0fff, 0x40000000.
|
||||
*
|
||||
* This section is semi-decoded — names are the analyst's best guesses.
|
||||
* Further AI-Ghidra rename pass needed before attempting byte-match. */
|
||||
uint32_t parity = ch & 1;
|
||||
unsigned o0 = parity * 0x30;
|
||||
unsigned o1 = (parity + 0x4c) << 2; /* ubfiz x1, x1, #2, #8 == (x1 & 0xff) << 2 */
|
||||
|
||||
/* Block A: save low 16 bits at sec[o0+0x18], no write-back */
|
||||
wr(ch_save, 0x244, (uint32_t)(sec_table[(o0 + 0x18) / 8] & 0xFFFF));
|
||||
|
||||
/* Block B: save low 5 bits at sec[o0+4], write 0x1f0000 */
|
||||
uint32_t *b_ptr = (uint32_t *)((uint8_t *)sec_table + o0 + 4);
|
||||
wr(ch_save, 0x240, *b_ptr & 0x1F);
|
||||
*b_ptr = 0x1F0000U;
|
||||
|
||||
/* Block C: save low 12 bits at sec[o0+0x24], write 0x2fff0fff */
|
||||
uint32_t *c_ptr = (uint32_t *)((uint8_t *)sec_table + o0 + 0x24);
|
||||
wr(ch_save, 0x248, *c_ptr & 0x2FFFU);
|
||||
*c_ptr = 0x2FFF0FFFU;
|
||||
|
||||
/* Block D: save bit 14 at sec[o1], write 0x40000000 */
|
||||
uint32_t *d_ptr = (uint32_t *)((uint8_t *)sec_table + o1);
|
||||
wr(ch_save, 0x24C, *d_ptr & 0x4000U);
|
||||
*d_ptr = 0x40000000U;
|
||||
}
|
||||
Reference in New Issue
Block a user