v3 patcher: full-body trampolines + site bisection subsets
Root cause of counted_v2 brick identified:
v2 copied only ONE non-load body instruction into each trampoline (picks
the first after the LDR). For poll patterns of form
LDR Wx, [Xbase, #off]
AND Wx, Wx, #mask ; no flag update
CMP Wx, #expected ; sets flags
B.cond .retry
— 9 of the 16 sites in v1.19 have this shape — the final CMP was silently
dropped. The trampoline's B.inv_cond tested whatever flags happened to be
set before entry, producing effectively random branch decisions once
under the trampoline. Result: boot crashes before the UART banner,
observed as 'power LED off' brick.
Fix in v3: copy the ENTIRE loop body (LDR + all intermediate instructions,
in original order) into each trampoline. Size is now 4*(N+6) where N is
body length (28 bytes for body=2, 36 for body=3).
Also in v3:
- --sites subset flag for bisection (all/early/mid/late/none/index list)
- decode_sites.py helper that tries to identify which MMIO register each
site polls (best effort — the materialized_base scanner is naive and
picks up stale MOVZ targets, but cluster grouping by blob offset is
reliable and sufficient for bisection)
Site clusters in v1.19:
0..7 early (0x07b78..0x07f08): SGRF + PHY firmware state machine
8..10 mid (0x09124..0x0aaf8): DfiStatus / training start
11..15 late (0x0d154..0x0d378): UctWriteProt / CalBusy / late
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+194
@@ -0,0 +1,194 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Decode each poll site: which base register + offset is being polled?
|
||||
|
||||
For each site find_poll_loops() reports, we pull:
|
||||
- the load instruction (LDR Wn, [Xbase, #off])
|
||||
- the base register number
|
||||
- the immediate offset
|
||||
- backward-scan for the MOV/MOVK/MOVZ sequence that materialised Xbase
|
||||
- print a register-friendly label using the PHY register map from
|
||||
BUG_ANALYSIS.md.
|
||||
"""
|
||||
|
||||
import struct
|
||||
import sys
|
||||
|
||||
|
||||
# Known base addresses from BUG_ANALYSIS / rk3588_ddr.h
|
||||
KNOWN_BASES = {
|
||||
0xFE050000: 'SGRF_DDR',
|
||||
0xF2000000: 'DDR0_PHY', # per-channel PHY base guesses
|
||||
0xF3000000: 'DDR1_PHY',
|
||||
0xF4000000: 'DDR2_PHY',
|
||||
0xF5000000: 'DDR3_PHY',
|
||||
}
|
||||
|
||||
# Offsets within a PHY block, from BUG_ANALYSIS register table.
|
||||
PHY_OFFSETS = {
|
||||
0x0684: 'CalBusy',
|
||||
0x0A24: 'DfiStatus',
|
||||
0x10080: 'MicroReset',
|
||||
0x10090: 'MicroContMuxSel',
|
||||
0x10514: 'UctWriteProtShadow',
|
||||
}
|
||||
|
||||
# SGRF offsets from BUG_ANALYSIS (MMIO absolute addresses for SGRF_DDR).
|
||||
SGRF_ABS_OFFSETS = {
|
||||
0xFE050054: 'SGRF_DDR_CON21',
|
||||
0xFE0500E0: 'SGRF_DDR_STATUS',
|
||||
}
|
||||
|
||||
|
||||
def decode_ldr_unsigned_imm(w):
|
||||
"""Decode LDR (immediate, unsigned offset).
|
||||
Returns (rt, rn, imm_bytes) or None for unsupported forms.
|
||||
Encoding: size[31:30] | 111001 | 01 | imm12 | Rn | Rt (LDR unsigned)
|
||||
"""
|
||||
if (w & 0xBFC00000) == 0xB9400000: # 32-bit LDR
|
||||
imm12 = (w >> 10) & 0xFFF
|
||||
rn = (w >> 5) & 0x1F
|
||||
rt = w & 0x1F
|
||||
return (rt, rn, imm12 * 4, 32)
|
||||
if (w & 0xBFC00000) == 0xF9400000: # 64-bit LDR
|
||||
imm12 = (w >> 10) & 0xFFF
|
||||
rn = (w >> 5) & 0x1F
|
||||
rt = w & 0x1F
|
||||
return (rt, rn, imm12 * 8, 64)
|
||||
return None
|
||||
|
||||
|
||||
def decode_movz(w):
|
||||
if (w & 0x7F800000) == 0x52800000: # MOVZ 32-bit
|
||||
hw = (w >> 21) & 0x3
|
||||
imm16 = (w >> 5) & 0xFFFF
|
||||
rd = w & 0x1F
|
||||
return ('MOVZ', rd, imm16, hw, 32)
|
||||
if (w & 0x7F800000) == 0xD2800000: # MOVZ 64-bit
|
||||
hw = (w >> 21) & 0x3
|
||||
imm16 = (w >> 5) & 0xFFFF
|
||||
rd = w & 0x1F
|
||||
return ('MOVZ', rd, imm16, hw, 64)
|
||||
return None
|
||||
|
||||
|
||||
def decode_movk(w):
|
||||
if (w & 0x7F800000) == 0x72800000: # MOVK 32-bit
|
||||
hw = (w >> 21) & 0x3
|
||||
imm16 = (w >> 5) & 0xFFFF
|
||||
rd = w & 0x1F
|
||||
return ('MOVK', rd, imm16, hw, 32)
|
||||
if (w & 0x7F800000) == 0xF2800000: # MOVK 64-bit
|
||||
hw = (w >> 21) & 0x3
|
||||
imm16 = (w >> 5) & 0xFFFF
|
||||
rd = w & 0x1F
|
||||
return ('MOVK', rd, imm16, hw, 64)
|
||||
return None
|
||||
|
||||
|
||||
def materialized_base(blob, load_offset, rn, window=64):
|
||||
"""Backward-scan up to `window` instructions looking for MOV/MOVZ/MOVK into Rn.
|
||||
Reconstruct the full 64-bit immediate if it's a clean MOVZ+MOVK sequence.
|
||||
Returns (addr, confidence) or (None, 'ind') if we can't pin it.
|
||||
"""
|
||||
addr = 0
|
||||
seen = {}
|
||||
pos = load_offset - 4
|
||||
end = max(0, load_offset - window * 4)
|
||||
hits = 0
|
||||
while pos >= end:
|
||||
w = struct.unpack_from('<I', blob, pos)[0]
|
||||
mk = decode_movk(w) or decode_movz(w)
|
||||
if mk:
|
||||
op, rd, imm16, hw, sz = mk
|
||||
if rd == rn:
|
||||
shift = hw * 16
|
||||
if op == 'MOVZ':
|
||||
# MOVZ clears other halfwords; take this as the base
|
||||
addr = (addr & ~(0xFFFF << shift)) | (imm16 << shift)
|
||||
hits += 1
|
||||
# MOVZ usually comes first (lowest hw); but scanning
|
||||
# backward we may see MOVKs earlier. Don't break.
|
||||
else:
|
||||
addr = (addr & ~(0xFFFF << shift)) | (imm16 << shift)
|
||||
hits += 1
|
||||
pos -= 4
|
||||
return (addr if hits else None, hits)
|
||||
|
||||
|
||||
def find_poll_loops(blob):
|
||||
sites = []
|
||||
for i in range(0, len(blob) - 12, 4):
|
||||
inst = struct.unpack_from('<I', blob, i)[0]
|
||||
if (inst & 0xFF000010) != 0x54000000:
|
||||
continue
|
||||
imm19 = (inst >> 5) & 0x7FFFF
|
||||
if not (imm19 & 0x40000):
|
||||
continue
|
||||
offset = -((~imm19 & 0x7FFFF) + 1) * 4
|
||||
if not (-16 <= offset <= -4):
|
||||
continue
|
||||
|
||||
loop_start = i + offset
|
||||
cond = inst & 0xF
|
||||
load_inst = None
|
||||
load_offset = None
|
||||
for j in range(loop_start, i, 4):
|
||||
w = struct.unpack_from('<I', blob, j)[0]
|
||||
if (w & 0xBFC00000) in (0xB9400000, 0xF9400000):
|
||||
load_inst = w
|
||||
load_offset = j
|
||||
|
||||
if load_inst is None:
|
||||
continue
|
||||
|
||||
sites.append({
|
||||
'idx': len(sites),
|
||||
'branch_offset': i,
|
||||
'cond': cond,
|
||||
'load_inst': load_inst,
|
||||
'load_offset': load_offset,
|
||||
})
|
||||
return sites
|
||||
|
||||
|
||||
def classify(base, off):
|
||||
"""Turn (base, off) into a human-readable MMIO target."""
|
||||
if base is None:
|
||||
return '?'
|
||||
abs_addr = base + off
|
||||
if abs_addr in SGRF_ABS_OFFSETS:
|
||||
return SGRF_ABS_OFFSETS[abs_addr]
|
||||
if base in KNOWN_BASES:
|
||||
name = PHY_OFFSETS.get(off, f'+0x{off:x}')
|
||||
return f'{KNOWN_BASES[base]}.{name}'
|
||||
# Unknown base but known PHY-relative offset
|
||||
if off in PHY_OFFSETS:
|
||||
return f'???@0x{base:08x}+{PHY_OFFSETS[off]}(0x{off:x})'
|
||||
return f'0x{abs_addr:08x}'
|
||||
|
||||
|
||||
COND_NAMES = ['EQ','NE','CS','CC','MI','PL','VS','VC',
|
||||
'HI','LS','GE','LT','GT','LE','AL','NV']
|
||||
|
||||
|
||||
def main():
|
||||
path = sys.argv[1]
|
||||
with open(path, 'rb') as f:
|
||||
blob = bytearray(f.read())
|
||||
sites = find_poll_loops(blob)
|
||||
print(f"{'#':>2} {'site':<7} {'br.cond':<8} {'base':<10} {'off':>6} target")
|
||||
print('-' * 72)
|
||||
for s in sites:
|
||||
dec = decode_ldr_unsigned_imm(s['load_inst'])
|
||||
if dec is None:
|
||||
print(f"{s['idx']:>2} 0x{s['branch_offset']:05x} B.{COND_NAMES[s['cond']]:<6} ???? — unusual LDR form")
|
||||
continue
|
||||
rt, rn, off, sz = dec
|
||||
base, hits = materialized_base(blob, s['load_offset'], rn, window=128)
|
||||
base_str = f'0x{base:08x}' if base is not None else 'indirect'
|
||||
label = classify(base, off)
|
||||
print(f"{s['idx']:>2} 0x{s['branch_offset']:05x} B.{COND_NAMES[s['cond']]:<6} {base_str:<10} 0x{off:04x} {label} (X{rn}→W{rt}, {sz}b, {hits} mov)")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user