v3 patcher: full-body trampolines + site bisection subsets
Root cause of counted_v2 brick identified:
v2 copied only ONE non-load body instruction into each trampoline (picks
the first after the LDR). For poll patterns of form
LDR Wx, [Xbase, #off]
AND Wx, Wx, #mask ; no flag update
CMP Wx, #expected ; sets flags
B.cond .retry
— 9 of the 16 sites in v1.19 have this shape — the final CMP was silently
dropped. The trampoline's B.inv_cond tested whatever flags happened to be
set before entry, producing effectively random branch decisions once
under the trampoline. Result: boot crashes before the UART banner,
observed as 'power LED off' brick.
Fix in v3: copy the ENTIRE loop body (LDR + all intermediate instructions,
in original order) into each trampoline. Size is now 4*(N+6) where N is
body length (28 bytes for body=2, 36 for body=3).
Also in v3:
- --sites subset flag for bisection (all/early/mid/late/none/index list)
- decode_sites.py helper that tries to identify which MMIO register each
site polls (best effort — the materialized_base scanner is naive and
picks up stale MOVZ targets, but cluster grouping by blob offset is
reliable and sufficient for bisection)
Site clusters in v1.19:
0..7 early (0x07b78..0x07f08): SGRF + PHY firmware state machine
8..10 mid (0x09124..0x0aaf8): DfiStatus / training start
11..15 late (0x0d154..0x0d378): UctWriteProt / CalBusy / late
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Executable
+254
@@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
"""RK3588 DDR Blob Patcher v3 — counted-loop trampolines with SITE SUBSET.
|
||||
|
||||
v2 BUG identified 2026-04-15: the original patcher copied at most ONE
|
||||
body instruction beyond the LDR into the trampoline. For poll patterns of
|
||||
form `LDR; AND; CMP; B.cond` (5 of the 16 sites), the CMP was dropped.
|
||||
The trampoline's `B.inv_cond` then tested stale flags → random branch →
|
||||
brick.
|
||||
|
||||
v3 fix: copy the **full** original loop body (LDR + every intermediate
|
||||
instruction, in original order) into the trampoline, so the last
|
||||
flag-setting instruction matches the original loop.
|
||||
|
||||
New trampoline layout (N = len(body) — usually 2 or 3):
|
||||
+0: MOV W16, #TIMEOUT
|
||||
+4 .. +4N: <body[0] .. body[N-1]> ; LDR + all test instructions
|
||||
+4N+4: B.inv_cond .done
|
||||
+4N+8: SUBS W16, W16, #1
|
||||
+4N+12: B.NE .retry (back to +4)
|
||||
+4N+16: B return_addr ; timeout path
|
||||
.done:
|
||||
+4N+20: B return_addr ; success path
|
||||
|
||||
Size per trampoline: 4 * (N + 5) bytes. With max body of 3 instructions
|
||||
that's 4*8 = 32 bytes (same as v2); 2-inst bodies take 28 bytes.
|
||||
|
||||
Site subset flags:
|
||||
--sites all | early | mid | late | none
|
||||
--sites 0,3,5-7 # explicit index list
|
||||
|
||||
Site index is stable (determined by find_poll_loops' ascending-offset
|
||||
ordering). As of rk3588_ddr_v1.19:
|
||||
0..7 EARLY cluster (0x07b78..0x07f08) — SGRF + PHY firmware fsm
|
||||
8..10 MID cluster (0x09124..0x0aaf8) — DfiStatus / training start
|
||||
11..15 LATE cluster (0x0d154..0x0d378) — UctWriteProt / CalBusy / late
|
||||
"""
|
||||
|
||||
import struct
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
|
||||
TIMEOUT_ITERATIONS = 0x4000
|
||||
COUNTER_REG = 16
|
||||
|
||||
COND_NAMES = ['EQ','NE','CS','CC','MI','PL','VS','VC',
|
||||
'HI','LS','GE','LT','GT','LE','AL','NV']
|
||||
|
||||
CLUSTERS = {
|
||||
'early': list(range(0, 8)),
|
||||
'mid': list(range(8, 11)),
|
||||
'late': list(range(11, 16)),
|
||||
'all': list(range(0, 16)),
|
||||
'none': [],
|
||||
}
|
||||
|
||||
|
||||
def encode_movz(rd, imm16, shift=0):
|
||||
hw = shift // 16
|
||||
return 0x52800000 | (hw << 21) | (imm16 << 5) | rd
|
||||
|
||||
def encode_subs_imm(rd, rn, imm12):
|
||||
return 0x71000000 | (imm12 << 10) | (rn << 5) | rd
|
||||
|
||||
def encode_b(from_offset, to_offset):
|
||||
delta = (to_offset - from_offset) // 4
|
||||
if delta < 0:
|
||||
delta = delta & 0x3FFFFFF
|
||||
return 0x14000000 | (delta & 0x3FFFFFF)
|
||||
|
||||
def encode_bcond(cond, from_offset, to_offset):
|
||||
delta = (to_offset - from_offset) // 4
|
||||
imm19 = delta & 0x7FFFF
|
||||
return 0x54000000 | (imm19 << 5) | cond
|
||||
|
||||
def encode_bne(from_offset, to_offset):
|
||||
return encode_bcond(1, from_offset, to_offset)
|
||||
|
||||
def invert_cond(cond):
|
||||
return cond ^ 1
|
||||
|
||||
|
||||
def find_poll_loops(blob):
|
||||
"""Find tight backward-branch poll loops of form <body-ending-in-LDR-or-test>; B.cond back."""
|
||||
sites = []
|
||||
for i in range(0, len(blob) - 12, 4):
|
||||
inst = struct.unpack_from('<I', blob, i)[0]
|
||||
if (inst & 0xFF000010) != 0x54000000:
|
||||
continue
|
||||
imm19 = (inst >> 5) & 0x7FFFF
|
||||
if not (imm19 & 0x40000):
|
||||
continue
|
||||
offset = -((~imm19 & 0x7FFFF) + 1) * 4
|
||||
if not (-16 <= offset <= -4):
|
||||
continue
|
||||
|
||||
loop_start = i + offset
|
||||
cond = inst & 0xF
|
||||
|
||||
# Body = every instruction from loop_start up to (but excluding) the branch.
|
||||
body = []
|
||||
has_load = False
|
||||
for j in range(loop_start, i, 4):
|
||||
w = struct.unpack_from('<I', blob, j)[0]
|
||||
body.append(w)
|
||||
if (w & 0xBFC00000) in (0xB9400000, 0xF9400000):
|
||||
has_load = True
|
||||
|
||||
if not has_load:
|
||||
continue # require at least one LDR — filters out pure cmp/b.ne spin loops
|
||||
|
||||
sites.append({
|
||||
'idx': len(sites),
|
||||
'branch_offset': i,
|
||||
'cond': cond,
|
||||
'body': body,
|
||||
})
|
||||
return sites
|
||||
|
||||
|
||||
def build_trampoline(site, tramp_offset, return_offset):
|
||||
"""Full-body trampoline: body + counter logic."""
|
||||
body = site['body']
|
||||
N = len(body)
|
||||
code = []
|
||||
|
||||
# +0: MOVZ W16, #TIMEOUT
|
||||
code.append(encode_movz(COUNTER_REG, TIMEOUT_ITERATIONS))
|
||||
|
||||
# +4 .. +4N: entire original body in order (LDR + any tests/cmps)
|
||||
code.extend(body)
|
||||
|
||||
# +4N+4: B.inv_cond → .done
|
||||
inv_cond = invert_cond(site['cond'])
|
||||
done_off = tramp_offset + 4 * (N + 5)
|
||||
code.append(encode_bcond(inv_cond, tramp_offset + 4 * (N + 1), done_off))
|
||||
|
||||
# +4N+8: SUBS W16, W16, #1
|
||||
code.append(encode_subs_imm(COUNTER_REG, COUNTER_REG, 1))
|
||||
|
||||
# +4N+12: B.NE → .retry (back to +4, first body instruction)
|
||||
code.append(encode_bne(tramp_offset + 4 * (N + 3), tramp_offset + 4))
|
||||
|
||||
# +4N+16: B return_addr (timeout fallthrough — condition NOT met)
|
||||
code.append(encode_b(tramp_offset + 4 * (N + 4), return_offset))
|
||||
|
||||
# +4N+20 (.done): B return_addr (success — condition met)
|
||||
code.append(encode_b(done_off, return_offset))
|
||||
|
||||
return code
|
||||
|
||||
|
||||
def parse_sites(spec, max_index):
|
||||
if spec in CLUSTERS:
|
||||
return CLUSTERS[spec]
|
||||
result = set()
|
||||
for part in spec.split(','):
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
if part in CLUSTERS:
|
||||
result.update(CLUSTERS[part])
|
||||
elif '-' in part:
|
||||
a, b = part.split('-', 1)
|
||||
result.update(range(int(a), int(b) + 1))
|
||||
else:
|
||||
result.add(int(part))
|
||||
return sorted(i for i in result if 0 <= i <= max_index)
|
||||
|
||||
|
||||
def patch_blob(inpath, outpath, site_indices):
|
||||
with open(inpath, 'rb') as f:
|
||||
blob = bytearray(f.read())
|
||||
|
||||
orig_size = len(blob)
|
||||
all_sites = find_poll_loops(blob)
|
||||
if not all_sites:
|
||||
print('No poll loops found!', file=sys.stderr)
|
||||
return 0, len(blob)
|
||||
|
||||
site_indices = [i for i in site_indices if i < len(all_sites)]
|
||||
if not site_indices:
|
||||
print('No sites selected — writing unmodified blob.')
|
||||
with open(outpath, 'wb') as f:
|
||||
f.write(blob)
|
||||
return 0, len(blob)
|
||||
|
||||
# Per-site trampoline sizes vary (body length varies). Precompute.
|
||||
tramp_sizes = [(4 * (len(all_sites[i]['body']) + 6)) for i in site_indices]
|
||||
# Align each to 32 bytes to keep things tidy.
|
||||
tramp_total = sum(tramp_sizes)
|
||||
|
||||
tramp_start = len(blob)
|
||||
blob.extend(b'\x00' * tramp_total)
|
||||
|
||||
patches = []
|
||||
tramp_offset = tramp_start
|
||||
|
||||
for idx, slot_size in zip(site_indices, tramp_sizes):
|
||||
site = all_sites[idx]
|
||||
branch_offset = site['branch_offset']
|
||||
return_offset = branch_offset + 4
|
||||
tramp_code = build_trampoline(site, tramp_offset, return_offset)
|
||||
for j, inst in enumerate(tramp_code):
|
||||
struct.pack_into('<I', blob, tramp_offset + j * 4, inst)
|
||||
struct.pack_into('<I', blob, branch_offset,
|
||||
encode_b(branch_offset, tramp_offset))
|
||||
patches.append({
|
||||
'idx': idx,
|
||||
'addr': branch_offset,
|
||||
'cond': COND_NAMES[site['cond']],
|
||||
'body_len': len(site['body']),
|
||||
'trampoline': tramp_offset,
|
||||
})
|
||||
tramp_offset += slot_size
|
||||
|
||||
with open(outpath, 'wb') as f:
|
||||
f.write(blob)
|
||||
|
||||
total_sites = len(all_sites)
|
||||
print(f'Patched {len(patches)}/{total_sites} poll sites (full-body trampolines):')
|
||||
print(f' Timeout: {TIMEOUT_ITERATIONS} iter (~{TIMEOUT_ITERATIONS*10/1800:.0f}us @1.8GHz)')
|
||||
for p in patches:
|
||||
print(f' site {p["idx"]:2d}: 0x{p["addr"]:05x} B.{p["cond"]:<2s} body={p["body_len"]} → tramp@0x{p["trampoline"]:05x}')
|
||||
unpatched = [i for i in range(total_sites) if i not in [p["idx"] for p in patches]]
|
||||
if unpatched:
|
||||
print(f' UNPATCHED (stock): {unpatched}')
|
||||
print(f'\nBinary: {orig_size} → {len(blob)} bytes (+{len(blob)-orig_size})')
|
||||
print(f'Output: {outpath}')
|
||||
return len(patches), len(blob)
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument('--sites', default='all',
|
||||
help="Sites to patch: 'all', 'early', 'mid', 'late', 'none',"
|
||||
" or comma/hyphen list (e.g. '0,3,5-7'). Default: all.")
|
||||
ap.add_argument('input', help='Input DDR blob')
|
||||
ap.add_argument('output', nargs='?',
|
||||
help='Output path (default: <input>_v3.bin)')
|
||||
args = ap.parse_args()
|
||||
|
||||
outpath = args.output or args.input.replace('.bin', '_v3.bin')
|
||||
|
||||
with open(args.input, 'rb') as f:
|
||||
all_sites = find_poll_loops(bytearray(f.read()))
|
||||
max_idx = len(all_sites) - 1 if all_sites else 0
|
||||
indices = parse_sites(args.sites, max_idx)
|
||||
|
||||
patch_blob(args.input, outpath, indices)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user