cd4d01fd69
Each poll loop branches to an appended trampoline that: - Initializes w18 = 0x20000 (128K iterations) - Copies the original loop body (LDR + condition check) - Decrements w18, retries until timeout - Falls through on timeout (no hang) QEMU verified: original stuck at 0x10350, trampoline progresses through all polls. Blob grows from 76704 to 78068 bytes (+1364 bytes trampoline section). NOT YET TESTED ON REAL HARDWARE - the NOP approach bricked the GenBook. This counted approach preserves the poll loops with a safety timeout. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
223 lines
8.3 KiB
Python
223 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
RK3588 DDR Blob Patcher v5 - Assembly trampolines with counted loops.
|
|
|
|
Appends a trampoline section AFTER the entire blob. Each poll loop's
|
|
backward branch is replaced with a B to its trampoline. The trampoline
|
|
does a counted loop and branches back.
|
|
|
|
The blob is loaded at a fixed address (0x0) by BL2. The data section
|
|
references are absolute addresses within the blob. By appending (not
|
|
inserting), we don't shift any existing code or data.
|
|
|
|
The only risk: if the BL2 loader checks the blob size and rejects larger
|
|
blobs. Testing with QEMU will verify the control flow.
|
|
"""
|
|
import struct, sys, os, hashlib
|
|
|
|
TIMEOUT = 0x20000 # 128K iterations (~1ms at 2GHz)
|
|
|
|
def encode_b(src, dst):
|
|
"""Unconditional branch B."""
|
|
off = (dst - src) >> 2
|
|
return 0x14000000 | (off & 0x3FFFFFF)
|
|
|
|
def encode_b_cond(src, dst, cond):
|
|
"""Conditional branch B.cond."""
|
|
off = (dst - src) >> 2
|
|
return 0x54000000 | ((off & 0x7FFFF) << 5) | cond
|
|
|
|
def encode_tbz(src, dst, bit, rt):
|
|
"""TBZ instruction."""
|
|
off = (dst - src) >> 2
|
|
b5 = (bit >> 5) & 1
|
|
b40 = bit & 0x1F
|
|
return (0x36000000 | (b5 << 31) | (b40 << 19) |
|
|
((off & 0x3FFF) << 5) | rt)
|
|
|
|
def encode_tbnz(src, dst, bit, rt):
|
|
"""TBNZ instruction."""
|
|
off = (dst - src) >> 2
|
|
b5 = (bit >> 5) & 1
|
|
b40 = bit & 0x1F
|
|
return (0x37000000 | (b5 << 31) | (b40 << 19) |
|
|
((off & 0x3FFF) << 5) | rt)
|
|
|
|
def encode_cbz(src, dst, rt, is64=False):
|
|
"""CBZ instruction."""
|
|
off = (dst - src) >> 2
|
|
op = 0xB4000000 if is64 else 0x34000000
|
|
return op | ((off & 0x7FFFF) << 5) | rt
|
|
|
|
def encode_cbnz(src, dst, rt, is64=False):
|
|
"""CBNZ instruction."""
|
|
off = (dst - src) >> 2
|
|
op = 0xB5000000 if is64 else 0x35000000
|
|
return op | ((off & 0x7FFFF) << 5) | rt
|
|
|
|
def find_polls(blob):
|
|
polls = []
|
|
for i in range(0, len(blob) - 12, 4):
|
|
inst = struct.unpack_from('<I', blob, i)[0]
|
|
|
|
# B.cond backward
|
|
if (inst & 0xFF000010) == 0x54000000:
|
|
imm19 = (inst >> 5) & 0x7FFFF
|
|
if imm19 & 0x40000:
|
|
offset = -((~imm19 & 0x7FFFF) + 1) * 4
|
|
if -16 <= offset <= -4:
|
|
ls = i + offset
|
|
has_load = any((struct.unpack_from('<I', blob, j)[0] & 0xFFC00000)
|
|
in (0xB9400000, 0xF9400000, 0xB9800000)
|
|
for j in range(ls, i, 4))
|
|
if has_load:
|
|
cond = inst & 0xF
|
|
n_body = (i - ls) // 4
|
|
body = [struct.unpack_from('<I', blob, ls + k*4)[0]
|
|
for k in range(n_body)]
|
|
polls.append(('Bcond', i, ls, body, cond, inst))
|
|
|
|
# TBZ/TBNZ backward
|
|
op = (inst >> 24) & 0xFF
|
|
if op in (0x36, 0x37):
|
|
imm14 = (inst >> 5) & 0x3FFF
|
|
if imm14 & 0x2000:
|
|
offset = -((~imm14 & 0x3FFF) + 1) * 4
|
|
if -12 <= offset <= -4:
|
|
ls = i + offset
|
|
has_load = any((struct.unpack_from('<I', blob, j)[0] & 0xFFC00000)
|
|
in (0xB9400000, 0xF9400000, 0xB9800000)
|
|
for j in range(ls, i, 4))
|
|
if has_load:
|
|
bit = ((inst >> 19) & 0x1F) | ((inst >> 26) & 0x20)
|
|
rt = inst & 0x1F
|
|
n_body = (i - ls) // 4
|
|
body = [struct.unpack_from('<I', blob, ls + k*4)[0]
|
|
for k in range(n_body)]
|
|
polls.append(('TBZ' if op == 0x36 else 'TBNZ',
|
|
i, ls, body, (bit, rt), inst))
|
|
|
|
# CBZ/CBNZ backward
|
|
if op in (0x34, 0x35, 0xB4, 0xB5):
|
|
imm19 = (inst >> 5) & 0x7FFFF
|
|
if imm19 & 0x40000:
|
|
offset = -((~imm19 & 0x7FFFF) + 1) * 4
|
|
if -12 <= offset <= -4:
|
|
ls = i + offset
|
|
has_load = any((struct.unpack_from('<I', blob, j)[0] & 0xFFC00000)
|
|
in (0xB9400000, 0xF9400000, 0xB9800000)
|
|
for j in range(ls, i, 4))
|
|
if has_load:
|
|
rt = inst & 0x1F
|
|
is64 = op >= 0xB4
|
|
n_body = (i - ls) // 4
|
|
body = [struct.unpack_from('<I', blob, ls + k*4)[0]
|
|
for k in range(n_body)]
|
|
polls.append(('CBZ' if op in (0x34, 0xB4) else 'CBNZ',
|
|
i, ls, body, (rt, is64), inst))
|
|
return polls
|
|
|
|
def build_trampoline(ptype, branch_addr, loop_start, body_instrs,
|
|
extra, orig_inst, tramp_addr):
|
|
"""Build trampoline instructions for one poll loop."""
|
|
instrs = []
|
|
return_addr = branch_addr + 4 # after original branch
|
|
|
|
# MOV w18, #(TIMEOUT & 0xFFFF)
|
|
instrs.append(0x52800012 | ((TIMEOUT & 0xFFFF) << 5))
|
|
# MOVK w18, #(TIMEOUT >> 16), LSL #16 (if needed)
|
|
if TIMEOUT > 0xFFFF:
|
|
instrs.append(0x72A00012 | (((TIMEOUT >> 16) & 0xFFFF) << 5))
|
|
|
|
ldr_offset = len(instrs) # index of first body instruction
|
|
|
|
# Copy loop body (LDR + any intermediate)
|
|
for instr in body_instrs:
|
|
instrs.append(instr)
|
|
|
|
# Check: did condition become true? If so, exit to return_addr
|
|
cur = tramp_addr + len(instrs) * 4
|
|
if ptype == 'Bcond':
|
|
cond = extra
|
|
inv = cond ^ 1 # invert: loop while cond → exit when !cond
|
|
instrs.append(encode_b_cond(cur, return_addr, inv))
|
|
elif ptype == 'TBZ':
|
|
bit, rt = extra
|
|
# TBZ loops while bit=0 → exit when bit=1 (TBNZ)
|
|
instrs.append(encode_tbnz(cur, return_addr, bit, rt))
|
|
elif ptype == 'TBNZ':
|
|
bit, rt = extra
|
|
# TBNZ loops while bit=1 → exit when bit=0 (TBZ)
|
|
instrs.append(encode_tbz(cur, return_addr, bit, rt))
|
|
elif ptype == 'CBZ':
|
|
rt, is64 = extra
|
|
instrs.append(encode_cbnz(cur, return_addr, rt, is64))
|
|
elif ptype == 'CBNZ':
|
|
rt, is64 = extra
|
|
instrs.append(encode_cbz(cur, return_addr, rt, is64))
|
|
|
|
# SUBS w18, w18, #1
|
|
cur = tramp_addr + len(instrs) * 4
|
|
instrs.append(0x71000652)
|
|
|
|
# B.NE back to LDR in trampoline
|
|
cur = tramp_addr + len(instrs) * 4
|
|
ldr_addr = tramp_addr + ldr_offset * 4
|
|
instrs.append(encode_b_cond(cur, ldr_addr, 1)) # NE
|
|
|
|
# B return_addr (timeout fallthrough)
|
|
cur = tramp_addr + len(instrs) * 4
|
|
instrs.append(encode_b(cur, return_addr))
|
|
|
|
return instrs
|
|
|
|
def patch(inpath, outpath):
|
|
with open(inpath, 'rb') as f:
|
|
blob = bytearray(f.read())
|
|
orig_size = len(blob)
|
|
|
|
polls = find_polls(blob)
|
|
print(f"Found {len(polls)} poll loops in {orig_size} byte blob")
|
|
|
|
# Append trampoline section
|
|
# Align to 16 bytes
|
|
while len(blob) % 16:
|
|
blob.append(0)
|
|
tramp_base = len(blob)
|
|
|
|
patched = 0
|
|
for ptype, branch_addr, loop_start, body, extra, orig_inst in polls:
|
|
tramp_addr = len(blob)
|
|
t_instrs = build_trampoline(ptype, branch_addr, loop_start,
|
|
body, extra, orig_inst, tramp_addr)
|
|
|
|
# Append trampoline
|
|
for inst in t_instrs:
|
|
blob += struct.pack('<I', inst)
|
|
|
|
# Replace original backward branch with B to trampoline
|
|
struct.pack_into('<I', blob, branch_addr, encode_b(branch_addr, tramp_addr))
|
|
patched += 1
|
|
|
|
print(f" 0x{branch_addr:05x} {ptype:>5s} -> trampoline @ 0x{tramp_addr:05x} "
|
|
f"({len(t_instrs)} instrs, {len(body)} body)")
|
|
|
|
tramp_size = len(blob) - tramp_base
|
|
print(f"\nTrampoline section: {tramp_size} bytes @ 0x{tramp_base:x}")
|
|
print(f"Patched: {patched}/{len(polls)} polls")
|
|
print(f"Timeout: {TIMEOUT} iterations per poll")
|
|
print(f"Blob: {orig_size} -> {len(blob)} bytes (+{len(blob)-orig_size})")
|
|
|
|
with open(outpath, 'wb') as f:
|
|
f.write(blob)
|
|
|
|
oh = hashlib.sha256(open(inpath,'rb').read()).hexdigest()[:16]
|
|
ph = hashlib.sha256(blob).hexdigest()[:16]
|
|
print(f"\nSHA256 orig: {oh}")
|
|
print(f"SHA256 patched: {ph}")
|
|
|
|
if __name__ == '__main__':
|
|
inp = sys.argv[1] if len(sys.argv) > 1 else '/opt/rkbin/bin/rk35/rk3588_ddr_lp4_2112MHz_lp5_2400MHz_v1.19.bin'
|
|
out = sys.argv[2] if len(sys.argv) > 2 else '/opt/work/rk3588_ddr_v1.19_trampoline.bin'
|
|
patch(inp, out)
|