Files
test0r cd4d01fd69 Add trampoline patcher v5 - counted loop timeouts for all 45 polls
Each poll loop branches to an appended trampoline that:
- Initializes w18 = 0x20000 (128K iterations)
- Copies the original loop body (LDR + condition check)
- Decrements w18, retries until timeout
- Falls through on timeout (no hang)

QEMU verified: original stuck at 0x10350, trampoline progresses through all polls.
Blob grows from 76704 to 78068 bytes (+1364 bytes trampoline section).

NOT YET TESTED ON REAL HARDWARE - the NOP approach bricked the GenBook.
This counted approach preserves the poll loops with a safety timeout.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 23:10:13 +02:00

223 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""
RK3588 DDR Blob Patcher v5 - Assembly trampolines with counted loops.
Appends a trampoline section AFTER the entire blob. Each poll loop's
backward branch is replaced with a B to its trampoline. The trampoline
does a counted loop and branches back.
The blob is loaded at a fixed address (0x0) by BL2. The data section
references are absolute addresses within the blob. By appending (not
inserting), we don't shift any existing code or data.
The only risk: if the BL2 loader checks the blob size and rejects larger
blobs. Testing with QEMU will verify the control flow.
"""
import struct, sys, os, hashlib
TIMEOUT = 0x20000 # 128K iterations (~1ms at 2GHz)
def encode_b(src, dst):
"""Unconditional branch B."""
off = (dst - src) >> 2
return 0x14000000 | (off & 0x3FFFFFF)
def encode_b_cond(src, dst, cond):
"""Conditional branch B.cond."""
off = (dst - src) >> 2
return 0x54000000 | ((off & 0x7FFFF) << 5) | cond
def encode_tbz(src, dst, bit, rt):
"""TBZ instruction."""
off = (dst - src) >> 2
b5 = (bit >> 5) & 1
b40 = bit & 0x1F
return (0x36000000 | (b5 << 31) | (b40 << 19) |
((off & 0x3FFF) << 5) | rt)
def encode_tbnz(src, dst, bit, rt):
"""TBNZ instruction."""
off = (dst - src) >> 2
b5 = (bit >> 5) & 1
b40 = bit & 0x1F
return (0x37000000 | (b5 << 31) | (b40 << 19) |
((off & 0x3FFF) << 5) | rt)
def encode_cbz(src, dst, rt, is64=False):
"""CBZ instruction."""
off = (dst - src) >> 2
op = 0xB4000000 if is64 else 0x34000000
return op | ((off & 0x7FFFF) << 5) | rt
def encode_cbnz(src, dst, rt, is64=False):
"""CBNZ instruction."""
off = (dst - src) >> 2
op = 0xB5000000 if is64 else 0x35000000
return op | ((off & 0x7FFFF) << 5) | rt
def find_polls(blob):
polls = []
for i in range(0, len(blob) - 12, 4):
inst = struct.unpack_from('<I', blob, i)[0]
# B.cond backward
if (inst & 0xFF000010) == 0x54000000:
imm19 = (inst >> 5) & 0x7FFFF
if imm19 & 0x40000:
offset = -((~imm19 & 0x7FFFF) + 1) * 4
if -16 <= offset <= -4:
ls = i + offset
has_load = any((struct.unpack_from('<I', blob, j)[0] & 0xFFC00000)
in (0xB9400000, 0xF9400000, 0xB9800000)
for j in range(ls, i, 4))
if has_load:
cond = inst & 0xF
n_body = (i - ls) // 4
body = [struct.unpack_from('<I', blob, ls + k*4)[0]
for k in range(n_body)]
polls.append(('Bcond', i, ls, body, cond, inst))
# TBZ/TBNZ backward
op = (inst >> 24) & 0xFF
if op in (0x36, 0x37):
imm14 = (inst >> 5) & 0x3FFF
if imm14 & 0x2000:
offset = -((~imm14 & 0x3FFF) + 1) * 4
if -12 <= offset <= -4:
ls = i + offset
has_load = any((struct.unpack_from('<I', blob, j)[0] & 0xFFC00000)
in (0xB9400000, 0xF9400000, 0xB9800000)
for j in range(ls, i, 4))
if has_load:
bit = ((inst >> 19) & 0x1F) | ((inst >> 26) & 0x20)
rt = inst & 0x1F
n_body = (i - ls) // 4
body = [struct.unpack_from('<I', blob, ls + k*4)[0]
for k in range(n_body)]
polls.append(('TBZ' if op == 0x36 else 'TBNZ',
i, ls, body, (bit, rt), inst))
# CBZ/CBNZ backward
if op in (0x34, 0x35, 0xB4, 0xB5):
imm19 = (inst >> 5) & 0x7FFFF
if imm19 & 0x40000:
offset = -((~imm19 & 0x7FFFF) + 1) * 4
if -12 <= offset <= -4:
ls = i + offset
has_load = any((struct.unpack_from('<I', blob, j)[0] & 0xFFC00000)
in (0xB9400000, 0xF9400000, 0xB9800000)
for j in range(ls, i, 4))
if has_load:
rt = inst & 0x1F
is64 = op >= 0xB4
n_body = (i - ls) // 4
body = [struct.unpack_from('<I', blob, ls + k*4)[0]
for k in range(n_body)]
polls.append(('CBZ' if op in (0x34, 0xB4) else 'CBNZ',
i, ls, body, (rt, is64), inst))
return polls
def build_trampoline(ptype, branch_addr, loop_start, body_instrs,
extra, orig_inst, tramp_addr):
"""Build trampoline instructions for one poll loop."""
instrs = []
return_addr = branch_addr + 4 # after original branch
# MOV w18, #(TIMEOUT & 0xFFFF)
instrs.append(0x52800012 | ((TIMEOUT & 0xFFFF) << 5))
# MOVK w18, #(TIMEOUT >> 16), LSL #16 (if needed)
if TIMEOUT > 0xFFFF:
instrs.append(0x72A00012 | (((TIMEOUT >> 16) & 0xFFFF) << 5))
ldr_offset = len(instrs) # index of first body instruction
# Copy loop body (LDR + any intermediate)
for instr in body_instrs:
instrs.append(instr)
# Check: did condition become true? If so, exit to return_addr
cur = tramp_addr + len(instrs) * 4
if ptype == 'Bcond':
cond = extra
inv = cond ^ 1 # invert: loop while cond → exit when !cond
instrs.append(encode_b_cond(cur, return_addr, inv))
elif ptype == 'TBZ':
bit, rt = extra
# TBZ loops while bit=0 → exit when bit=1 (TBNZ)
instrs.append(encode_tbnz(cur, return_addr, bit, rt))
elif ptype == 'TBNZ':
bit, rt = extra
# TBNZ loops while bit=1 → exit when bit=0 (TBZ)
instrs.append(encode_tbz(cur, return_addr, bit, rt))
elif ptype == 'CBZ':
rt, is64 = extra
instrs.append(encode_cbnz(cur, return_addr, rt, is64))
elif ptype == 'CBNZ':
rt, is64 = extra
instrs.append(encode_cbz(cur, return_addr, rt, is64))
# SUBS w18, w18, #1
cur = tramp_addr + len(instrs) * 4
instrs.append(0x71000652)
# B.NE back to LDR in trampoline
cur = tramp_addr + len(instrs) * 4
ldr_addr = tramp_addr + ldr_offset * 4
instrs.append(encode_b_cond(cur, ldr_addr, 1)) # NE
# B return_addr (timeout fallthrough)
cur = tramp_addr + len(instrs) * 4
instrs.append(encode_b(cur, return_addr))
return instrs
def patch(inpath, outpath):
with open(inpath, 'rb') as f:
blob = bytearray(f.read())
orig_size = len(blob)
polls = find_polls(blob)
print(f"Found {len(polls)} poll loops in {orig_size} byte blob")
# Append trampoline section
# Align to 16 bytes
while len(blob) % 16:
blob.append(0)
tramp_base = len(blob)
patched = 0
for ptype, branch_addr, loop_start, body, extra, orig_inst in polls:
tramp_addr = len(blob)
t_instrs = build_trampoline(ptype, branch_addr, loop_start,
body, extra, orig_inst, tramp_addr)
# Append trampoline
for inst in t_instrs:
blob += struct.pack('<I', inst)
# Replace original backward branch with B to trampoline
struct.pack_into('<I', blob, branch_addr, encode_b(branch_addr, tramp_addr))
patched += 1
print(f" 0x{branch_addr:05x} {ptype:>5s} -> trampoline @ 0x{tramp_addr:05x} "
f"({len(t_instrs)} instrs, {len(body)} body)")
tramp_size = len(blob) - tramp_base
print(f"\nTrampoline section: {tramp_size} bytes @ 0x{tramp_base:x}")
print(f"Patched: {patched}/{len(polls)} polls")
print(f"Timeout: {TIMEOUT} iterations per poll")
print(f"Blob: {orig_size} -> {len(blob)} bytes (+{len(blob)-orig_size})")
with open(outpath, 'wb') as f:
f.write(blob)
oh = hashlib.sha256(open(inpath,'rb').read()).hexdigest()[:16]
ph = hashlib.sha256(blob).hexdigest()[:16]
print(f"\nSHA256 orig: {oh}")
print(f"SHA256 patched: {ph}")
if __name__ == '__main__':
inp = sys.argv[1] if len(sys.argv) > 1 else '/opt/rkbin/bin/rk35/rk3588_ddr_lp4_2112MHz_lp5_2400MHz_v1.19.bin'
out = sys.argv[2] if len(sys.argv) > 2 else '/opt/work/rk3588_ddr_v1.19_trampoline.bin'
patch(inp, out)