#!/usr/bin/env python3 """ RK3588 DDR Blob Patcher v5 - Assembly trampolines with counted loops. Appends a trampoline section AFTER the entire blob. Each poll loop's backward branch is replaced with a B to its trampoline. The trampoline does a counted loop and branches back. The blob is loaded at a fixed address (0x0) by BL2. The data section references are absolute addresses within the blob. By appending (not inserting), we don't shift any existing code or data. The only risk: if the BL2 loader checks the blob size and rejects larger blobs. Testing with QEMU will verify the control flow. """ import struct, sys, os, hashlib TIMEOUT = 0x20000 # 128K iterations (~1ms at 2GHz) def encode_b(src, dst): """Unconditional branch B.""" off = (dst - src) >> 2 return 0x14000000 | (off & 0x3FFFFFF) def encode_b_cond(src, dst, cond): """Conditional branch B.cond.""" off = (dst - src) >> 2 return 0x54000000 | ((off & 0x7FFFF) << 5) | cond def encode_tbz(src, dst, bit, rt): """TBZ instruction.""" off = (dst - src) >> 2 b5 = (bit >> 5) & 1 b40 = bit & 0x1F return (0x36000000 | (b5 << 31) | (b40 << 19) | ((off & 0x3FFF) << 5) | rt) def encode_tbnz(src, dst, bit, rt): """TBNZ instruction.""" off = (dst - src) >> 2 b5 = (bit >> 5) & 1 b40 = bit & 0x1F return (0x37000000 | (b5 << 31) | (b40 << 19) | ((off & 0x3FFF) << 5) | rt) def encode_cbz(src, dst, rt, is64=False): """CBZ instruction.""" off = (dst - src) >> 2 op = 0xB4000000 if is64 else 0x34000000 return op | ((off & 0x7FFFF) << 5) | rt def encode_cbnz(src, dst, rt, is64=False): """CBNZ instruction.""" off = (dst - src) >> 2 op = 0xB5000000 if is64 else 0x35000000 return op | ((off & 0x7FFFF) << 5) | rt def find_polls(blob): polls = [] for i in range(0, len(blob) - 12, 4): inst = struct.unpack_from('> 5) & 0x7FFFF if imm19 & 0x40000: offset = -((~imm19 & 0x7FFFF) + 1) * 4 if -16 <= offset <= -4: ls = i + offset has_load = any((struct.unpack_from('> 24) & 0xFF if op in (0x36, 0x37): imm14 = (inst >> 5) & 0x3FFF if imm14 & 0x2000: offset = -((~imm14 & 0x3FFF) + 1) * 4 if -12 <= offset <= -4: ls = i + offset has_load = any((struct.unpack_from('> 19) & 0x1F) | ((inst >> 26) & 0x20) rt = inst & 0x1F n_body = (i - ls) // 4 body = [struct.unpack_from('> 5) & 0x7FFFF if imm19 & 0x40000: offset = -((~imm19 & 0x7FFFF) + 1) * 4 if -12 <= offset <= -4: ls = i + offset has_load = any((struct.unpack_from('= 0xB4 n_body = (i - ls) // 4 body = [struct.unpack_from('> 16), LSL #16 (if needed) if TIMEOUT > 0xFFFF: instrs.append(0x72A00012 | (((TIMEOUT >> 16) & 0xFFFF) << 5)) ldr_offset = len(instrs) # index of first body instruction # Copy loop body (LDR + any intermediate) for instr in body_instrs: instrs.append(instr) # Check: did condition become true? If so, exit to return_addr cur = tramp_addr + len(instrs) * 4 if ptype == 'Bcond': cond = extra inv = cond ^ 1 # invert: loop while cond → exit when !cond instrs.append(encode_b_cond(cur, return_addr, inv)) elif ptype == 'TBZ': bit, rt = extra # TBZ loops while bit=0 → exit when bit=1 (TBNZ) instrs.append(encode_tbnz(cur, return_addr, bit, rt)) elif ptype == 'TBNZ': bit, rt = extra # TBNZ loops while bit=1 → exit when bit=0 (TBZ) instrs.append(encode_tbz(cur, return_addr, bit, rt)) elif ptype == 'CBZ': rt, is64 = extra instrs.append(encode_cbnz(cur, return_addr, rt, is64)) elif ptype == 'CBNZ': rt, is64 = extra instrs.append(encode_cbz(cur, return_addr, rt, is64)) # SUBS w18, w18, #1 cur = tramp_addr + len(instrs) * 4 instrs.append(0x71000652) # B.NE back to LDR in trampoline cur = tramp_addr + len(instrs) * 4 ldr_addr = tramp_addr + ldr_offset * 4 instrs.append(encode_b_cond(cur, ldr_addr, 1)) # NE # B return_addr (timeout fallthrough) cur = tramp_addr + len(instrs) * 4 instrs.append(encode_b(cur, return_addr)) return instrs def patch(inpath, outpath): with open(inpath, 'rb') as f: blob = bytearray(f.read()) orig_size = len(blob) polls = find_polls(blob) print(f"Found {len(polls)} poll loops in {orig_size} byte blob") # Append trampoline section # Align to 16 bytes while len(blob) % 16: blob.append(0) tramp_base = len(blob) patched = 0 for ptype, branch_addr, loop_start, body, extra, orig_inst in polls: tramp_addr = len(blob) t_instrs = build_trampoline(ptype, branch_addr, loop_start, body, extra, orig_inst, tramp_addr) # Append trampoline for inst in t_instrs: blob += struct.pack('5s} -> trampoline @ 0x{tramp_addr:05x} " f"({len(t_instrs)} instrs, {len(body)} body)") tramp_size = len(blob) - tramp_base print(f"\nTrampoline section: {tramp_size} bytes @ 0x{tramp_base:x}") print(f"Patched: {patched}/{len(polls)} polls") print(f"Timeout: {TIMEOUT} iterations per poll") print(f"Blob: {orig_size} -> {len(blob)} bytes (+{len(blob)-orig_size})") with open(outpath, 'wb') as f: f.write(blob) oh = hashlib.sha256(open(inpath,'rb').read()).hexdigest()[:16] ph = hashlib.sha256(blob).hexdigest()[:16] print(f"\nSHA256 orig: {oh}") print(f"SHA256 patched: {ph}") if __name__ == '__main__': inp = sys.argv[1] if len(sys.argv) > 1 else '/opt/rkbin/bin/rk35/rk3588_ddr_lp4_2112MHz_lp5_2400MHz_v1.19.bin' out = sys.argv[2] if len(sys.argv) > 2 else '/opt/work/rk3588_ddr_v1.19_trampoline.bin' patch(inp, out)