diff --git a/patch_trampoline.py b/patch_trampoline.py new file mode 100644 index 0000000..e4f1991 --- /dev/null +++ b/patch_trampoline.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +""" +RK3588 DDR Blob Patcher v5 - Assembly trampolines with counted loops. + +Appends a trampoline section AFTER the entire blob. Each poll loop's +backward branch is replaced with a B to its trampoline. The trampoline +does a counted loop and branches back. + +The blob is loaded at a fixed address (0x0) by BL2. The data section +references are absolute addresses within the blob. By appending (not +inserting), we don't shift any existing code or data. + +The only risk: if the BL2 loader checks the blob size and rejects larger +blobs. Testing with QEMU will verify the control flow. +""" +import struct, sys, os, hashlib + +TIMEOUT = 0x20000 # 128K iterations (~1ms at 2GHz) + +def encode_b(src, dst): + """Unconditional branch B.""" + off = (dst - src) >> 2 + return 0x14000000 | (off & 0x3FFFFFF) + +def encode_b_cond(src, dst, cond): + """Conditional branch B.cond.""" + off = (dst - src) >> 2 + return 0x54000000 | ((off & 0x7FFFF) << 5) | cond + +def encode_tbz(src, dst, bit, rt): + """TBZ instruction.""" + off = (dst - src) >> 2 + b5 = (bit >> 5) & 1 + b40 = bit & 0x1F + return (0x36000000 | (b5 << 31) | (b40 << 19) | + ((off & 0x3FFF) << 5) | rt) + +def encode_tbnz(src, dst, bit, rt): + """TBNZ instruction.""" + off = (dst - src) >> 2 + b5 = (bit >> 5) & 1 + b40 = bit & 0x1F + return (0x37000000 | (b5 << 31) | (b40 << 19) | + ((off & 0x3FFF) << 5) | rt) + +def encode_cbz(src, dst, rt, is64=False): + """CBZ instruction.""" + off = (dst - src) >> 2 + op = 0xB4000000 if is64 else 0x34000000 + return op | ((off & 0x7FFFF) << 5) | rt + +def encode_cbnz(src, dst, rt, is64=False): + """CBNZ instruction.""" + off = (dst - src) >> 2 + op = 0xB5000000 if is64 else 0x35000000 + return op | ((off & 0x7FFFF) << 5) | rt + +def find_polls(blob): + polls = [] + for i in range(0, len(blob) - 12, 4): + inst = struct.unpack_from('> 5) & 0x7FFFF + if imm19 & 0x40000: + offset = -((~imm19 & 0x7FFFF) + 1) * 4 + if -16 <= offset <= -4: + ls = i + offset + has_load = any((struct.unpack_from('> 24) & 0xFF + if op in (0x36, 0x37): + imm14 = (inst >> 5) & 0x3FFF + if imm14 & 0x2000: + offset = -((~imm14 & 0x3FFF) + 1) * 4 + if -12 <= offset <= -4: + ls = i + offset + has_load = any((struct.unpack_from('> 19) & 0x1F) | ((inst >> 26) & 0x20) + rt = inst & 0x1F + n_body = (i - ls) // 4 + body = [struct.unpack_from('> 5) & 0x7FFFF + if imm19 & 0x40000: + offset = -((~imm19 & 0x7FFFF) + 1) * 4 + if -12 <= offset <= -4: + ls = i + offset + has_load = any((struct.unpack_from('= 0xB4 + n_body = (i - ls) // 4 + body = [struct.unpack_from('> 16), LSL #16 (if needed) + if TIMEOUT > 0xFFFF: + instrs.append(0x72A00012 | (((TIMEOUT >> 16) & 0xFFFF) << 5)) + + ldr_offset = len(instrs) # index of first body instruction + + # Copy loop body (LDR + any intermediate) + for instr in body_instrs: + instrs.append(instr) + + # Check: did condition become true? If so, exit to return_addr + cur = tramp_addr + len(instrs) * 4 + if ptype == 'Bcond': + cond = extra + inv = cond ^ 1 # invert: loop while cond → exit when !cond + instrs.append(encode_b_cond(cur, return_addr, inv)) + elif ptype == 'TBZ': + bit, rt = extra + # TBZ loops while bit=0 → exit when bit=1 (TBNZ) + instrs.append(encode_tbnz(cur, return_addr, bit, rt)) + elif ptype == 'TBNZ': + bit, rt = extra + # TBNZ loops while bit=1 → exit when bit=0 (TBZ) + instrs.append(encode_tbz(cur, return_addr, bit, rt)) + elif ptype == 'CBZ': + rt, is64 = extra + instrs.append(encode_cbnz(cur, return_addr, rt, is64)) + elif ptype == 'CBNZ': + rt, is64 = extra + instrs.append(encode_cbz(cur, return_addr, rt, is64)) + + # SUBS w18, w18, #1 + cur = tramp_addr + len(instrs) * 4 + instrs.append(0x71000652) + + # B.NE back to LDR in trampoline + cur = tramp_addr + len(instrs) * 4 + ldr_addr = tramp_addr + ldr_offset * 4 + instrs.append(encode_b_cond(cur, ldr_addr, 1)) # NE + + # B return_addr (timeout fallthrough) + cur = tramp_addr + len(instrs) * 4 + instrs.append(encode_b(cur, return_addr)) + + return instrs + +def patch(inpath, outpath): + with open(inpath, 'rb') as f: + blob = bytearray(f.read()) + orig_size = len(blob) + + polls = find_polls(blob) + print(f"Found {len(polls)} poll loops in {orig_size} byte blob") + + # Append trampoline section + # Align to 16 bytes + while len(blob) % 16: + blob.append(0) + tramp_base = len(blob) + + patched = 0 + for ptype, branch_addr, loop_start, body, extra, orig_inst in polls: + tramp_addr = len(blob) + t_instrs = build_trampoline(ptype, branch_addr, loop_start, + body, extra, orig_inst, tramp_addr) + + # Append trampoline + for inst in t_instrs: + blob += struct.pack('5s} -> trampoline @ 0x{tramp_addr:05x} " + f"({len(t_instrs)} instrs, {len(body)} body)") + + tramp_size = len(blob) - tramp_base + print(f"\nTrampoline section: {tramp_size} bytes @ 0x{tramp_base:x}") + print(f"Patched: {patched}/{len(polls)} polls") + print(f"Timeout: {TIMEOUT} iterations per poll") + print(f"Blob: {orig_size} -> {len(blob)} bytes (+{len(blob)-orig_size})") + + with open(outpath, 'wb') as f: + f.write(blob) + + oh = hashlib.sha256(open(inpath,'rb').read()).hexdigest()[:16] + ph = hashlib.sha256(blob).hexdigest()[:16] + print(f"\nSHA256 orig: {oh}") + print(f"SHA256 patched: {ph}") + +if __name__ == '__main__': + inp = sys.argv[1] if len(sys.argv) > 1 else '/opt/rkbin/bin/rk35/rk3588_ddr_lp4_2112MHz_lp5_2400MHz_v1.19.bin' + out = sys.argv[2] if len(sys.argv) > 2 else '/opt/work/rk3588_ddr_v1.19_trampoline.bin' + patch(inp, out) diff --git a/rk3588_ddr_v1.19_trampoline.bin b/rk3588_ddr_v1.19_trampoline.bin new file mode 100644 index 0000000..cd1ddb1 Binary files /dev/null and b/rk3588_ddr_v1.19_trampoline.bin differ