From 850251702172b8089d489553d3bcdf29f4abe0d5 Mon Sep 17 00:00:00 2001
From: Markus Fritsche <mfritsche@reauktion.de>
Date: Sat, 16 May 2026 23:29:56 +0000
Subject: [PATCH] context: tokenize_fn + per-turn _tokens cache (Phase 8 commit
 #2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Foundation for accurate Context:estimate_tokens. When the optional
tokenize_fn is wired (Phase 8 commit #4 wires it from repl.lua),
estimate_tokens uses it with per-turn caching for O(1) amortized
cost. char/4 path unchanged when tokenize_fn nil.

Changes:

- Context.new accepts opts.tokenize_fn -> stored as self.tokenize_fn.

- Context:estimate_tokens:
    if tokenize_fn nil -> existing char/4 (no behavior change).
    if tokenize_fn set ->
      - tokenize self.system_prompt every call (dynamic per
        compose_background/project/summary; can't cache).
      - for each turn: if t._tokens nil -> compute + cache; else
        use cached. Turn content immutable after append (we never
        mutate stored turns) so cache never goes stale.

- :reset wipes self.turns which takes the _tokens cache with them;
  new turns start with t._tokens == nil and lazy-set on first count.

8/8 unit cases verified:
  - char/4 path unchanged when no tokenize_fn
  - tokenize_fn called 1+ N times on first estimate (sys + N turns)
  - subsequent estimates fire only 1 tokenize call (sys; turns cached)
  - new turn fires +1 tokenize call on next estimate
  - :reset + fresh turn fires fresh tokenize call (cache died with turn)

No callers wire tokenize_fn yet — Phase 8 commit #4 lands the
repl.lua wiring (after commit #3 adds the enforce_budget extension
that's the real beneficiary of accuracy).

Regression: test_safety 87/87, test_router_model 31/31.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 context.lua | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/context.lua b/context.lua
index ffa02e0..71058cc 100644
--- a/context.lua
+++ b/context.lua
@@ -72,6 +72,11 @@ function M.new(opts)
         -- warn_at_tokens. Both survive :reset (R8 parity).
         usage_totals         = {},
         cost_warn_state      = { dollars = false, tokens = false },
+        -- Phase 8 (docs/PHASE8.md): optional tokenize callback. When
+        -- set, Context:estimate_tokens uses it (with a per-turn cache
+        -- on turn._tokens for amortization). nil = char/4 fallback
+        -- (Phase 0 §8 — existing behavior, no change).
+        tokenize_fn          = opts.tokenize_fn,
     }, Context)
 end
 
@@ -356,9 +361,29 @@ function Context:enforce_budget()
     return evicted
 end
 
--- Coarse char/4 token estimate per §8. Phase 0 visibility only; accurate
--- tokenization is Q1 (target Phase 3).
+-- Phase 0 §8: char/4 heuristic. Phase 8 (Q1 resolved): when
+-- self.tokenize_fn is set, use it for accuracy. Per-turn _tokens
+-- cache amortizes after the first count.
+--
+-- system_prompt is recomposed each call (memory/project/summary
+-- blocks are dynamic), so it's not cached — one tokenize round-trip
+-- per call when tokenize_fn is active.
+--
+-- Turn content is immutable after append (see Context:append; we
+-- never mutate stored turns). The cache on t._tokens is therefore
+-- safe to live forever on the turn; it dies with the turn on :reset.
 function Context:estimate_tokens()
+    if self.tokenize_fn then
+        local n = self.tokenize_fn(self.system_prompt)
+        for _, t in ipairs(self.turns) do
+            if t._tokens == nil then
+                t._tokens = self.tokenize_fn(t.content)
+            end
+            n = n + t._tokens
+        end
+        return n
+    end
+    -- char/4 fallback (Phase 0 behavior, unchanged when tokenize_fn nil)
     local n = #self.system_prompt
     for _, t in ipairs(self.turns) do
         n = n + #t.content