diff --git a/context.lua b/context.lua index ffa02e0..71058cc 100644 --- a/context.lua +++ b/context.lua @@ -72,6 +72,11 @@ function M.new(opts) -- warn_at_tokens. Both survive :reset (R8 parity). usage_totals = {}, cost_warn_state = { dollars = false, tokens = false }, + -- Phase 8 (docs/PHASE8.md): optional tokenize callback. When + -- set, Context:estimate_tokens uses it (with a per-turn cache + -- on turn._tokens for amortization). nil = char/4 fallback + -- (Phase 0 §8 — existing behavior, no change). + tokenize_fn = opts.tokenize_fn, }, Context) end @@ -356,9 +361,29 @@ function Context:enforce_budget() return evicted end --- Coarse char/4 token estimate per §8. Phase 0 visibility only; accurate --- tokenization is Q1 (target Phase 3). +-- Phase 0 §8: char/4 heuristic. Phase 8 (Q1 resolved): when +-- self.tokenize_fn is set, use it for accuracy. Per-turn _tokens +-- cache amortizes after the first count. +-- +-- system_prompt is recomposed each call (memory/project/summary +-- blocks are dynamic), so it's not cached — one tokenize round-trip +-- per call when tokenize_fn is active. +-- +-- Turn content is immutable after append (see Context:append; we +-- never mutate stored turns). The cache on t._tokens is therefore +-- safe to live forever on the turn; it dies with the turn on :reset. function Context:estimate_tokens() + if self.tokenize_fn then + local n = self.tokenize_fn(self.system_prompt) + for _, t in ipairs(self.turns) do + if t._tokens == nil then + t._tokens = self.tokenize_fn(t.content) + end + n = n + t._tokens + end + return n + end + -- char/4 fallback (Phase 0 behavior, unchanged when tokenize_fn nil) local n = #self.system_prompt for _, t in ipairs(self.turns) do n = n + #t.content