From 94b7d869262ceff0e0e3da03b841049a1eb56ce4 Mon Sep 17 00:00:00 2001
From: Markus Fritsche <mfritsche@reauktion.de>
Date: Sat, 16 May 2026 23:31:40 +0000
Subject: [PATCH] repl: wire tokenize_fn + :cost detail estimate row (Phase 8
 commit #4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Activates Phase 8 pillars 2+3+5 end-to-end and adds the R3-revised
:cost detail trailing line.

Changes:

- When cfg.tokenize.use_endpoint is true, ctx_opts.tokenize_fn is
  set to `function(text) return broker.token_count(active_cfg, text) end`
  before Context.new fires. R4: the closure body references
  active_cfg DIRECTLY (upvalue) — Lua resolves upvalues at call
  time, so subsequent :model switches re-route to the new model's
  tokenizer automatically (verified by E2E: :model cloud after the
  fast call still produces clean estimate row).

- :cost detail gains a trailing line per R3:
    estimated session ctx: <N> tokens; token_budget=<M> (X.Y% used)
  N comes from ctx:estimate_tokens() (current in-memory snapshot,
  NOT a comparison against the accumulator sum above which is
  cumulative across calls + evicted turns). Gives at-a-glance
  budget utilization.

E2E verified against live broker:
  - fast model call -> 168 tokens estimated (real BPE via /tokenize)
  - :model cloud + cloud call -> 178 tokens estimated (closure
    follows :model switch correctly per R4)
  - 21% / 22.3% budget utilization shown
  - Accumulator sums and estimate are intentionally different
    (sums are cumulative, estimate is current snapshot) — R3-
    correctly displayed as separate lines

Regression: test_safety 87/87, test_router_model 31/31, repl loads.

With this commit landed, Phase 8 is functionally complete; commit
#5 is config example + status bump.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 repl.lua | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
diff --git a/repl.lua b/repl.lua
index 366cf13..a865dcf 100644
--- a/repl.lua
+++ b/repl.lua
@@ -362,6 +362,18 @@ function M.run(config)
     if config.context and config.context.summarize_on_evict then
         ctx_opts.summarize_fn = make_summarize_fn()
     end
+    -- Phase 8 (docs/PHASE8.md): when cfg.tokenize.use_endpoint is true,
+    -- wire a tokenize_fn so Context:estimate_tokens uses real counts
+    -- from <endpoint>/tokenize (broker.token_count handles per-endpoint
+    -- capability cache + char/4 fallback). R4: the closure body MUST
+    -- reference `active_cfg` directly as an upvalue (NOT capture by
+    -- value) so :model switches naturally re-route to the new model's
+    -- tokenizer. A5 verified Lua upvalue semantics resolve at call time.
+    if config.tokenize and config.tokenize.use_endpoint then
+        ctx_opts.tokenize_fn = function(text)
+            return broker.token_count(active_cfg, text)
+        end
+    end
     local ctx = Context.new(ctx_opts)
 
     -- Phase 2: MCP sessions. Populated from config.mcp.servers at startup
@@ -2095,6 +2107,16 @@ function M.run(config)
                     r.model, r.category, r.calls, r.prompt, r.completion, r.cost,
                     r.is_local and "  (local)" or ""))
             end
+            -- Phase 8 R3: trailing summary line — current ctx snapshot
+            -- (NOT a comparison against the accumulator sums above; the
+            -- accumulator carries cumulative across all calls including
+            -- evicted turns, while estimate_tokens is current-in-memory
+            -- only). Shows budget utilization at-a-glance.
+            local est       = ctx:estimate_tokens()
+            local budget    = ctx.token_budget or 0
+            local pct       = (budget > 0) and (est * 100 / budget) or 0
+            renderer.status(("estimated session ctx: %d tokens; token_budget=%d (%.1f%% used)"):format(
+                est, budget, pct))
             return
         end
         renderer.status("usage: :cost [detail|reset]")