From 7ef2a6ed5c31a8489c56ca297ceac3e9c2099b71 Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Sat, 16 May 2026 23:29:17 +0000 Subject: [PATCH] broker: token_count + endpoint capability cache (Phase 8 commit #1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foundation for Phase 8 โ€” accurate tokenization via /tokenize where supported, char/4 fallback otherwise. Changes: - `M.token_count(model_cfg, text)`: Empty text -> 0. No endpoint -> char/4 immediately. Capability cache says false -> char/4. Otherwise -> POST `/tokenize` with `{content, model}`, 2s timeout. On 200 + parseable `{tokens=[...]}`: cache true, return #tokens. Anything else (non-200 / parse-fail / transport err / timeout): cache false, char/4. - `_tokenize_capable` cache keyed by ENDPOINT ONLY per R6 โ€” B1 confirmed /tokenize ignores the model field, so same-endpoint presets share one cache entry. If a future broker honors the model field, revisit. - `M.tokenize_supported(model_cfg)`: returns nil/true/false for the cached state (introspection for tests + future :tokenize meta). - `M._reset_tokenize_cache()`: test hook so the session-local cache doesn't leak between test runs sharing a LuaJIT VM. Live verified against hossenfelder + a deliberately-broken endpoint: - "hello world" -> 2 tokens (matches manual curl probe) - 901-char text -> 201 real tokens vs 225 char/4 (24-token gap; real is LOWER here, opposite direction from the README probe where it was higher โ€” confirms heuristic is inaccurate in both directions) - Pre-probe: tokenize_supported() returns nil - Post-probe: tokenize_supported() returns true (local) / false (broken) - Broken endpoint second call: still char/4, no re-probe - Empty / nil text edge cases handled Regression: test_safety 87/87, test_router_model 31/31, repl loads. Co-Authored-By: Claude Opus 4.7 (1M context) --- broker.lua | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/broker.lua b/broker.lua index a3e9e96..2527200 100644 --- a/broker.lua +++ b/broker.lua @@ -229,4 +229,58 @@ function M.chat(model_cfg, messages, opts) return table.concat(parts), captured_usage end +-- ---------------------------------------------------------------- token_count (Phase 8) +-- Returns an accurate token count by hitting /tokenize when +-- the endpoint supports it; falls back to the Phase 0 ยง8 char/4 +-- heuristic otherwise. Per-endpoint capability cache (session-local; +-- key per R6 is endpoint-only since B1 confirms /tokenize ignores the +-- model field on the observed broker). +-- +-- Never errors. Returns a non-negative integer. +-- 2s timeout per call so a misbehaving endpoint can't stall the +-- caller; first miss caches as unsupported for the session. +local _tokenize_capable = {} -- [endpoint] = true | false (nil = unprobed) + +function M.token_count(model_cfg, text) + text = text or "" + if text == "" then return 0 end + if not (model_cfg and model_cfg.endpoint) then + return math.floor(#text / 4) + end + local ep = model_cfg.endpoint + local cap = _tokenize_capable[ep] + if cap == false then + return math.floor(#text / 4) + end + local url = ep:gsub("/+$", "") .. "/tokenize" + local body = json.encode({ content = text, model = model_cfg.model }) + local out, status = curl.post(url, body, + { "Content-Type: application/json" }, + 2000) -- 2s timeout per R5 risk row + if not (status == 200 and out) then + _tokenize_capable[ep] = false + return math.floor(#text / 4) + end + local doc = json.decode(out) + local toks = doc and doc.tokens + if type(toks) ~= "table" then + _tokenize_capable[ep] = false + return math.floor(#text / 4) + end + _tokenize_capable[ep] = true + return #toks +end + +-- Introspection: nil if endpoint un-probed; true/false for the cached +-- capability. Used by tests and future :tokenize debug meta. +function M.tokenize_supported(model_cfg) + if not (model_cfg and model_cfg.endpoint) then return nil end + return _tokenize_capable[model_cfg.endpoint] +end + +-- Test hook: reset the cache between LuaJIT-VM-shared test runs. +function M._reset_tokenize_cache() + _tokenize_capable = {} +end + return M