broker: token_count + endpoint capability cache (Phase 8 commit #1)
Foundation for Phase 8 — accurate tokenization via <endpoint>/tokenize
where supported, char/4 fallback otherwise.
Changes:
- `M.token_count(model_cfg, text)`:
Empty text -> 0.
No endpoint -> char/4 immediately.
Capability cache says false -> char/4.
Otherwise -> POST `<endpoint>/tokenize` with `{content, model}`,
2s timeout. On 200 + parseable `{tokens=[...]}`: cache true,
return #tokens. Anything else (non-200 / parse-fail / transport
err / timeout): cache false, char/4.
- `_tokenize_capable` cache keyed by ENDPOINT ONLY per R6 — B1
confirmed /tokenize ignores the model field, so same-endpoint
presets share one cache entry. If a future broker honors the
model field, revisit.
- `M.tokenize_supported(model_cfg)`: returns nil/true/false for
the cached state (introspection for tests + future :tokenize meta).
- `M._reset_tokenize_cache()`: test hook so the session-local cache
doesn't leak between test runs sharing a LuaJIT VM.
Live verified against hossenfelder + a deliberately-broken endpoint:
- "hello world" -> 2 tokens (matches manual curl probe)
- 901-char text -> 201 real tokens vs 225 char/4 (24-token gap;
real is LOWER here, opposite direction from the README probe
where it was higher — confirms heuristic is inaccurate in both
directions)
- Pre-probe: tokenize_supported() returns nil
- Post-probe: tokenize_supported() returns true (local) / false (broken)
- Broken endpoint second call: still char/4, no re-probe
- Empty / nil text edge cases handled
Regression: test_safety 87/87, test_router_model 31/31, repl loads.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+54
@@ -229,4 +229,58 @@ function M.chat(model_cfg, messages, opts)
|
||||
return table.concat(parts), captured_usage
|
||||
end
|
||||
|
||||
-- ---------------------------------------------------------------- token_count (Phase 8)
|
||||
-- Returns an accurate token count by hitting <endpoint>/tokenize when
|
||||
-- the endpoint supports it; falls back to the Phase 0 §8 char/4
|
||||
-- heuristic otherwise. Per-endpoint capability cache (session-local;
|
||||
-- key per R6 is endpoint-only since B1 confirms /tokenize ignores the
|
||||
-- model field on the observed broker).
|
||||
--
|
||||
-- Never errors. Returns a non-negative integer.
|
||||
-- 2s timeout per call so a misbehaving endpoint can't stall the
|
||||
-- caller; first miss caches as unsupported for the session.
|
||||
local _tokenize_capable = {} -- [endpoint] = true | false (nil = unprobed)
|
||||
|
||||
function M.token_count(model_cfg, text)
|
||||
text = text or ""
|
||||
if text == "" then return 0 end
|
||||
if not (model_cfg and model_cfg.endpoint) then
|
||||
return math.floor(#text / 4)
|
||||
end
|
||||
local ep = model_cfg.endpoint
|
||||
local cap = _tokenize_capable[ep]
|
||||
if cap == false then
|
||||
return math.floor(#text / 4)
|
||||
end
|
||||
local url = ep:gsub("/+$", "") .. "/tokenize"
|
||||
local body = json.encode({ content = text, model = model_cfg.model })
|
||||
local out, status = curl.post(url, body,
|
||||
{ "Content-Type: application/json" },
|
||||
2000) -- 2s timeout per R5 risk row
|
||||
if not (status == 200 and out) then
|
||||
_tokenize_capable[ep] = false
|
||||
return math.floor(#text / 4)
|
||||
end
|
||||
local doc = json.decode(out)
|
||||
local toks = doc and doc.tokens
|
||||
if type(toks) ~= "table" then
|
||||
_tokenize_capable[ep] = false
|
||||
return math.floor(#text / 4)
|
||||
end
|
||||
_tokenize_capable[ep] = true
|
||||
return #toks
|
||||
end
|
||||
|
||||
-- Introspection: nil if endpoint un-probed; true/false for the cached
|
||||
-- capability. Used by tests and future :tokenize debug meta.
|
||||
function M.tokenize_supported(model_cfg)
|
||||
if not (model_cfg and model_cfg.endpoint) then return nil end
|
||||
return _tokenize_capable[model_cfg.endpoint]
|
||||
end
|
||||
|
||||
-- Test hook: reset the cache between LuaJIT-VM-shared test runs.
|
||||
function M._reset_tokenize_cache()
|
||||
_tokenize_capable = {}
|
||||
end
|
||||
|
||||
return M
|
||||
|
||||
Reference in New Issue
Block a user