broker: token_count + endpoint capability cache (Phase 8 commit #1)

Foundation for Phase 8 — accurate tokenization via <endpoint>/tokenize where supported, char/4 fallback otherwise. Changes: - `M.token_count(model_cfg, text)`: Empty text -> 0. No endpoint -> char/4 immediately. Capability cache says false -> char/4. Otherwise -> POST `<endpoint>/tokenize` with `{content, model}`, 2s timeout. On 200 + parseable `{tokens=[...]}`: cache true, return #tokens. Anything else (non-200 / parse-fail / transport err / timeout): cache false, char/4. - `_tokenize_capable` cache keyed by ENDPOINT ONLY per R6 — B1 confirmed /tokenize ignores the model field, so same-endpoint presets share one cache entry. If a future broker honors the model field, revisit. - `M.tokenize_supported(model_cfg)`: returns nil/true/false for the cached state (introspection for tests + future :tokenize meta). - `M._reset_tokenize_cache()`: test hook so the session-local cache doesn't leak between test runs sharing a LuaJIT VM. Live verified against hossenfelder + a deliberately-broken endpoint: - "hello world" -> 2 tokens (matches manual curl probe) - 901-char text -> 201 real tokens vs 225 char/4 (24-token gap; real is LOWER here, opposite direction from the README probe where it was higher — confirms heuristic is inaccurate in both directions) - Pre-probe: tokenize_supported() returns nil - Post-probe: tokenize_supported() returns true (local) / false (broken) - Broken endpoint second call: still char/4, no re-probe - Empty / nil text edge cases handled Regression: test_safety 87/87, test_router_model 31/31, repl loads. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 23:29:17 +00:00
parent 467e573d24
commit 7ef2a6ed5c
1 changed files with 54 additions and 0 deletions
@@ -229,4 +229,58 @@ function M.chat(model_cfg, messages, opts)
    return table.concat(parts), captured_usage
 end
 -- ---------------------------------------------------------------- token_count (Phase 8)
 -- Returns an accurate token count by hitting <endpoint>/tokenize when
 -- the endpoint supports it; falls back to the Phase 0 §8 char/4
 -- heuristic otherwise. Per-endpoint capability cache (session-local;
 -- key per R6 is endpoint-only since B1 confirms /tokenize ignores the
 -- model field on the observed broker).
 --
 -- Never errors. Returns a non-negative integer.
 -- 2s timeout per call so a misbehaving endpoint can't stall the
 -- caller; first miss caches as unsupported for the session.
 local _tokenize_capable = {}    -- [endpoint] = true | false (nil = unprobed)
 function M.token_count(model_cfg, text)
    text = text or ""
    if text == "" then return 0 end
    if not (model_cfg and model_cfg.endpoint) then
        return math.floor(#text / 4)
    end
    local ep  = model_cfg.endpoint
    local cap = _tokenize_capable[ep]
    if cap == false then
        return math.floor(#text / 4)
    end
    local url = ep:gsub("/+$", "") .. "/tokenize"
    local body = json.encode({ content = text, model = model_cfg.model })
    local out, status = curl.post(url, body,
        { "Content-Type: application/json" },
        2000)  -- 2s timeout per R5 risk row
    if not (status == 200 and out) then
        _tokenize_capable[ep] = false
        return math.floor(#text / 4)
    end
    local doc = json.decode(out)
    local toks = doc and doc.tokens
    if type(toks) ~= "table" then
        _tokenize_capable[ep] = false
        return math.floor(#text / 4)
    end
    _tokenize_capable[ep] = true
    return #toks
 end
 -- Introspection: nil if endpoint un-probed; true/false for the cached
 -- capability. Used by tests and future :tokenize debug meta.
 function M.tokenize_supported(model_cfg)
    if not (model_cfg and model_cfg.endpoint) then return nil end
    return _tokenize_capable[model_cfg.endpoint]
 end
 -- Test hook: reset the cache between LuaJIT-VM-shared test runs.
 function M._reset_tokenize_cache()
    _tokenize_capable = {}
 end
 return M