diff --git a/config.lua b/config.lua index ede80da..0a5bb05 100644 --- a/config.lua +++ b/config.lua @@ -146,4 +146,49 @@ return { -- -- (cloud may have variable cost per session). -- summarizer_model = "fast", -- }, + + -- Phase 5 (docs/PHASE5.md): multi-model routing + cloud fallback + + -- summarize-on-evict. OFF by default — auto-routing can spend money + -- silently on the cloud preset; require explicit opt-in. + -- + -- routing = { + -- -- Enable auto-routing per request. When true, router.classify_model + -- -- inspects each prompt and may switch the model for THAT request + -- -- only (the :model selection is preserved across requests). + -- -- Default false. Toggle at runtime with :route on / :route off. + -- auto = true, + -- + -- -- Class → model mapping. nil = "keep current" (heuristic fires + -- -- but no override). Ships with reasoning = nil because mapping + -- -- "explain ..." prompts to a paid cloud model would spend money + -- -- silently — opt in by uncommenting the reasoning line below. + -- classes = { + -- code = "deep", -- code-like prompts to local deep + -- -- reasoning = "cloud", -- OPT-IN: "explain"/"why"/"how does" → paid + -- -- default = nil, -- keep active model + -- }, + -- + -- -- Single-hop retry on transport failure (HTTP 5xx, 408, + -- -- 404 model_not_found, DNS, connection refused, timeouts). + -- -- Retries against fallback_model once. Skipped if any text + -- -- has already streamed (no partial-output duplication). + -- -- Toggle at runtime with :fallback on / :fallback off. + -- fallback = false, -- default off (cost-safety) + -- fallback_model = "cloud", + -- }, + + -- ── Phase 5 context summarization on sliding-window eviction. + -- Set INSIDE the context = { ... } block above to enable: + -- context = { + -- max_turns = 40, + -- token_budget = 4096, + -- summarize_on_evict = true, + -- summarizer_model = "fast", -- model name in models{} + -- max_summary_chars = 2000, + -- }, + -- When summarize_on_evict is true, evicted turn pairs are fed to + -- summarizer_model and the result lives on ctx.summary, appended to + -- the system prompt as [earlier conversation summary]. Suppressed + -- in Norris mode (R-C4 — planner stays on its goal). If broker + -- fails, falls back to Phase 0 silent eviction (no crash). }