repl: routing + fallback + summarize_fn wiring (Phase 5 commit #3)
Phase 5 commit #3 per docs/PHASE5.md §3 / §11. Wires the Phase 5 machinery into the REPL. make_summarize_fn(): Returns a closure that maps (prior_summary, evicted_turns) onto a broker.chat call against cfg.context.summarizer_model (default "fast"). Three dispatch paths matching the R-B1 callback contract: evicted == nil → compress signal prior present → additive ("extend the prior summary ...") prior nil → first-time ("summarize the following turns") All use a system prompt enforcing "exactly one short paragraph", max_tokens=300, timeout_ms=30000. Broker failure returns nil so Context falls back to silent eviction. Renderer status is logged on failure for visibility. Context construction: Build ctx_opts as a fresh table (copies config.context to avoid mutating it), adds summarize_fn ONLY when config.context.summarize_on_evict == true. Defaults stay OFF — Phase 4 regression coverage. Fallback machinery: - FALLBACK_PATTERNS table with 7 transport-error signatures (HTTP 5xx, 408, 404-model_not_found, DNS, connection refused, "Timeout was reached", "Operation timed out") - fallback_reason(err) strips the "transport: " prefix and matches. - should_fallback(err) gates on cfg.routing.fallback. - call_broker(cfg, name, msgs, on_delta, opts) wraps broker.chat_stream: • tracks any_delta via wrapped on_delta callback • retries ONCE against cfg.routing.fallback_model (default "cloud") when err matches AND no deltas arrived (N3: mid-stream failures aren't retried — partial text would duplicate) • emits "[aish] local <name> failed (<reason>); retrying via <fb>" status before the retry call ask_ai routing: - Routing decision taken ONCE on entry (R-C2). req_name/req_cfg locals carry the choice through every tool-sub-loop iteration. - active_name/active_cfg are NOT mutated — user's :model selection survives the request. - When config.routing.auto is true, classify_model(text, config) is invoked. Non-nil model + non-active → swap req_cfg + status line. - broker.chat_stream call replaced with call_broker (fallback wrap). Meta cmds: :route on/off — toggle cfg.routing.auto at runtime :route classes — show class → model mapping :route check <text> — report classify_model result with "(routing currently disabled)" suffix when auto is off (N1) :fallback on/off — toggle cfg.routing.fallback at runtime HELP updated with the four new commands. Smoke-tested: aish boots, all four metas behave correctly, classify_model returns reasoning class for "Explain how MMAP works on Linux" (the model slot is nil because no classes are configured by default — N2 cost-safety). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -44,6 +44,10 @@ Meta commands:
|
||||
:memory clear forget all active items (confirms first)
|
||||
:memory inject reload memory.jsonl into ctx (after manual edits)
|
||||
:memory summarize LLM-extract candidate items from this session
|
||||
:route on/off toggle auto-routing per-request (heuristic in router.lua)
|
||||
:route classes show current class → model mapping
|
||||
:route check <text> report which class <text> would route to (debug)
|
||||
:fallback on/off toggle cloud retry when local transport fails
|
||||
:help this message
|
||||
]]
|
||||
|
||||
@@ -56,7 +60,65 @@ function M.run(config)
|
||||
.. "' not found in config.models")
|
||||
end
|
||||
|
||||
local ctx = Context.new(config.context or {})
|
||||
-- Phase 5: render the evicted turns into a compact transcript for
|
||||
-- the summarizer prompt. Same shape as :memory summarize uses.
|
||||
local function render_evicted(turns)
|
||||
local parts = {}
|
||||
for _, t in ipairs(turns or {}) do
|
||||
parts[#parts + 1] = ("%s: %s"):format(
|
||||
t.role, (t.content or ""):gsub("\n", " "):sub(1, 600))
|
||||
end
|
||||
return table.concat(parts, "\n")
|
||||
end
|
||||
|
||||
-- Phase 5: summarize_fn factory. Returns a closure that maps
|
||||
-- (prior_summary, evicted_turns) onto a broker.chat call against
|
||||
-- the configured summarizer model. Returns nil on any failure so
|
||||
-- Context falls back to silent eviction (Phase 0 behavior).
|
||||
local function make_summarize_fn()
|
||||
local sum_name = (config.context and config.context.summarizer_model)
|
||||
or "fast"
|
||||
local sum_cfg = config.models[sum_name]
|
||||
if not sum_cfg then return nil end
|
||||
return function(prior, evicted)
|
||||
local body
|
||||
if evicted == nil then
|
||||
body = "Compress this prior summary into 2-3 sentences. "
|
||||
.. "Keep names, facts, decisions; drop chatter.\n\n"
|
||||
.. "Prior summary:\n" .. (prior or "")
|
||||
elseif prior and prior ~= "" then
|
||||
body = "Extend this prior summary with the new turns. "
|
||||
.. "Keep it 2-4 sentences. Preserve names, facts, decisions.\n\n"
|
||||
.. "Prior summary:\n" .. prior
|
||||
.. "\n\nNew turns:\n" .. render_evicted(evicted)
|
||||
else
|
||||
body = "Summarize the following conversation turns in "
|
||||
.. "2-3 sentences. Preserve names, facts, decisions.\n\n"
|
||||
.. render_evicted(evicted)
|
||||
end
|
||||
local reply, err = broker.chat(sum_cfg, {
|
||||
{ role = "system", content =
|
||||
"Output exactly one short summary paragraph. "
|
||||
.. "No commentary, no markdown, no bullet lists." },
|
||||
{ role = "user", content = body },
|
||||
}, { max_tokens = 300, timeout_ms = 30000 })
|
||||
if not reply then
|
||||
renderer.status("context summarize failed: " .. tostring(err))
|
||||
return nil
|
||||
end
|
||||
return reply:gsub("^%s+", ""):gsub("%s+$", "")
|
||||
end
|
||||
end
|
||||
|
||||
-- Build Context with optional summarize_fn (gated by cfg flag).
|
||||
local ctx_opts = {}
|
||||
if config.context then
|
||||
for k, v in pairs(config.context) do ctx_opts[k] = v end
|
||||
end
|
||||
if config.context and config.context.summarize_on_evict then
|
||||
ctx_opts.summarize_fn = make_summarize_fn()
|
||||
end
|
||||
local ctx = Context.new(ctx_opts)
|
||||
|
||||
-- Phase 2: MCP sessions. Populated from config.mcp.servers at startup
|
||||
-- (best-effort — failures are status-logged once, session absent from
|
||||
@@ -254,6 +316,58 @@ function M.run(config)
|
||||
end
|
||||
end
|
||||
|
||||
-- ── Phase 5: fallback eligibility per PHASE5.md §5 ──────────────────
|
||||
-- All transport-failure patterns must match against the err string
|
||||
-- as broker.lua emits it (with "transport: " prefix). The matcher
|
||||
-- strips the prefix before testing.
|
||||
local FALLBACK_PATTERNS = {
|
||||
"^HTTP 5%d%d",
|
||||
"^HTTP 404.*model_not_found",
|
||||
"^HTTP 408",
|
||||
"Couldn'?t resolve host",
|
||||
"Connection refused",
|
||||
"Timeout was reached",
|
||||
"Operation timed out",
|
||||
}
|
||||
local function fallback_reason(err)
|
||||
if type(err) ~= "string" then return "unknown error" end
|
||||
local stripped = err:gsub("^transport:%s*", "")
|
||||
for _, pat in ipairs(FALLBACK_PATTERNS) do
|
||||
if stripped:match(pat) then return (stripped:match(pat)) end
|
||||
end
|
||||
return nil
|
||||
end
|
||||
|
||||
local function should_fallback(err)
|
||||
return config.routing and config.routing.fallback
|
||||
and fallback_reason(err) ~= nil
|
||||
end
|
||||
|
||||
-- Wrap broker.chat_stream with the Phase 5 fallback-retry path.
|
||||
-- Retries ONCE against cfg.routing.fallback_model (default "cloud")
|
||||
-- when (a) cfg.routing.fallback is true, (b) err matches a
|
||||
-- fallback-eligible pattern, AND (c) no deltas have arrived yet
|
||||
-- (mid-stream failures aren't retried — partial text would be
|
||||
-- duplicated).
|
||||
local function call_broker(model_cfg, model_name, msgs, on_delta, opts)
|
||||
local any_delta = false
|
||||
local wrapped = function(kind, payload)
|
||||
any_delta = true
|
||||
return on_delta(kind, payload)
|
||||
end
|
||||
local ok, err = broker.chat_stream(model_cfg, msgs, wrapped, opts)
|
||||
if ok then return ok end
|
||||
if any_delta then return ok, err end -- mid-stream — don't retry
|
||||
if not should_fallback(err) then return ok, err end
|
||||
local fb_name = (config.routing and config.routing.fallback_model)
|
||||
or "cloud"
|
||||
local fb_cfg = config.models[fb_name]
|
||||
if not fb_cfg then return ok, err end
|
||||
renderer.status(("local %s failed (%s); retrying via %s")
|
||||
:format(model_name, fallback_reason(err), fb_name))
|
||||
return broker.chat_stream(fb_cfg, msgs, wrapped, opts)
|
||||
end
|
||||
|
||||
-- Run a shell command, framing output and (per config.shell.capture_output)
|
||||
-- buffering it for the NEXT user turn — context.append_exec_output keeps
|
||||
-- a [exec output] block pending until ask_ai flushes it via append_user.
|
||||
@@ -291,6 +405,19 @@ function M.run(config)
|
||||
ctx:append_user(text)
|
||||
log_turn(ctx.turns[#ctx.turns])
|
||||
|
||||
-- Phase 5 R-C2: routing decision taken ONCE on entry to ask_ai.
|
||||
-- req_name/req_cfg are used for every iteration of the
|
||||
-- tool-sub-loop; active_name/active_cfg are NOT mutated so the
|
||||
-- user's :model selection survives the request.
|
||||
local req_name, req_cfg = active_name, active_cfg
|
||||
if config.routing and config.routing.auto then
|
||||
local routed, class = router.classify_model(text, config)
|
||||
if routed and config.models[routed] and routed ~= active_name then
|
||||
renderer.status(("routed to %s (%s class)"):format(routed, class))
|
||||
req_name, req_cfg = routed, config.models[routed]
|
||||
end
|
||||
end
|
||||
|
||||
local depth = 0
|
||||
local final_resp = ""
|
||||
local first_iteration = true
|
||||
@@ -298,7 +425,7 @@ function M.run(config)
|
||||
while true do
|
||||
local text_parts = {}
|
||||
local tool_calls_seen = {}
|
||||
local ok, err = broker.chat_stream(active_cfg, ctx:to_messages(),
|
||||
local ok, err = call_broker(req_cfg, req_name, ctx:to_messages(),
|
||||
function(kind, payload)
|
||||
if kind == "text" then
|
||||
text_parts[#text_parts + 1] = payload
|
||||
@@ -911,6 +1038,51 @@ function M.run(config)
|
||||
renderer.status("usage: :safety {patterns|check}")
|
||||
end
|
||||
end,
|
||||
route = function(args)
|
||||
local sub, sub_args = args:match("^%s*(%S*)%s*(.*)$")
|
||||
config.routing = config.routing or {}
|
||||
if sub == "on" then
|
||||
config.routing.auto = true
|
||||
renderer.status("auto-routing on")
|
||||
elseif sub == "off" then
|
||||
config.routing.auto = false
|
||||
renderer.status("auto-routing off")
|
||||
elseif sub == "classes" then
|
||||
local classes = config.routing.classes or {}
|
||||
if next(classes) == nil then
|
||||
renderer.status("(no classes configured)"); return
|
||||
end
|
||||
for k, v in pairs(classes) do
|
||||
io.write((" %-10s → %s\n"):format(k, tostring(v)))
|
||||
end
|
||||
elseif sub == "check" then
|
||||
local text = sub_args:match("^%s*(.-)%s*$")
|
||||
if not text or text == "" then
|
||||
renderer.status("usage: :route check <text>"); return
|
||||
end
|
||||
local m, class = router.classify_model(text, config)
|
||||
local extra = config.routing.auto and ""
|
||||
or " (routing currently disabled)"
|
||||
renderer.status(("class=%s model=%s%s"):format(
|
||||
class, tostring(m), extra))
|
||||
else
|
||||
renderer.status("usage: :route {on|off|classes|check}")
|
||||
end
|
||||
end,
|
||||
fallback = function(args)
|
||||
local sub = args:match("^%s*(%S*)")
|
||||
config.routing = config.routing or {}
|
||||
if sub == "on" then
|
||||
config.routing.fallback = true
|
||||
renderer.status(("cloud fallback on (target: %s)"):format(
|
||||
config.routing.fallback_model or "cloud"))
|
||||
elseif sub == "off" then
|
||||
config.routing.fallback = false
|
||||
renderer.status("cloud fallback off")
|
||||
else
|
||||
renderer.status("usage: :fallback {on|off}")
|
||||
end
|
||||
end,
|
||||
help = function() io.write(HELP) end,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user