repl: routing + fallback + summarize_fn wiring (Phase 5 commit #3)

Phase 5 commit #3 per docs/PHASE5.md §3 / §11. Wires the Phase 5
machinery into the REPL.

make_summarize_fn():
  Returns a closure that maps (prior_summary, evicted_turns) onto a
  broker.chat call against cfg.context.summarizer_model (default
  "fast"). Three dispatch paths matching the R-B1 callback contract:
    evicted == nil      → compress signal
    prior present       → additive ("extend the prior summary ...")
    prior nil           → first-time ("summarize the following turns")
  All use a system prompt enforcing "exactly one short paragraph",
  max_tokens=300, timeout_ms=30000. Broker failure returns nil so
  Context falls back to silent eviction. Renderer status is logged
  on failure for visibility.

Context construction:
  Build ctx_opts as a fresh table (copies config.context to avoid
  mutating it), adds summarize_fn ONLY when
  config.context.summarize_on_evict == true. Defaults stay OFF —
  Phase 4 regression coverage.

Fallback machinery:
  - FALLBACK_PATTERNS table with 7 transport-error signatures
    (HTTP 5xx, 408, 404-model_not_found, DNS, connection refused,
    "Timeout was reached", "Operation timed out")
  - fallback_reason(err) strips the "transport: " prefix and matches.
  - should_fallback(err) gates on cfg.routing.fallback.
  - call_broker(cfg, name, msgs, on_delta, opts) wraps
    broker.chat_stream:
      • tracks any_delta via wrapped on_delta callback
      • retries ONCE against cfg.routing.fallback_model (default
        "cloud") when err matches AND no deltas arrived (N3:
        mid-stream failures aren't retried — partial text would
        duplicate)
      • emits "[aish] local <name> failed (<reason>); retrying via
        <fb>" status before the retry call

ask_ai routing:
  - Routing decision taken ONCE on entry (R-C2). req_name/req_cfg
    locals carry the choice through every tool-sub-loop iteration.
  - active_name/active_cfg are NOT mutated — user's :model selection
    survives the request.
  - When config.routing.auto is true, classify_model(text, config) is
    invoked. Non-nil model + non-active → swap req_cfg + status line.
  - broker.chat_stream call replaced with call_broker (fallback wrap).

Meta cmds:
  :route on/off           — toggle cfg.routing.auto at runtime
  :route classes          — show class → model mapping
  :route check <text>     — report classify_model result with
                            "(routing currently disabled)" suffix when
                            auto is off (N1)
  :fallback on/off        — toggle cfg.routing.fallback at runtime

HELP updated with the four new commands.

Smoke-tested: aish boots, all four metas behave correctly, classify_model
returns reasoning class for "Explain how MMAP works on Linux" (the model
slot is nil because no classes are configured by default — N2 cost-safety).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 11:31:14 +00:00
parent 03497b5eea
commit 40ea0b49b0
+174 -2
View File
@@ -44,6 +44,10 @@ Meta commands:
:memory clear forget all active items (confirms first)
:memory inject reload memory.jsonl into ctx (after manual edits)
:memory summarize LLM-extract candidate items from this session
:route on/off toggle auto-routing per-request (heuristic in router.lua)
:route classes show current class → model mapping
:route check <text> report which class <text> would route to (debug)
:fallback on/off toggle cloud retry when local transport fails
:help this message
]]
@@ -56,7 +60,65 @@ function M.run(config)
.. "' not found in config.models")
end
local ctx = Context.new(config.context or {})
-- Phase 5: render the evicted turns into a compact transcript for
-- the summarizer prompt. Same shape as :memory summarize uses.
local function render_evicted(turns)
local parts = {}
for _, t in ipairs(turns or {}) do
parts[#parts + 1] = ("%s: %s"):format(
t.role, (t.content or ""):gsub("\n", " "):sub(1, 600))
end
return table.concat(parts, "\n")
end
-- Phase 5: summarize_fn factory. Returns a closure that maps
-- (prior_summary, evicted_turns) onto a broker.chat call against
-- the configured summarizer model. Returns nil on any failure so
-- Context falls back to silent eviction (Phase 0 behavior).
local function make_summarize_fn()
local sum_name = (config.context and config.context.summarizer_model)
or "fast"
local sum_cfg = config.models[sum_name]
if not sum_cfg then return nil end
return function(prior, evicted)
local body
if evicted == nil then
body = "Compress this prior summary into 2-3 sentences. "
.. "Keep names, facts, decisions; drop chatter.\n\n"
.. "Prior summary:\n" .. (prior or "")
elseif prior and prior ~= "" then
body = "Extend this prior summary with the new turns. "
.. "Keep it 2-4 sentences. Preserve names, facts, decisions.\n\n"
.. "Prior summary:\n" .. prior
.. "\n\nNew turns:\n" .. render_evicted(evicted)
else
body = "Summarize the following conversation turns in "
.. "2-3 sentences. Preserve names, facts, decisions.\n\n"
.. render_evicted(evicted)
end
local reply, err = broker.chat(sum_cfg, {
{ role = "system", content =
"Output exactly one short summary paragraph. "
.. "No commentary, no markdown, no bullet lists." },
{ role = "user", content = body },
}, { max_tokens = 300, timeout_ms = 30000 })
if not reply then
renderer.status("context summarize failed: " .. tostring(err))
return nil
end
return reply:gsub("^%s+", ""):gsub("%s+$", "")
end
end
-- Build Context with optional summarize_fn (gated by cfg flag).
local ctx_opts = {}
if config.context then
for k, v in pairs(config.context) do ctx_opts[k] = v end
end
if config.context and config.context.summarize_on_evict then
ctx_opts.summarize_fn = make_summarize_fn()
end
local ctx = Context.new(ctx_opts)
-- Phase 2: MCP sessions. Populated from config.mcp.servers at startup
-- (best-effort — failures are status-logged once, session absent from
@@ -254,6 +316,58 @@ function M.run(config)
end
end
-- ── Phase 5: fallback eligibility per PHASE5.md §5 ──────────────────
-- All transport-failure patterns must match against the err string
-- as broker.lua emits it (with "transport: " prefix). The matcher
-- strips the prefix before testing.
local FALLBACK_PATTERNS = {
"^HTTP 5%d%d",
"^HTTP 404.*model_not_found",
"^HTTP 408",
"Couldn'?t resolve host",
"Connection refused",
"Timeout was reached",
"Operation timed out",
}
local function fallback_reason(err)
if type(err) ~= "string" then return "unknown error" end
local stripped = err:gsub("^transport:%s*", "")
for _, pat in ipairs(FALLBACK_PATTERNS) do
if stripped:match(pat) then return (stripped:match(pat)) end
end
return nil
end
local function should_fallback(err)
return config.routing and config.routing.fallback
and fallback_reason(err) ~= nil
end
-- Wrap broker.chat_stream with the Phase 5 fallback-retry path.
-- Retries ONCE against cfg.routing.fallback_model (default "cloud")
-- when (a) cfg.routing.fallback is true, (b) err matches a
-- fallback-eligible pattern, AND (c) no deltas have arrived yet
-- (mid-stream failures aren't retried — partial text would be
-- duplicated).
local function call_broker(model_cfg, model_name, msgs, on_delta, opts)
local any_delta = false
local wrapped = function(kind, payload)
any_delta = true
return on_delta(kind, payload)
end
local ok, err = broker.chat_stream(model_cfg, msgs, wrapped, opts)
if ok then return ok end
if any_delta then return ok, err end -- mid-stream — don't retry
if not should_fallback(err) then return ok, err end
local fb_name = (config.routing and config.routing.fallback_model)
or "cloud"
local fb_cfg = config.models[fb_name]
if not fb_cfg then return ok, err end
renderer.status(("local %s failed (%s); retrying via %s")
:format(model_name, fallback_reason(err), fb_name))
return broker.chat_stream(fb_cfg, msgs, wrapped, opts)
end
-- Run a shell command, framing output and (per config.shell.capture_output)
-- buffering it for the NEXT user turn — context.append_exec_output keeps
-- a [exec output] block pending until ask_ai flushes it via append_user.
@@ -291,6 +405,19 @@ function M.run(config)
ctx:append_user(text)
log_turn(ctx.turns[#ctx.turns])
-- Phase 5 R-C2: routing decision taken ONCE on entry to ask_ai.
-- req_name/req_cfg are used for every iteration of the
-- tool-sub-loop; active_name/active_cfg are NOT mutated so the
-- user's :model selection survives the request.
local req_name, req_cfg = active_name, active_cfg
if config.routing and config.routing.auto then
local routed, class = router.classify_model(text, config)
if routed and config.models[routed] and routed ~= active_name then
renderer.status(("routed to %s (%s class)"):format(routed, class))
req_name, req_cfg = routed, config.models[routed]
end
end
local depth = 0
local final_resp = ""
local first_iteration = true
@@ -298,7 +425,7 @@ function M.run(config)
while true do
local text_parts = {}
local tool_calls_seen = {}
local ok, err = broker.chat_stream(active_cfg, ctx:to_messages(),
local ok, err = call_broker(req_cfg, req_name, ctx:to_messages(),
function(kind, payload)
if kind == "text" then
text_parts[#text_parts + 1] = payload
@@ -911,6 +1038,51 @@ function M.run(config)
renderer.status("usage: :safety {patterns|check}")
end
end,
route = function(args)
local sub, sub_args = args:match("^%s*(%S*)%s*(.*)$")
config.routing = config.routing or {}
if sub == "on" then
config.routing.auto = true
renderer.status("auto-routing on")
elseif sub == "off" then
config.routing.auto = false
renderer.status("auto-routing off")
elseif sub == "classes" then
local classes = config.routing.classes or {}
if next(classes) == nil then
renderer.status("(no classes configured)"); return
end
for k, v in pairs(classes) do
io.write((" %-10s → %s\n"):format(k, tostring(v)))
end
elseif sub == "check" then
local text = sub_args:match("^%s*(.-)%s*$")
if not text or text == "" then
renderer.status("usage: :route check <text>"); return
end
local m, class = router.classify_model(text, config)
local extra = config.routing.auto and ""
or " (routing currently disabled)"
renderer.status(("class=%s model=%s%s"):format(
class, tostring(m), extra))
else
renderer.status("usage: :route {on|off|classes|check}")
end
end,
fallback = function(args)
local sub = args:match("^%s*(%S*)")
config.routing = config.routing or {}
if sub == "on" then
config.routing.fallback = true
renderer.status(("cloud fallback on (target: %s)"):format(
config.routing.fallback_model or "cloud"))
elseif sub == "off" then
config.routing.fallback = false
renderer.status("cloud fallback off")
else
renderer.status("usage: :fallback {on|off}")
end
end,
help = function() io.write(HELP) end,
}