From 40ea0b49b02129ab287f8029667bef4d3165648e Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Wed, 13 May 2026 11:31:14 +0000 Subject: [PATCH] repl: routing + fallback + summarize_fn wiring (Phase 5 commit #3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 5 commit #3 per docs/PHASE5.md §3 / §11. Wires the Phase 5 machinery into the REPL. make_summarize_fn(): Returns a closure that maps (prior_summary, evicted_turns) onto a broker.chat call against cfg.context.summarizer_model (default "fast"). Three dispatch paths matching the R-B1 callback contract: evicted == nil → compress signal prior present → additive ("extend the prior summary ...") prior nil → first-time ("summarize the following turns") All use a system prompt enforcing "exactly one short paragraph", max_tokens=300, timeout_ms=30000. Broker failure returns nil so Context falls back to silent eviction. Renderer status is logged on failure for visibility. Context construction: Build ctx_opts as a fresh table (copies config.context to avoid mutating it), adds summarize_fn ONLY when config.context.summarize_on_evict == true. Defaults stay OFF — Phase 4 regression coverage. Fallback machinery: - FALLBACK_PATTERNS table with 7 transport-error signatures (HTTP 5xx, 408, 404-model_not_found, DNS, connection refused, "Timeout was reached", "Operation timed out") - fallback_reason(err) strips the "transport: " prefix and matches. - should_fallback(err) gates on cfg.routing.fallback. - call_broker(cfg, name, msgs, on_delta, opts) wraps broker.chat_stream: • tracks any_delta via wrapped on_delta callback • retries ONCE against cfg.routing.fallback_model (default "cloud") when err matches AND no deltas arrived (N3: mid-stream failures aren't retried — partial text would duplicate) • emits "[aish] local failed (); retrying via " status before the retry call ask_ai routing: - Routing decision taken ONCE on entry (R-C2). req_name/req_cfg locals carry the choice through every tool-sub-loop iteration. - active_name/active_cfg are NOT mutated — user's :model selection survives the request. - When config.routing.auto is true, classify_model(text, config) is invoked. Non-nil model + non-active → swap req_cfg + status line. - broker.chat_stream call replaced with call_broker (fallback wrap). Meta cmds: :route on/off — toggle cfg.routing.auto at runtime :route classes — show class → model mapping :route check — report classify_model result with "(routing currently disabled)" suffix when auto is off (N1) :fallback on/off — toggle cfg.routing.fallback at runtime HELP updated with the four new commands. Smoke-tested: aish boots, all four metas behave correctly, classify_model returns reasoning class for "Explain how MMAP works on Linux" (the model slot is nil because no classes are configured by default — N2 cost-safety). Co-Authored-By: Claude Opus 4.7 (1M context) --- repl.lua | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 2 deletions(-) diff --git a/repl.lua b/repl.lua index b3add7d..6a07aa4 100644 --- a/repl.lua +++ b/repl.lua @@ -44,6 +44,10 @@ Meta commands: :memory clear forget all active items (confirms first) :memory inject reload memory.jsonl into ctx (after manual edits) :memory summarize LLM-extract candidate items from this session + :route on/off toggle auto-routing per-request (heuristic in router.lua) + :route classes show current class → model mapping + :route check report which class would route to (debug) + :fallback on/off toggle cloud retry when local transport fails :help this message ]] @@ -56,7 +60,65 @@ function M.run(config) .. "' not found in config.models") end - local ctx = Context.new(config.context or {}) + -- Phase 5: render the evicted turns into a compact transcript for + -- the summarizer prompt. Same shape as :memory summarize uses. + local function render_evicted(turns) + local parts = {} + for _, t in ipairs(turns or {}) do + parts[#parts + 1] = ("%s: %s"):format( + t.role, (t.content or ""):gsub("\n", " "):sub(1, 600)) + end + return table.concat(parts, "\n") + end + + -- Phase 5: summarize_fn factory. Returns a closure that maps + -- (prior_summary, evicted_turns) onto a broker.chat call against + -- the configured summarizer model. Returns nil on any failure so + -- Context falls back to silent eviction (Phase 0 behavior). + local function make_summarize_fn() + local sum_name = (config.context and config.context.summarizer_model) + or "fast" + local sum_cfg = config.models[sum_name] + if not sum_cfg then return nil end + return function(prior, evicted) + local body + if evicted == nil then + body = "Compress this prior summary into 2-3 sentences. " + .. "Keep names, facts, decisions; drop chatter.\n\n" + .. "Prior summary:\n" .. (prior or "") + elseif prior and prior ~= "" then + body = "Extend this prior summary with the new turns. " + .. "Keep it 2-4 sentences. Preserve names, facts, decisions.\n\n" + .. "Prior summary:\n" .. prior + .. "\n\nNew turns:\n" .. render_evicted(evicted) + else + body = "Summarize the following conversation turns in " + .. "2-3 sentences. Preserve names, facts, decisions.\n\n" + .. render_evicted(evicted) + end + local reply, err = broker.chat(sum_cfg, { + { role = "system", content = + "Output exactly one short summary paragraph. " + .. "No commentary, no markdown, no bullet lists." }, + { role = "user", content = body }, + }, { max_tokens = 300, timeout_ms = 30000 }) + if not reply then + renderer.status("context summarize failed: " .. tostring(err)) + return nil + end + return reply:gsub("^%s+", ""):gsub("%s+$", "") + end + end + + -- Build Context with optional summarize_fn (gated by cfg flag). + local ctx_opts = {} + if config.context then + for k, v in pairs(config.context) do ctx_opts[k] = v end + end + if config.context and config.context.summarize_on_evict then + ctx_opts.summarize_fn = make_summarize_fn() + end + local ctx = Context.new(ctx_opts) -- Phase 2: MCP sessions. Populated from config.mcp.servers at startup -- (best-effort — failures are status-logged once, session absent from @@ -254,6 +316,58 @@ function M.run(config) end end + -- ── Phase 5: fallback eligibility per PHASE5.md §5 ────────────────── + -- All transport-failure patterns must match against the err string + -- as broker.lua emits it (with "transport: " prefix). The matcher + -- strips the prefix before testing. + local FALLBACK_PATTERNS = { + "^HTTP 5%d%d", + "^HTTP 404.*model_not_found", + "^HTTP 408", + "Couldn'?t resolve host", + "Connection refused", + "Timeout was reached", + "Operation timed out", + } + local function fallback_reason(err) + if type(err) ~= "string" then return "unknown error" end + local stripped = err:gsub("^transport:%s*", "") + for _, pat in ipairs(FALLBACK_PATTERNS) do + if stripped:match(pat) then return (stripped:match(pat)) end + end + return nil + end + + local function should_fallback(err) + return config.routing and config.routing.fallback + and fallback_reason(err) ~= nil + end + + -- Wrap broker.chat_stream with the Phase 5 fallback-retry path. + -- Retries ONCE against cfg.routing.fallback_model (default "cloud") + -- when (a) cfg.routing.fallback is true, (b) err matches a + -- fallback-eligible pattern, AND (c) no deltas have arrived yet + -- (mid-stream failures aren't retried — partial text would be + -- duplicated). + local function call_broker(model_cfg, model_name, msgs, on_delta, opts) + local any_delta = false + local wrapped = function(kind, payload) + any_delta = true + return on_delta(kind, payload) + end + local ok, err = broker.chat_stream(model_cfg, msgs, wrapped, opts) + if ok then return ok end + if any_delta then return ok, err end -- mid-stream — don't retry + if not should_fallback(err) then return ok, err end + local fb_name = (config.routing and config.routing.fallback_model) + or "cloud" + local fb_cfg = config.models[fb_name] + if not fb_cfg then return ok, err end + renderer.status(("local %s failed (%s); retrying via %s") + :format(model_name, fallback_reason(err), fb_name)) + return broker.chat_stream(fb_cfg, msgs, wrapped, opts) + end + -- Run a shell command, framing output and (per config.shell.capture_output) -- buffering it for the NEXT user turn — context.append_exec_output keeps -- a [exec output] block pending until ask_ai flushes it via append_user. @@ -291,6 +405,19 @@ function M.run(config) ctx:append_user(text) log_turn(ctx.turns[#ctx.turns]) + -- Phase 5 R-C2: routing decision taken ONCE on entry to ask_ai. + -- req_name/req_cfg are used for every iteration of the + -- tool-sub-loop; active_name/active_cfg are NOT mutated so the + -- user's :model selection survives the request. + local req_name, req_cfg = active_name, active_cfg + if config.routing and config.routing.auto then + local routed, class = router.classify_model(text, config) + if routed and config.models[routed] and routed ~= active_name then + renderer.status(("routed to %s (%s class)"):format(routed, class)) + req_name, req_cfg = routed, config.models[routed] + end + end + local depth = 0 local final_resp = "" local first_iteration = true @@ -298,7 +425,7 @@ function M.run(config) while true do local text_parts = {} local tool_calls_seen = {} - local ok, err = broker.chat_stream(active_cfg, ctx:to_messages(), + local ok, err = call_broker(req_cfg, req_name, ctx:to_messages(), function(kind, payload) if kind == "text" then text_parts[#text_parts + 1] = payload @@ -911,6 +1038,51 @@ function M.run(config) renderer.status("usage: :safety {patterns|check}") end end, + route = function(args) + local sub, sub_args = args:match("^%s*(%S*)%s*(.*)$") + config.routing = config.routing or {} + if sub == "on" then + config.routing.auto = true + renderer.status("auto-routing on") + elseif sub == "off" then + config.routing.auto = false + renderer.status("auto-routing off") + elseif sub == "classes" then + local classes = config.routing.classes or {} + if next(classes) == nil then + renderer.status("(no classes configured)"); return + end + for k, v in pairs(classes) do + io.write((" %-10s → %s\n"):format(k, tostring(v))) + end + elseif sub == "check" then + local text = sub_args:match("^%s*(.-)%s*$") + if not text or text == "" then + renderer.status("usage: :route check "); return + end + local m, class = router.classify_model(text, config) + local extra = config.routing.auto and "" + or " (routing currently disabled)" + renderer.status(("class=%s model=%s%s"):format( + class, tostring(m), extra)) + else + renderer.status("usage: :route {on|off|classes|check}") + end + end, + fallback = function(args) + local sub = args:match("^%s*(%S*)") + config.routing = config.routing or {} + if sub == "on" then + config.routing.fallback = true + renderer.status(("cloud fallback on (target: %s)"):format( + config.routing.fallback_model or "cloud")) + elseif sub == "off" then + config.routing.fallback = false + renderer.status("cloud fallback off") + else + renderer.status("usage: :fallback {on|off}") + end + end, help = function() io.write(HELP) end, }