diff --git a/repl.lua b/repl.lua index b3add7d..6a07aa4 100644 --- a/repl.lua +++ b/repl.lua @@ -44,6 +44,10 @@ Meta commands: :memory clear forget all active items (confirms first) :memory inject reload memory.jsonl into ctx (after manual edits) :memory summarize LLM-extract candidate items from this session + :route on/off toggle auto-routing per-request (heuristic in router.lua) + :route classes show current class → model mapping + :route check report which class would route to (debug) + :fallback on/off toggle cloud retry when local transport fails :help this message ]] @@ -56,7 +60,65 @@ function M.run(config) .. "' not found in config.models") end - local ctx = Context.new(config.context or {}) + -- Phase 5: render the evicted turns into a compact transcript for + -- the summarizer prompt. Same shape as :memory summarize uses. + local function render_evicted(turns) + local parts = {} + for _, t in ipairs(turns or {}) do + parts[#parts + 1] = ("%s: %s"):format( + t.role, (t.content or ""):gsub("\n", " "):sub(1, 600)) + end + return table.concat(parts, "\n") + end + + -- Phase 5: summarize_fn factory. Returns a closure that maps + -- (prior_summary, evicted_turns) onto a broker.chat call against + -- the configured summarizer model. Returns nil on any failure so + -- Context falls back to silent eviction (Phase 0 behavior). + local function make_summarize_fn() + local sum_name = (config.context and config.context.summarizer_model) + or "fast" + local sum_cfg = config.models[sum_name] + if not sum_cfg then return nil end + return function(prior, evicted) + local body + if evicted == nil then + body = "Compress this prior summary into 2-3 sentences. " + .. "Keep names, facts, decisions; drop chatter.\n\n" + .. "Prior summary:\n" .. (prior or "") + elseif prior and prior ~= "" then + body = "Extend this prior summary with the new turns. " + .. "Keep it 2-4 sentences. Preserve names, facts, decisions.\n\n" + .. "Prior summary:\n" .. prior + .. "\n\nNew turns:\n" .. render_evicted(evicted) + else + body = "Summarize the following conversation turns in " + .. "2-3 sentences. Preserve names, facts, decisions.\n\n" + .. render_evicted(evicted) + end + local reply, err = broker.chat(sum_cfg, { + { role = "system", content = + "Output exactly one short summary paragraph. " + .. "No commentary, no markdown, no bullet lists." }, + { role = "user", content = body }, + }, { max_tokens = 300, timeout_ms = 30000 }) + if not reply then + renderer.status("context summarize failed: " .. tostring(err)) + return nil + end + return reply:gsub("^%s+", ""):gsub("%s+$", "") + end + end + + -- Build Context with optional summarize_fn (gated by cfg flag). + local ctx_opts = {} + if config.context then + for k, v in pairs(config.context) do ctx_opts[k] = v end + end + if config.context and config.context.summarize_on_evict then + ctx_opts.summarize_fn = make_summarize_fn() + end + local ctx = Context.new(ctx_opts) -- Phase 2: MCP sessions. Populated from config.mcp.servers at startup -- (best-effort — failures are status-logged once, session absent from @@ -254,6 +316,58 @@ function M.run(config) end end + -- ── Phase 5: fallback eligibility per PHASE5.md §5 ────────────────── + -- All transport-failure patterns must match against the err string + -- as broker.lua emits it (with "transport: " prefix). The matcher + -- strips the prefix before testing. + local FALLBACK_PATTERNS = { + "^HTTP 5%d%d", + "^HTTP 404.*model_not_found", + "^HTTP 408", + "Couldn'?t resolve host", + "Connection refused", + "Timeout was reached", + "Operation timed out", + } + local function fallback_reason(err) + if type(err) ~= "string" then return "unknown error" end + local stripped = err:gsub("^transport:%s*", "") + for _, pat in ipairs(FALLBACK_PATTERNS) do + if stripped:match(pat) then return (stripped:match(pat)) end + end + return nil + end + + local function should_fallback(err) + return config.routing and config.routing.fallback + and fallback_reason(err) ~= nil + end + + -- Wrap broker.chat_stream with the Phase 5 fallback-retry path. + -- Retries ONCE against cfg.routing.fallback_model (default "cloud") + -- when (a) cfg.routing.fallback is true, (b) err matches a + -- fallback-eligible pattern, AND (c) no deltas have arrived yet + -- (mid-stream failures aren't retried — partial text would be + -- duplicated). + local function call_broker(model_cfg, model_name, msgs, on_delta, opts) + local any_delta = false + local wrapped = function(kind, payload) + any_delta = true + return on_delta(kind, payload) + end + local ok, err = broker.chat_stream(model_cfg, msgs, wrapped, opts) + if ok then return ok end + if any_delta then return ok, err end -- mid-stream — don't retry + if not should_fallback(err) then return ok, err end + local fb_name = (config.routing and config.routing.fallback_model) + or "cloud" + local fb_cfg = config.models[fb_name] + if not fb_cfg then return ok, err end + renderer.status(("local %s failed (%s); retrying via %s") + :format(model_name, fallback_reason(err), fb_name)) + return broker.chat_stream(fb_cfg, msgs, wrapped, opts) + end + -- Run a shell command, framing output and (per config.shell.capture_output) -- buffering it for the NEXT user turn — context.append_exec_output keeps -- a [exec output] block pending until ask_ai flushes it via append_user. @@ -291,6 +405,19 @@ function M.run(config) ctx:append_user(text) log_turn(ctx.turns[#ctx.turns]) + -- Phase 5 R-C2: routing decision taken ONCE on entry to ask_ai. + -- req_name/req_cfg are used for every iteration of the + -- tool-sub-loop; active_name/active_cfg are NOT mutated so the + -- user's :model selection survives the request. + local req_name, req_cfg = active_name, active_cfg + if config.routing and config.routing.auto then + local routed, class = router.classify_model(text, config) + if routed and config.models[routed] and routed ~= active_name then + renderer.status(("routed to %s (%s class)"):format(routed, class)) + req_name, req_cfg = routed, config.models[routed] + end + end + local depth = 0 local final_resp = "" local first_iteration = true @@ -298,7 +425,7 @@ function M.run(config) while true do local text_parts = {} local tool_calls_seen = {} - local ok, err = broker.chat_stream(active_cfg, ctx:to_messages(), + local ok, err = call_broker(req_cfg, req_name, ctx:to_messages(), function(kind, payload) if kind == "text" then text_parts[#text_parts + 1] = payload @@ -911,6 +1038,51 @@ function M.run(config) renderer.status("usage: :safety {patterns|check}") end end, + route = function(args) + local sub, sub_args = args:match("^%s*(%S*)%s*(.*)$") + config.routing = config.routing or {} + if sub == "on" then + config.routing.auto = true + renderer.status("auto-routing on") + elseif sub == "off" then + config.routing.auto = false + renderer.status("auto-routing off") + elseif sub == "classes" then + local classes = config.routing.classes or {} + if next(classes) == nil then + renderer.status("(no classes configured)"); return + end + for k, v in pairs(classes) do + io.write((" %-10s → %s\n"):format(k, tostring(v))) + end + elseif sub == "check" then + local text = sub_args:match("^%s*(.-)%s*$") + if not text or text == "" then + renderer.status("usage: :route check "); return + end + local m, class = router.classify_model(text, config) + local extra = config.routing.auto and "" + or " (routing currently disabled)" + renderer.status(("class=%s model=%s%s"):format( + class, tostring(m), extra)) + else + renderer.status("usage: :route {on|off|classes|check}") + end + end, + fallback = function(args) + local sub = args:match("^%s*(%S*)") + config.routing = config.routing or {} + if sub == "on" then + config.routing.fallback = true + renderer.status(("cloud fallback on (target: %s)"):format( + config.routing.fallback_model or "cloud")) + elseif sub == "off" then + config.routing.fallback = false + renderer.status("cloud fallback off") + else + renderer.status("usage: :fallback {on|off}") + end + end, help = function() io.write(HELP) end, }