From 76a8f970097652c7cad0de38efd6f80dec2d168f Mon Sep 17 00:00:00 2001 From: Markus Fritsche Date: Sun, 17 May 2026 08:21:25 +0000 Subject: [PATCH] repl: cloud preplanner + local executor split for Norris (closes #89) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 10 C4 — the orchestration commit. Splits Norris autonomous mode into a one-shot cloud preplan + per-step local executor flow, with graceful fall-back to single-model Norris when preplan is disabled or fails. run_norris additions (in order): 1. R4 fix: clear ctx.norris_active/_goal/_tasks at the TOP so a prior crashed Norris can't leak stale state into the new launch. 2. Preplan block (gated on cfg.norris.preplanner): - Look up the preplanner preset in cfg.models; warn + skip if absent. - Build a system prompt asking for TASK: lines (R1: %d via string.format — gsub("N", ...) would corrupt "No prose / commentary / numbering" to "16o prose"). - Scrub messages per the preplan model's redact policy; run broker.chat (non-streaming, per Q-PP2) with category "norris-preplan"; R7: respect pre_cfg.timeout_ms. - On success: rehydrate; record usage via _record_usage; extract_task_lines; cap to tasks_max; populate ctx.norris_tasks = { current = 1, list = parsed }. - On ANY failure (transport err / empty list / bogus preset): status log + leave ctx.norris_tasks nil → single-model fall-back. R3 design: NOT routed via call_broker; a fallback retry would silently swap planning models which is worse than a clean hard-fail. 3. Executor cfg resolution (independent of preplan per Q-PP1): cfg.norris.executor names a preset → executor_cfg = that cfg. Unset / missing preset → executor_cfg = active_cfg (existing :model-selection behavior). 4. Loop body: pass executor_cfg (not active_cfg) to safety.norris_step. After each "continue" result, advance ctx.norris_tasks.current. When current > #list, exit with synthesized status "tasks_complete" + reason "all N preplanned tasks executed". 5. Exit cleanup: clear ctx.norris_tasks alongside the existing norris_active/_goal clears so a re-launch starts fresh. renderer.norris_end gains "tasks_complete" as a non-error status (cyan, same as "done"). Distinct from "done" (executor said GOAL: complete) — executor exhausted the plan but didn't confirm goal, which is a clean exit, not an error. E2E verified (preplanner=fast, executor=fast on hossenfelder:8082): :norris print the date and the current uptime → preplanned 2 tasks via fast → ─ step 1/3 ─ Print the current date. → CMD: date → Sun May 17 ... → ─ step 2/3 ─ Print the current uptime. → CMD: uptime → ... up 1 day ... → NORRIS TASKS COMPLETE: all 2 preplanned tasks executed :cost detail correctly shows two rows for the same model: norris-preplan 1 calls, 95 / 12 tokens norris 1 calls, 364 / 9 tokens Fall-back verified: cfg.norris.preplanner = "doesnotexist" → "[aish] preplanner 'doesnotexist' is not in cfg.models; running single-model" → Norris runs as Phase 6. No-preplan path verified (no cfg.norris block): Norris runs exactly as Phase 6, no behavior change. Regression: 87/87 safety, 31/31 router_model, repl loads. Closes #89. Co-Authored-By: Claude Opus 4.7 (1M context) --- renderer.lua | 5 ++- repl.lua | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/renderer.lua b/renderer.lua index a3185ae..676d5aa 100644 --- a/renderer.lua +++ b/renderer.lua @@ -266,7 +266,10 @@ end -- Norris loop exit. status ∈ {"done", "aborted", "budget_exhausted", -- "stalled", "broker_error"}. function M.norris_end(status, reason) - local color = (status == "done") and A.cyan or A.red + -- Phase 10: "tasks_complete" is a success-ish exit (executor ran + -- through all preplanned tasks but didn't explicitly say GOAL: done). + local non_error = (status == "done") or (status == "tasks_complete") + local color = non_error and A.cyan or A.red local label = status:upper():gsub("_", " ") emit(A.bold, color, "─── NORRIS ", label, " ──", (" "):rep(math.max(0, 28 - #label)), diff --git a/repl.lua b/repl.lua index 657919f..8446f36 100644 --- a/repl.lua +++ b/repl.lua @@ -1308,14 +1308,100 @@ function M.run(config) end local function run_norris(goal) + -- Phase 10 R4: clear all Norris state at the TOP. If a prior + -- Norris session crashed (uncaught broker error) leaving + -- norris_active/_goal/_tasks stale, a fresh launch should + -- start clean. Cheaper than wrapping the driver in pcall. ctx.norris_active = true ctx.norris_goal = goal ctx.norris_consecutive_skips = 0 + ctx.norris_tasks = nil ctx:append_user(("[norris] %s"):format(goal)) log_turn(ctx.turns[#ctx.turns]) renderer.norris_begin(goal) + -- Phase 10 / #89: cloud preplanner emits a TASK list ONCE per + -- :norris launch when cfg.norris.preplanner names a model + -- preset. Cheap fall-back paths everywhere: any failure keeps + -- single-model Norris behavior intact. + if config.norris and config.norris.preplanner then + local pre_name = config.norris.preplanner + local pre_cfg = config.models and config.models[pre_name] + if not pre_cfg then + renderer.status(("preplanner '%s' is not in cfg.models; " + .. "running single-model"):format(pre_name)) + else + local sys = (config.norris.preplan_system) or [[ +You are a task decomposer. Given the user's goal, decompose it into a +sequence of single-step imperative TASKs. Output format: one TASK per +line, EXACTLY this shape: + + TASK: + +Output AT MOST %d tasks. No prose; no numbering; no commentary outside +the TASK: lines. +]] + -- R1: use %d/format; gsub("N", ...) would corrupt "No prose". + local tasks_max = config.norris.tasks_max or 16 + sys = string.format(sys, tasks_max) + + local msgs = scrub_messages({ + { role = "system", content = sys }, + { role = "user", content = goal }, + }, secrets_mode_for(pre_cfg)) + + local text, usage = broker.chat(pre_cfg, msgs, { + category = "norris-preplan", + max_tokens = 800, + -- R7: respect the configured per-model timeout. + timeout_ms = pre_cfg.timeout_ms or 60000, + }) + + if not text then + renderer.status(("preplan failed: %s; " + .. "running single-model"):format(tostring(usage))) + else + if secrets_session then + text = secrets_session:rehydrate(text) + end + if usage then + _record_usage(usage.model, usage.category, usage) + end + local parsed = executor.extract_task_lines(text) + if #parsed > tasks_max then + for i = #parsed, tasks_max + 1, -1 do parsed[i] = nil end + renderer.status(("preplan emitted >%d tasks; " + .. "truncated"):format(tasks_max)) + end + if #parsed == 0 then + renderer.status("preplan produced no TASK lines; " + .. "running single-model") + else + ctx.norris_tasks = { current = 1, list = parsed } + renderer.status(("preplanned %d tasks via %s") + :format(#parsed, pre_name)) + end + end + end + end + + -- Phase 10: resolve the EXECUTOR cfg independently of preplan + -- (Q-PP1: cfg.norris.executor applies even without preplanner). + -- Fall-back: unset OR not in cfg.models -> active_cfg (the user's + -- current :model selection — existing Phase 3 behavior). + local executor_cfg = active_cfg + if config.norris and config.norris.executor then + local exe_name = config.norris.executor + local exe_cfg = config.models and config.models[exe_name] + if exe_cfg then + executor_cfg = exe_cfg + else + renderer.status(("executor '%s' is not in cfg.models; " + .. "using active model"):format(exe_name)) + end + end + local helpers = { tools_schema = tools_schema, exec_cmd = norris_exec, @@ -1355,7 +1441,9 @@ function M.run(config) local step_n = 1 local final_status, final_reason while true do - local result = safety.norris_step(ctx, active_cfg, helpers, { + -- Phase 10: pass executor_cfg (resolved above) instead of + -- active_cfg; safety.norris_step signature unchanged. + local result = safety.norris_step(ctx, executor_cfg, helpers, { step_n = step_n, max_steps = max_norris_steps, cfg = config, @@ -1367,6 +1455,18 @@ function M.run(config) status_evictions(ctx:enforce_budget()) if result.status == "continue" then step_n = step_n + 1 + -- Phase 10: advance the task pointer after each + -- non-terminal step. When exhausted (current > #list), + -- exit with synthesized "tasks_complete" status. + if ctx.norris_tasks then + ctx.norris_tasks.current = ctx.norris_tasks.current + 1 + if ctx.norris_tasks.current > #ctx.norris_tasks.list then + final_status = "tasks_complete" + final_reason = string.format("all %d preplanned tasks executed", + #ctx.norris_tasks.list) + break + end + end else final_status, final_reason = result.status, result.reason break @@ -1375,6 +1475,7 @@ function M.run(config) ctx.norris_active = false ctx.norris_goal = nil + ctx.norris_tasks = nil -- Phase 10: clear on exit for clean re-launch renderer.norris_end(final_status, final_reason) end