b30212af0f
Closes the last two broker call sites that flow through safety.lua.
Together with commits #1-#3, all 7 broker call sites in aish now
attribute usage to the cost accumulator under the right category.
Changes:
safety.lua:
- llm_probe (the YES/NO destructive checker) — broker.chat call
gains opts.category = "probe". Captures (text, usage) via
(reply, second) and, when opts.on_usage is provided AND the
call succeeded, routes second through opts.on_usage(model,
category, payload). N4 signature chain: opts already flowed
through llm_second_opinion -> M.is_destructive from #52's
work; opts.on_usage rides along naturally with no further
signature change.
- M.norris_step (Norris main broker round-trip):
* opts to broker.chat_stream gains category = "norris"
* probe_opts (passed to is_destructive inside the loop)
gains on_usage = helpers.on_usage so the LLM probe's
cost lands under "probe" too
* on_delta wrapper adds elseif kind == "usage" branch that
calls helpers.on_usage(payload.model, payload.category,
payload). Coexists cleanly with the existing text (rehydrator)
and tool_call branches.
repl.lua:
- Norris helpers table gains on_usage = _record_usage. The R5
central chokepoint (commit #3) does the warn-threshold check
AND ctx:add_usage atomically.
- :safety check meta's probe_opts always carries on_usage now
(independently of whether secrets_session is set). secrets-aware
scrub_msgs/rehydrate added conditionally as before.
E2E verified against live broker (safety.llm_model = "cloud"):
- :safety check ls -la /tmp -> 2 cloud probe calls
- "[aish] session cost $0.000128 has crossed warn_at_dollars=$0.000100"
- probe category visible in accumulator (would appear in :cost detail
once commit #5 ships the meta).
Regression: test_safety 87/87, test_router_model 31/31, repl loads.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
547 lines
25 KiB
Lua
547 lines
25 KiB
Lua
-- safety.lua — workflow safeguards for tool execution.
|
|
-- Phase 2: M.confirm_tool_call (per-call confirm gate + auto-approve policy).
|
|
-- Phase 3: M.is_destructive (static pattern + LLM second-opinion gate for
|
|
-- Norris autonomous mode) and M.norris_step (single-iteration
|
|
-- planning loop). See docs/PHASE2.md §6 and docs/PHASE3.md §4 / §5.
|
|
-- Issue #9: M.classify_command (allow/confirm/deny rule list — interactive
|
|
-- CMD: gate, supersedes the confirm_cmd boolean when configured).
|
|
|
|
local rl = require("ffi.readline")
|
|
local json = require("dkjson")
|
|
local broker = require("broker")
|
|
|
|
local M = {}
|
|
|
|
-- ---------------------------------------------------------------- classify_command
|
|
-- Walk config.permissions (allow / confirm / deny rule lists) against `cmd`
|
|
-- in priority order: deny > confirm > allow. First match in the chosen
|
|
-- category wins. Returns the verdict string and the matching pattern (for
|
|
-- status messages); falls back to the legacy confirm_cmd boolean when no
|
|
-- permissions table is configured. Default verdict when permissions is set
|
|
-- but no rule matches is "confirm" — per the issue body.
|
|
-- verdict ∈ "allow" | "confirm" | "deny"
|
|
local function _match_any(cmd, rules)
|
|
if not rules then return nil end
|
|
for _, p in ipairs(rules) do
|
|
if cmd:find(p) then return p end
|
|
end
|
|
return nil
|
|
end
|
|
function M.classify_command(cmd, cfg)
|
|
local perms = cfg and cfg.permissions
|
|
if perms then
|
|
local mp = _match_any(cmd, perms.deny); if mp then return "deny", mp end
|
|
mp = _match_any(cmd, perms.confirm); if mp then return "confirm", mp end
|
|
mp = _match_any(cmd, perms.allow); if mp then return "allow", mp end
|
|
return "confirm", nil
|
|
end
|
|
if cfg and cfg.shell and cfg.shell.confirm_cmd then
|
|
return "confirm", nil
|
|
end
|
|
return "allow", nil
|
|
end
|
|
|
|
-- Render the call as `name({"path":"/tmp"})` for the confirm prompt.
|
|
-- Truncate to keep one-line prompts.
|
|
local function pretty_call(name, args)
|
|
local body = ""
|
|
if args and next(args) then
|
|
local ok, encoded = pcall(json.encode, args)
|
|
if ok then
|
|
body = (#encoded <= 80) and encoded or (encoded:sub(1, 77) .. "...")
|
|
else
|
|
body = "..."
|
|
end
|
|
end
|
|
return name .. "(" .. body .. ")"
|
|
end
|
|
|
|
-- Ask the user whether tool `name` may be called with `args`, consulting
|
|
-- `cfg.mcp.auto_approve` first. Policy keys:
|
|
-- "<alias>__<tool>" → exact-match auto-approve
|
|
-- "<alias>__*" → whole-server auto-approve
|
|
-- Anything else falls back to a [y/N] prompt; empty / non-"y" answer rejects.
|
|
-- The separator switched from "." to "__" 2026-05-12 because Anthropic via
|
|
-- Bedrock rejects dots in tool names (regex ^[a-zA-Z0-9_-]{1,128}$).
|
|
function M.confirm_tool_call(name, args, cfg)
|
|
local policy = (cfg and cfg.mcp and cfg.mcp.auto_approve) or {}
|
|
if policy[name] then return true end
|
|
local alias = name:match("^(.-)__")
|
|
if alias and alias ~= "" and policy[alias .. "__*"] then return true end
|
|
|
|
local prompt = ("call '%s'? [y/N] "):format(pretty_call(name, args))
|
|
local ans = rl.readline(prompt) or ""
|
|
return ans:lower():sub(1, 1) == "y"
|
|
end
|
|
|
|
-- ---------------------------------------------------------------- is_destructive
|
|
-- Phase 3 commit #1: static-pattern matcher only (no LLM second-opinion yet —
|
|
-- that lands in commit #2). Patterns are Lua patterns (NOT regex). When
|
|
-- `ci = true` is set on a rule, the input is lowercased before matching so
|
|
-- the rule matches case-insensitively (`DROP TABLE`, `drop table`, etc.).
|
|
-- See docs/PHASE3.md §5 for the rationale and the wrapper-bypass class
|
|
-- (R-B1) the first nine entries below are guarding against.
|
|
|
|
local DESTRUCTIVE_PATTERNS = {
|
|
-- ── Shell wrappers (R-B1) — flag the wrapper itself; can't inspect
|
|
-- the inner content safely without parsing the inner shell.
|
|
-- Norris HALTs on these unconditionally; the user reads the inner
|
|
-- before proceeding.
|
|
{ pat = "^%s*bash%s+%-l?c%s", reason = "bash -c (wrapped shell)" },
|
|
{ pat = "^%s*sh%s+%-l?c%s", reason = "sh -c (wrapped shell)" },
|
|
{ pat = "^%s*zsh%s+%-l?c%s", reason = "zsh -c (wrapped shell)" },
|
|
{ pat = "^%s*eval%s", reason = "eval (dynamic shell)" },
|
|
{ pat = "^%s*python3?%s+%-c%s", reason = "python -c (inline script)" },
|
|
{ pat = "^%s*perl%s+%-e%s", reason = "perl -e (inline script)" },
|
|
{ pat = "|%s*sh%s", reason = "pipe-to-sh" },
|
|
{ pat = "|%s*sh%s*$", reason = "pipe-to-sh (eol)" },
|
|
{ pat = "|%s*bash%s", reason = "pipe-to-bash" },
|
|
{ pat = "|%s*bash%s*$", reason = "pipe-to-bash (eol)" },
|
|
{ pat = "xargs%s+.-rm", reason = "xargs ... rm" },
|
|
|
|
-- ── Filesystem destructive
|
|
{ pat = "rm%s+.-%-rf?", reason = "rm -rf" },
|
|
{ pat = "rm%s+.-%-fr?", reason = "rm -fr" },
|
|
{ pat = "find%s+.-%-delete", reason = "find -delete" },
|
|
{ pat = "find%s+.-%-exec%s+rm", reason = "find -exec rm" },
|
|
{ pat = ">%s*/dev/sd[a-z]", reason = "write to raw disk" },
|
|
{ pat = "dd%s+.-of=/dev/", reason = "dd to device" },
|
|
{ pat = "mkfs%.", reason = "mkfs (format)" },
|
|
{ pat = "shred%s", reason = "shred" },
|
|
{ pat = "wipefs%s", reason = "wipefs" },
|
|
{ pat = "truncate%s+.-%-s%s*0", reason = "truncate to zero" },
|
|
|
|
-- ── Version control destructive
|
|
{ pat = "git%s+push%s+.-%-%-force", reason = "git push --force" },
|
|
{ pat = "git%s+push%s+.-%-f%f[%s]", reason = "git push -f" },
|
|
{ pat = "git%s+reset%s+.-%-%-hard", reason = "git reset --hard" },
|
|
{ pat = "git%s+clean%s+.-%-fd?", reason = "git clean -fd" },
|
|
{ pat = "git%s+branch%s+.-%-D", reason = "git branch -D" },
|
|
|
|
-- ── Database / process
|
|
-- ci=true rules use lowercase patterns; the matcher lowercases the
|
|
-- input before testing. Don't use uppercase patterns with ci=true.
|
|
{ pat = "drop%s+table", reason = "DROP TABLE", ci = true },
|
|
{ pat = "drop%s+database", reason = "DROP DATABASE", ci = true },
|
|
{ pat = "truncate%s+table", reason = "TRUNCATE TABLE", ci = true },
|
|
-- pkill BEFORE kill so the more specific match wins (Lua tables are
|
|
-- order-preserving; first hit reports the reason).
|
|
{ pat = "pkill%s+%-9", reason = "pkill -9" },
|
|
-- kill -9 needs a word boundary so "pkill -9" doesn't match this rule's
|
|
-- "kill" substring. %f[%w] is Lua's frontier pattern — matches a
|
|
-- transition from non-word to word characters.
|
|
{ pat = "%f[%w]kill%s+%-9", reason = "kill -9" },
|
|
|
|
-- ── Network/permission
|
|
{ pat = "chmod%s+.-777", reason = "chmod 777" },
|
|
{ pat = "chown%s+.-%s+/%s*$", reason = "chown on root path" },
|
|
}
|
|
|
|
-- Match each rule against `cmd`. Returns (true, reason) on first hit;
|
|
-- (false, nil) if no rule matches. Static-only — does NOT invoke the
|
|
-- LLM probe (that's `is_destructive` below, which calls this first).
|
|
local function match_static(cmd)
|
|
if type(cmd) ~= "string" or cmd == "" then return false, nil end
|
|
local lower = nil -- lazily computed for ci-rules
|
|
for _, rule in ipairs(DESTRUCTIVE_PATTERNS) do
|
|
local target = cmd
|
|
if rule.ci then
|
|
lower = lower or cmd:lower()
|
|
target = lower
|
|
end
|
|
if target:match(rule.pat) then
|
|
return true, rule.reason
|
|
end
|
|
end
|
|
return false, nil
|
|
end
|
|
|
|
-- ---------------------------------------------------------------- LLM probe
|
|
-- Session-scoped cache for the LLM second-opinion. Keyed by the normalized
|
|
-- (lowercased, whitespace-collapsed) command text. Mitigates Q23 latency
|
|
-- when the same command pattern recurs within a single Norris run.
|
|
local _llm_cache = {}
|
|
|
|
local function normalize(cmd)
|
|
return cmd:lower():gsub("%s+", " "):gsub("^%s+", ""):gsub("%s+$", "")
|
|
end
|
|
|
|
-- Per-probe timeout. The probe must be quick — destructive detection has
|
|
-- to keep up with Norris's pace. We override the model's default timeout
|
|
-- (which can be 30+ min for deep/slow local models) with a tight cap.
|
|
local PROBE_TIMEOUT_MS = 15000
|
|
|
|
-- Ask `model_cfg` whether `cmd` is destructive. Returns "YES"/"NO" string
|
|
-- (not bool — caller cares about disagreement between probes).
|
|
--
|
|
-- Issue #52: when `opts.scrub_msgs` is provided, scrub the outbound
|
|
-- {system, user(cmd)} message pair using the caller's secrets-aware
|
|
-- scrubber. The probe model sees placeholders for any secrets the
|
|
-- CMD: line happens to contain. Verdict text ("YES"/"NO") is unlikely
|
|
-- to carry placeholders but we rehydrate defensively via opts.rehydrate
|
|
-- so any echoed value comes back clean.
|
|
local function llm_probe(model_cfg, system, cmd, opts)
|
|
local msgs = {
|
|
{ role = "system", content = system },
|
|
{ role = "user", content = cmd },
|
|
}
|
|
if opts and opts.scrub_msgs then
|
|
msgs = opts.scrub_msgs(msgs, model_cfg)
|
|
end
|
|
-- Phase 7: opts.category = "probe" tags the usage in the
|
|
-- accumulator so :cost detail surfaces probe spend separately.
|
|
-- broker.chat returns (text, usage) on success; capture as
|
|
-- (reply, second) and branch on reply nil-ness.
|
|
local reply, second = broker.chat(model_cfg, msgs,
|
|
{ max_tokens = 4, timeout_ms = PROBE_TIMEOUT_MS, category = "probe" })
|
|
if not reply then
|
|
-- Broker failure → safe default: treat as YES (destructive)
|
|
return "YES_FAILSAFE", second
|
|
end
|
|
-- Phase 7 (N4): route the usage payload through opts.on_usage if
|
|
-- the caller wired one (repl.lua's _record_usage when secrets/
|
|
-- cost are configured).
|
|
if second and opts and opts.on_usage then
|
|
opts.on_usage(second.model, second.category, second)
|
|
end
|
|
if opts and opts.rehydrate then reply = opts.rehydrate(reply) end
|
|
local upper = reply:upper()
|
|
if upper:match("YES") then return "YES" end
|
|
if upper:match("NO") then return "NO" end
|
|
-- Unparseable response → treat as YES (safe default)
|
|
return "YES_UNPARSEABLE", reply
|
|
end
|
|
|
|
-- LLM second-opinion gate. Returns:
|
|
-- true, "LLM flagged as destructive" first probe said YES
|
|
-- true, "LLM probe disagreement" first NO, second NO ("is it safe?")
|
|
-- true, "LLM probe failed: <err>" broker error → fail-safe YES
|
|
-- false, nil both probes agree command is safe
|
|
-- Cached per normalized command for the session.
|
|
--
|
|
-- Model-selection precedence (per PHASE3.md §5):
|
|
-- 1. cfg.safety.llm_model — explicit model-name from config (e.g. "cloud")
|
|
-- 2. cfg.models.deep — local independent-class model (qwen3-30b)
|
|
-- 3. cfg.models[cfg.default_model] — fallback to whatever the user runs by default
|
|
local function llm_second_opinion(cmd, cfg, opts)
|
|
local probe_cfg
|
|
if cfg and cfg.safety and cfg.safety.llm_model and cfg.models then
|
|
probe_cfg = cfg.models[cfg.safety.llm_model]
|
|
end
|
|
if not probe_cfg and cfg and cfg.models then
|
|
probe_cfg = cfg.models.deep or cfg.models[cfg.default_model or "fast"]
|
|
end
|
|
if not probe_cfg then
|
|
-- No probe model available → fail-safe YES
|
|
return true, "LLM probe unavailable (no model in config)"
|
|
end
|
|
|
|
local key = normalize(cmd)
|
|
local cached = _llm_cache[key]
|
|
if cached ~= nil then
|
|
if cached[1] then return cached[1], cached[2] end
|
|
return false, nil
|
|
end
|
|
|
|
local function memo(is_d, reason)
|
|
_llm_cache[key] = { is_d, reason }
|
|
return is_d, reason
|
|
end
|
|
|
|
-- Probe 1: "Is this destructive?"
|
|
local p1 = llm_probe(probe_cfg,
|
|
"Detect destructive shell commands. Reply YES if running this "
|
|
.. "would delete, overwrite, or irreversibly modify data, "
|
|
.. "processes, or system state. Otherwise reply NO. Respond "
|
|
.. "with only YES or NO.", cmd, opts)
|
|
if p1 == "YES" then return memo(true, "LLM flagged as destructive") end
|
|
if p1 == "YES_FAILSAFE" then return memo(true, "LLM probe failed (fail-safe)") end
|
|
if p1 == "YES_UNPARSEABLE" then return memo(true, "LLM unparseable (fail-safe)") end
|
|
|
|
-- Probe 2 (NO → re-roll with inverted phrasing per R-B2):
|
|
local p2 = llm_probe(probe_cfg,
|
|
"Reply YES or NO only. Is the following shell command SAFE to "
|
|
.. "run autonomously without user review?", cmd, opts)
|
|
if p2 == "YES" then return memo(false, nil) end
|
|
-- Disagreement or fail-safe → HALT
|
|
return memo(true, "LLM probe disagreement")
|
|
end
|
|
|
|
-- Main entry point. Returns (true, reason) if EITHER the static patterns
|
|
-- OR the LLM second-opinion flag the command. Used by the Norris loop.
|
|
-- `cfg` is the full aish config (carries cfg.safety.llm_second_opinion
|
|
-- and cfg.models for the probe model lookup). When cfg is nil, only the
|
|
-- static layer runs (handy for unit tests and tooling that wants the
|
|
-- fast deterministic gate without an LLM round-trip).
|
|
-- Issue #52: opts.scrub_msgs(messages, model_cfg) + opts.rehydrate(text)
|
|
-- callbacks let the LLM probe scrub the outbound cmd before sending and
|
|
-- rehydrate the YES/NO verdict before parsing. Both optional; absent
|
|
-- opts = no-op (backwards-compatible). Caller (repl.lua / norris_step
|
|
-- helpers) provides them when secrets are configured.
|
|
function M.is_destructive(cmd, cfg, opts)
|
|
if type(cmd) ~= "string" or cmd == "" then return false, nil end
|
|
|
|
-- Static patterns first (fast, deterministic).
|
|
local hit, reason = match_static(cmd)
|
|
if hit then return true, reason end
|
|
|
|
-- LLM second-opinion. Default ON when cfg is present; off when cfg
|
|
-- is nil (test/static-only mode). Explicit opt-out via
|
|
-- cfg.safety.llm_second_opinion = false.
|
|
if cfg == nil then return false, nil end
|
|
if cfg.safety and cfg.safety.llm_second_opinion == false then
|
|
return false, nil
|
|
end
|
|
|
|
return llm_second_opinion(cmd, cfg, opts)
|
|
end
|
|
|
|
-- Expose the pattern table for `:safety patterns` meta and for testing.
|
|
M._patterns = DESTRUCTIVE_PATTERNS
|
|
M._match_static = match_static -- testable in isolation
|
|
M._reset_cache = function() _llm_cache = {} end
|
|
|
|
-- ---------------------------------------------------------------- norris_step
|
|
-- One iteration of the Norris planning loop per PHASE3.md §4.
|
|
-- The driver in repl.lua calls this in a while loop, advancing on every
|
|
-- non-terminal status.
|
|
--
|
|
-- Inputs:
|
|
-- ctx aish Context (read & written: turns + pending_exec_output)
|
|
-- model_cfg the active broker model config (model_cfg.endpoint/.model/etc.)
|
|
-- helpers table of injected dispatch helpers:
|
|
-- .tools_schema() → tools array for opts.tools
|
|
-- .exec_cmd(cmd) → run shell cmd; returns (out, exit_code)
|
|
-- .dispatch_tool(call,args)→ run an MCP tool; returns (content, is_error)
|
|
-- .extract_cmd_lines(text)→ executor.extract_cmd_lines (passed in)
|
|
-- .halt(step_n, max_n, reason, action) → "proceed"|"skip"|"abort"
|
|
-- .render_step(n, max_n, descr) (renderer.norris_step)
|
|
-- .render_tool_begin(name, args) (renderer.tool_call_begin)
|
|
-- .render_tool_end(content, is_error) (renderer.tool_call_end)
|
|
-- .render_exec_begin() (renderer.exec_begin)
|
|
-- .render_exec_end(code) (renderer.exec_end)
|
|
-- .render_assistant_delta(chunk) (renderer.assistant_delta)
|
|
-- .render_assistant_flush() (renderer.assistant_flush)
|
|
-- .log_turn(turn) (session log append)
|
|
-- opts:
|
|
-- .step_n current step (1-based)
|
|
-- .max_steps budget cap (default 8)
|
|
-- .cfg full aish config (for is_destructive)
|
|
--
|
|
-- Returns: { status, reason } where status ∈ {
|
|
-- "continue" — keep looping (driver bumps step_n)
|
|
-- "done" — model emitted GOAL: complete
|
|
-- "aborted" — user typed abort at a halt prompt
|
|
-- "stalled" — model emitted nothing actionable
|
|
-- "budget_exhausted" — step_n >= max_steps after this iteration
|
|
-- "broker_error" — broker.chat_stream returned (nil, err)
|
|
-- }
|
|
function M.norris_step(ctx, model_cfg, helpers, opts)
|
|
local step_n = opts.step_n or 1
|
|
local max_steps = opts.max_steps or 8
|
|
local cfg = opts.cfg
|
|
|
|
helpers.render_step(step_n, max_steps)
|
|
|
|
-- (1) one broker round-trip — stream text + collect tool_calls.
|
|
--
|
|
-- Issue #52: when helpers.scrub_msgs is provided, scrub outbound
|
|
-- per the active model's redact policy; when helpers.streaming_rehydrator
|
|
-- is provided, wrap on_delta so the user sees rehydrated text AND
|
|
-- text_parts accumulates rehydrated chunks (so any extracted CMD: /
|
|
-- DELEGATE: lines downstream see plain values — matches ask_ai's
|
|
-- contract in repl.lua).
|
|
local msgs = ctx:to_messages()
|
|
if helpers.scrub_msgs then msgs = helpers.scrub_msgs(msgs, model_cfg) end
|
|
local rehydrator = helpers.streaming_rehydrator and helpers.streaming_rehydrator() or nil
|
|
-- Phase 7: thread on_usage callback into the LLM probe via
|
|
-- probe_opts so destructive-check costs land in the accumulator
|
|
-- under the "probe" category. helpers.on_usage is repl.lua's
|
|
-- _record_usage (the central chokepoint with warn-threshold check).
|
|
local probe_opts = nil
|
|
if helpers.scrub_msgs or helpers.rehydrate or helpers.on_usage then
|
|
probe_opts = {
|
|
scrub_msgs = helpers.scrub_msgs,
|
|
rehydrate = helpers.rehydrate,
|
|
on_usage = helpers.on_usage,
|
|
}
|
|
end
|
|
|
|
local text_parts = {}
|
|
local tool_calls_seen = {}
|
|
local ok, err = broker.chat_stream(model_cfg, msgs,
|
|
function(kind, payload)
|
|
if kind == "text" then
|
|
local emit = rehydrator and rehydrator:push(payload) or payload
|
|
if emit ~= "" then
|
|
text_parts[#text_parts + 1] = emit
|
|
helpers.render_assistant_delta(emit)
|
|
end
|
|
elseif kind == "tool_call" then
|
|
tool_calls_seen[#tool_calls_seen + 1] = payload
|
|
elseif kind == "usage" then
|
|
-- Phase 7: route Norris's own broker usage to the
|
|
-- accumulator via helpers.on_usage. R5 chokepoint
|
|
-- (_record_usage) is what's wired in.
|
|
if helpers.on_usage then
|
|
helpers.on_usage(payload.model, payload.category, payload)
|
|
end
|
|
end
|
|
end,
|
|
{ tools = helpers.tools_schema(), category = "norris" })
|
|
if rehydrator then
|
|
local tail = rehydrator:flush()
|
|
if tail ~= "" then
|
|
text_parts[#text_parts + 1] = tail
|
|
helpers.render_assistant_delta(tail)
|
|
end
|
|
end
|
|
helpers.render_assistant_flush()
|
|
|
|
if not ok then
|
|
return { status = "broker_error", reason = tostring(err) }
|
|
end
|
|
|
|
local resp_text = table.concat(text_parts)
|
|
|
|
-- (2) parse actions from response
|
|
local cmd_lines = helpers.extract_cmd_lines(resp_text) or {}
|
|
local goal_done = false
|
|
for line in (resp_text .. "\n"):gmatch("([^\n]*)\n") do
|
|
local trimmed = line:gsub("^%s+", ""):gsub("%s+$", "")
|
|
if trimmed == "GOAL: complete" then
|
|
goal_done = true; break
|
|
end
|
|
end
|
|
|
|
local n_actions = #tool_calls_seen + #cmd_lines
|
|
|
|
-- (3) record assistant turn (with optional tool_calls)
|
|
if #tool_calls_seen > 0 then
|
|
ctx:append({ role = "assistant", content = resp_text,
|
|
tool_calls = tool_calls_seen })
|
|
else
|
|
ctx:append({ role = "assistant", content = resp_text })
|
|
end
|
|
helpers.log_turn(ctx.turns[#ctx.turns])
|
|
|
|
if n_actions == 0 and not goal_done then
|
|
return { status = "stalled", reason = "no action emitted" }
|
|
end
|
|
|
|
-- (4) dispatch tool_calls first (structured route)
|
|
for _, call in ipairs(tool_calls_seen) do
|
|
local args_table = {}
|
|
if call.arguments and call.arguments ~= "" then
|
|
local d, _, derr = json.decode(call.arguments)
|
|
if d then args_table = d
|
|
else
|
|
-- Argument JSON parse failure: synthesize tool turn (alternation)
|
|
ctx:append({ role = "tool", tool_call_id = call.id,
|
|
content = "[aish] tool arguments not "
|
|
.. "parseable as JSON: " .. tostring(derr) })
|
|
helpers.log_turn(ctx.turns[#ctx.turns])
|
|
goto continue_tool
|
|
end
|
|
end
|
|
|
|
-- Probe destructive on the JSON-serialized call as a proxy.
|
|
local call_repr = (call.name or "?") .. " " .. (call.arguments or "")
|
|
local destr, reason = M.is_destructive(call_repr, cfg, probe_opts)
|
|
|
|
local verdict
|
|
if destr then
|
|
verdict = helpers.halt(step_n, max_steps, reason or "destructive",
|
|
call_repr)
|
|
else
|
|
-- Non-destructive tool_call: auto_approve OR halt for consent
|
|
local policy = cfg and cfg.mcp and cfg.mcp.auto_approve or {}
|
|
local alias = (call.name or ""):match("^(.-)__")
|
|
local auto = policy[call.name]
|
|
or (alias and alias ~= "" and policy[alias .. "__*"])
|
|
if auto then
|
|
verdict = "proceed"
|
|
else
|
|
verdict = helpers.halt(step_n, max_steps, "tool consent",
|
|
call_repr)
|
|
end
|
|
end
|
|
|
|
if verdict == "abort" then
|
|
return { status = "aborted", reason = "user abort at halt" }
|
|
elseif verdict == "skip" then
|
|
ctx.norris_consecutive_skips = (ctx.norris_consecutive_skips or 0) + 1
|
|
ctx:append({ role = "tool", tool_call_id = call.id,
|
|
content = "[aish] tool call skipped by user: "
|
|
.. (reason or "no reason") })
|
|
helpers.log_turn(ctx.turns[#ctx.turns])
|
|
else -- proceed
|
|
ctx.norris_consecutive_skips = 0
|
|
helpers.render_tool_begin(call.name, call.arguments)
|
|
local content, is_error = helpers.dispatch_tool(call.name, args_table)
|
|
helpers.render_tool_end(content, is_error)
|
|
ctx:append({ role = "tool", tool_call_id = call.id,
|
|
content = content or "" })
|
|
helpers.log_turn(ctx.turns[#ctx.turns])
|
|
end
|
|
::continue_tool::
|
|
end
|
|
|
|
-- (5) dispatch CMD: lines (legacy route)
|
|
for _, cmd in ipairs(cmd_lines) do
|
|
local destr, reason = M.is_destructive(cmd, cfg, probe_opts)
|
|
local verdict
|
|
if destr then
|
|
verdict = helpers.halt(step_n, max_steps, reason or "destructive",
|
|
cmd)
|
|
else
|
|
verdict = "proceed" -- non-destructive CMD: runs without consent
|
|
-- in Norris (Norris user accepted autonomy)
|
|
end
|
|
|
|
if verdict == "abort" then
|
|
return { status = "aborted", reason = "user abort at halt" }
|
|
elseif verdict == "skip" then
|
|
ctx.norris_consecutive_skips = (ctx.norris_consecutive_skips or 0) + 1
|
|
-- CMD: skip → synthesize exec-output line so the model sees it
|
|
ctx:append_exec_output("[aish] CMD skipped by user: "
|
|
.. (reason or "no reason"))
|
|
else -- proceed
|
|
ctx.norris_consecutive_skips = 0
|
|
helpers.render_exec_begin()
|
|
local out, code = helpers.exec_cmd(cmd)
|
|
helpers.render_exec_end(code)
|
|
if cfg and cfg.shell and cfg.shell.capture_output then
|
|
ctx:append_exec_output(out)
|
|
end
|
|
end
|
|
end
|
|
|
|
-- Skip-budget escalation: R-C1
|
|
if (ctx.norris_consecutive_skips or 0) >= 3 then
|
|
local verdict = helpers.halt(step_n, max_steps,
|
|
("%d consecutive user skips"):format(ctx.norris_consecutive_skips),
|
|
"(repeated similar destructive proposals)")
|
|
if verdict == "abort" then
|
|
return { status = "aborted", reason = "user abort on skip-escalation" }
|
|
end
|
|
-- Else: reset the counter and continue (user said proceed)
|
|
ctx.norris_consecutive_skips = 0
|
|
end
|
|
|
|
-- (6) goal_done after dispatch
|
|
if goal_done then
|
|
return { status = "done", reason = "GOAL: complete" }
|
|
end
|
|
|
|
-- (7) budget
|
|
if step_n >= max_steps then
|
|
return { status = "budget_exhausted",
|
|
reason = ("%d step limit reached"):format(max_steps) }
|
|
end
|
|
|
|
return { status = "continue" }
|
|
end
|
|
|
|
return M
|