packaging: bin/aish wrapper + examples/config.lua + LICENSE for v0.1.0 release

main.lua now resolves package.path relative to its own script directory rather than cwd, so the packaged install at /usr/share/lua/5.1/aish/ finds its siblings regardless of where the user invokes aish from. Dev mode (luajit main.lua from the repo root) is preserved: arg[0] is "main.lua" with no "/" so the regex returns nil and _dir falls back to "./" — identical to the previous behavior. bin/aish is a POSIX-sh wrapper that execs luajit against $AISH_LIB/main.lua (default /usr/share/lua/5.1/aish). The AISH_LIB env override lets users point at a dev checkout without uninstalling the package. Wrapper emits distinct errors when AISH_LIB is missing or when luajit isn't on PATH so broken installs surface clearly instead of through a bare sh: not found. examples/config.lua is the canonical commented reference, shipped at /usr/share/doc/aish/examples/config.lua. Stripped of the two live MCP bearer tokens carried by the in-tree config.lua and switched to the auth_env env-var indirection form; mcp.servers entries are commented out so a copy-to-~/.config/aish/config.lua produces a working starting point on first uncomment. HOSSENFELDER URL flagged as maintainer-LAN. LICENSE: MIT, copyright 2026 Markus Fritsche. README updated to match. Sonnet review of the changeset (per feedback_reviews_use_sonnet.md + bugfix-process step 4): no blockers; the two Important findings (USAGE text still said "luajit main.lua", bin/aish didn't pre-check luajit) and one Nit (unredacted HOSSENFELDER URL) were folded in before commit. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
config: example for context.summarize_every_n_turns (#101 )
2026-05-25 00:21:55 +02:00 · 2026-05-17 09:21:16 +00:00 · 2026-05-17 09:20:56 +00:00 · 2026-05-17 09:18:15 +00:00 · 2026-05-17 09:18:02 +00:00 · 2026-05-17 08:22:22 +00:00
42 changed files with 14905 additions and 127 deletions
@@ -0,0 +1,16 @@
 {
  "permissions": {
    "allow": [
      "mcp__boltzmann-tools__read_file",
      "Bash(ping *)",
      "mcp__hub-tools__remote_list_hosts",
      "mcp__hub-tools__remote_read_file",
      "Bash(dig *)",
      "mcp__ohm-tools__read_file",
      "mcp__nc-tools__read_file",
      "mcp__boltzmann-tools__list_dir",
      "mcp__riemann-tools__search_files",
      "mcp__ampere-tools__read_file"
    ]
  }
 }
@@ -14,6 +14,9 @@ memory.jsonl
 # Local config overrides (committed config.lua is the example/default)
 config.local.lua
 # Per-user Claude Code permission grants — settings.json is shared, .local is private
 .claude/settings.local.json
 # OS noise
 .DS_Store
 Thumbs.db
@@ -211,3 +211,27 @@ Token in the URL: `git push https://<user>:<token>@git.reauktion.de/marfrit/aish
 The user has marfrit-level credentials available via a separate channel
 if needed for repo-admin operations.
 ---
 ## 12. Contribution flow
 Default for direct work: **commit straight to `main`**. No PR, no issue
 gate. This is what "non-PR-flow repo" means in §11.
 Two opt-in carve-outs:
 - **Feature requests and bugs → Gitea issues** at
  `git.reauktion.de/marfrit/aish/issues`. Don't implement feature
  requests in-band; file the issue, let marfrit triage. Tag
  `architecture` for cross-phase concerns. (Bug-filing convention is
  fleet-wide per the `his` cheatsheet; this row extends it to features
  for aish specifically.)
 - **Review-required iteration → PR**. When the medium needs to be the
  diff (inline comments per finding, refinable wording), open a PR
  authored as `claude-<host>` and let marfrit review. Self-approval
  forbidden. PR #1 (`marfrit/aish#1`, 2026-05-10) set the precedent —
  the MCP phase-2 question batch surfaced by review of `013c625`.
 When in doubt whether something is a feature request vs. an in-band fix,
 ask. Cheaper than the alternatives.
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2026 Markus Fritsche
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
@@ -111,7 +111,7 @@ Replace these with your own llama.cpp endpoints if you're not on that LAN.
 ## License
-Not yet selected. Default-private until decided.
+MIT — see [`LICENSE`](LICENSE).
 ## Project conventions
@@ -0,0 +1,23 @@
 #!/bin/sh
 # aish — AI-augmented conversational shell launcher.
 # Source of truth: git.reauktion.de/marfrit/aish
 #
 # Installed by the aish package at /usr/bin/aish; execs LuaJIT against
 # the packaged main.lua under $AISH_LIB (default /usr/share/lua/5.1/aish).
 #
 # Dev mode: AISH_LIB=$HOME/src/aish aish ...
 AISH_LIB="${AISH_LIB:-/usr/share/lua/5.1/aish}"
 if [ ! -r "$AISH_LIB/main.lua" ]; then
    echo "aish: $AISH_LIB/main.lua not found." >&2
    echo "aish: set AISH_LIB to the directory containing main.lua." >&2
    exit 2
 fi
 if ! command -v luajit >/dev/null 2>&1; then
    echo "aish: luajit not found in PATH. Install luajit." >&2
    exit 2
 fi
 exec luajit "$AISH_LIB/main.lua" "$@"
@@ -1,15 +1,294 @@
 -- broker.lua — llama.cpp HTTP client.
-- Phase 0: blocking POST via libcurl FFI; SSE streaming wired in Phase 1.
+-- Phase 0: blocking POST via ffi/curl + vendored dkjson.
-- See docs/PHASE0.md §6.
+-- Phase 1: streaming POST via ffi/curl.post_sse with an OpenAI-shape decoder
 -- on top. M.chat becomes a thin buffering wrapper around M.chat_stream so the
 -- one streaming path covers both incremental and sync callers.
 -- Phase 2: optional opts.tools array passed through to the request body
 -- (omitted entirely when nil/empty per §12 risk row 1). The chat_stream
 -- on_delta callback widens to (kind, payload) where kind is "text" or
 -- "tool_call"; tool_call deltas are accumulated by `index` (default 0 if
 -- absent per C2) and emitted as complete records on finish_reason "tool_calls".
 -- broker.lua does NOT depend on mcp.lua — the caller assembles opts.tools
 -- and passes it in. See docs/PHASE0.md §6, PHASE1.md §3, PHASE2.md §3 / §5.
 local curl = require("ffi.curl")
 local json = require("dkjson")
 local M = {}
-- Send a /v1/chat/completions request.
+local function build_headers(model_cfg)
--   model_cfg: entry from config.models (endpoint, model, temperature, [key_env])
+    local h = { "Content-Type: application/json" }
--   messages:  list of { role = ..., content = ... } including system prompt
+    if model_cfg.key_env then
-- Returns: assistant content string on success, (nil, errmsg) on failure.
+        local key = os.getenv(model_cfg.key_env)
-function M.chat(model_cfg, messages)
+        if key and key ~= "" then
-    error("broker.chat: not implemented (Phase 0 pending)")
+            h[#h + 1] = "Authorization: Bearer " .. key
        end
    end
    return h
 end
 -- Phase 7 (A3): build_request widens to take an opts table; previously
 -- positional (tools, max_tokens). Both internal call sites (chat_stream
 -- and M.chat-via-chat_stream) updated. opts fields:
 --   .tools         per Phase 2 (omitted from body when nil/empty)
 --   .max_tokens    per Phase 3 (omitted when nil)
 --   .include_usage Phase 7 — default true; sets stream_options.include_usage
 --                  in the request body (B1: required for local llama.cpp
 --                  to emit usage; no-op for cloud which emits anyway).
 local function build_request(model_cfg, messages, stream, opts)
    if not (model_cfg and model_cfg.endpoint and model_cfg.model) then
        return nil, "broker: model_cfg.endpoint and .model are required"
    end
    opts = opts or {}
    local url  = model_cfg.endpoint:gsub("/+$", "") .. "/v1/chat/completions"
    local req  = {
        model       = model_cfg.model,
        messages    = messages,
        stream      = stream and true or false,
        temperature = model_cfg.temperature or 0.2,
    }
    -- Per PHASE2.md §12 risk row "Empty tools array": some servers reject
    -- "tools": []. Only set the field when the list has entries.
    if opts.tools and #opts.tools > 0 then req.tools = opts.tools end
    -- Phase 3 (A2): max_tokens passthrough — used by safety.is_destructive
    -- to cap YES/NO probes at ~4 tokens. Omitted when nil (Phase 1/2
    -- callers unaffected — model defaults still apply).
    if opts.max_tokens then req.max_tokens = opts.max_tokens end
    -- Phase 7 (B1): default ON for streaming requests; the flag is
    -- required to make local llama.cpp emit usage. Cloud honors it as
    -- a no-op (emits usage with or without). Per-call opt-out:
    -- opts.include_usage = false.
    if stream and opts.include_usage ~= false then
        req.stream_options = { include_usage = true }
    end
    -- #88: GBNF grammar passthrough. llama.cpp constrains the sampler
    -- to only emit tokens matching the grammar — eliminates format
    -- drift on small models. Probed cloud (Anthropic via Bedrock)
    -- silently ignores the field, so default passthrough is safe;
    -- no per-model opt-out needed in v1. Misformed grammar produces
    -- a broker error at request time (visible via the usual transport
    -- error path).
    if opts.grammar then req.grammar = opts.grammar end
    return url, json.encode(req), build_headers(model_cfg),
           (model_cfg.timeout_ms or 60000)
 end
 -- Streaming /v1/chat/completions.
 -- Signature widens vs Phase 1: opts is optional and may carry .tools.
 -- Phase 7 adds .include_usage (default true) + .category (echoed into
 -- the emitted usage payload for caller-side accumulator tagging).
 -- on_delta is called as on_delta(kind, payload):
 --   on_delta("text",      content_string)  - per text chunk
 --   on_delta("tool_call", { id, name, arguments }) - once per completed
 --                       tool call (on finish_reason "tool_calls").
 --   on_delta("usage",     { prompt_tokens, completion_tokens,
 --                           total_tokens, cost, model, category })
 --                       - Phase 7: emitted once after the stream
 --                       completes successfully, IF the provider sent
 --                       a usage block. Skipped on transport / API
 --                       errors. model is model_cfg.model (caller-
 --                       stable per B4 + R2); cost is nil for
 --                       providers that don't emit it (local llama.cpp);
 --                       category is opts.category or "main".
 -- Returns:
 --   true                  stream ended cleanly
 --   nil, errmsg           transport / API failure
 function M.chat_stream(model_cfg, messages, on_delta, opts)
    opts = opts or {}
    local url, body, headers, timeout_ms =
        build_request(model_cfg, messages, true, opts)
    if not url then return nil, body end  -- url slot carries err on bad cfg
    -- Phase 3: opts.timeout_ms overrides the model's default. Used by
    -- safety.is_destructive's LLM probe to cap YES/NO checks at ~15s even
    -- when the model's normal timeout is much higher (e.g. user's deep
    -- model has 1800000ms for long generations).
    if opts.timeout_ms then timeout_ms = opts.timeout_ms end
    local done = false
    local api_err
    -- Tool-call accumulator keyed by index. Each slot is filled across
    -- many deltas: id+name come on the opener, arguments arrives as
    -- character-fragment JSON-string chunks (PHASE2-baseline.md §4).
    local tc_by_index = {}
    local tc_index_order = {}  -- preserve emission order
    local index_absent_warned = false
    -- Phase 7: usage captured from the final SSE chunk (per B2 either
    -- on a non-empty-choices chunk with finish_reason — cloud, or on a
    -- choices=[] chunk before [DONE] — local). Emitted as
    -- on_delta("usage", ...) AFTER curl.post_sse returns (B5).
    local final_usage = nil
    local function on_event(data)
        if done then return end
        if data == "[DONE]" then done = true; return end
        local doc = json.decode(data)
        if not doc then return end          -- ignore unparseable events
        -- Some servers emit an SSE-framed error envelope at the start of the
        -- stream — surface it.
        if doc.error then
            local m = (type(doc.error) == "table" and doc.error.message)
                        or tostring(doc.error)
            api_err = m
            done = true
            return
        end
        -- N1: usage branch is INDEPENDENT of the choice/delta branches.
        -- Check unconditionally — local emits usage on choices=[] chunks
        -- where `choice` is nil; cloud emits with non-empty choices.
        -- R2: payload.model is the caller-stable model_cfg.model (upvar),
        -- so call_broker's fallback retry naturally credits the right
        -- model — wrapper callers key by payload.model.
        if doc.usage then
            final_usage = {
                prompt_tokens     = doc.usage.prompt_tokens or 0,
                completion_tokens = doc.usage.completion_tokens or 0,
                total_tokens      = doc.usage.total_tokens or 0,
                cost              = doc.usage.cost,           -- nil for local (R6 preserves nil)
                model             = model_cfg.model,          -- caller-stable per B4/R2
                category          = opts.category or "main",
            }
            -- Don't emit yet; fired after curl.post_sse returns.
        end
        local choice = doc.choices and doc.choices[1]
        local delta  = choice and choice.delta
        -- Text path (unchanged from Phase 1 semantics; kind widened).
        local content = delta and delta.content
        if type(content) == "string" and #content > 0 then
            on_delta("text", content)
        end
        -- Tool-call accumulation (Phase 2).
        local tcs = delta and delta.tool_calls
        if type(tcs) == "table" then
            for _, tc in ipairs(tcs) do
                local idx = tc.index
                if idx == nil then
                    idx = 0
                    if not index_absent_warned then
                        index_absent_warned = true
                        -- One-shot debug status per stream; printed to
                        -- stderr so it doesn't interleave with renderer
                        -- stdout output.
                        io.stderr:write(
                            "[aish] broker: tool_calls[].index absent; assuming 0\n")
                    end
                end
                local slot = tc_by_index[idx]
                if not slot then
                    slot = { id = nil, name = nil, arguments = "" }
                    tc_by_index[idx] = slot
                    tc_index_order[#tc_index_order + 1] = idx
                end
                if tc.id   then slot.id   = tc.id   end
                if tc["function"] then
                    local fn = tc["function"]
                    if fn.name then slot.name = fn.name end
                    if fn.arguments then
                        slot.arguments = slot.arguments .. fn.arguments
                    end
                end
            end
        end
        -- On finish_reason "tool_calls", emit all accumulated calls.
        if choice and choice.finish_reason == "tool_calls" then
            for _, idx in ipairs(tc_index_order) do
                on_delta("tool_call", tc_by_index[idx])
            end
            tc_by_index    = {}
            tc_index_order = {}
        end
    end
    local ok, err = curl.post_sse(url, body, headers, on_event, timeout_ms)
    if api_err then return nil, "api: " .. api_err end
    if not ok    then return nil, "transport: " .. tostring(err) end
    -- Phase 7 (B5): emit captured usage AFTER stream completes, as the
    -- last event in stream order. Skipped on transport/api errors (the
    -- accumulator stays unchanged for the failed call).
    if final_usage then on_delta("usage", final_usage) end
    return true
 end
 -- Send a /v1/chat/completions request and return the full assistant text.
 -- Thin buffering wrapper over M.chat_stream — same path as the streaming
 -- consumer, so the broker keeps one HTTP shape (stream:true always).
 -- M.chat's external contract widens in Phase 7 (R1): now returns
 -- (text, usage). Existing callers that ignore the second value continue
 -- to work — Lua silently drops extra return values. Callers that want
 -- cost/usage tracking do `local r, u = broker.chat(...)` and route u
 -- to ctx:add_usage via the central _record_usage helper.
 -- Tool-call kinds are still silently ignored (no caller of M.chat
 -- passes opts.tools).
 -- Returns:
 --   text, usage                       on success (usage may be nil if
 --                                     the provider didn't emit one)
 --   nil, errmsg                       on transport / decode / API failure
 function M.chat(model_cfg, messages, opts)
    local parts = {}
    local captured_usage  -- R1: required so callers see usage
    local ok, err = M.chat_stream(model_cfg, messages, function(kind, payload)
        if     kind == "text"  then parts[#parts + 1] = payload
        elseif kind == "usage" then captured_usage = payload
        end
    end, opts)
    if not ok then return nil, err end
    return table.concat(parts), captured_usage
 end
 -- ---------------------------------------------------------------- token_count (Phase 8)
 -- Returns an accurate token count by hitting <endpoint>/tokenize when
 -- the endpoint supports it; falls back to the Phase 0 §8 char/4
 -- heuristic otherwise. Per-endpoint capability cache (session-local;
 -- key per R6 is endpoint-only since B1 confirms /tokenize ignores the
 -- model field on the observed broker).
 --
 -- Never errors. Returns a non-negative integer.
 -- 2s timeout per call so a misbehaving endpoint can't stall the
 -- caller; first miss caches as unsupported for the session.
 local _tokenize_capable = {}    -- [endpoint] = true | false (nil = unprobed)
 function M.token_count(model_cfg, text)
    text = text or ""
    if text == "" then return 0 end
    if not (model_cfg and model_cfg.endpoint) then
        return math.floor(#text / 4)
    end
    local ep  = model_cfg.endpoint
    local cap = _tokenize_capable[ep]
    if cap == false then
        return math.floor(#text / 4)
    end
    local url = ep:gsub("/+$", "") .. "/tokenize"
    local body = json.encode({ content = text, model = model_cfg.model })
    local out, status = curl.post(url, body,
        { "Content-Type: application/json" },
        2000)  -- 2s timeout per R5 risk row
    if not (status == 200 and out) then
        _tokenize_capable[ep] = false
        return math.floor(#text / 4)
    end
    local doc = json.decode(out)
    local toks = doc and doc.tokens
    if type(toks) ~= "table" then
        _tokenize_capable[ep] = false
        return math.floor(#text / 4)
    end
    _tokenize_capable[ep] = true
    return #toks
 end
 -- Introspection: nil if endpoint un-probed; true/false for the cached
 -- capability. Used by tests and future :tokenize debug meta.
 function M.tokenize_supported(model_cfg)
    if not (model_cfg and model_cfg.endpoint) then return nil end
    return _tokenize_capable[model_cfg.endpoint]
 end
 -- Test hook: reset the cache between LuaJIT-VM-shared test runs.
 function M._reset_tokenize_cache()
    _tokenize_capable = {}
 end
 return M
@@ -1,26 +1,45 @@
 -- config.lua — model registry, routing rules, user preferences.
 -- Loaded with dofile() at startup; returns a plain Lua table.
 -- See docs/PHASE0.md §10 for resolution order and full schema.
 --
 -- Per issue #12: hossenfelder is the canonical single-URL broker. It does
 -- model-aware routing server-side (local models on boltzmann; cloud routes
 -- through OpenRouter using its own bearer auth — no client-side key here).
 -- Discovery: GET http://hossenfelder.fritz.box:8082/v1/models.
 --
 -- Phase 9 (docs/PHASE9.md): a `.aish.lua` in/above your cwd (walking up
 -- to $HOME) overlays this user config. First encounter prompts to trust;
 -- sha256-pinned in ~/.aish/trusted-projects. Use it for repo-specific
 -- model presets, permissions, hooks, etc.
 --
 -- IMPORTANT: shallow merge. If your `.aish.lua` sets a top-level block
 -- (models, permissions, cost, shell, ...), it REPLACES the user's
 -- entire block — list every entry you want available OR omit the block
 -- to keep the user's. Inspect the merge via `:config show` at runtime.
 local HOSSENFELDER = "http://hossenfelder.fritz.box:8082"
 return {
    default_model = "fast",
    models = {
        fast = {
-            endpoint    = "http://dirac.fritz.box:8081",
+            endpoint    = HOSSENFELDER,
-            model       = "qwen-coder-7b-snappy-8k",
+            model       = "qwen2.5-coder-1.5b-q4_k_m.gguf",
            temperature = 0.2,
        },
        deep = {
-            endpoint    = "http://dirac.fritz.box:8080",
+            endpoint    = HOSSENFELDER,
-            model       = "qwen-coder-7b-32k",
+            -- 2026-05-13: qwen3-30b not loaded on hossenfelder right now;
            -- using deepseek-coder-v2-lite (16B MoE, ~2.4B active) for the
            -- time being. Restore qwen3-30b when the slot is back up.
            model       = "deepseek-coder-v2-lite",
            timeout_ms  = 300000,   -- 5 min; MoE inference is faster than dense 30B
            temperature = 0.1,
        },
        cloud = {
-            endpoint    = "https://hossenfelder.fritz.box:8082",
+            endpoint    = HOSSENFELDER,
            model       = "anthropic/claude-haiku-4.5",
            -- Hossenfelder forwards to OpenRouter using its own key from
            -- /etc/conf.d/llm-proxy on the LXC; no client-side key needed.
            temperature = 0.2,
        },
    },
@@ -33,6 +52,14 @@ return {
        },
        capture_output = true,    -- inject exec output into context
        confirm_cmd    = true,    -- prompt before executing CMD: suggestions
        -- Issue #10: prompt template. When set, replaces the default
        -- "[aish:<model>]> " prompt. Variables (substituted via {name}):
        --   {model}  {ctx_used}  {ctx_max}  {turn}
        --   {cwd}    {cwd_short} (cwd with $HOME -> ~)
        --   {last_status} (last exec exit code, empty if none yet)
        --   {mode}   (norris / plan / normal)
        -- prompt = "[{model} {ctx_used}/{ctx_max}t T{turn} {mode}] {cwd_short} > ",
    },
    context = {
@@ -43,4 +70,365 @@ return {
    history = {
        dir = (os.getenv("HOME") or ".") .. "/.local/share/aish",
    },
    -- Issue #3: pre/post CMD hooks. Optional shell scripts triggered around
    -- every CMD: execution. Each hook receives the command on stdin and
    -- AISH_CMD / AISH_TURN / AISH_CWD as env vars. Non-zero exit on pre_cmd
    -- aborts execution; post_cmd's exit code is ignored but its stdout is
    -- logged. Default off (no hooks). Uncomment to enable.
    -- hooks = {
    --     pre_cmd  = (os.getenv("HOME") or ".") .. "/.aish/hooks/pre-cmd",
    --     post_cmd = (os.getenv("HOME") or ".") .. "/.aish/hooks/post-cmd",
    -- },
    -- Issue #13: secret redaction. Vault is a separate file at ~/.aish/
    -- secrets.lua (mode 0600 enforced). When set, outbound broker messages
    -- are scrubbed: vault literals + autodetect heuristics (OpenAI sk-,
    -- OpenRouter sk-or-v1-, GitHub ghp_/gho_/ghs_, AWS AKIA, JWT eyJ...,
    -- SSH/GPG PRIVATE KEY headers) become $AISH_SECRET_NNN placeholders.
    -- The streamed reply is rehydrated before display so the user sees
    -- real values. Per-broker override via models[*].redact:
    --   "off"               -- no scrubbing (trusted local)
    --   "vault"             -- vault literals only
    --   "vault+autodetect"  -- + heuristics (default when vault loaded)
    --   "stealth"           -- + heuristics, opaque decoys, no rehydrate
    -- Default per-broker is the global config.secrets.default, falling
    -- back to "vault+autodetect" when vault loaded, else "off".
    -- secrets = {
    --     vault   = "~/.aish/secrets.lua",
    --     default = "vault+autodetect",  -- applies when models[*].redact is nil
    -- },
    -- Issue #8: background CMD (CMD&: marker). Requires history.dir set
    -- (logs land at <history.dir>/bg/<id>.log + .status sidecar). The
    -- feature is always-on once history.dir exists — no config flag — but
    -- only fires when the model emits "CMD&: " or the user runs :bg-spawn.
    -- Issue #9: permission policy DSL for AI-suggested CMD: lines. When set,
    -- supersedes shell.confirm_cmd. Patterns are Lua patterns (NOT regex)
    -- per substrate invariant §3 (no compiled extensions). Priority order:
    -- deny > confirm > allow; first match in the chosen category wins.
    -- Unmatched commands default to "confirm". Probe with :perms check <cmd>.
    -- permissions = {
    --     allow   = { "^ls%s", "^cat%s", "^git status", "^git diff" },
    --     confirm = { "^rm%s", "^git push", "^docker%s", "^sudo%s" },
    --     deny    = { "^ssh%s+root@", "^curl%s+http[^s]" },
    -- },
    -- Phase 2 (docs/PHASE2.md): MCP server registry + tool-call policy.
    -- The block is OFF by default — connect-at-startup happens only when
    -- `servers` is non-empty. Uncomment + adjust per your fleet.
    --
    -- mcp = {
    --     servers = {
    --         -- Each entry: alias = { url = "...", auth_token = "..." | auth_env = "..." }
    --         -- auth_token literal > auth_env env-var indirection > nil (no auth).
    --         -- Aliases become the namespace prefix on tool names sent to the model
    --         -- ("<alias>__<tool>" — e.g. "boltzmann__list_dir"). The separator is
    --         -- "__" (two underscores) because Anthropic via Bedrock validates tool
    --         -- names against ^[a-zA-Z0-9_-]{1,128}$ — dots are rejected.
    --         -- Aliases themselves must not contain "__".
    --         boltzmann = {
    --             url      = "http://boltzmann.fritz.box:8080/mcp",
    --             auth_env = "BOLTZMANN_MCP_TOKEN",
    --         },
    --         hertz = {
    --             url      = "http://hertz.fritz.box:8080/mcp",
    --             auth_env = "HERTZ_MCP_TOKEN",
    --         },
    --         broglie = {
    --             url = "http://broglie.fritz.box:8080/mcp",  -- LAN-only, no auth
    --         },
    --     },
    --
    --     -- Per-call confirm gate auto-approve policy.
    --     -- Key forms:
    --     --   "<alias>__<tool>" — auto-approve one specific tool
    --     --   "<alias>__*"      — auto-approve every tool on that server
    --     -- Anything not matched falls back to the [y/N] prompt.
    --     auto_approve = {
    --         ["boltzmann__read_file"]    = true,
    --         ["boltzmann__list_dir"]     = true,
    --         ["boltzmann__search_files"] = true,
    --         ["hertz__*"]                = true,   -- trust the hub fully
    --     },
    --
    --     -- Tool-call sub-loop budget per ask_ai turn. Hitting the cap surfaces
    --     -- a status and breaks; default 8 if absent.
    --     max_tool_depth = 8,
    -- },
    -- Phase 3 (docs/PHASE3.md): Chuck Norris autonomous mode + destructive-op
    -- heuristic. The block is OFF by default (sane defaults kick in when
    -- absent); uncomment to tune.
    --
    -- safety = {
    --     -- LLM second-opinion on commands the static patterns don't flag.
    --     -- Default true. Set false for static-only operation (faster, but
    --     -- misses novel destructive patterns the static list doesn't know
    --     -- about — bash -c content, custom destructive idioms, etc.).
    --     llm_second_opinion = true,
    --
    --     -- Which configured model to use for the YES/NO destructive probe.
    --     -- Precedence: this field → models.deep → models[default_model].
    --     -- R-B2: prefer an INDEPENDENT model class from the action-emitting
    --     -- model (avoids self-policing). Recommended values:
    --     --   "cloud"  — anthropic/claude-haiku-4.5 via openrouter. Fast and
    --     --              reliable. Costs money per probe (typical Norris
    --     --              session = 16 probes max, often cached).
    --     --   "deep"   — local large model (qwen3-30b on this fleet). Free
    --     --              but slow on RK3588 hardware (~1-3s per probe).
    --     --              Falls back here automatically if not set.
    --     --   "fast"   — same model as the action-emitter. NOT RECOMMENDED
    --     --              (circular trust); use only when no other option.
    --     llm_model = "cloud",
    --
    --     -- Norris planning-loop budget. Iterations of safety.norris_step.
    --     -- Each iteration is one broker round-trip + dispatch of actions.
    --     -- Default 8. Bump for long-running goals; cap low for testing.
    --     max_norris_steps = 8,
    -- },
    -- Phase 4 (docs/PHASE4.md): cross-session memory.jsonl + startup
    -- injection + :memory management surface. The block is OFF by
    -- default (no startup injection); uncomment to tune. Note that
    -- :remember / :memory list / :memory forget / :memory summarize
    -- all work without this block — they store to <history.dir>/
    -- memory.jsonl regardless. The block only configures the
    -- injection-into-system-prompt behavior at startup.
    --
    -- memory = {
    --     -- Cap on total characters injected at startup. ~2000 chars ≈
    --     -- 500 tokens. LRU-by-ts selection if your memory.jsonl has
    --     -- more recent items than fit. Older items remain in the
    --     -- file; only injection is bounded. Suppressed entirely in
    --     -- Norris mode (R-C1).
    --     inject_max_chars = 2000,
    --
    --     -- Which configured model to use for :memory summarize.
    --     -- Defaults to the active model when nil. Use "fast" for
    --     -- speed; "deep" or "cloud" for better extraction quality
    --     -- (cloud may have variable cost per session).
    --     summarizer_model = "fast",
    --
    --     -- #102: auto-summarize the session into memory.jsonl on :q.
    --     -- When true, shutdown_session runs the same distill flow as
    --     -- `:memory summarize`, non-interactively, and auto-adds the
    --     -- parsed candidates. Silent no-op for trivial sessions (turn
    --     -- count < min_turns_for_summary, default 5). pcall'd so a
    --     -- broker failure never blocks :q.
    --     auto_summarize_on_quit = true,
    --     min_turns_for_summary  = 5,
    --     summary_model          = "fast",  -- new alias; summarizer_model
    --                                       -- above is still honored for
    --                                       -- back-compat.
    -- },
    -- Phase 5 (docs/PHASE5.md): multi-model routing + cloud fallback +
    -- summarize-on-evict. OFF by default — auto-routing can spend money
    -- silently on the cloud preset; require explicit opt-in.
    --
    -- routing = {
    --     -- Enable auto-routing per request. When true, router.classify_model
    --     -- inspects each prompt and may switch the model for THAT request
    --     -- only (the :model selection is preserved across requests).
    --     -- Default false. Toggle at runtime with :route on / :route off.
    --     auto = true,
    --
    --     -- Class → model mapping. nil = "keep current" (heuristic fires
    --     -- but no override). Ships with reasoning = nil because mapping
    --     -- "explain ..." prompts to a paid cloud model would spend money
    --     -- silently — opt in by uncommenting the reasoning line below.
    --     classes = {
    --         code      = "deep",      -- code-like prompts to local deep
    --         -- reasoning = "cloud",  -- OPT-IN: "explain"/"why"/"how does" → paid
    --         -- default   = nil,      -- keep active model
    --     },
    --
    --     -- Single-hop retry on transport failure (HTTP 5xx, 408,
    --     -- 404 model_not_found, DNS, connection refused, timeouts).
    --     -- Retries against fallback_model once. Skipped if any text
    --     -- has already streamed (no partial-output duplication).
    --     -- Toggle at runtime with :fallback on / :fallback off.
    --     fallback       = false,           -- default off (cost-safety)
    --     fallback_model = "cloud",
    --
    --     -- Issue #86: per-class system_prompt override. When the
    --     -- classified request falls into a class with an entry here,
    --     -- the BASE system_prompt is REPLACED for that one request
    --     -- (dynamic blocks — [background], [project], [earlier
    --     -- summary], NORRIS suffix — still compose on top). Mostly
    --     -- useful for tightening small local models' instruction
    --     -- adherence. Default {} (no override).
    --     system_prompts = {
    --         code = [[You are a code assistant. Rules:
    -- 1. Output ONLY the requested code or command.
    -- 2. No prose explanation unless explicitly asked.
    -- 3. Wrap shell commands in CMD: prefix.
    -- 4. Max response: 200 tokens.]],
    --         default = [[You are a shell assistant.
    -- Output shell commands as: CMD: <command>
    -- Output answers as single short sentences.
    -- Do not ask clarifying questions.]],
    --         -- reasoning routes to cloud; no override usually needed
    --     },
    --
    --     -- Issue #88: per-class GBNF grammar passthrough. llama.cpp
    --     -- constrains the sampler to ONLY emit tokens matching the
    --     -- grammar — eliminates format drift on small models. Cloud
    --     -- (Anthropic/Bedrock) silently ignores the field, so default
    --     -- passthrough is safe; no per-model opt-out needed. Misformed
    --     -- grammar surfaces as a broker error at request time.
    --     grammars = {
    --         code    = [[root ::= "CMD: " [^\n]+ "\n"]],
    --         default = [[root ::= ("CMD: " [^\n]+ "\n") | [^\n]+ "\n"]],
    --     },
    -- },
    --
    -- Issue #88 (continued): for the safety LLM probe (YES/NO
    -- destructive classification), set safety.probe_grammar to force
    -- the probe model to emit exactly YES or NO. Eliminates the
    -- regex-match fallback for unparseable verdicts; small models
    -- become reliable enough to use as the probe.
    --
    -- safety = {
    --     llm_second_opinion = true,
    --     llm_model          = "fast",
    --     probe_grammar      = [[root ::= ("YES" | "NO")]],
    -- },
    -- ── Issue #87 (route-aware context compression).
    -- When a routed model preset has `local_compress = true`, each
    -- broker call against THAT preset gets a compressed view of
    -- ctx.turns: only the last `keep_turns` turns; any turn whose
    -- content exceeds `max_turn_chars` is tail-truncated. The full
    -- context lives on (visible via :history); compression is purely
    -- per-request for small models that effectively use a fraction
    -- of their advertised context window.
    --
    -- Set the per-model opt-in on models[<name>]:
    --     models.fast = { ..., local_compress = true }
    -- Defaults live under context.compress:
    --     context = {
    --         ...
    --         compress = { keep_turns = 2, max_turn_chars = 800 },
    --     }
    --
    -- Trade-off documented in the FR: tool turns lose information
    -- when tail-truncated. Acceptable for shell-output blocks (the
    -- tail is usually the relevant bit); known limitation for
    -- structured tool results. Disable per-model if it bites.
    -- ── Issue #89 / Phase 10: cloud preplanner → local executor split.
    -- When cfg.norris.preplanner names a model preset, :norris launch
    -- fires ONE broker.chat against that preset asking for a sequence
    -- of TASK: <imperative> lines. Parsed list (capped at tasks_max)
    -- becomes ctx.norris_tasks; the executor model (cfg.norris.executor,
    -- defaulting to the active :model selection) runs each task with
    -- the current task shown in the per-step header.
    --
    -- Goal: small fast local models are cheap per step but easily
    -- distracted on multi-step plans; cloud is capable at planning
    -- but expensive per step. Use cloud ONCE for the plan, local for
    -- every step. Falls back to single-model Norris (existing
    -- behavior) when preplanner unset / fails / produces no TASKs.
    --
    -- norris = {
    --     preplanner = "anthropic",   -- model name in cfg.models;
    --                                 -- this preset is called ONCE per
    --                                 -- :norris launch. Omit to run
    --                                 -- single-model (Phase 6 behavior).
    --     executor   = "fast",        -- model that runs each step.
    --                                 -- Omit to use the active :model.
    --     tasks_max  = 16,            -- cap on preplan list size.
    --     -- preplan_system = "...",  -- override the built-in prompt
    -- },
    --
    -- :cost detail separates norris-preplan and norris rows so you
    -- can see cloud planning cost vs local execution cost. The
    -- preplan call does NOT retry via fallback_model (a different
    -- model = a different decomposition; clean hard-fail to single-
    -- model is safer).
    -- ── Phase 5 context summarization on sliding-window eviction.
    -- Set INSIDE the context = { ... } block above to enable:
    --     context = {
    --         max_turns          = 40,
    --         token_budget       = 4096,
    --         summarize_on_evict = true,
    --         summarizer_model   = "fast",   -- model name in models{}
    --         max_summary_chars  = 2000,
    --
    --         -- #101 (proactive periodic summarization). When set,
    --         -- enforce_cadence fires every N appends (before
    --         -- enforce_budget) and folds turns OLDER than
    --         -- summarize_keep_recent into ctx.summary. Goal: keep the
    --         -- wire prompt tight from the start so small local models
    --         -- aren't fed near-budget context until eviction. Composes
    --         -- with summarize_on_evict (same summarize_fn closure;
    --         -- different trigger). Suppressed in Norris (R-C4 parity).
    --         summarize_every_n_turns = 10,    -- nil = disabled (default)
    --         summarize_keep_recent   = 4,
    --     },
    -- When summarize_on_evict is true, evicted turn pairs are fed to
    -- summarizer_model and the result lives on ctx.summary, appended to
    -- the system prompt as [earlier conversation summary]. Suppressed
    -- in Norris mode (R-C4 — planner stays on its goal). If broker
    -- fails, falls back to Phase 0 silent eviction (no crash).
    -- Phase 6 (docs/PHASE6.md): project file-tree context + :diff /
    -- :tree / :highlight metas. The :diff and :tree metas work without
    -- any config. The `project` block below only controls the
    -- AUTO-injection-at-startup behavior; manual `:tree` always works
    -- regardless. Uncomment to enable startup auto-inject.
    --
    -- project = {
    --     auto_tree      = true,   -- run `:tree` once at startup
    --     tree_depth     = 3,      -- depth filter for the scan (find fallback only;
    --                              -- git ls-files emits full repo-relative paths)
    --     tree_max_chars = 4096,   -- truncate the injected block above this
    -- },
    --
    -- :highlight has no config flag in v1 — toggled at runtime only.
    -- Requires the external `tree-sitter` CLI plus configured parser-
    -- directories with cloned + built `tree-sitter-<lang>` grammars
    -- (see `:highlight on` for the install hints).
    -- Phase 7 (docs/PHASE7.md): cost / usage observability. broker.lua
    -- captures `usage` (+ `cost` for cloud) from every chat/chat_stream
    -- call and routes via ctx:add_usage to a per-session accumulator.
    -- `:cost` / `:cost detail` / `:cost reset` surface the totals.
    -- The `cost` block below configures OPTIONAL warn thresholds —
    -- a single status line fires the first time the cumulative
    -- crosses each threshold. Default off. Useful when paid cloud
    -- presets are in play so runaway-cost sessions get a nudge.
    --
    -- cost = {
    --     warn_at_dollars = 0.50,    -- one-shot warn when cumulative cost crosses
    --     warn_at_tokens  = 100000,  -- one-shot warn when cumulative tokens crosses
    -- },
    --
    -- Both flags are independent (R4 — first-to-fire doesn't suppress
    -- the other); `:cost reset` re-arms both. Per-turn usage is also
    -- written to session/*.jsonl (assistant-turn `usage` field) for
    -- after-the-fact scripting; cross-session aggregation deferred
    -- to a future phase (Q-C2).
    -- Phase 8 (docs/PHASE8.md): accurate tokenization via the broker's
    -- /tokenize endpoint, replacing the Phase 0 §8 char/4 heuristic.
    -- Two consequences when use_endpoint=true:
    --   (1) Context:estimate_tokens hits <endpoint>/tokenize once per
    --       new turn (cached on the turn dict thereafter). Network
    --       cost is one round-trip (~30ms) per fresh turn; subsequent
    --       calls reuse the cache.
    --   (2) Context:enforce_budget actually ENFORCES token_budget now
    --       (previously only max_turns was checked). Sessions that
    --       fit under char/4 may evict earlier — raise token_budget
    --       to match your model's real context window if needed.
    -- Cloud endpoints (OpenRouter) don't expose /tokenize; capability
    -- cached as unsupported on first probe -> silent char/4 fallback.
    --
    -- tokenize = {
    --     use_endpoint = true,
    -- },
 }
@@ -1,28 +1,619 @@
 -- context.lua — in-memory conversation history + token budget.
-- Phase 0: ordered turn list, sliding window eviction.
+-- Phase 0: ordered turn list, sliding-window eviction by max_turns.
-- Tokenization is char/4 heuristic in Phase 0; accurate count is Phase 2.
+-- Tokenization is char/4 heuristic in Phase 0; accurate count is Phase 3 (Q1).
-- See docs/PHASE0.md §8.
+-- Phase 2 (added 2026-05-12): support for `role:"tool"` turns and assistant
 -- turns carrying `tool_calls = [...]`, plus a `use_tool_role` rendering
 -- toggle for the strict-chat-template fallback path (Q18).
 -- See docs/PHASE0.md §6, §8 and docs/PHASE2.md §3 / §5.
 local M = {}
-- Construct a Context table from config.context.
+-- The §6 default system prompt. The `CMD: ` (exact prefix, single space)
 -- contract is locked per §3 invariants — do not edit without amending PHASE0.
 -- Phase 2 appends ~4 lines about MCP tools per PHASE2.md §8 (hybrid:
 -- static frame here + dynamic tools list in the request body). The block
 -- is always present even when no MCP servers are configured — the cost
 -- is ~60 tokens and the model just sees instructions that don't apply.
 local DEFAULT_SYSTEM_PROMPT = [[
 You are aish, an AI-augmented shell assistant. You help the user execute shell
 commands, write and debug code, and re-engineer software. When suggesting shell
 commands, output them on a line beginning with exactly "CMD: " so aish can
 identify and optionally execute them. Be concise. Prefer concrete actions over
 explanations unless asked.
 You may have access to MCP tools — they appear in this request's `tools` field.
 Call a tool by emitting a tool_call; the result will be supplied in the next
 turn. Use tools for structured operations (file reads, queries, etc.) and
 `CMD:` lines for local shell commands. Prefer tools when available; fall back
 to `CMD:` for anything not exposed as a tool.]]
 local Context = {}
 Context.__index = Context
 function M.new(opts)
-    error("context.new: not implemented (Phase 0 pending)")
+    opts = opts or {}
    return setmetatable({
        system_prompt        = opts.system_prompt or DEFAULT_SYSTEM_PROMPT,
        turns                = {},
        pending_exec_output  = nil,   -- buffered until next user turn (§6)
        max_turns            = opts.max_turns    or 40,
        token_budget         = opts.token_budget or 4096,
        -- Phase 2: tool-role rendering toggle. true = emit OpenAI-standard
        -- role:"tool" messages from to_messages(); false = collapse
        -- assistant+tool_calls and tool turns into a single assistant text
        -- turn for chat templates that reject the role:"tool" shape.
        -- Default true per PHASE2.md §12 "Q18 default"; flip from caller.
        use_tool_role        = (opts.use_tool_role == nil) and true
                                                          or  opts.use_tool_role,
        -- Phase 5: summarize-on-evict. When set, enforce_budget calls
        -- summarize_fn(prior_summary, evicted_turns) -> string | nil
        -- and updates ctx.summary instead of silently dropping turns.
        -- Callback contract per PHASE5.md R-B1:
        --   (nil, [turns])  → first-time summarize
        --   (str, [turns])  → additive: extend prior summary with new turns
        --   (str, nil)      → compress: re-summarize the prior summary
        -- Returns nil → fall back to silent eviction (Phase 0 behavior).
        summarize_fn         = opts.summarize_fn,
        summary              = nil,   -- rolling summary string
        max_summary_chars    = opts.max_summary_chars or 2000,
        -- #101: proactive periodic summarization (cadence-triggered,
        -- in addition to Phase 5's eviction-triggered path). When
        -- summarize_every_n_turns is set AND summarize_fn is wired,
        -- enforce_cadence() folds turns older than the last
        -- summarize_keep_recent into ctx.summary every N appends.
        -- Goal: keep the wire prompt tight from the start so small
        -- local models aren't fed near-budget context until eviction
        -- forces a fold. nil = disabled (existing behavior).
        summarize_every_n_turns = opts.summarize_every_n_turns,
        summarize_keep_recent   = opts.summarize_keep_recent or 4,
        _turns_since_summarize  = 0,
        -- Phase 6 (#issue Phase 6 §6): project file-tree block, set by
        -- repl.lua via :tree meta or the cfg.project.auto_tree startup
        -- hook. nil = no block injected. Cached scan opts (depth /
        -- max_chars overrides) live on _project_opts for :tree refresh.
        project              = nil,
        _project_opts        = nil,
        -- Phase 7 (docs/PHASE7.md): cost/usage accumulator. Keyed as
        -- usage_totals[model_name][category] -> { prompt, completion,
        -- calls, cost, is_local }. is_local (R6) is a sticky flag
        -- set when ANY recorded usage for the slot had cost==nil
        -- (preserves local-vs-cloud-zero distinction for :cost detail
        -- annotation). cost_warn_state (R4) carries per-threshold
        -- one-shot flags so warn_at_dollars firing doesn't suppress
        -- warn_at_tokens. Both survive :reset (R8 parity).
        usage_totals         = {},
        cost_warn_state      = { dollars = false, tokens = false },
        -- Phase 8 (docs/PHASE8.md): optional tokenize callback. When
        -- set, Context:estimate_tokens uses it (with a per-turn cache
        -- on turn._tokens for amortization). nil = char/4 fallback
        -- (Phase 0 §8 — existing behavior, no change).
        tokenize_fn          = opts.tokenize_fn,
    }, Context)
 end
-- Append a turn { role = ..., content = ... }.
+-- Append a turn. Phase 2 widens what's valid:
-function M:append(turn)
+--   role="user"      content (string) required
-    error("context:append: not implemented (Phase 0 pending)")
+--   role="system"    content (string) required (callers shouldn't add system
 --                    turns directly; system prompt is stored separately and
 --                    prepended at to_messages time per §6)
 --   role="assistant" content may be empty IF tool_calls is non-empty;
 --                    otherwise content required
 --   role="tool"      tool_call_id required + content required; the preceding
 --                    stored turn must be an assistant turn with non-empty
 --                    tool_calls (debug assertion catches sub-loop bugs early
 --                    per PHASE2.md §3 row + N4 in review)
 function Context:append(turn)
    assert(type(turn) == "table" and turn.role,
           "context:append requires { role = ... }")
    local stored = { role = turn.role, content = turn.content or "" }
    if turn.role == "assistant" and turn.tool_calls and #turn.tool_calls > 0 then
        stored.tool_calls = turn.tool_calls
    elseif turn.role == "tool" then
        assert(turn.tool_call_id, "context:append role=tool requires tool_call_id")
        assert(turn.content, "context:append role=tool requires content")
        -- A tool turn may follow either an assistant-with-tool_calls (the
        -- first reply in the sub-loop) or another tool turn (subsequent
        -- replies when the assistant emitted multiple parallel tool_calls).
        -- Walk back through tool turns until we hit a non-tool; that turn
        -- must be an assistant with non-empty tool_calls.
        local j = #self.turns
        while j > 0 and self.turns[j].role == "tool" do j = j - 1 end
        local anchor = self.turns[j]
        assert(anchor and anchor.role == "assistant"
                      and anchor.tool_calls and #anchor.tool_calls > 0,
               "context:append role=tool must follow assistant with tool_calls "
               .. "(possibly via prior tool turns in the same sub-loop)")
        stored.tool_call_id = turn.tool_call_id
    else
        assert(turn.content, "context:append requires content for role=" .. turn.role)
    end
    self.turns[#self.turns + 1] = stored
    -- #101: bump cadence counter so enforce_cadence knows when to fire.
    self._turns_since_summarize = (self._turns_since_summarize or 0) + 1
 end
-- Render messages array suitable for broker.chat (system prompt prepended).
+-- Buffer captured shell-exec output. Per §6 (post user-test fix), exec output
-function M:to_messages()
+-- is NOT appended as its own user turn — strict chat templates (e.g. mistral-
-    error("context:to_messages: not implemented (Phase 0 pending)")
+-- nemo's Jinja) reject the resulting user/user back-to-back. Instead it is
 -- held until the next user turn arrives, then prepended via :append_user.
 function Context:append_exec_output(out)
    if not out or out == "" then return end
    local block = "[exec output]\n" .. out
    if self.pending_exec_output then
        self.pending_exec_output = self.pending_exec_output .. "\n" .. block
    else
        self.pending_exec_output = block
    end
 end
-- Apply max_turns eviction policy. Returns number of turns evicted.
+-- Append a user turn, flushing any pending exec output as a prefix. Use this
-function M:enforce_budget()
+-- (rather than raw :append) for any turn whose role is "user".
-    error("context:enforce_budget: not implemented (Phase 0 pending)")
+function Context:append_user(content)
    if self.pending_exec_output then
        content = self.pending_exec_output .. "\n\n" .. content
        self.pending_exec_output = nil
    end
    self:append({ role = "user", content = content })
 end
 -- Compact JSON-ish rendering used by the fallback (use_tool_role=false) path
 -- to convert a tool_calls + tool-result pair into inline text. Not OpenAI-
 -- standard — only used when a strict chat template rejects role:"tool".
 local function inline_tool_call(call, result_content)
    return ("[tool: %s]\n%s\n[result]\n%s")
        :format(call.name or "?",
                tostring(call.arguments or ""),
                tostring(result_content or ""))
 end
 -- Render the messages array for broker.chat (system prompt prepended; turns
 -- in order). Phase 2 adds two emission modes:
 --
 --   use_tool_role = true (default): pass through OpenAI-standard
 --     {role:"assistant", content, tool_calls} and {role:"tool", tool_call_id,
 --     content} turns unchanged.
 --
 --   use_tool_role = false (fallback, Q18): collapse each
 --     assistant-with-tool_calls + its following role:"tool" turn(s) into a
 --     single assistant text turn carrying the synthesized "[tool: name]\n
 --     <args>\n[result]\n<content>" body. The role:"tool" turns and the
 --     tool_calls field are NOT emitted. Same logical alternation seen by the
 --     model (user → assistant → user → assistant), no strict-template breakage.
 --
 -- The system prompt is NOT stored in self.turns per §6.
 -- Phase 4: [background] block composer. Memory items from memory.jsonl
 -- are stored on self.memory_items (loaded by repl.lua at startup) and
 -- rendered as a dim-styled suffix on the system prompt. Suppressed when
 -- norris_active to avoid stacking large background contexts in
 -- per-iteration broker calls (R-C1 review fold-in). Cap honored via
 -- inject_max_chars argument from the caller (already truncated by repl).
 local function compose_background(items)
    if not items or #items == 0 then return "" end
    local lines = { "", "", "[background] (memory.jsonl; manage via :memory)" }
    for _, it in ipairs(items) do
        lines[#lines + 1] =
            ("- (%s) %s"):format(it.kind or "?", (it.content or ""):gsub("\n", " "))
    end
    return table.concat(lines, "\n")
 end
 -- Phase 5 R-C4: summary block composer. Mirrors the [background]
 -- pattern; suppressed under Norris (callers already guard, but the
 -- function returns "" for empty input regardless).
 local function compose_summary(summary_text)
    if not summary_text or summary_text == "" then return "" end
    return "\n\n[earlier conversation summary]\n" .. summary_text
 end
 -- Phase 6: project file-tree composer. Inserted between [background]
 -- and [earlier summary] so the reading order is memory facts →
 -- project tree → earlier conversation → NORRIS suffix. Same Norris-
 -- suppression rule (callers gate via self.norris_active).
 local function compose_project(project_text)
    if not project_text or project_text == "" then return "" end
    return "\n\n[project]\n" .. project_text
 end
 -- Phase 3: NORRIS MODE suffix appended to the system prompt when
 -- self.norris_active. Carries self.norris_goal so eviction of the
 -- user's "[norris] goal: ..." turn doesn't lose the anchor.
 local NORRIS_SUFFIX_TEMPLATE = [[
 [NORRIS MODE] You are operating autonomously toward the following goal:
    %s
 Plan and execute step by step using CMD: lines (for shell) or tool_calls
 (when MCP tools are available). After each action, you will see its
 result in the next turn. Re-plan based on what you observe.
 When the goal is achieved, emit a single line:
    GOAL: complete
 on its own line, optionally followed by a brief summary.
 If the goal is unreachable or you need user input, emit:
    GOAL: blocked
 with a one-line reason.
 Avoid destructive operations unless the goal explicitly requires them.
 The user will be prompted to confirm destructive actions; expect their
 verdict in the next turn as a synthesized "[aish] ... skipped by user"
 message if they declined.]]
 -- Phase 10 / #89: optional task-hint block appended AFTER the NORRIS
 -- suffix when the cloud preplanner emitted a TASK list at :norris
 -- launch. self.norris_tasks shape: { current = 1, list = {...} }.
 -- Returns "" when no tasks (preplan disabled OR preplan failed OR
 -- list exhausted) — keeps the NORRIS suffix backward-compatible.
 local function compose_norris_task_hint(self)
    if not (self.norris_tasks and self.norris_tasks.list) then return "" end
    local k = self.norris_tasks.current
    local n = #self.norris_tasks.list
    local task = self.norris_tasks.list[k]
    if not task then return "" end  -- exhausted → no hint
    return string.format("\n\nCurrent step %d/%d:\n    %s", k, n, task)
 end
 -- #87: route-aware context compression. Keeps the LAST keep_turns
 -- turns; tail-truncates any turn whose content exceeds max_turn_chars.
 -- Drops tool turns at the slice head (they'd be orphaned without
 -- their assistant-with-tool_calls anchor; strict chat templates
 -- reject the resulting tool-without-anchor shape). Returns a new
 -- list of turn-shaped tables; self.turns is NEVER mutated.
 local function _compress_turns(turns, keep_turns, max_chars)
    local n = #turns
    if keep_turns and n > keep_turns then
        -- start index is the first turn we keep
    end
    local start = math.max(1, n - (keep_turns or 2) + 1)
    -- Drop orphan tool turns at the head.
    while start <= n and turns[start].role == "tool" do
        start = start + 1
    end
    local out = {}
    for i = start, n do
        local t = turns[i]
        local c = t.content or ""
        if max_chars and #c > max_chars then
            out[#out + 1] = {
                role         = t.role,
                content      = c:sub(-max_chars),
                tool_calls   = t.tool_calls,
                tool_call_id = t.tool_call_id,
            }
        else
            out[#out + 1] = t  -- ref the existing turn; no copy needed
        end
    end
    return out
 end
 function Context:to_messages(opts)
    -- Phase 10 (#86): per-call system_prompt_override. Replaces the
    -- BASE system_prompt for THIS render only (state unchanged); the
    -- dynamic blocks ([background], [project], [earlier summary],
    -- NORRIS suffix) still compose on top. Used by ask_ai's routing
    -- path when cfg.routing.system_prompts[class] is set — gives
    -- small local models tighter instructions while preserving
    -- ambient memory/project context.
    local sys_content = (opts and opts.system_prompt_override)
                        or self.system_prompt
    -- Phase 4 [background] memory block + Phase 6 [project] file-tree
    -- block + Phase 5 [earlier summary] block. All suppressed during
    -- Norris (R-C1 / R-C4 — avoid redundant tokens per planning
    -- iteration; planner stays focused on its goal anchor).
    if not self.norris_active then
        sys_content = sys_content .. compose_background(self.memory_items)
        sys_content = sys_content .. compose_project(self.project)
        sys_content = sys_content .. compose_summary(self.summary)
    end
    -- Phase 3 NORRIS MODE suffix. Last block so its instructions dominate.
    if self.norris_active and self.norris_goal then
        sys_content = sys_content
            .. string.format(NORRIS_SUFFIX_TEMPLATE, self.norris_goal)
            .. compose_norris_task_hint(self)
    end
    local msgs = { { role = "system", content = sys_content } }
    -- #87: route-aware compression. When opts.compress is set, swap
    -- the turn iterable for a truncated copy. self.turns unchanged
    -- (this is a per-render transformation; persistence + display
    -- via :history see the full context).
    local turns = self.turns
    if opts and opts.compress then
        turns = _compress_turns(self.turns,
            opts.compress.keep_turns or 2,
            opts.compress.max_turn_chars or 800)
    end
    if self.use_tool_role then
        for _, t in ipairs(turns) do
            local m = { role = t.role, content = t.content }
            if t.role == "assistant" and t.tool_calls then
                -- OpenAI shape wraps each call as
                -- {id, type:"function", function:{name, arguments}}.
                local oai = {}
                for i, c in ipairs(t.tool_calls) do
                    oai[i] = {
                        id   = c.id,
                        type = "function",
                        ["function"] = { name = c.name,
                                         arguments = c.arguments or "" },
                    }
                end
                m.tool_calls = oai
            elseif t.role == "tool" then
                m.tool_call_id = t.tool_call_id
            end
            msgs[#msgs + 1] = m
        end
        return msgs
    end
    -- Fallback path: walk turns, collapse asst-with-tool_calls + following
    -- tool turns into a single asst text turn. Merge consecutive assistant
    -- turns afterward so the trailing post-tool-result assistant text
    -- doesn't produce asst/asst back-to-back (which strict templates would
    -- also reject — same gotcha PHASE0.md §6 warned about for user/user).
    local function push_or_merge_assistant(content)
        local last = msgs[#msgs]
        if last and last.role == "assistant" then
            last.content = last.content .. "\n" .. content
        else
            msgs[#msgs + 1] = { role = "assistant", content = content }
        end
    end
    -- #87: same compressed `turns` view used by the fallback path.
    local i = 1
    while i <= #turns do
        local t = turns[i]
        if t.role == "assistant" and t.tool_calls then
            local parts = {}
            if t.content and t.content ~= "" then
                parts[#parts + 1] = t.content
            end
            for ci, call in ipairs(t.tool_calls) do
                local result_text = ""
                local next_t = turns[i + ci]
                if next_t and next_t.role == "tool"
                          and next_t.tool_call_id == call.id then
                    result_text = next_t.content
                end
                parts[#parts + 1] = inline_tool_call(call, result_text)
            end
            push_or_merge_assistant(table.concat(parts, "\n"))
            i = i + 1 + #t.tool_calls
        elseif t.role == "tool" then
            -- Orphan tool turn (no preceding asst-tool_calls captured it).
            -- Shouldn't happen given the :append assertion, but defensively
            -- drop it rather than emit a malformed message.
            i = i + 1
        elseif t.role == "assistant" then
            push_or_merge_assistant(t.content or "")
            i = i + 1
        else
            msgs[#msgs + 1] = { role = t.role, content = t.content }
            i = i + 1
        end
    end
    return msgs
 end
 -- #101: proactive periodic summarization. Fires every
 -- summarize_every_n_turns appends, folding turns older than the last
 -- summarize_keep_recent into ctx.summary via summarize_fn. Returns
 -- the number of turns folded (0 if disabled / not yet due / nothing
 -- to fold / Norris-mode / callback failed).
 --
 -- Norris suppression (Phase 5 R-C4 parity): the planner stays
 -- focused on its goal anchor — folding history mid-loop would
 -- change its perceived progress.
 --
 -- Orphan-tool guard: never fold an assistant-with-tool_calls turn
 -- without its matching role=tool turn(s). When the slice would end
 -- on such an assistant, peel back until it doesn't (the unfolded
 -- tail then becomes part of the live window — temporarily larger
 -- than summarize_keep_recent, but chat-template-legal).
 function Context:enforce_cadence()
    if self.norris_active then return 0 end
    if not self.summarize_fn then return 0 end
    if not self.summarize_every_n_turns then return 0 end
    if (self._turns_since_summarize or 0) < self.summarize_every_n_turns then
        return 0
    end
    local keep = self.summarize_keep_recent or 4
    local n = #self.turns
    if n <= keep then return 0 end
    local fold_count = n - keep
    -- Orphan-tool guard: peel back from the right edge of the fold
    -- slice while the last folded turn is assistant-with-tool_calls.
    while fold_count > 0 do
        local last = self.turns[fold_count]
        if last and last.role == "assistant"
           and last.tool_calls and #last.tool_calls > 0 then
            fold_count = fold_count - 1
        else
            break
        end
    end
    if fold_count == 0 then return 0 end
    local pair = {}
    for i = 1, fold_count do pair[i] = self.turns[i] end
    local ok, new_summary = pcall(self.summarize_fn, self.summary, pair)
    if not ok or type(new_summary) ~= "string" or new_summary == "" then
        return 0  -- failure: leave turns; eviction will handle them later
    end
    self.summary = new_summary
    if #self.summary > self.max_summary_chars then
        local ok2, compressed = pcall(self.summarize_fn, self.summary, nil)
        if ok2 and type(compressed) == "string" and compressed ~= "" then
            self.summary = compressed
        end
    end
    for _ = 1, fold_count do table.remove(self.turns, 1) end
    self._turns_since_summarize = 0
    return fold_count
 end
 -- Evict the oldest pair (user + assistant) while we exceed max_turns
 -- OR token_budget (Phase 8 pillar 5). Returns total turns evicted.
 -- Caller is responsible for rendering the §8 status line.
 --
 -- R2 guard: when system_prompt alone exceeds token_budget, the OR
 -- condition stays true even when turns are empty — would spin
 -- forever calling table.remove on a 0-length list. The `and
 -- #self.turns > 0` clause ensures we exit when there's nothing
 -- left to evict. Over-budget system_prompts (large [project]
 -- blocks, etc.) are then on the user to shrink via :tree off /
 -- :memory clear / etc.
 function Context:enforce_budget()
    local evicted = 0
    while (#self.turns > self.max_turns
           or self:estimate_tokens() > self.token_budget)
          and #self.turns > 0 do
        -- Collect evicted slice (pair: user + assistant)
        local pair = {}
        pair[#pair + 1] = self.turns[1]
        if #self.turns >= 2 then pair[#pair + 1] = self.turns[2] end
        -- Phase 5: ask the summarize callback (if wired) to absorb this
        -- slice into the rolling summary. Callback contract per R-B1:
        --   summarize_fn(prior_summary, evicted_turns) -> string | nil
        -- nil return → silent eviction (Phase 0 behavior).
        if self.summarize_fn then
            local ok, new_summary = pcall(self.summarize_fn, self.summary, pair)
            if ok and type(new_summary) == "string" and new_summary ~= "" then
                self.summary = new_summary
                -- R-C1: if grown past cap, compress in a second pass.
                if #self.summary > self.max_summary_chars then
                    local ok2, compressed = pcall(self.summarize_fn,
                                                  self.summary, nil)
                    if ok2 and type(compressed) == "string"
                          and compressed ~= "" then
                        self.summary = compressed
                    end
                end
            end
        end
        -- Remove the pair from turns (matches Phase 0 visible effect)
        table.remove(self.turns, 1)
        evicted = evicted + 1
        if #self.turns > 0 and (#self.turns > self.max_turns
                                or evicted % 2 == 1) then
            table.remove(self.turns, 1)
            evicted = evicted + 1
        end
    end
    return evicted
 end
 -- Phase 0 §8: char/4 heuristic. Phase 8 (Q1 resolved): when
 -- self.tokenize_fn is set, use it for accuracy. Per-turn _tokens
 -- cache amortizes after the first count.
 --
 -- system_prompt is recomposed each call (memory/project/summary
 -- blocks are dynamic), so it's not cached — one tokenize round-trip
 -- per call when tokenize_fn is active.
 --
 -- Turn content is immutable after append (see Context:append; we
 -- never mutate stored turns). The cache on t._tokens is therefore
 -- safe to live forever on the turn; it dies with the turn on :reset.
 function Context:estimate_tokens()
    if self.tokenize_fn then
        local n = self.tokenize_fn(self.system_prompt)
        for _, t in ipairs(self.turns) do
            if t._tokens == nil then
                t._tokens = self.tokenize_fn(t.content)
            end
            n = n + t._tokens
        end
        return n
    end
    -- char/4 fallback (Phase 0 behavior, unchanged when tokenize_fn nil)
    local n = #self.system_prompt
    for _, t in ipairs(self.turns) do
        n = n + #t.content
    end
    return math.floor(n / 4)
 end
 -- Phase 7: cost/usage accumulator helpers.
 --
 -- Context:add_usage(model_name, category, usage)
 --   Increment the (model, category) slot. usage is the payload from
 --   broker.lua's on_delta("usage", ...): { prompt_tokens, completion_
 --   tokens, total_tokens, cost (nil for local per R6), model, category }.
 --   We use the model_name + category args (not the payload fields)
 --   because the caller may want to normalize (e.g., key by req_cfg
 --   alias rather than model_cfg.model).
 function Context:add_usage(model_name, category, usage)
    model_name = model_name or "?"
    category   = category   or "main"
    self.usage_totals = self.usage_totals or {}
    local m = self.usage_totals[model_name] or {}
    local c = m[category] or {
        prompt = 0, completion = 0, calls = 0, cost = 0,
        -- R6: sticky flag; set once any nil-cost usage lands here.
        is_local = false,
    }
    c.prompt     = c.prompt     + (usage.prompt_tokens or 0)
    c.completion = c.completion + (usage.completion_tokens or 0)
    c.calls      = c.calls      + 1
    if usage.cost == nil then
        c.is_local = true   -- preserves local-vs-cloud-zero per R6
    else
        c.cost = c.cost + usage.cost
    end
    m[category] = c
    self.usage_totals[model_name] = m
 end
 function Context:total_cost()
    local total = 0
    for _, m in pairs(self.usage_totals or {}) do
        for _, c in pairs(m) do total = total + (c.cost or 0) end
    end
    return total
 end
 -- Returns (prompt_tokens, completion_tokens) summed across all slots.
 function Context:total_tokens()
    local p, comp = 0, 0
    for _, m in pairs(self.usage_totals or {}) do
        for _, c in pairs(m) do
            p    = p    + (c.prompt     or 0)
            comp = comp + (c.completion or 0)
        end
    end
    return p, comp
 end
 -- :cost reset path — zero accumulator AND clear per-threshold one-shot flags.
 function Context:reset_usage()
    self.usage_totals    = {}
    self.cost_warn_state = { dollars = false, tokens = false }
 end
 function Context:reset()
    self.turns = {}
    self.pending_exec_output = nil
    self.summary = nil
    -- Phase 10 R6: clear norris_tasks defensively. :reset is
    -- unreachable mid-Norris (no readline prompt while the planner
    -- runs), but if a Norris session crashed leaving the field stale,
    -- :reset gives the user a clean recovery path.
    self.norris_tasks = nil
    -- R8 parity: usage_totals + cost_warn_state preserved (matches
    -- memory_items + project — "ambient context survives a user-
    -- driven conversation reset"). Use :reset_usage to zero the
    -- cost meter explicitly.
 end
 return M
@@ -41,6 +41,14 @@ Phase 0 is the minimal working skeleton. It establishes the REPL loop, input dis
 | Shell execution | `io.popen` in Phase 0; `forkpty` via libc FFI from Phase 1 | `popen` sufficient for non-interactive commands; PTY required for vim, htop, etc. |
 | Session persistence | Deferred to Phase 1 | Phase 0 holds history in memory only |
 | Config format | Lua table (plain `.lua` file sourced at startup) | No parser dependency; native types; easily extended |
 | JSON encode/decode | dkjson 2.8 vendored under `vendor/dkjson.lua` | Pure Lua (preserves §3 "no compiled extensions" invariant); single-file vendor avoids `luarocks`; sourced from Debian's `lua-dkjson` package, originally from dkolf.de |
 **FFI loader fallback.** `ffi.load("readline")` and `ffi.load("curl")`
 look for the unversioned `lib<name>.so` symlink, which is only installed
 by the `-dev` package. Phase 0 loaders try the unversioned name first
 then fall back to versioned sonames (`readline.so.8`, `readline.so.7`,
 `curl.so.4`, `curl-gnutls.so.4`) so a runtime-only host (Debian/ALARM
 without `lib<name>-dev`) just works.
 ---
@@ -51,6 +59,7 @@ aish/
 ├── main.lua              # Entry point: arg parsing, config load, REPL start
 ├── repl.lua              # Readline loop, input dispatch, prompt rendering
 ├── broker.lua            # llama.cpp HTTP client; Phase 0: blocking POST
 ├── mcp.lua               # MCP JSON-RPC 2.0 client (Phase 2; added 2026-05-12)
 ├── router.lua            # Task classifier: shell / AI / meta
 ├── executor.lua          # Command execution; Phase 0: io.popen
 ├── context.lua           # In-memory conversation history, token budget
@@ -65,7 +74,7 @@ aish/
    └── libc.lua          # Shared: errno, signal, write, read, misc
 ```
-All modules are required explicitly from `main.lua`. No module autoloading. File names are stable across phases — later phases fill in bodies, not rename files.
+All modules are required explicitly from `main.lua`. No module autoloading. File names are stable across phases — later phases fill in bodies, not rename files. Adding new files is permitted and additive (e.g. `mcp.lua` was inserted at Phase 2 per docs/PHASE2.md §9); the rename prohibition is what keeps cross-phase wiring stable.
 ---
@@ -133,7 +142,9 @@ Each turn is stored in `context.lua` as:
 { role = "system" | "user" | "assistant", content = "..." }
 ```
-The system prompt is prepended on every request and is not stored as a history turn. Exec output injected into context uses role `"user"` with a prefix tag `[exec output]`.
+The system prompt is prepended on every request and is not stored as a history turn.
 **Exec output injection.** Captured shell-exec output is **not** appended as its own user turn — that produces user/user back-to-back, which strict chat templates (e.g. `mistral-nemo-instruct`'s Jinja) reject with `roles must alternate user/assistant/...`. Instead, exec output is buffered on the context and prepended to the **next** user turn with a `[exec output]` tag. Multiple shell calls between AI turns concatenate. `:reset` clears the buffer. The user-visible behavior is unchanged; only the role alternation seen by the broker differs.
 ### System prompt (Phase 0 default)
@@ -152,7 +163,7 @@ The `CMD:` prefix convention is the extraction contract between the model and `e
 ## 7. Execution Model (Phase 0)
 ```lua
-- executor.lua Phase 0
+-- executor.lua Phase 0 (illustrative — see note below)
 local function exec(cmd)
    local handle = io.popen(cmd .. " 2>&1", "r")
    local output = handle:read("*a")
@@ -161,11 +172,22 @@ local function exec(cmd)
 end
 ```
 **Superseded by Phase 1.** The §7 sketch was never quite accurate on
 LuaJIT 2.1 (which follows the Lua 5.1 ABI for `io.popen():close()` and
 returns only `true` — no exit status). The Phase 0 implementation worked
 around this with a sentinel-echo wrapper (`(cmd) 2>&1; echo
 __AISH_EXIT_<tag>__$?`) and parsed the status back out of stdout. Phase 1
 retired the workaround entirely: `executor.lua` now spawns the child via
 `forkpty` and recovers exit status via `waitpid(WEXITSTATUS)`. See
 docs/PHASE1.md §5 for the current PTY model.
 Output is captured and:
 1. Printed to the terminal
 2. Injected into `context.lua` as a `[exec output]` user turn
-`cd` is intercepted before `popen` and handled via `posix.chdir` (libc FFI) so the working directory change persists across calls — `popen` forks a subprocess and `cd` inside it would otherwise be discarded.
+`cd` is intercepted before `popen` and handled via `libc.chdir` (FFI) so
 the working directory change persists across calls — `popen` forks a
 subprocess and `cd` inside it would otherwise be discarded.
 ---
@@ -269,11 +291,24 @@ return {
 ```
 Config path resolution order:
-1. `--config <path>` CLI argument
+1. `--config <path>` CLI argument (explicit; failure if not openable, no fallback)
 2. `$AISH_CONFIG` environment variable
 3. `~/.config/aish/config.lua`
 4. `./config.lua` (development fallback)
 Phase 9 adds a project-local overlay step AFTER the user config resolves:
 walks up from cwd looking for `.aish.lua` (stops at `$HOME` or `/`),
 prompts to trust on first encounter, sha256-pins the trust record, and
 shallow-merges the project's top-level keys onto the user config. See
 `docs/PHASE9.md`.
 **Cwd-relative module resolution.** Phase 0 prepends `./?.lua;./vendor/?.lua`
 to `package.path`, so `luajit main.lua` must be invoked with the repo
 root as cwd. Cwd-independent resolution (relative to the script's own
 directory) lands later — likely Phase 1 alongside the install path
 work, or whenever the first user reports trying `luajit ~/aish/main.lua`
 from somewhere else.
 ---
 ## 11. Planned Phase Sequence
@@ -287,6 +322,10 @@ Config path resolution order:
 | **4** | `memory.jsonl` summarization, startup context injection from memory, `:history` management, pruning |
 | **5** | Multi-model routing by task type, cloud fallback, context summarization via fast model on eviction |
 | **6** | Tree-sitter syntax highlighting hooks, diff-aware code injection, project-level context (file tree summary) |
 | **7** | Cost / usage observability: broker captures `usage` + `cost`; per-session accumulator on ctx; `:cost` reporter; optional warn thresholds |
 | **8** | Accurate tokenization: per-endpoint `/tokenize` probe (cached); `broker.token_count`; `Context:estimate_tokens` widened; `:cost detail` est-vs-actual annotation |
 | **9** | Project-local config overlay (`.aish.lua` walk-up from cwd to $HOME, sha256-pinned trust prompt, shallow merge over user config); `:config show` meta |
 | **10** | Cloud preplanner + local executor split for Norris (`cfg.norris.preplanner` emits TASK list once; `cfg.norris.executor` runs each step); `extract_task_lines`; `ctx.norris_tasks` anchor (survives eviction); cost category `"norris-preplan"` |
 ---
@@ -0,0 +1,271 @@
 # aish — Phase 1 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 1 Requirements, Architecture & Design Decisions
 **Status:** Formulate (pre-analysis)
 **Date:** 2026-05-10
 PHASE0.md is the locked substrate. This manifest specifies what Phase 1
 adds on top. Section numbers reference back to PHASE0.md when relevant.
 ---
 ## 1. Scope of Phase 1
 Four pillars per PHASE0.md §11:
 1. **SSE streaming** — assistant text arrives incrementally instead of as
   a complete block at end of `curl_easy_perform`. Reuses the Phase 0
   WRITEFUNCTION hook in `ffi/curl.lua`.
 2. **PTY-backed exec** via `forkpty` (libc FFI). Replaces Phase 0's
   `io.popen` so interactive commands (`vim`, `less`, `htop`) work and so
   the §7 sentinel-echo exit-code workaround can be retired in favor of
   `waitpid`.
 3. **Session persistence** — each session writes an append-only JSONL log
   under `<config.history.dir>/sessions/<utc>.jsonl`. Optional `:resume`
   loads a prior session's turns into context.
 4. **Readline custom bindings** — wire the rebinding API on `ffi/readline.lua`
   so subsequent phases can attach actions to key sequences. Phase 1 itself
   binds nothing user-visible; Norris (Phase 3) is the first consumer.
 **Phase 1 is done when:**
 - Assistant responses arrive token-by-token (visible streaming)
 - `vim` / `less` / `htop` work end-to-end via `$cmd` or `:exec cmd`
 - A session is written to `sessions/*.jsonl` and resumable across `luajit main.lua` invocations
 - The Phase 0 `executor.lua` sentinel hack is gone; PHASE0.md §7's sketch becomes accurate (waitpid surfaces the exit code)
 - `rl_bind_keyseq` is callable from Lua and known not to crash with a no-op handler bound to a reserved sequence
 ---
 ## 2. Technology Decisions (delta from Phase 0)
 | Decision | Choice | Rationale |
 |---|---|---|
 | Streaming transport | SSE over the existing libcurl easy interface | OpenAI-compat servers (llama.cpp, hossenfelder) emit `text/event-stream` when the request body has `stream: true`. The Phase 0 WRITEFUNCTION callback already receives incremental chunks; the only change is the parsing strategy. |
 | Streaming concurrency | Single blocking `curl_easy_perform`; the WRITEFUNCTION calls a Lua `on_delta` callback synchronously | LuaJIT FFI callbacks run on the libcurl thread but Phase 0's WRITEFUNCTION already ran fine that way. No coroutines / no threads in Phase 1. |
 | PTY library | `forkpty(3)` from libutil (linked separately on glibc) | Standard, single-call setup of master/slave pair + fork + dup2. Avoids hand-rolling the openpty/grantpt/unlockpt/ptsname dance. |
 | Exec uniformity | All shell exec goes through PTY (no `io.popen` fallback) | One code path. Non-interactive cmds (`ls`) work fine on a PTY too. Avoids the per-cmd "is this interactive?" classifier. |
 | Exit code recovery | `waitpid(WEXITSTATUS)` from the PTY parent | The §7 sentinel-echo hack is retired. Same commit that lands PTY exec also amends PHASE0.md §7 to drop the LuaJIT-2.1 popen caveat. |
 | Session log format | Append-only JSONL (one turn per line) | Streaming-friendly; grep-able; robust to truncation; no parser dependency beyond the vendored dkjson. |
 | Session location | `<config.history.dir>/sessions/<UTC-iso8601>.jsonl` | Default `~/.local/share/aish/sessions/` per Phase 0 config. Per-session file → concurrent aish processes don't collide. |
 | Session save trigger | Auto-write on `:quit` AND explicit `:save` for mid-session checkpoint | Closes Q3 from PHASE0.md §13 with both. The auto path means kept-by-default; explicit path exists for users who want a checkpoint name. |
 | Readline bindings API | Bind via `rl_bind_keyseq` (GNU readline) — `M.bind(seq, lua_fn)` wrapper | Phase 1 ships the wiring; bound sequences with no consuming phase yet are reserved with a logged-status no-op. Phase 3+ replace handlers. |
 ---
 ## 3. Module Changes
 No new module file names beyond the §4 stubs already present (`ffi/pty.lua`,
 `history.lua`). All changes are growth of existing files.
 | File | Phase 0 | Phase 1 |
 |---|---|---|
 | `ffi/curl.lua` | Blocking POST; response captured into a Lua string | Add `M.post_sse(url, body, headers, on_event)`. `on_event(delta)` is called per parsed SSE `data:` line. The Phase 0 `M.post` stays for non-streaming consumers. |
 | `ffi/pty.lua` | Stub | Implement: `M.spawn(argv) -> handle`; handle exposes `:read()`, `:write(data)`, `:close()`, `:wait() -> exit_code`. Uses `forkpty` + `waitpid`. |
 | `ffi/libc.lua` | `chdir`, `errno`, `strerror` | Add `waitpid`, `WEXITSTATUS` (macro materialized in Lua), `read`, `write`, `close`, `kill`, `tcgetattr`/`tcsetattr` + `cfmakeraw` for raw-mode toggle on the controlling tty (required for single-key UIs to work — done-criteria #2), `poll` for stdin↔master multiplex in executor. |
 | `ffi/readline.lua` | `readline`, `add_history` | Add `rl_bind_keyseq` binding; expose `M.bind(seq, fn)`. |
 | `broker.lua` | `M.chat(cfg, msgs)` blocking | Add `M.chat_stream(cfg, msgs, on_delta)`. `M.chat` becomes a thin wrapper that buffers deltas. |
 | `executor.lua` | `popen` + sentinel exit-code recovery + `cd` interception + `CMD:` extract | Replace popen path with `pty.spawn`. The sentinel hack is deleted. `cd` interception unchanged (still routes through `libc.chdir`). `CMD:` extract unchanged. |
 | `repl.lua` | Blocking ask_ai → renderer.assistant | `chat_stream` with renderer.assistant_delta per chunk; closing flush highlights any completed `CMD:` lines. New meta: `:save`, `:resume <name>`, `:sessions`. |
 | `renderer.lua` | `assistant(text)` whole block | Add `assistant_delta(chunk)` and `assistant_flush()`. Streaming path emits raw chunks; flush re-highlights completed `CMD:` lines if needed. |
 | `history.lua` | Stub | Implement: `M.open(path) -> session`; `session:append(turn)`; `M.load(path) -> turns`; `M.list_sessions(dir) -> [{name, mtime, turns}]`. |
 | `config.lua` | history.dir set | Optional new fields: `session.autosave` (default true), `session.resume_on_start` (default false). |
 ---
 ## 4. SSE Streaming
 ### Request shape (delta from PHASE0 §6)
 ```
 POST /v1/chat/completions
 Content-Type: application/json
 {
  "model": "...",
  "messages": [...],
  "stream": true,
  "temperature": 0.2
 }
 ```
 ### Event format (per OpenAI / llama.cpp)
 ```
 data: {"choices":[{"delta":{"content":"Hel"}}]}
 data: {"choices":[{"delta":{"content":"lo"}}]}
 data: [DONE]
 ```
 Events are `\n\n`-terminated. `data: ` prefix carries either JSON or the
 literal `[DONE]` sentinel. SSE comments (lines starting with `:`) are
 ignored.
 ### Parser (in `ffi/curl.lua` post_sse)
 1. WRITEFUNCTION accumulates into a buffer.
 2. After each callback delivery, scan for `\n\n` event terminators.
 3. For each complete event:
   - Skip `:` comment lines.
   - Strip the `data: ` prefix.
   - If body is `[DONE]`, signal end.
   - Else `dkjson.decode(body)`, extract `choices[1].delta.content`, call `on_event(content)`.
 4. Carry incomplete tail of buffer into next callback.
 UTF-8 codepoint splits at chunk boundaries are tolerated because we hold
 delivery in the buffer until a full event is assembled before decoding.
 ### Renderer streaming
 `renderer.assistant_delta(chunk)` writes raw characters to stdout (no
 ANSI markup yet — the `CMD:` highlight depends on seeing a complete
 line). `renderer.assistant_flush()` is called after the SSE stream ends:
 it scans the accumulated stdout buffer (kept in renderer-local state) for
 completed `CMD:` lines and emits ANSI sequences after-the-fact via cursor
 manipulation. Open question Q12 below.
 ---
 ## 5. PTY Execution Model
 ```
 parent (aish)              child (cmd)
 ─────────────              ───────────
 forkpty()                       │
   │                            │
   ├─ master fd ───────┐        │
   │                   └────────┴── slave PTY (becomes child stdin/stdout/stderr)
   │
   ├─ select / read master fd → renderer.exec_output_delta(chunk)
   ├─ write master fd ← user keystrokes (when interactive)
   │
   └─ waitpid() → exit_code = WEXITSTATUS(status)
 ```
 For Phase 1's interactive cmds (vim/less/htop), aish flips its own
 controlling tty to raw mode (`tcgetattr` + `tcsetattr` ICANON/ECHO off)
 while the child is running, and restores on exit. Ctrl-C sends `SIGINT`
 to the child via `kill(pid, SIGINT)` rather than the aish parent.
 Non-interactive cmds (`ls`, `git status`) run on the same path; the
 output is read from the master fd and rendered exactly as Phase 0's
 exec_output frame did. The fact that the tty is a PTY rather than a pipe
 does not change the visible UX for these.
 Exit code: `waitpid(pid, &status, 0); WEXITSTATUS(status)`. The §7
 sentinel-echo hack is gone. PHASE0.md §7's amendment ("LuaJIT 2.1
 popen-close caveat") becomes obsolete — same commit that lands the PTY
 work amends §7 again to drop the caveat.
 ---
 ## 6. Session Persistence
 ### Format
 Each session is one JSONL file. One turn per line:
 ```jsonl
 {"ts":"2026-05-10T19:00:01Z","role":"user","content":"list files"}
 {"ts":"2026-05-10T19:00:04Z","role":"assistant","content":"CMD: ls"}
 {"ts":"2026-05-10T19:00:05Z","role":"user","content":"[exec output]\n..."}
 ```
 The first line is special: `{"meta":{"started":"...","model":"fast","aish_version":"phase1"}}`.
 ### Lifecycle
 - On startup, `history.lua` opens `<config.history.dir>/sessions/<utc-iso8601>.jsonl` for append.
 - Every `ctx:append_user(...)` and assistant turn triggers a `session:append(turn)`.
 - `:quit` closes the file and flushes (auto-save default).
 - `:save [<name>]` renames the current session file to `<name>.jsonl` (or copies if user wants both auto + named).
 - `:resume <name>` reads a JSONL file, recreates a Context, swaps it in. Q15 below covers the warn/refuse semantics on a non-empty current context.
 - `:sessions` lists files in the dir with mtime + turn count.
 ### Recovery semantics
 Append-only JSONL means a partial last line (process killed mid-write)
 is recoverable: `history.load` skips lines that fail to JSON-parse and
 emits a warning. No fsync after every line in Phase 1 (overhead); a
 crash may lose the most recent turn. Q? deferred.
 ---
 ## 7. Readline Custom Bindings
 Wire `rl_bind_keyseq` from libreadline:
 ```c
 int rl_bind_keyseq(const char *keyseq, rl_command_func_t function);
 ```
 Lua wrapper:
 ```lua
 function M.bind(seq, fn)
    -- ffi.cast a closure that calls fn() and returns 0
    rl.rl_bind_keyseq(seq, fn_cast)
 end
 ```
 Phase 1 binds nothing user-visible. The reserved-key list is documented
 here so subsequent phases don't collide:
 | Sequence | Reserved for | Phase |
 |---|---|---|
 | `\C-n` | Norris autonomous mode toggle | 3 |
 | `\C-x\C-c` | Cancel running CMD: confirm prompt | 3 (deferred from Phase 1 — no consumer here) |
 Phase 1 binds `\C-n` to a no-op handler that emits a `[aish] Norris mode
 not yet implemented (Phase 3)` status, just to verify the wiring works.
 ---
 ## 8. Migration from Phase 0
 User-visible changes:
 - Assistant responses stream instead of arriving in a block.
 - All exec routes through PTY; `vim`/`less`/`htop` work.
 - A session log is written by default; `:reset` no longer loses the conversation forever (it's in the JSONL).
 Substrate (PHASE0.md §3) invariants are unchanged. The §6 broker
 contract grows (request body adds `stream: true`; response handling adds
 SSE) but the Phase 0 blocking shape stays callable. The §7 amendment
 about LuaJIT 2.1 popen-close gets retired in the same commit that lands
 PTY exec.
 ---
 ## 9. Out of Scope (Phase 1)
 Per PHASE0.md §11, these belong elsewhere:
 - Tool-calling / MCP (Phase 2)
 - Norris autonomous mode (Phase 3)
 - `memory.jsonl` summarization (Phase 4)
 - Multi-model routing / cloud fallback (Phase 5)
 - Tree-sitter syntax highlighting (Phase 6)
 Specifically out of Phase 1 scope despite proximity:
 - Any binding consumer beyond the no-op `\C-n` reserved key.
 - Streaming partial-tool-call deltas (Phase 2).
 - Session search / pruning beyond `:sessions` listing (Phase 4).
 ---
 ## 10. Open Questions
 | # | Question | Impact | Resolve by |
 |---|---|---|---|
 | Q11 | Hossenfelder-via-OpenRouter SSE: do all routed cloud models emit identical event shape, or do some flatten / re-frame? | broker.lua streaming parser robustness | Phase 7 (verify) |
 | Q12 | `CMD:` highlight on streaming output: highlight as the line completes (delayed render), or live-highlight starting at the `CMD: ` prefix detection? Cursor-positioning re-render trade-off. | renderer.lua | Phase 4 (plan) |
 | Q13 | TTY raw-mode restore on uncaught Lua error during PTY exec: SIGWINCH handler + on-exit hook, or accept that a crashed aish leaves a wrecked terminal? | executor + signal handling | Phase 4 (plan) |
 | Q14 | `\C-n` reserved binding: bind a no-op now (verifies wiring) or defer the entire binding API to Phase 3 (where Norris is the first real consumer)? | ffi/readline + repl scope | Phase 4 (plan) |
 | Q15 | `:resume <name>` into a non-empty current context: refuse with a warning, prompt-overwrite, or merge? | repl + history | Phase 4 (plan) |
 | Q16 | Session log fsync: per-line (safe, slow) or close-only (fast, lossy on crash)? Default Phase 1 = close-only; revisit if crash recovery becomes a real concern. | history.lua | Phase 1 default; tracked for Phase 4 if it bites |
 ---
 *End of Phase 1 Manifest — aish*
@@ -0,0 +1,391 @@
 # aish — Phase 10 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 10 Requirements, Architecture & Design Decisions
 **Status:** Formulate (pre-analyze)
 **Date:** 2026-05-17
 PHASE0 is the locked substrate; PHASE1-9 are layered on top. This
 manifest specifies what Phase 10 adds — **Cloud preplanner → local
 executor split** for Norris autonomous mode. Resolves Gitea issue #89.
 Today Norris runs entirely on ONE model: pick cloud (capable but slow
 per step + costs per step) OR local (fast + free per step but easily
 distracted on multi-step planning). Phase 10 splits the planning and
 execution roles: cloud emits a TASK list ONCE per Norris session;
 local model executes each task. Most tasks are simple shell ops the
 local model handles fine; cloud is used only at the planning layer
 that benefits from its reasoning.
 PHASE0 §11 amendment to add Phase 10 row lands in the same commit
 as this formulate doc.
 ---
 ## 1. Scope of Phase 10
 Four pillars:
 1. **Preplan call** — on `:norris <goal>` launch, if `cfg.norris.preplanner`
   names a configured model preset, fire ONE broker.chat call against
   that preset with a system-prompt asking for `TASK: <imperative>` lines.
   Parse them into a list; cap at `cfg.norris.tasks_max` (default 16).
   Stash the list + current index on ctx (separate from ctx.turns so
   eviction can't lose them — mirrors the ctx.norris_goal anchor).
 2. **Executor loop** — `safety.norris_step` already iterates per-step;
   extend its prompt to include the CURRENT task. Synthesize a user-
   turn-shaped `[task k/N] <task text>` block fed alongside the
   existing NORRIS suffix. When all tasks consumed (or executor signals
   GOAL: complete early), Norris exits.
 3. **Cost + secrets composition** — preplan call goes through the
   normal scrub_messages + on_delta usage callbacks. Category
   `"norris-preplan"`; executor steps keep `"norris"`. `:cost detail`
   surfaces both as separate rows.
 4. **Graceful fall-back** — if `cfg.norris.preplanner` is unset OR
   the preplan call fails (transport err, parse failure, empty list),
   Norris runs as today: single model handles both planning and
   execution via the existing in-loop reasoning. No regression for
   users without Phase 10 config.
 **Phase 10 is done when:**
 - `:norris find files larger than 10MB in /var/log and report sizes`
  launched with `cfg.norris.preplanner = "cloud"` + `cfg.norris.executor
  = "fast"`:
  1. Cloud emits a TASK list (e.g., `TASK: find /var/log -size +10M`;
     `TASK: stat -c "%n %s" <results>`; `TASK: format and report`).
  2. Terminal output: `[aish] preplanned 3 tasks via cloud`  (R8: was "Status:")
  3. Per-step execution by `fast`: each step shows the task it's
     working on; existing HALT protocol still gates destructive ops.
 - Without `cfg.norris.preplanner`, Norris behaves exactly as Phase 6
  (no regression for existing users).
 - Preplan failure (broken cloud endpoint) → status log + fall back
  to single-model Norris.
 - `:cost detail` after a Norris session shows BOTH
  `cloud / norris-preplan` (one row) and `<executor model> / norris`
  (one row).
 ---
 ## 2. Technology Decisions (delta from Phase 9)
 | Decision | Choice | Rationale |
 |---|---|---|
 | Preplan trigger | ONCE at `:norris <goal>` launch (run_norris in repl.lua) | One round-trip per Norris session keeps cost predictable. Re-planning mid-flight deferred to a future iteration. |
 | Preplan model selection | `cfg.norris.preplanner` (string; matches a key in cfg.models) | Same shape as `cfg.safety.llm_model`. Optional; absent = no split, existing behavior. |
 | Executor model selection | `cfg.norris.executor` (string; matches cfg.models key) | Optional; absent = active_cfg (the user's `:model` choice at launch — existing behavior). |
 | Preplan system prompt | Static template baked into safety.lua: "Decompose the goal into single-step imperative TASKs. Output format: TASK: <imperative sentence, max 80 chars>. Maximum N tasks." with N = cfg.norris.tasks_max | Predictable parse; small surface. Override via cfg.norris.preplan_system if user wants. |
 | TASK line parsing | `^TASK:%s*(.+)$` per line; trim whitespace; filter empty | Same shape as the existing CMD: / DELEGATE: / CMD&: extractors in executor.lua. Trivially adapt extract_*_lines. |
 | Task storage | `ctx.norris_tasks = { current = 1, list = {...} }` (NEW field, separate from ctx.turns) | Survives eviction (mirrors ctx.norris_goal anchor); cleared at Norris exit. |
 | Step-prompt synthesis | `safety.norris_step` reads `ctx.norris_tasks.list[current]` and prepends `[task k/N] <text>` to the rendered messages (system block? or synth user turn?). Decision: prepend to the NORRIS suffix already in the system prompt. | Keeps user-turn alternation legal; NORRIS suffix already exists and is per-turn re-composed. |
 | Per-task advance | After `safety.norris_step` returns "continue", repl.lua's run_norris bumps `ctx.norris_tasks.current`. When current > #list, Norris exits with status "tasks_complete". | Same as the existing step counter; just tied to the task list now. |
 | Goal anchor + task layered together | Both visible in the NORRIS suffix: `goal:` line (existing) + `current task k/N:` line (new) | Planner-executor still sees the global goal AND the current focus. |
 | Preplan parse failure | Status log + fall back to single-model Norris (no tasks) | Robust; user can re-launch :norris if preplan was wonky. |
 | Preplan empty result | Same as parse failure — fall back | Robust. |
 | tasks_max cap | Default 16; cfg.norris.tasks_max overrides | Bounded blast radius; matches the existing max_norris_steps cap intent. |
 | Cost category | "norris-preplan" for the preplan call; "norris" for executor steps (existing) | `:cost detail` surfaces them as separate rows. |
 | Secrets/scrub | Preplan call goes through scrub_messages + rehydrate (matches all other broker calls in repl.lua) | No special-case. |
 | Norris HALT protocol | Unchanged — per executor step | Existing safety.is_destructive + halt-proceed/skip/abort still gates. |
 | Skip semantics | If user halts and skips at task k, advance to task k+1 (NOT re-try) | Predictable; user can :norris off + relaunch with refined goal if they need full re-plan. |
 ---
 ## 3. Module Changes
 | File | State after Phase 9 | Phase 10 changes |
 |---|---|---|
 | `repl.lua` | `run_norris(goal)` builds helpers, runs while loop calling safety.norris_step | Pre-loop: if `cfg.norris.preplanner` set, fire one broker.chat against that preset; parse TASK lines; set `ctx.norris_tasks`. Per-iteration: bump `ctx.norris_tasks.current` after each non-terminal result; exit "tasks_complete" when exhausted. |
 | `safety.lua` | norris_step composes the NORRIS suffix; uses model_cfg for broker call | Read `ctx.norris_tasks` if set; embed `[task k/N] <text>` into the suffix template OR pass via opts. Use `cfg.norris.executor` (resolved by repl.lua at run_norris launch) for the per-step broker call. |
 | `context.lua` | system prompt composition + ctx.norris_active/norris_goal/norris_consecutive_skips | Add `ctx.norris_tasks` field (table or nil); clear on :reset (matches norris_goal lifecycle). NORRIS_SUFFIX_TEMPLATE extended to optionally show current task. |
 | `executor.lua` | extract_cmd_lines, extract_cmd_bg_lines, extract_delegate_lines | Add `extract_task_lines(text)` — pure function. |
 | `config.lua` | Phase 9 .aish.lua header + existing example blocks | Add commented-out `norris = { preplanner = "cloud", executor = "fast", tasks_max = 16 }` block. |
 | `docs/PHASE0.md` | §11 lists phases 0-9 | Amendment: add Phase 10 row. |
 No new module files.
 ---
 ## 4. Pillar 1 — Preplan call
 ```lua
 -- repl.lua run_norris, pre-loop block:
 local tasks
 if config.norris and config.norris.preplanner then
    local pre_name = config.norris.preplanner
    local pre_cfg  = config.models and config.models[pre_name]
    if pre_cfg then
        local sys = (config.norris and config.norris.preplan_system) or [[
 You are a task decomposer. Given the user's goal, decompose it into a
 sequence of single-step imperative TASKs. Output format: one TASK per
 line, EXACTLY this shape:
  TASK: <imperative sentence, max 80 chars>
 Output AT MOST %d tasks. No prose; no numbering; no commentary outside
 the TASK: lines.
 ]]
        -- R1 fix: %d via string.format; gsub("N", ...) would corrupt
        -- "No prose / No commentary / No numbering" → "16o prose" etc.
        sys = string.format(sys, config.norris.tasks_max or 16)
        local msgs = scrub_messages({
            { role = "system", content = sys },
            { role = "user",   content = goal },
        }, secrets_mode_for(pre_cfg))
        local text, usage = broker.chat(pre_cfg, msgs,
            { category   = "norris-preplan",
              max_tokens = 800,
              -- R7 fix: respect the model's configured timeout
              timeout_ms = pre_cfg.timeout_ms or 60000 })
        if text then
            if secrets_session then text = secrets_session:rehydrate(text) end
            if usage then _record_usage(usage.model, usage.category, usage) end
            local parsed = executor.extract_task_lines(text)
            local cap = config.norris.tasks_max or 16
            if #parsed > cap then
                -- trim and warn
                for i = #parsed, cap + 1, -1 do parsed[i] = nil end
                renderer.status(("preplan emitted >%d tasks; truncated"):format(cap))
            end
            if #parsed > 0 then
                tasks = parsed
                renderer.status(("preplanned %d tasks via %s"):format(#tasks, pre_name))
            else
                renderer.status("preplan produced no TASK lines; running single-model")
            end
        else
            renderer.status("preplan failed: " .. tostring(usage)
                            .. "; running single-model")
        end
    end
 end
 if tasks then
    ctx.norris_tasks = { current = 1, list = tasks }
 end
 ```
 ---
 ## 5. Pillar 2 — Executor loop
 `safety.norris_step` extension: if `ctx.norris_tasks` is set, embed
 the current task into the system suffix. The existing while loop in
 `run_norris` already calls `norris_step` once per iteration; after
 each `result.status == "continue"`, bump
 `ctx.norris_tasks.current = ctx.norris_tasks.current + 1`. When
 `current > #ctx.norris_tasks.list`, the loop exits with a
 synthesized `"tasks_complete"` final status.
 System suffix extension (R2 fix — keep NORRIS_SUFFIX_TEMPLATE
 **unchanged**; append a task-hint block AFTER the existing format):
 ```lua
 -- New helper at module scope in context.lua, alongside NORRIS_SUFFIX_TEMPLATE:
 local function compose_norris_task_hint(self)
    if not (self.norris_tasks and self.norris_tasks.list) then return "" end
    local k = self.norris_tasks.current
    local n = #self.norris_tasks.list
    local task = self.norris_tasks.list[k]
    if not task then return "" end  -- exhausted → no hint
    return string.format(
        "\n\nCurrent step %d/%d:\n    %s", k, n, task)
 end
 -- In Context:to_messages, AFTER the existing string.format(NORRIS_SUFFIX...)
 -- block, append the hint:
 if self.norris_active and self.norris_goal then
    sys_content = sys_content
        .. string.format(NORRIS_SUFFIX_TEMPLATE, self.norris_goal)
        .. compose_norris_task_hint(self)
 end
 ```
 Also (R6 fix) defensive clear in `Context:reset()`:
 ```lua
 function Context:reset()
    self.turns = {}
    self.pending_exec_output = nil
    self.summary = nil
    self.norris_tasks = nil  -- R6: defensive; :reset is unreachable
                             --     mid-Norris but cheap to be safe.
 end
 ```
 ---
 ## 6. Pillar 3 — Cost + secrets composition
 Preplan call goes through the same `broker.chat` API as Phase 7 cost-
 accumulator wiring. `category = "norris-preplan"` tags it for
 `:cost detail` separation:
 ```
 [aish] session usage detail (total=$0.000119, 312/45 tokens):
  anthropic/claude-haiku-4.5  norris-preplan  1 calls,  180 / 35 tokens, $0.000099
  qwen-coder-7b-snappy-8k     norris          5 calls,  132 / 10 tokens, $0.000000  (local)
 [aish] estimated session ctx: 412 tokens; token_budget=4096 (10.1% used)
 ```
 Secrets scrub fires before broker.chat sees the messages; rehydrate
 on reply — same path as other call sites.
 ---
 ## 7. Pillar 4 — Graceful fall-back
 If `cfg.norris.preplanner` is unset → `tasks = nil` → Norris behaves
 as Phase 6 (single-model loop; existing semantics).
 If preplan call fails (transport err, parse failure, empty list) →
 status log + `tasks = nil` → same fall-back.
 If executor model lookup fails (`cfg.norris.executor` names a
 non-existent preset) → status log + use active_cfg (existing
 behavior). User can fix config and re-launch.
 If `:reset` is invoked → unreachable mid-Norris (no readline prompt
 while the planner is running). Out-of-Norris, `Context:reset()` now
 also clears `self.norris_tasks` as defensive coding (R6 fix).
 R4: `run_norris` clears `ctx.norris_active`/`ctx.norris_goal`/
 `ctx.norris_tasks` at the **top** of the function, BEFORE the preplan
 block. This guarantees a fresh launch starts clean even if a prior
 Norris session crashed with stale state. Cheaper than wrapping the
 whole driver in pcall.
 ---
 ## 8. UX Surface Summary
 | Config | Default | Effect |
 |---|---|---|
 | `cfg.norris.preplanner` | nil | Name of model preset for the preplan call; absent = no split |
 | `cfg.norris.executor` | nil (uses active model) | Name of model preset for per-step execution |
 | `cfg.norris.tasks_max` | 16 | Cap on TASK list size (parse-time trim) |
 | `cfg.norris.preplan_system` | (built-in template) | Override preplan system prompt |
 | Startup status | Behavior |
 |---|---|
 | (preplan unset) | nothing — existing single-model Norris |
 | (preplan success) | `[aish] preplanned N tasks via <preplanner>` |
 | (preplan failed) | `[aish] preplan failed: <reason>; running single-model` |
 | (preplan over cap) | `[aish] preplan emitted >N tasks; truncated` |
 No new meta commands in v1. Inspect via `:cost detail` (separate
 norris-preplan row) and the existing `:history` (preplan call + reply
 become assistant turns visible there).
 ---
 ## 9. Out of Scope (Phase 10)
 - **Mid-flight re-plan** — preplan fires ONCE per Norris launch.
  Re-plan based on per-step results would be a separate iteration;
  user can `:norris off` + re-launch with refined goal for v1.
 - **Adaptive task decomposition** — TASKs are fixed at launch; the
  executor doesn't get to refine them. v1 trusts the preplanner's
  parse.
 - **Multi-step task = sub-tasks** — flat list only. Nested TASK
  hierarchies are a future shape.
 - **Skip-then-retry** — skip at HALT advances to the next task; no
  retry mechanism. User re-launches if they need a retry.
 - **Per-task model selection** — single executor model for the whole
  session. Per-task routing (e.g. some tasks → cloud, some → local)
  is interesting but bigger surface; defer.
 - **Preplan-while-executing** — sequential: preplan first, THEN
  execute. Streaming overlap is a future optimization.
 ---
 ## 10. Risks
 | Risk | Mitigation |
 |---|---|
 | Preplan model emits malformed output (no `TASK:` lines, or wraps in markdown) | extract_task_lines tolerates leading whitespace + ignores non-TASK lines. If zero TASKs parsed, fall back to single-model. |
 | Preplanner cost surprises user (silent paid call on every :norris launch) | Phase 7 cost meter accounts it under `norris-preplan` category; warn_at_dollars still fires. Default = unset (no automatic cost). |
 | Task list is wrong / off-goal | Executor still has the global GOAL in the NORRIS suffix; can deviate per-step. Skip-budget per Phase 3 still escalates. User retains `:norris off` abort. |
 | Local executor can't actually do a planned step (model too weak) | Same as today's Norris-on-local case — model emits something useless; HALT prompt lets user skip or abort. Phase 10 doesn't fix this; preplan + execute split makes the failure mode more visible (you can SEE which TASK is stuck). |
 | ctx.norris_tasks survives across non-:reset session boundaries | Cleared at Norris exit (in run_norris's finally-equivalent) so re-launching Norris in same session starts fresh. |
 | Eviction during long Norris session removes preplan + first executor turns | Tasks stored on ctx (NOT in turns); survive eviction. Per Phase 3 R-C3 the goal anchor in the NORRIS suffix also survives. |
 | Preplan system prompt drift (user overrides badly) | Built-in fallback if cfg.norris.preplan_system absent; user override is opt-in. |
 | Anthropic cloud preplan emits "Here's my plan:\n1. ...\n2. ..." (markdown numbering) instead of TASK: lines | extract_task_lines uses strict `^TASK:` matcher; markdown lists are ignored. preplan_system explicitly demands the format. If real cloud models drift, document or refine prompt at impl time. |
 | R3: preplan call bypasses `call_broker` (Phase 5 fallback-retry wrapper) | **By design** — retrying the preplan against `fallback_model` would produce a different decomposition from a different model. That's not a recovery; it's a silent semantics change. Hard-fail to single-model Norris is the safer fallback. Documented here so a future maintainer doesn't "fix" it by wiring `call_broker` and surprise users. |
 ---
 ## 11. Open Questions — RESOLVED (analyze step)
 | # | Question | Resolution |
 |---|---|---|
 | Q-PP1 | `cfg.norris.executor` applies even without preplanner? | **YES.** Resolving the executor is independent of preplan. If `cfg.norris.executor` names a valid preset, `run_norris` uses it for `safety.norris_step` regardless of preplanner state. Preplanner unset + executor set = "always use cloud-haiku for Norris steps even though my interactive `:model` is qwen-coder". Useful split. |
 | Q-PP2 | Stream the preplan TASKs as they're emitted? | **NO (v1 = non-streaming).** Use `broker.chat` (non-streaming) for preplan. Preplan emits ~16 × ~10 tokens = ~160 tokens total; on cloud Haiku that's <2s. Print the full TASK list at completion (`[aish] preplanned N tasks via cloud`) rather than streaming letter-by-letter. Streaming adds latency variance + screen flicker for sub-2s win. Reconsider if real-world preplan latency exceeds 5s. |
 | Q-PP3 | Re-launch fires preplan again? | **YES, naturally.** Each `:norris <goal>` re-enters `run_norris`. The pre-loop preplan block runs (different goal → different decomposition). `ctx.norris_tasks` is overwritten. No special re-launch logic needed; falls out of lifecycle. |
 | Q-PP4 | Executor sees full goal AND current task? | **BOTH.** Goal anchor in NORRIS suffix (existing) + a NEW optional task-hint block appended right after. The executor planner can use the goal to detect off-track tasks and adjust its CMD: emission. |
 | Q-PP5 | `:norris` (no args) reports tasks state? | **No — out-of-scope v1.** Inside Norris there's no readline prompt; meta commands aren't reachable. After exit, `ctx.norris_tasks` is cleared. The renderer's per-step `[step k/N: <task>]` line is the user-facing readout. Re-consider if users ask for a "task plan preview before execution" mode. |
 | Q-PP6 | 1-task degenerate case? | **Run as normal, no special case.** Functionally identical to single-model Norris (executor sees goal + single TASK hint). Preplanner cost is the only delta. Acceptable. |
 **Additional findings from code reading:**
 - `safety.norris_step(ctx, model_cfg, ...)` takes `model_cfg` as a parameter. **Implication:** `run_norris` resolves the executor cfg ONCE pre-loop and passes it in every iteration. No signature change to safety.lua. The "executor" is just a different `model_cfg` than `active_cfg`.
 - `Context:reset()` does NOT touch `norris_goal`/`norris_active` (Norris state is owned by `run_norris`, set on entry + cleared on exit). `ctx.norris_tasks` follows the same lifecycle: created at preplan, cleared at `run_norris` exit, NOT by `:reset` (which is unreachable mid-Norris anyway).
 - `NORRIS_SUFFIX_TEMPLATE` has one `%s` slot for goal. Don't change the template; **append** a `compose_norris_task_hint(self)` helper output AFTER the formatted suffix. Keeps the template stable; the hint block is additive.
 - Preplan call lives in `repl.lua` (not `safety.lua`) — keeps safety's invariant "single broker round-trip per call". Repl already orchestrates multi-call flows (Norris loop, secrets rehydration, routing); preplan is one more pre-loop hook.
 - The renderer needs a per-step prefix showing `[step k/N: <task>]`. `renderer.norris_step` currently takes `(n, max_n)`; extend to `(n, max_n, descr)` — descr was already in the signature per the helpers contract above (line 339 of safety.lua), but `run_norris` doesn't pass it today. Phase 10 wiring fills that gap.
 ---
 ## 11b. Plan — commit-by-commit roadmap (5 commits)
 | # | Commit subject | Files | Why this slice |
 |---|---|---|---|
 | 1 | `executor: extract_task_lines for Phase 10 preplan parsing` | executor.lua + inline test | Pure function; verifiable standalone. Locks the TASK: parse contract before the preplan call wires it. |
 | 2 | `context: norris_tasks anchor + task-hint composition` | context.lua + inline test | New field on Context. Adds `compose_norris_task_hint(self)`; appends after the NORRIS suffix. ctx.norris_tasks is nil by default → no regression. |
 | 3 | `safety: pass current task descr to render_step from norris_step` | safety.lua ONLY | One-line tweak in safety.lua to source `descr` from `ctx.norris_tasks` and pass to `helpers.render_step(step_n, max_steps, descr)`. **No repl.lua change in this commit** (R5 clarification). |
 | 4 | `repl: preplan + executor cfg resolution + tasks_max truncate (closes #89)` | repl.lua | The orchestration commit. Pre-loop preplan block; fall-back paths; executor cfg resolution (`active_cfg` vs `cfg.norris.executor`); `ctx.norris_tasks` lifecycle (clear-at-top per R4); pass executor_cfg to safety.norris_step instead of active_cfg. |
 | 5 | `phase10: config example + MEMORY index + project status` | config.lua, MEMORY.md, memory/project_phase_status.md | Documentation + persistent project state. Ships the user-visible config block. |
 Each commit must leave the tree in a state where `luajit main.lua` runs and existing tests pass; commits 1-3 ship behind a feature-unused-yet stance (nothing calls them), commit 4 lights them up, commit 5 documents.
 ### Per-commit verification
 - **C1**: 6 inline unit cases for `extract_task_lines`: empty input → {}, single TASK → {it}, mixed CMD+TASK → only TASKs, leading whitespace tolerated, blank lines ignored, > tasks_max → caller's job to cap (function itself just parses). test runs from repo root.
 - **C2**: 5 inline unit cases for `compose_norris_task_hint`: nil tasks → "", empty list → "", current=1 of 3 → contains "step 1/3", current > #list → "" (completed), full to_messages render with tasks shows hint in system content. self.turns + self.norris_tasks unmutated.
 - **C3**: safety_test snapshot still 87/87 (no behavior change for the no-tasks path). Manual run of single-model Norris to confirm no regression.
 - **C4**: E2E with cfg.norris.preplanner=cloud + executor=fast. Goal: `find files larger than 10MB in /var/log and report sizes`. Verify preplan emits 2-5 tasks; executor runs each. :cost detail shows two model rows. Fall-back E2E with preplanner pointing to bogus model → status log + normal Norris.
 - **C5**: visual inspection of config.lua. MEMORY.md + project_phase_status.md updated to "Phase 0-10 done".
 ### Resolved review tickets folded into the plan
 **Sonnet review 2026-05-17 — 2 blockers + 4 important + 2 nits. All accepted.**
 - **R1 (blocker)** `sys:gsub("N", ...)` would corrupt "No prose", "No commentary", "No numbering" → "16o prose". **Fix**: use `string.format` with `%d` in the template, replace the gsub call.
 - **R2 (blocker)** §5 pseudocode showed a 2-slot NORRIS_SUFFIX_TEMPLATE redesign, contradicting §11's "don't change the template; append helper output AFTER". **Fix**: §5 below now shows the helper-append approach matching §11.
 - **R3 (important)** Preplan call bypasses `call_broker` (Phase 5 fallback-retry wrapper). **Decision: intentional** — fallback for a preplan call would produce a different decomposition from a different model, which is actively undesirable. Documented in §10 Risks.
 - **R4 (important)** No pcall around `run_norris` → stale `ctx.norris_active`/`norris_goal`/`norris_tasks` on uncaught error. Pre-existing bug; Phase 10 adds one more leaky field. **Fix**: clear all three at the TOP of `run_norris` (before preplan) so a fresh launch always starts clean regardless of prior crash. Cheaper than full pcall wrap; sufficient for the stale-tasks vector.
 - **R5 (important)** C3 commit scope ambiguity. **Clarification**: C3's "tiny repl.lua wiring" is ONLY passing `descr` to `render_step`. Executor cfg resolution (active_cfg vs cfg.norris.executor) lands in C4 alongside the preplan block. Table updated.
 - **R6 (important)** `ctx.norris_tasks` lifecycle vs `Context:reset()`. **Fix**: add `self.norris_tasks = nil` to `Context:reset()` as defensive coding (one line, no regression). §7 amended to remove the contradictory "Document in §9" deferral.
 - **R7 (nit)** Hardcoded `timeout_ms = 60000` ignores `pre_cfg.timeout_ms`. **Fix**: `pre_cfg.timeout_ms or 60000` in §4 pseudocode.
 - **R8 (nit)** "Status:" label in §1 acceptance criterion could be misread as on-screen prefix. **Fix**: rename to "Terminal output:".
 - **R9-R11**: confirmations of clean composition with #87 (compression doesn't fire during Norris steps — correct), #86/#88 (both scoped to ask_ai; can't leak into preplan call site). No action.
 ---
 ## 12. Phase 10 → Phase 11+ Out-of-band
 Candidate follow-ups (non-binding):
 - **Phase 11**: cross-session cost rollup (Phase 7 §12 option 1 —
  long-deferred).
 - **Cost preflight enforcement** (Phase 7 §12 option 2 — also long-
  deferred; Phase 8's accurate counts are the prerequisite).
 - **Mid-flight Norris re-plan** — preplanner gets to re-decompose
  based on executor progress. Real value, but needs careful
  state-machine design (when to re-plan, how to preserve already-
  completed work).
 - **Per-task model selection** — task could carry a model hint
  emitted by the preplanner.
 Phase 10 itself is self-contained — depends on Phase 3 (Norris) +
 Phase 7 (cost accumulator) which are both implemented.
@@ -0,0 +1,154 @@
 # Phase 2 Baseline — pre-implementation measurements
 **Date:** 2026-05-12
 **Targets probed:** lmcp v0.5.4 on `boltzmann.fritz.box:8080/mcp`; OpenAI-compat broker on `hossenfelder.fritz.box:8082`.
 This is the Phase 7 (verify) anchor — captures what the world looked like just *before* Phase 2 implementation lands, so post-implementation behavior can be compared against it. Companion to PHASE2.md (manifest).
 ---
 ## 1. MCP RPC round-trip timings (cold path, single warm-up)
 | RPC | Latency |
 |---|---|
 | `initialize` | 19 ms |
 | `notifications/initialized` (HTTP 202, no body) | 11 ms |
 | `tools/list` | 17 ms |
 | `tools/call` `list_dir({path:"/tmp"})` (success, ~1 KB result) | 72 ms |
 | `tools/call` `read_file({path:"/nonexistent/..."})` (handler-caught failure) | 12 ms |
 | `tools/call` `nope_tool` (JSON-RPC -32601 unknown tool) | 12 ms |
 LAN-local; sub-100ms for everything but a file-listing payload. Phase 2's
 sequential tool-call dispatch won't be the bottleneck — the LLM is.
 ---
 ## 2. Fixtures (saved to `/tmp/aish-baseline/`)
 | File | Shape |
 |---|---|
 | `01_initialize.json` | `{result:{protocolVersion, serverInfo:{name,version}, capabilities:{tools:{listChanged:false}}}}` |
 | `02_notif_init.body` | empty (HTTP 202) |
 | `03_tools_list.json` | `{result:{tools:[{name, description, inputSchema}...]}}` — 7 tools on boltzmann |
 | `04_tools_call_ok.json` | `{result:{isError:false, content:[{type:"text", text:"<listing>"}]}}` |
 | `05_tools_call_iserror.json` | **see §3 finding** |
 | `06_tools_call_unknown.json` | `{error:{code:-32601, message:"Tool not found: nope_tool"}}` |
 ### Initialize response (compact)
 ```json
 {"id":1,"jsonrpc":"2.0","result":{
    "serverInfo":{"version":"0.1.0","name":"boltzmann-tools"},
    "protocolVersion":"2025-03-26",
    "capabilities":{"tools":{"listChanged":false}}}}
 ```
 ### Unknown-tool error (transport-level failure)
 ```json
 {"id":5,"jsonrpc":"2.0","error":{
    "message":"Tool not found: nope_tool","code":-32601}}
 ```
 ---
 ## 3. Baseline finding: `isError` is not a complete failure signal
 `read_file({path:"/nonexistent/baseline-probe"})` returned:
 ```json
 {"id":4,"jsonrpc":"2.0","result":{
    "isError":false,
    "content":[{"type":"text","text":"Error: could not read /nonexistent/baseline-probe"}]}}
 ```
 `isError: false` despite an obvious failure. The handler caught the error and put it in `content` text but didn't set the flag.
 **Implication for Phase 2 design:** aish cannot rely solely on `result.isError` to decide success/failure of a tool call. The model must read the text content. This actually simplifies Phase 2: just feed `content` straight back as the `role:"tool"` turn body regardless of `isError`. The flag is advisory; the model is the discriminator. (No PHASE2.md amendment needed — §4's "pass-through to the model" stance already accommodates this.)
 This is a per-tool boltzmann-lmcp implementation quirk, not a spec issue. Other lmcp deployments may set `isError: true` correctly; aish should still pass content through and not crash on either shape.
 ---
 ## 4. Streaming `tool_calls` delta shape (verified against hossenfelder)
 For `stream: true` requests with `tools` declared, observed deltas:
 ```
 data: {"choices":[{"delta":{"role":"assistant","content":null}}]}
 data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"...","type":"function",
                                            "function":{"name":"get_weather","arguments":""}}]}}]}
 data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{"}}]}}]}
 data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\""}}]}}]}
 data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city"}}]}}]}
 ...
 data: {"choices":[{"finish_reason":"tool_calls","delta":{}}]}
 data: [DONE]
 ```
 Accumulator rules confirmed:
 1. On the first delta containing `tool_calls[i]`: capture `id`, `type`, `function.name`. `arguments` may be empty `""`.
 2. On subsequent deltas matching same `index`: concatenate `function.arguments` into the running buffer.
 3. `finish_reason: "tool_calls"` closes the set; arguments buffer is parsed as JSON at that point.
 Matches PHASE2.md §5 design.
 ---
 ## 5. Baseline aish behavior (pre-MCP, what Phase 1 does today)
 Sent to hossenfelder with the standard system prompt and **no `tools` field**:
 ```
 user: List the files in /tmp
 ```
 Response (qwen2.5-coder-1.5b via hossenfelder, sans tools):
 ```
 ```cmd
 dir /tmp
 ```
 ```
 `finish_reason: stop`, `tool_calls: null`, 9 completion tokens.
 The loaded model emits Windows shell syntax in a markdown code-fence, ignoring the system prompt's `CMD:` extraction contract. **No tool_calls path is exercised today** because no tools are declared. This is the empirical "before" of Phase 2 — once MCP servers are wired and a real tool exists (`list_dir({path:"/tmp"})`), the model has a structured path that doesn't depend on getting `CMD:` formatting right.
 ---
 ## 6. Known blockers carried into Phase 7 (verify)
 Both live in the **boltzmann proxy** (`hossenfelder.fritz.box:8082`), not in aish:
 | # | Bug | Affects | Tracking |
 |---|---|---|---|
 | 1 | SSE buffering — proxy sets `Content-Length` on `text/event-stream` and flushes the whole response at once | streaming visibility (Phase 1) AND streaming tool_calls deltas (Phase 2) | [aish#15](https://git.reauktion.de/marfrit/aish/issues/15) + [[reference-hossenfelder-sse-buffering]] |
 | 2 | `model` field routing — every request returns chunks tagged `qwen2.5-coder-1.5b-q4_k_m.gguf` regardless of requested `model`, suggesting the proxy ignores the field | Phase 2 testing against mistral-nemo specifically (the strict-chat-template canary for Q18); also any `:model deep` / `:model cloud` switch | side-finding in #15 triage; needs its own issue when Phase 7 hits it |
 Phase 2 implement/verify will proceed against whatever model is loaded.
 Full template-strictness verification of Q18 (`role:"tool"` acceptance on
 mistral-nemo) waits for bug #2 to be fixed in the boltzmann proxy code.
 ---
 ## 7. Module pre-state (Phase 1 head: `5878f73`)
 | Module | LOC (incl. comments) | State |
 |---|---|---|
 | `broker.lua` | 92 | chat + chat_stream, no `tools` field |
 | `context.lua` | (per Phase 1) | `pending_exec_output` buffer; no `role:"tool"`; no `tool_calls` on assistant turns |
 | `executor.lua` | (per Phase 1) | PTY-backed, `CMD:` extract, no tool dispatch |
 | `repl.lua` | 287 | meta cmds, ask_ai stream loop, no `:mcp …`, no tool-call sub-loop |
 | `renderer.lua` | 79 | exec frame, streaming text; no tool-call frame |
 | `safety.lua` | (per PHASE0 §4) | stub — only the file exists |
 | `mcp.lua` | — | does not exist yet |
 | `config.lua` | (per user's edits) | models registry; no `mcp = { servers = {...} }` section |
 After Phase 2 lands, `git diff main..post-phase-2 --stat` should show:
 new `mcp.lua` (substantial), modest growth in `broker.lua` / `context.lua` /
 `repl.lua` / `renderer.lua`, finally non-stub `safety.lua`.
 ---
 *End of Phase 2 Baseline — aish*
@@ -0,0 +1,629 @@
 # aish — Phase 2 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 2 Requirements, Architecture & Design Decisions
 **Status:** Verify (Phase 7) — implementation complete; live testing in progress
 **Date:** 2026-05-12
 **Amendments since formulate:**
 - 2026-05-12 (review fold-in): see §12 "Review fold-in" subsection.
 - 2026-05-12 (Phase 7 verify, separator switch): tool-name namespace
  delimiter changed from `.` to `__` because Anthropic via Bedrock
  validates tool names against `^[a-zA-Z0-9_-]{1,128}$` — dots are
  rejected with `HTTP 400 tools.0.custom.name: String should match
  pattern '...'`. Discovered when `:model cloud` exercised TC #26
  against the real cloud path. Internal API matches on-wire shape so
  there's no transformation layer. Constraint: aliases must not
  themselves contain `__` so the parse stays unambiguous (leftmost
  `__` is the split point). Tool names from MCP servers may contain
  underscores freely. All §3/§5/§6/§7/§12 references updated below.
 PHASE0.md is the locked substrate; PHASE1.md is layered on top. This
 manifest specifies what Phase 2 adds. Section numbers reference back to
 PHASE0.md / PHASE1.md where relevant.
 ---
 ## 1. Scope of Phase 2
 Three pillars per PHASE0.md §11 row 2:
 1. **MCP client** (`mcp.lua`) — JSON-RPC 2.0 over HTTP+SSE transport.
   Target reference implementation: `lmcp`. Operations needed for v1:
   `initialize`, `tools/list`, `tools/call`. Multiple servers may be
   connected concurrently; tools are namespaced `<server>__<tool>`.
 2. **Tool-calling protocol bridge** — the broker sends OpenAI-compatible
   `tools` in the request body; the model emits `tool_calls` in the
   response; `mcp.lua` dispatches each call to the right server; the
   tool result is fed back as a `role:"tool"` turn in `context.lua` and
   the chat continues.
 3. **Authorization gate** — `safety.lua` (PHASE0.md §4 stub) finally gets
   implemented. Every tool call is confirmed by the user by default,
   with per-tool and per-server `auto_approve` policies in `config.lua`.
 **Phase 2 is done when:**
 - aish can connect to at least one local `lmcp` server declared in
  `config.lua` and one connected via `:mcp connect <url>` at runtime.
 - `:mcp list` shows connected servers; `:mcp tools` shows discovered
  tools across all servers.
 - A model conversation can invoke a tool: the broker request carries
  the live tools schema; the response's `tool_calls` are confirmed by
  the user; each call dispatches to the right MCP server; the result
  re-enters the chat; the model continues with the result available.
 - `CMD:` extraction (PHASE0.md §6 substrate invariant) still works
  unchanged — Phase 2 is additive, not replacing.
 - A tool with `auto_approve = true` (in config) executes without the
  confirm prompt; a non-approved tool still prompts.
 ---
 ## 2. Technology Decisions (delta from Phase 1)
 | Decision | Choice | Rationale |
 |---|---|---|
 | MCP transport | HTTP POST per RPC, `Connection: close` per response, **no long-lived SSE GET channel** in v1 | Analyze finding (2026-05-12): lmcp v0.5.4 only implements the trivial POST-and-respond flavor of the spec's streamable-HTTP transport. Its GET /mcp endpoint announces the POST endpoint then closes — there's no server→client notification channel to listen on. Combined with lmcp's `capabilities.tools.listChanged = false`, aish doesn't need an SSE GET listener at all for lmcp. Stdio transport is left for a possible Phase 2.1 if a stdio-only MCP server becomes necessary. |
 | MCP protocol version | `2025-03-26` (confirmed by live probe of boltzmann:8080/mcp) | lmcp pins this in `MCP_VERSION` and **does not negotiate** — it returns its compiled-in version regardless of what the client sends (lmcp.lua:80-91). aish sends `2025-03-26` in `initialize` and accepts whatever the server returns; on mismatch it logs `[aish] mcp <alias>: protocol version mismatch (sent X, got Y); proceeding` and continues. v1 has no version-gated behavior to abort on. |
 | MCP auth | Bearer token via `Authorization: Bearer <token>` header, per-server | Analyze finding: every lmcp deployment in mfritsche's fleet (boltzmann/hertz/pve*/nc/etc.) requires Bearer auth. Phase 2 config supports `auth_token` literal and `auth_env` env-var indirection per server (mirrors `key_env` in the models registry). lmcp servers without auth (broglie/higgs LAN-only) just leave the field nil. |
 | Tool-call wire format | OpenAI `tools` field on `/v1/chat/completions` body; `tool_calls` on assistant deltas; `role:"tool"` turn with `tool_call_id` for results | Standard, supported by llama.cpp and OpenRouter. Aligns with the existing `/v1/chat/completions` substrate invariant. |
 | Tool namespacing | `<server-alias>__<tool-name>` for both the wire-level tool name and `:mcp tools` listing (was `.` at formulate; switched 2026-05-12 — see Amendments above) | Avoids name collisions across servers. The alias comes from the config key or the connect URL hash. `__` (two underscores) is within Bedrock's tool-name regex `^[a-zA-Z0-9_-]{1,128}$` whereas `.` is not. Aliases must not themselves contain `__`. |
 | `CMD:` coexistence with tool-calls | Both stay live, no policy preference. Substrate invariant §3 unchanged. | Resolves Q6 (see §10). `CMD:` is the local-shell route; MCP tools are structured-API routes; they serve different purposes. Future phases (Norris, Phase 3) may prefer tools when both are available, but Phase 2 doesn't enforce. |
 | Authorization default | Per-call confirm (mirrors PHASE0.md §10 `confirm_cmd` for shell) | Conservative default; user can opt into auto-approval per tool or per server via config. Resolves Q8. |
 | System prompt augmentation | Hybrid: static frame in `broker.lua` system prompt + dynamic `tools` array in the request body | Tool list goes in the API field where it belongs; the system prompt only mentions that tools exist and how to use them. Per-request body cost is bounded (tools change rarely; small schemas). Resolves Q9. |
 | Tool-call streaming | Streaming-from-day-one — `broker.chat_stream`'s on_delta callback widens to handle `tool_calls` deltas in addition to text deltas | Resolves Q10. Phase 1 SSE landed first, so we're not retrofitting; we just extend the parser. **Wire shape confirmed at analyze** (2026-05-12 probe vs hossenfelder): `delta.tool_calls[]` arrives indexed; id+type+function.name appear on the opening delta; `function.arguments` is a JSON-string that arrives in character-fragment chunks; finish_reason "tool_calls" closes the call. Accumulator strategy matches §5. |
 | Tool-call concurrency | Sequential dispatch in Phase 2 v1 — process `tool_calls[0]` to completion, then `[1]`, etc. | Simpler error handling; tool effects often order-dependent (e.g. write-then-read). Parallel dispatch deferred (see Q20). |
 | MCP server lifecycle | aish does not manage MCP server processes (parallel to PHASE0.md §12 llama.cpp rule) | Declared in config or connected by URL; aish is a client only. |
 ---
 ## 3. Module Changes
 | File | State after Phase 1 | Phase 2 changes |
 |---|---|---|
 | `mcp.lua` | **New file** (not in PHASE0 §4 layout; this Phase amends the layout to add it) | Implement: `M.connect(url, opts) -> session` (opts: `alias`, `auth_token`, `auth_env`), `session:initialize()`, `session:list_tools() -> [{name, description, inputSchema}]`, `session:call_tool(name, args) -> (result_table, kind)` where `kind ∈ {"ok","handler_error","rpc_error"}` so callers can route the response per §4's error split, `session:close()`. JSON-RPC 2.0 over HTTP POST (`Content-Type: application/json`, `Accept: application/json`, `Authorization: Bearer <token>`). Per-session state: alias, base-url, auth, tools-cache, request-ID counter. No persistent SSE channel — POST is one-shot per RPC. Distinguishes HTTP-level failure (e.g. lmcp's `401 {"error":"unauthorized"}` body, which is NOT JSON-RPC-shaped — has no `jsonrpc`/`id` fields) from JSON-RPC envelope errors; needs `ffi/curl.M.post` extended to return status code (see ffi/curl.lua row). |
 | `safety.lua` | Stub | Implement Phase 2 surface only: `M.confirm_tool_call(tool_name, args, policy) -> bool`. Reads `config.mcp.auto_approve` (per-tool and per-server) before prompting. Norris destructive-op heuristic and HALT gate stay Phase 3. |
 | `broker.lua` | Streaming `chat_stream(cfg, msgs, on_delta)` | Signature widens to `chat_stream(cfg, msgs, on_delta, opts)`. `opts.tools` (optional array of `{type, function:{name, description, parameters}}`) is passed through to the request body; **omitted entirely if absent or empty** (some servers reject `"tools": []`). The on_delta callback widens to `on_delta(kind, payload)` where `kind ∈ {"text", "tool_call"}`. **`broker.lua` does NOT depend on `mcp.lua`** — repl assembles the tools array and passes it in; broker stays a transport layer. `M.chat` (non-streaming wrapper) is unchanged in this phase (no tool consumers go through it). |
 | `context.lua` | turns = {{role, content}, ...} + `pending_exec_output`; `Context:append` asserts `turn.content` and rebuilds the entry as `{role, content}` only — extra fields are dropped | Three concrete edits: (a) **loosen `:append`** so `role == "assistant"` can carry `tool_calls = [{id, name, arguments}]` with `content` allowed empty, and `role == "tool"` requires `tool_call_id` + `content` (the assert moves from "content required" to "shape per role"); (b) **preserve `tool_calls` and `tool_call_id`** in the stored turn (not just role+content); (c) `to_messages()` emits `tool_calls` on assistant turns and `tool_call_id` on tool turns. Add a debug assertion that `role == "tool"` follows an assistant turn with non-empty `tool_calls` (catches design bugs early; N4 in review). **`pending_exec_output` interaction**: the buffer **persists across the tool-call sub-loop** (the loop is internal — no user input happens — so there's no append_user to flush against). It flushes on the next genuine user turn, regardless of how many tool-call iterations preceded. |
 | `repl.lua` | meta cmds + ask_ai stream loop | After ask_ai sees `tool_calls`, enter a tool-execution sub-loop: confirm-gate each call via `safety.confirm_tool_call`, dispatch via `mcp.session:call_tool`, append tool turn to context, re-issue the broker request. Loop until assistant emits text without tool_calls. New meta: `:mcp connect <url> [alias]`, `:mcp list`, `:mcp tools`, `:mcp disconnect <alias>`. |
 | `renderer.lua` | streaming text + exec frame | Add `tool_call_begin(name, args)`, `tool_call_end(result, ok)`. Visual style: indented, dim, parallel to the exec frame. |
 | `config.lua` | example with models/shell/context/history | Schema additions: `mcp = { servers = { alias = { url = "..." } }, auto_approve = { ["alias__tool"] = true } }`. Documented in §6 below. |
 | `ffi/curl.lua` | post + post_sse; `M.post` does not set `FAILONERROR`, so non-2xx responses return the body as a normal string. `ffi.cdef` exposes only `curl_easy_setopt` — no `curl_easy_getinfo` (cdef block at curl.lua:11-28). | **One small extension**: `M.post` returns **`(body, status_code)` on transport success** (status_code may be non-2xx — caller decides what to do; mcp.lua treats `>= 400` as transport failure). `(nil, errmsg)` on libcurl-level failure is **unchanged** — Phase 1 callers that read only the first slot stay correct. Requires adding `curl_easy_getinfo` + `CURLINFO_RESPONSE_CODE` (decimal 2097154, `CURLINFOTYPE_LONG | 2`) to the `ffi.cdef` block, plus a `long[1]` out-param shim. MCP auth failures from lmcp arrive as HTTP `401` with a non-JSON-RPC body (`{"error":"unauthorized"}`); `mcp.lua` must distinguish HTTP-level failure from JSON-RPC envelope errors. No SSE GET channel is added (analyze finding ruled it out for lmcp). |
 | `history.lua` | JSONL session log | Tool turns are logged like any other turn — `{role:"tool", tool_call_id:"...", content:"..."}`. Resume reconstructs them via `ctx:append` like user/assistant turns. |
 §4 module-layout amendment: `mcp.lua` slots between `broker.lua` and
 `router.lua` in the §4 table. Same commit lands the manifest amendment.
 ---
 ## 4. MCP Transport (analyze findings — lmcp v0.5.4)
 lmcp implements only the **synchronous POST** flavor of the MCP
 streamable-HTTP spec. Each RPC is one HTTP transaction:
 ```
 client → server:   POST /mcp           Content-Type: application/json
                                       Accept: application/json
                                       Authorization: Bearer <token>
                                       Body: { jsonrpc:"2.0", id, method, params }
                                       Returns: { jsonrpc, id, result | error }
                                       Connection: close
 ```
 lmcp's `GET /mcp` exists but only sends a one-shot `event: endpoint`
 announcing the POST URL, then closes — there is no held-open
 server→client channel. Combined with the `listChanged: false`
 capability lmcp announces in `initialize`, **aish does not open a
 persistent SSE channel** to lmcp servers in v1. Notifications-from-server
 are out of scope here; track for v2 if a richer server appears.
 ### Handshake
 1. `initialize` request: `{ protocolVersion: "2025-03-26", capabilities: {}, clientInfo: { name: "aish", version: "..." } }`.
 2. Server response (lmcp): `{ protocolVersion: "2025-03-26", capabilities: { tools: { listChanged: false } }, serverInfo: { name, version } }`.
 3. **Version mismatch**: lmcp ignores client's `protocolVersion` and always returns its compiled-in `MCP_VERSION` (lmcp.lua:80-91). aish accepts whatever lmcp returns; on mismatch it logs a status (`[aish] mcp <alias>: protocol version mismatch (sent X, got Y); proceeding`) and continues. v1 has no version-gated behavior.
 4. `notifications/initialized` POST (one-way; lmcp returns HTTP 202 with no body).
 ### Tool discovery
 1. `tools/list` RPC → `{ tools: [{ name, description, inputSchema }] }`.
 2. Cache per-session **for the session lifetime** — lmcp announces
   `listChanged: false`, so there's no need to refetch or listen for
   change notifications.
 ### Tool invocation
 **Content flattening**: tool results return `content: [{type, ...}, ...]`.
 lmcp v0.5.4 only emits `type: "text"`, but the spec also allows
 `"image"` and `"resource"`. Phase 2 v1 **concatenates all `text` blocks**
 into a single string for the `role:"tool"` turn body and **ignores
 non-text blocks**, logging a one-shot status warning when a non-text
 block is observed. Image/resource handling is deferred. See §12
 "Content blocks beyond text" for the corresponding risk note.
 `tools/call` with `{ name, arguments }`. Failure has three flavors and
 all of them result in **a `role:"tool"` turn being appended** so the
 assistant's `tool_calls` is never left orphaned in context (strict
 templates reject `assistant.tool_calls` without a matching `tool`
 reply — same gotcha PHASE0.md §6 warned about):
 - **Tool-handler exception** → JSON-RPC `result` with `isError: true`
  and `content: [{ type:"text", text: "Error: ..." }]`. Feed
  `content` straight back as the `role:"tool"` turn body. Model-recoverable.
 - **Baseline `isError: false` on actual failure** (PHASE2-baseline.md §3
  found this — boltzmann's `read_file` returns content text containing
  "Error: ..." but `isError: false`). Pass content through unchanged —
  let the model read the text. `isError` is advisory, not authoritative.
 - **JSON-RPC envelope error** (e.g. `{code: -32601, message: "Tool not
  found"}`) → synthesize a `role:"tool"` turn with
  `content = "[aish] tool dispatch failed: <error.message>"` and the
  matching `tool_call_id`. Also surface a status line for the user.
  This both keeps alternation legal and tells the model what happened
  so its next plan is informed.
 - **HTTP-level failure** (auth, unreachable, timeout) → same shape:
  synthesize a `role:"tool"` turn with
  `content = "[aish] tool transport error: <reason>"`. Same alternation
  rationale.
 This split resolves Q21 (with the C5/C7 review fix folded in).
 ### Lifecycle
 - Connect on startup (from `config.mcp.servers`) — best effort; failures
  are status-logged once, don't abort aish, and the session is **absent
  from `mcp_sessions` until manually reconnected via `:mcp connect`**.
  No automatic retry. "Connect" here means: do the `initialize`
  round-trip + cache `tools/list` results.
 - `:mcp connect <url>` adds a session at runtime; alias auto-derived
  from hostname or supplied as second arg.
 - `:mcp disconnect <alias>` drops cached state. There's no long-lived
  HTTP connection to close (every RPC was already `Connection: close`).
 - On aish quit, sessions are just forgotten — nothing to clean up
  server-side.
 - An unreachable server simply contributes no tools to the broker
  request body — the model is not told that tools were "meant" to be
  available. If `tools_schema()` returns empty across all sessions, the
  broker omits the `tools` field entirely.
 ---
 ## 5. Tool-Call Bridge
 ### Broker request body (delta from Phase 1)
 ```json
 {
  "model": "...",
  "messages": [...],
  "stream": true,
  "temperature": 0.2,
  "tools": [
    { "type":"function",
      "function": { "name":"<alias>__<tool>",
                    "description":"...",
                    "parameters": <inputSchema> } },
    ...
  ]
 }
 ```
 The `tools` array is assembled by `mcp.tools_schema()` — flattens
 `tools/list` results from every connected session, namespacing each tool
 as `<alias>__<name>`.
 ### Response handling (streaming)
 llama.cpp / OpenAI deltas may include:
 ```json
 data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_…",
                "function":{"name":"alias__tool","arguments":"{\"a\":"}}]}}]}
 data: {"choices":[{"delta":{"tool_calls":[{"index":0,
                "function":{"arguments":"1}"}}]}}]}
 data: {"choices":[{"finish_reason":"tool_calls",...}]}
 ```
 `broker.chat_stream` accumulates tool-call deltas keyed by `index`; the
 `arguments` field is a JSON-string that arrives chunked and is concatenated.
 On `finish_reason: tool_calls`, the accumulated calls are emitted to
 on_delta as `kind="tool_call"` with full payloads.
 **Index-absent fallback**: per the OpenAI spec, `index` is REQUIRED on
 streaming `tool_calls[]` deltas — but some local llama.cpp builds have
 been reported to omit it for single-call streams. If a delta has
 `tool_calls` but no `index`, treat it as `index = 0` and accumulate
 into the slot-0 buffer. Log a one-shot debug status the first time
 this is observed per stream.
 ### Re-injection into context
 The assistant turn carries **whatever text was streamed before
 `finish_reason: tool_calls`** (which may be non-empty — models often
 say "Sure, let me look that up" before calling). The renderer flushes
 that text first, then renders the tool-call frame around dispatch.
 ```lua
 -- After tool execution
 ctx:append({
  role = "assistant",
  content = accumulated_text,    -- may be "" if model emitted no prose
  tool_calls = { {id="call_…", name="alias__tool", arguments=<json-string>} },
 })
 ctx:append({
  role = "tool",
  tool_call_id = "call_…",
  content = <tool-result-text-or-synthesized-error>,
 })
 ```
 `to_messages()` renders both shapes for the next broker request. The
 strict-alternation issue from PHASE0.md §6 (mistral-nemo Jinja) is
 handled differently here — tool turns ARE expected to follow assistant
 tool_calls per the OpenAI chat-template convention. If a model's
 template still rejects this shape, fall back to the `[tool: X]` prefix
 strategy used for exec output (Q18 below — fallback is plumbed via the
 `context.use_tool_role` flag; default `true`).
 ### Re-issuing the broker request
 After tool turns are appended, the broker is called again with the
 extended messages array. The model may emit more `tool_calls`, more
 text, or both. Loop until the response has no `tool_calls` (i.e. a
 plain text assistant turn).
 Budget: a max-tool-call-depth setting (default 8) prevents runaway loops.
 Hit-cap surfaces as a status: `[aish] tool-call depth limit reached`.
 ---
 ## 6. Authorization (safety.lua Phase 2 surface)
 ```lua
 -- safety.confirm_tool_call(tool_name, args_table, config) -> bool
 function M.confirm_tool_call(name, args, cfg)
    local policy = cfg.mcp and cfg.mcp.auto_approve or {}
    if policy[name] then return true end
    -- Per-server prefix check: "alias__*" entries
    local alias = name:match("^([^.]+)%.")
    if alias and policy[alias .. ".*"] then return true end
    -- Otherwise prompt
    local pretty = name .. "(" .. (#args > 0 and "..." or "") .. ")"
    local ans = rl.readline(("call '%s'? [y/N] "):format(pretty)) or ""
    return ans:lower():sub(1,1) == "y"
 end
 ```
 Config schema (analyze-revised — Bearer auth fields added):
 ```lua
 mcp = {
    servers = {
        boltzmann = {
            url       = "http://boltzmann.fritz.box:8080/mcp",
            auth_env  = "BOLTZMANN_MCP_TOKEN",  -- read from env at startup
        },
        broglie = {
            url = "http://broglie.fritz.box:8080/mcp",
            -- no auth (LAN-only deployment)
        },
        nc = {
            url        = "https://nc.reauktion.de:8080/mcp",
            auth_token = "literal-token-if-not-using-env",  -- alternative
        },
    },
    auto_approve = {
        ["boltzmann__read_file"] = true,    -- specific tool
        ["broglie__*"]           = true,    -- whole server
    },
    max_tool_depth = 8,
 }
 ```
 Auth precedence per server: `auth_token` literal > `auth_env` indirection
 > nil (no Authorization header sent). Mirrors PHASE0 §10's `key_env`
 convention for cloud model API keys.
 Norris mode (Phase 3) will extend this: when autonomous, the destructive-op
 heuristic decides; for non-destructive tools, auto_approve. Outside scope here.
 ---
 ## 7. Meta Commands (Phase 2 additions)
 | Command | Action |
 |---|---|
 | `:mcp connect <url> [<alias>]` | Open a session; perform initialize + tools/list; add to active set |
 | `:mcp disconnect <alias>` | Close one session |
 | `:mcp list` | Show connected sessions (alias, url, tool count, status) |
 | `:mcp tools` | List tools across all sessions (`alias__name` — short description) |
 | `:mcp tool <alias__name>` | Show one tool's full inputSchema (debug aid) |
 Existing `:help` updated to list these.
 ---
 ## 8. System Prompt Augmentation
 `broker.lua`'s default system prompt grows by ~4 lines:
 ```
 You may have access to MCP tools — they appear in this request's `tools`
 field. Call a tool by emitting a tool_call; the result will be supplied
 in the next turn. Use tools for structured operations (file reads,
 queries, etc.) and `CMD:` lines for local shell commands. Prefer tools
 when available; fall back to `CMD:` for anything not exposed as a tool.
 ```
 The actual tool list is in the `tools` request-body field, not the
 prompt. This avoids per-turn token bloat for the full schema.
 §3 substrate invariants are unchanged. The `CMD:` extraction marker stays
 the local-shell route; tools are the additive structured route.
 ---
 ## 9. Migration from Phase 1
 User-visible changes:
 - New `:mcp …` meta commands when MCP servers are configured or
  connected at runtime.
 - Assistant responses may now invoke tools — user sees a confirm prompt
  (similar to `CMD:` execution gate) followed by an indented tool-call
  frame with the result.
 - `CMD:` lines still work exactly as before for shell.
 Substrate (PHASE0.md §3) invariants: unchanged. Module layout (§4)
 amended to **add** `mcp.lua` (no rename of any existing file). Adding
 a new file is additive and preserves the §3 module-stability invariant
 ("File names are stable across phases — later phases fill in bodies,
 not rename files"). The amendment ships in commit #1 of the §12 plan
 (C6 in the review).
 `config.lua`: existing configs without an `mcp` section continue to work
 — no MCP servers means no tools sent in the broker request body, no
 auth checks, no behavior change.
 ---
 ## 10. Out of Scope (Phase 2)
 Per PHASE0.md §11, these belong elsewhere:
 - Chuck Norris autonomous mode (Phase 3) — even though tool-calls
  enable richer autonomy, the *autonomous policy* is Phase 3's.
 - Destructive-op heuristic in safety.lua (Phase 3) — Phase 2 only
  implements the per-call confirm-prompt surface.
 - `memory.jsonl` summarization across sessions (Phase 4).
 - Multi-model routing / cloud fallback (Phase 5).
 - Tree-sitter syntax highlighting (Phase 6).
 Specifically out of Phase 2 scope despite proximity:
 - Stdio-transport MCP servers (Q17 below).
 - Parallel tool-call dispatch (Q20).
 - MCP `resources/list` and `prompts/list` capabilities — Phase 2
  v1 only implements `tools/*`. Resources/prompts deferred (probably
  Phase 4 alongside memory).
 - Server-sent `notifications/progress` for long-running tool calls —
  ignored in v1; status surface comes later.
 ---
 ## 11. Open Questions
 | # | Question | Impact | Resolve by |
 |---|---|---|---|
 | Q17 | ~~MCP transport abstraction: stdio vs HTTP+SSE~~ | mcp.lua API shape | **Resolved at analyze.** Hard-code POST-only HTTP for v1. lmcp doesn't use the long-lived SSE channel and `listChanged: false` removes any v1 need for it. Stdio transport tracked as Phase 2.1 / out-of-scope here. |
 | Q18 | Tool-result re-injection: standard OpenAI `role:"tool"` turn, or `[tool: X]` prefix to next user turn (matching the §6 exec-output pattern)? | context.lua + broker.lua | **Partly resolved.** Live probe (2026-05-12, hossenfelder) shows `role:"tool"` accepted by the proxy + the loaded model (qwen2.5-coder-1.5b). Mistral-nemo-specific template testing is **blocked** by the hossenfelder proxy routing all `model` field values to the loaded fast model — see open-end below. Default v1 path: `role:"tool"` (standard); fallback to `[tool: X]` prefix is plumbed but unused unless a strict template rejects it during Phase 7 verify. |
 | Q19 | Large tool-result payloads: pass-through, truncate at N chars, or summarize via fast model? | context.lua + executor of tool-result | Phase 2 (plan); Phase 4 may refine with memory.jsonl |
 | Q20 | Parallel `tool_calls`: sequential v1 is safe; spec allows parallel. Move to parallel when both calls are read-only? | mcp.lua dispatch | Phase 2 (verify) — track for v2 |
 | Q21 | ~~MCP error mapping~~ | mcp.lua + broker.lua | **Resolved at analyze.** lmcp distinguishes: `result.isError=true` (handler exception, model-recoverable, feed back as tool turn content) vs JSON-RPC `error` (unknown method/tool, transport-level, surface as aish status). See §4. |
 | Q22 | aish's own command surface as an MCP server | scope expansion | **Out of Phase 2.** Parked for Phase 4+ if interest stays. |
 Open-end carried forward to Phase 7 (verify):
 - **Hossenfelder proxy `model`-field bug** (separate from aish): the proxy at `:8082` routes all requests to the loaded fast model regardless of the request's `model` field — chunks return `"model":"qwen2.5-coder-1.5b-q4_k_m.gguf"` even when `mistral-nemo-12b-instruct` was asked for. This **blocks live-verification of mistral-nemo's chat-template tool-role behavior**. Tracked as [aish#23](https://git.reauktion.de/marfrit/aish/issues/23) (filed 2026-05-12 at review). Sibling to the SSE-buffering bug at [aish#15](https://git.reauktion.de/marfrit/aish/issues/15) — both live in the boltzmann proxy code. Phase 7 needs at least #23 fixed to fully close Q18.
 Resolved at formulate (above in §2 table):
 - Q6 (CMD: vs tools coexistence) — both, no policy preference, substrate unchanged.
 - Q7 (MCP discovery) — both, config-declared default + runtime `:mcp connect`.
 - Q8 (authorization) — per-call confirm default, per-tool/per-server `auto_approve` policy.
 - Q9 (system-prompt augmentation) — hybrid: static frame + dynamic `tools` body field.
 - Q10 (tool-call streaming) — streaming-from-day-one on top of Phase 1 SSE.
 Resolved at analyze (2026-05-12, live probes vs lmcp v0.5.4 + hossenfelder):
 - Q17 (transport abstraction) — POST-only, no SSE channel needed for lmcp.
 - Q21 (error mapping) — isError vs JSON-RPC error split per §4.
 ---
 ## 12. Implementation Plan (commit-by-commit)
 Bottom-up — start with modules with the fewest dependencies, end with the
 REPL wiring that exercises everything together. Same shape as Phase 0
 and Phase 1 implementation cadence.
 ### Order
 1. **`mcp.lua` (new file) — JSON-RPC client.** `M.connect(url, opts)`,
   `session:initialize()` + `:list_tools()` + `:call_tool(name, args)` +
   `:close()`. Uses Phase 1's `ffi/curl.M.post` for transport — **same
   commit lands the `M.post` extension to return `(body, status_code)`
   per §3 row** so `mcp.lua` can distinguish HTTP `401` (non-JSON-RPC
   body `{"error":"unauthorized"}`) from JSON-RPC envelope errors.
   Per-server Bearer auth (`auth_token` literal or `auth_env`
   indirection). `:call_tool` returns `(result_table, kind)` where
   `kind ∈ {"ok","handler_error","rpc_error"}` so callers route per
   §4. **Test in isolation** via
   `luajit -e 'local mcp=require("mcp"); local s=mcp.connect("http://boltzmann.fritz.box:8080/mcp",{auth_env="BOLTZMANN_MCP_TOKEN"}); s:initialize(); print(#s:list_tools())'`.
   Also amends PHASE0.md §4 to list `mcp.lua` between `broker.lua` and
   `router.lua` in the same commit (additive — preserves §3
   module-stability invariant per §9).
 2. **`safety.lua` — confirm-gate surface.** Implement just
   `M.confirm_tool_call(name, args, cfg)` per §6. Reads
   `cfg.mcp.auto_approve` for exact-match and `alias__*` glob. Falls back
   to `rl.readline` prompt. Norris-mode hooks stay out (Phase 3). **Test
   in isolation** with mocked rl + various policy shapes.
 3. **`context.lua` extensions.** Three concrete edits per §3 row:
   (a) loosen `Context:append`'s assert from "content required" to
   shape-per-role (assistant may have empty content if `tool_calls`
   present; `tool` requires `tool_call_id` + `content`); (b) preserve
   `tool_calls` / `tool_call_id` in stored turns (not just role+content);
   (c) extend `to_messages()` to emit those fields. Add alternation
   assert (N4 in review). `pending_exec_output` is **unchanged**:
   buffer persists across tool-call sub-loops; flushes on next genuine
   user turn (§3 row). **Tests in isolation**: (i) build a context with
   assistant+tool_calls + tool turns, round-trip through `to_messages()`,
   eyeball JSON shape; (ii) day-one fallback test (N8) — same context
   with `use_tool_role = false` must emit the `[tool: alias__name]\n…`
   prefix shape instead of a `role:"tool"` message.
 4. **`renderer.lua` extensions.** Add `M.tool_call_begin(name, args)`
   (top rule + `name(json-snippet)` indented dim) and
   `M.tool_call_end(content, is_error)` (bottom rule with dim/red status).
   Visual parity with the exec frame. **Test visually** with a one-liner.
 5. **`broker.lua` extensions.** Signature widens:
   `chat_stream(cfg, msgs, on_delta, opts)`. `opts.tools` (optional
   array) is passed through to the request body; **omitted entirely
   when nil or empty**. The on_delta callback widens to
   `on_delta(kind, payload)` where `kind ∈ {"text","tool_call"}`.
   Text path unchanged. Tool-call path: accumulator keyed by `index`
   (default 0 if absent — C2), concatenates `function.arguments` until
   `finish_reason: "tool_calls"`, then emits one
   `on_delta("tool_call", {id,name,arguments})` per completed call.
   **`M.chat` shape unchanged** in this phase (C1 in review — no
   caller for a polymorphic return). **Test against hossenfelder**
   with `tools` declared + streaming.
 6. **`repl.lua` wiring.** New module-local `mcp_sessions = {alias=session,...}`,
   populated from `config.mcp.servers` at startup. Helpers:
   - `tools_schema()` → flatten `tool` lists across sessions, namespace `alias__name`
   - `dispatch_tool_call(call)` → split `alias__tool`, look up session, call, return content
   - `ask_ai` loop now: stream response → if any tool_calls completed,
     for each call: `safety.confirm_tool_call` → `dispatch_tool_call` →
     append assistant-with-tool_calls + tool turn → re-call `broker.chat_stream`
     → repeat until pure-text response or `max_tool_depth` reached
   - New meta cmds: `:mcp list`, `:mcp tools`, `:mcp tool <name>`,
     `:mcp connect <url> [alias]`, `:mcp disconnect <alias>`
   **End-to-end test** via the REPL against a real boltzmann lmcp +
   hossenfelder broker.
 7. **`config.lua` example block.** Add a commented-out `mcp = { servers
   = { boltzmann = {...} }, auto_approve = {...} }` example so users can
   see the shape. Not behavior-impacting; documentation only. Bundled
   with commit #6 if small or split if substantial.
 ### Risk / non-obvious
 - **Empty tools array.** If `config.mcp.servers` is absent or all
  connects fail, the broker request body must **omit** `tools`
  entirely (some servers reject `"tools": []`). Don't send the field
  when empty.
 - **Connect-at-startup blocking.** N servers × ~30 ms init+list. For
  N ≤ 3 (typical) the 90 ms is acceptable. Failures are status-logged
  per server, don't abort aish. Parallel via coroutines is out of scope
  here — sequential is fine for v1.
 - **Content blocks beyond text.** lmcp returns `[{type:"text", text:...}]`.
  The spec allows `type:"image" | "resource"`. Phase 2 v1 flattens by
  concatenating all `text` blocks and ignoring non-text. Log a status
  warning if non-text blocks are seen. Adequate for boltzmann/hertz
  tools (all text); image/resource tools deferred.
 - **`isError: false` on actual failure** (baseline finding §3 of
  PHASE2-baseline.md). Pass content through unchanged; let the model
  read the error text. Do NOT short-circuit on the flag.
 - **JSON-RPC `error` from `tools/call`.** Surface as aish status
  AND synthesize a `role:"tool"` turn with
  `content = "[aish] tool dispatch failed: <error.message>"` and the
  matching `tool_call_id`. The alternation rationale (§4) requires
  this — leaving the assistant's `tool_calls` orphaned breaks strict
  chat templates exactly the way PHASE0.md §6 warned about. The model
  receives the error and can re-plan within the same turn.
 - **Tool-call sub-loop bounds.** `max_tool_depth` (default 8) per ask_ai
  invocation. When hit, surface as status and break — append the
  assistant's last text (if any) and let the user reply.
 - **Argument JSON might be invalid.** A model can stream malformed JSON
  in `function.arguments`. `dkjson.decode` failure → DO NOT execute on
  partial parse. Synthesize a `role:"tool"` turn with
  `content = "[aish] tool arguments not parseable as JSON: <decode-err>"`
  and the matching `tool_call_id` (same alternation rationale as
  JSON-RPC error above; C7 in review).
 - **Q18 fallback path** (strict templates rejecting `role:"tool"`).
  Plumb a `context.use_tool_role` flag (default true). If a real-world
  rejection appears at Phase 7, flip the flag and convert tool turns to
  `[tool: alias__name]\n<content>` prefix on the next user turn (same
  pattern as `pending_exec_output`). **Day-one verification** (N8 in
  review): commit #3 includes a small in-isolation test that builds a
  context with `use_tool_role = false`, appends an assistant+tool_calls
  turn followed by a tool result, and confirms `to_messages()` emits
  the prefix shape instead of a `role:"tool"` turn. Keeps the fallback
  alive rather than dead-coded until Phase 7 first runs it under stress.
 ### Test checkpoints
 After each commit, verify with a targeted probe before moving on:
 | Commit | Verify |
 |---|---|
 | #1 `mcp.lua` | `luajit -e "local m=require('mcp'); ..."` connects + lists tools against boltzmann lmcp |
 | #2 `safety.lua` | unit-test policy lookup with mock rl: exact match → true; `*` glob → true; miss → prompt invoked |
 | #3 `context.lua` | (i) round-trip a context with tool turns through `to_messages()`, eyeball JSON shape; (ii) day-one fallback test with `use_tool_role = false` emits the `[tool: …]` prefix shape (N8) |
 | #4 `renderer.lua` | one-liner emits frame around fake tool result |
 | #5 `broker.lua` | curl-compare: hand-built request body with tools matches `broker.chat_stream(cfg, msgs, on_delta)` body |
 | #6 `repl.lua` | full REPL: `:mcp list` shows boltzmann; question that triggers `list_dir` round-trips through confirm + execution + model continuation |
 | #7 `config.lua` | aish starts with example mcp section present; no MCP servers connected means no `tools` field sent |
 ### Commits expected: 7 (commit #1 carries the PHASE0.md §4 amendment)
 Per Phase 1's cadence (10 commits + 1 BLOCKER fix), Phase 2 is smaller
 in surface — single new file plus targeted extensions. Tracked to land
 in one working session if the boltzmann proxy bugs don't intrude.
 ### Resolved at review (2026-05-12)
 - **Q18 default** — `use_tool_role = true` defaulted, fallback exercised
  day-one in commit #3 test (ii) so it's not dead code. Phase 7 flips if
  mistral-nemo (once #23 is fixed) rejects.
 - **`:mcp connect` re-fetch policy** — v1 trusts the `listChanged: false`
  capability; manual disconnect+reconnect is the workaround if a server's
  tools change. No automatic re-fetch.
 ### Review fold-in (2026-05-12, all BLOCKERs + relevant CONCERNs/NITs)
 Independent review surfaced 5 BLOCKERs / 7 CONCERNs / 8 NITs against
 the formulate+analyze+plan draft. Resolutions applied in this revision:
 - **B1** context.lua impact widened — assert loosening + field
  preservation + `to_messages` emit are now explicit in §3.
 - **B2** `ffi/curl.M.post` extended to return `(body, status_code)` so
  `mcp.lua` distinguishes HTTP `401` from JSON-RPC envelope errors.
 - **B3** `inputSchema` typo fixed in §3 mcp.lua row.
 - **B4** `pending_exec_output` × tool-call sub-loop interaction
  specified (persists across; flushes on next user turn).
 - **B5** §3/§12 dependency contradiction resolved — broker takes
  `opts.tools` from the caller; no layering inversion.
 - **C1** `M.chat` polymorphic return dropped.
 - **C2** Index-absent fallback specified (default to 0).
 - **C3** Re-injection example now stores accumulated text in the
  assistant turn, not hard-coded empty string.
 - **C4** `:mcp connect` failure semantics specified (no auto-retry).
 - **C5/C7** Both orphan-tool_calls scenarios now synthesize a
  `role:"tool"` turn with `[aish] tool dispatch failed: ...` content
  to preserve alternation.
 - **C6** §9 explicitly notes the §4 amendment is additive.
 - **N3** protocolVersion fallback specified (lmcp doesn't negotiate).
 - **N4** alternation assert added to context.lua row.
 - **N7** model-routing bug filed as [aish#23](https://git.reauktion.de/marfrit/aish/issues/23).
 - **N8** day-one fallback test added to commit #3 checkpoints.
 CONCERNs / NITs not folded (defended as wording-only, not load-bearing):
 N1, N2, N5, N6 — left as-is.
 ---
 *End of Phase 2 Manifest — aish*
@@ -0,0 +1,90 @@
 # Phase 3 Baseline — pre-implementation measurements
 **Date:** 2026-05-12
 **Target probed:** `hossenfelder.fritz.box:8082` (OpenAI-compat broker → `qwen2.5-coder-1.5b-q4_k_m.gguf` local).
 This is the Phase 7 (verify) anchor for Phase 3. Captures the world just
 before Norris/destructive-heuristic implementation lands.
 ---
 ## 1. LLM second-opinion latency (Q23 budget check)
 `fast` preset, `temperature=0`, `max_tokens=4`, system prompt "Reply YES or NO only":
 | Command | Reply | Latency |
 |---|---|---|
 | `rm -rf /tmp/foo` | YES | 1162 ms |
 | `ls /tmp` | NO | 666 ms |
 | `truncate -s 0 important.log` | YES | 475 ms |
 | `git push --force origin main` | YES | 451 ms |
 | `cat /etc/hostname` | NO | 425 ms |
 Five-for-five correct answers; median ~475 ms; 95th percentile (small sample) ~1200 ms. The first request was slowest (likely cold-cache), subsequent ones settled below 700 ms.
 ### Budget implication for a 16-step Norris session
 Worst-case (no static-pattern hits, all queries to LLM, no cache):
 16 × 1200 ms = ~19s of additional latency over the Norris run.
 With realistic mix (static patterns catch the obvious cases without
 LLM, repeated commands hit the session cache):
 ~5s typical, dominated by genuinely-novel command tokens.
 Conclusion: LLM second-opinion is workable as a default-on feature.
 The session-scoped cache (§12 commit #2) is the right mitigation; an
 additional async pre-check on the static patterns first means most
 calls never reach the LLM.
 ---
 ## 2. Module pre-state (Phase 2 head `f26cbd9` + cosmetic fix `3fa6279`)
 | Module | LOC | State |
 |---|---|---|
 | `safety.lua` | 55 | confirm_tool_call only; `is_destructive` and `norris_step` raise error() |
 | `renderer.lua` | 110 | exec frame + tool-call frame + assistant streaming + status; no norris frames |
 | `repl.lua` | (post-Phase 2) | tool-sub-loop + :mcp meta + `\C-n` no-op placeholder |
 | `context.lua` | (post-Phase 2) | static system_prompt (Phase 0+Phase 2 MCP block); no norris suffix wiring |
 | `broker.lua` | 96 | chat_stream(cfg, msgs, on_delta, opts) with opts.tools; no opts.max_tokens |
 | `ffi/readline.lua` | (Phase 1) | rl_bind_keyseq + M.bind wrapper; no rl_insert_text or rl_redisplay |
 | `config.lua` | (Phase 2) | mcp example block; no safety example block |
 After Phase 3 lands, `git diff main..post-phase-3 --stat` should show:
 - `safety.lua` substantial growth (~150 LOC for is_destructive + norris_step)
 - modest `renderer.lua` growth (~30 LOC for norris frames)
 - modest `repl.lua` growth (Norris driver + :norris meta)
 - one-line `context.lua` addition (system prompt suffix builder)
 - 4-line `broker.lua` addition (opts.max_tokens)
 - 6-line `ffi/readline.lua` addition (rl_insert_text + rl_redisplay)
 ---
 ## 3. Static-pattern hit-rate sanity check
 Six patterns from §5 of the manifest exercised against safe vs destructive corpora:
 | Pattern | Test command | Expected | Result |
 |---|---|---|---|
 | `rm%s+.-%-rf?` | `rm -rf /tmp/x` | YES | HIT (pre-implementation Lua check) |
 | `rm%s+.-%-rf?` | `rm /tmp/x.log` | NO  | MISS (correct — no -r/-f flags) |
 | `git%s+push%s+.-%-%-force` | `git push --force origin main` | YES | HIT |
 | `git%s+push%s+.-%-%-force` | `git push origin main` | NO  | MISS |
 | `find%s+.-%-delete` | `find . -name '*.log' -delete` | YES | HIT |
 | `find%s+.-%-delete` | `find . -name '*.log'` | NO  | MISS |
 All six match the intent. Pattern soundness verified via Lua's `string.match`
 on each test string. Implementation in `safety.is_destructive` will use the
 same syntax.
 ---
 ## 4. Known carries from earlier phases
 - **Issue [#15](https://git.reauktion.de/marfrit/aish/issues/15)** — hossenfelder SSE buffering bug. Open. Affects Norris streaming visibility (the model's plan/explanation streams in one batch). Workaround: nothing aish-side; fix is upstream.
 - **Issue [#14](https://git.reauktion.de/marfrit/aish/issues/14)** — `:model` swap should re-render Context.system_prompt. Phase 3 makes this MORE relevant since the Norris suffix is dynamically composed; if the user `:model deep` then `:norris <goal>`, the new system prompt must take effect on the next broker call.
 - **Issues [#32](https://git.reauktion.de/marfrit/aish/issues/32) / [#33](https://git.reauktion.de/marfrit/aish/issues/33)** — Phase 2 follow-ups (tool-name validation, auto_approve typo warning). Not blocking Phase 3.
 ---
 *End of Phase 3 Baseline — aish*
@@ -0,0 +1,579 @@
 # aish — Phase 3 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 3 Requirements, Architecture & Design Decisions
 **Status:** Plan (review fold-in 2026-05-12 — security-layer BLOCKERs resolved)
 **Date:** 2026-05-12
 **Review fold-in (2026-05-12, security layer):**
 R-B1. **Shell-wrapper bypass coverage.** Static patterns missed `bash -c`,
    `sh -c`, `eval`, `xargs | rm`, `| sh`, `python -c`. Added to the
    pattern list in §5 as a "wrapper requires manual review" class —
    in Norris mode, any wrapper invocation HALTs regardless of the
    inner command. The wrapper itself is the trigger.
 R-B2. **LLM second-opinion model class.** Switched from `fast` to `deep`
    for the destructive-detection probe. `fast` co-emits the action
    AND judges it (circular). `deep` is a different model class
    (qwen3-30b currently mapped to `deep` per config.lua) — adds
    ~1-3s per probe but breaks the self-policing loop. Added a
    YES/inversion re-roll: if the deep model says NO, re-ask
    "Is this safe?" — disagreement → HALT. Cheap insurance for
    the edge cases. §5 reflects.
 R-B3. **`is_destructive` scope narrowed to Norris mode.** The
    formulate-time §9 said the heuristic would also gate interactive
    `CMD:` extraction. That's a PHASE0 §6/§10 substrate amendment
    that's bigger than Phase 3 should be making implicitly. Q24
    resolved: `is_destructive` runs ONLY when `norris_active == true`.
    Interactive `CMD:` extraction continues to honor `confirm_cmd`
    exactly as Phase 0 specified — no behavior change.
 **CONCERN folds (2026-05-12):**
 R-C1. **Skip-budget added** — `consecutive_user_skips` counter; ≥2
    triggers escalation HALT "model has proposed similar destructive
    action 3+ times — abort, force-proceed, or change goal?". §4 +
    §6 reflect.
 R-C2. **§4 algorithm reorder** — dispatch all pending actions FIRST,
    then check `GOAL: complete`. Q25 resolution + §4 algorithm now
    consistent (was contradictory).
 R-C3. **Norris goal pinned in system-prompt suffix** — `ctx.norris_goal`
    field; the dynamic system suffix from §8 carries it. Eviction
    can no longer drop the anchor.
 R-C4. **Readline rebind safety** — `M.bind` will NOT free old callbacks
    (pin for process lifetime). Avoids a use-after-free window between
    `:free()` and the new `rl_bind_keyseq` call. Memory cost is
    bounded (one closure per bound key, negligible).
 R-C5. **`GOAL: complete` matcher** — line-level scan, exact match after
    trim. Aligned with `CMD:` extraction rigor.
 R-C6. **§4 step 4 algorithm tightened** — auto_approve only short-circuits
    the user-prompt, NEVER the destructive-heuristic. Tool-call
    without `auto_approve` entry AND no destructive flag → still
    HALTs in Norris mode (Norris is conservative by design).
 **Analyze findings (2026-05-12):**
 A1. **`\C-n` mid-readline limitation.** Phase 1's `\C-n` handler fires
    synchronously from inside the readline keystroke callback (via
    `rl_bind_keyseq` → ffi-cast Lua closure). The current binding API
    only exposes `rl_bind_keyseq` — no `rl_insert_text`,
    `rl_replace_line`, or `rl_redisplay`. So a `\C-n` callback cannot
    cleanly mutate the in-progress prompt buffer or end the
    readline call early to "transition into Norris mode".
    **Resolution**: bind `rl_insert_text` + `rl_redisplay` (single cdef
    + 2 wrapper lines in `ffi/readline.lua`) so the `\C-n` handler
    inserts `:norris ` at the cursor and refreshes the display. User
    then types the goal + Enter, routing through the existing meta
    dispatch normally. `\C-n` becomes a typing shortcut, not a state
    toggle.
 A2. **`broker.chat` lacks `max_tokens`.** The LLM second-opinion path
    in `safety.is_destructive` needs a tight YES/NO completion (2
    tokens max). The proxy + small models honor `max_tokens`
    correctly (verified vs hossenfelder: `max_tokens=4` returned a
    clean "YES" in 2 completion tokens). Phase 2's broker doesn't
    surface this option. **Resolution**: add `opts.max_tokens` to
    `M.chat_stream`'s opts table (Phase 2 already widened opts);
    `M.chat` passes through. Defaults nil → field omitted from the
    request body — Phase 1/2 callers unaffected.
 A3. **Tool-sub-loop is structurally reusable.** Phase 2's `ask_ai` sub-
    loop (stream → collect text + tool_calls → dispatch → append → loop
    until pure-text response or cap) IS the planner shape Phase 3 wants.
    `safety.norris_step` per §4 is essentially this iteration extracted
    behind a function call, plus the `GOAL: complete` sentinel check.
    No structural refactor of Phase 2 needed — Norris is additive.
 These findings tighten §3's module-changes table and §12's commit #1
 scope (adds a small `ffi/readline.lua` extension to commit #5) — see
 inline notes below where the change matters.
 PHASE0.md is the locked substrate; PHASE1.md and PHASE2.md are layered
 on top. This manifest specifies what Phase 3 adds — **Chuck Norris
 autonomous mode**, the **destructive-op safety heuristic** that gates
 it, and the **HALT/confirm protocol** for human-in-the-loop control.
 Section numbers reference back to earlier phases where relevant.
 ---
 ## 1. Scope of Phase 3
 Three pillars per PHASE0.md §11 row 3:
 1. **Norris autonomous mode** (`safety.norris_step` + `repl.lua`
   integration) — a planning-and-execution loop where the model
   pursues a user-stated goal across multiple shell-exec and
   tool-call turns without per-turn user prompting. Triggered by
   `\C-n` (Phase 1 reserved key) or `:norris <goal>`. Iterative
   re-plan after each action.
 2. **Destructive-op heuristic** (`safety.is_destructive`) — hybrid
   gate that combines (a) a static pattern allowlist of obviously
   destructive shell idioms (`rm -rf`, `dd of=`, `mkfs`, `git push
   --force`, etc.) with (b) an LLM second-opinion via the `fast`
   model for ambiguous cases. Any positive hit forces HALT before
   execution, regardless of Norris-mode policy.
 3. **HALT/confirm protocol** — a uniform way for the Norris loop to
   surface decisions to the user. HALT means: stop generation, drop
   to a `[Norris] proceed / skip / abort?` prompt with the proposed
   action displayed. User decides on each gate; abort returns control
   to the interactive REPL with the conversation intact.
 **Phase 3 is done when:**
 - `\C-n` toggles Norris mode (replacing the Phase 1 status no-op).
 - `:norris <goal>` launches an autonomous task explicitly.
 - The model can plan + execute a multi-step task (e.g. "find all
  Python files modified in the last week and count them") through
  iterative CMD:/tool_call cycles without per-step user confirms
  for safe operations.
 - `rm -rf /tmp/foo`, `dd of=/dev/sda`, and equivalent destructive
  operations HALT and require explicit user approval.
 - The LLM second-opinion catches at least one realistic ambiguous
  case the static patterns miss (e.g. `find . -delete`,
  `truncate -s 0 important.log`).
 - HALT-abort returns to interactive mode without context loss.
 ---
 ## 2. Technology Decisions (delta from Phase 2)
 | Decision | Choice | Rationale |
 |---|---|---|
 | Planning model | **Iterative re-plan after each action** | Resolves PHASE0.md §13 Q2. Top-down task trees are brittle to dynamic environments — a shell command's output frequently changes what the next step should be. Iterative re-plan piggybacks the existing Phase 2 tool-sub-loop pattern: model emits next action, gets result, decides next. Depth-bounded by `max_norris_steps` (default 16, configurable). |
 | Action sources | **`CMD:` lines + MCP `tool_calls`** | Per PHASE0.md §11 row 3 ("now able to use MCP tools as well as CMD: lines"). Norris consumes both kinds equally. The Phase 2 system prompt already biases toward tools when available; that bias carries into Norris mode unchanged. |
 | HALT trigger | **Static-pattern hit OR LLM-second-opinion flag** | Either gate fires HALT independently. Static for speed and predictability on known footguns; LLM for novel/ambiguous patterns. Cost of an LLM second-opinion call: one fast-model round-trip (≤3s on local Q4). Only invoked when static doesn't already HALT. |
 | HALT response shape | **3-way prompt**: `proceed` / `skip` / `abort` | `proceed` runs the action and continues. `skip` reports "user skipped" to the model and lets it re-plan. `abort` ends the Norris session, drops back to interactive mode. (`abort` is also bound to `\C-x\C-c` per PHASE1.md §7 reserved keys.) |
 | Auto-approve under Norris | **Trust the Phase 2 `auto_approve` policy** | A tool already in `auto_approve` runs without HALT even in Norris mode, as long as the destructive-op heuristic doesn't flag it. The user opted in once; Norris doesn't unilaterally re-prompt. CMD: lines never auto-approve under Norris — they always pass through `is_destructive` first. |
 | Destructive-op static rules | **Patterned shell-idiom list** in `safety.lua` (hardcoded; configurable later via `config.safety.destructive_patterns`) | Phase 3 v1 ships a fixed list (~20 patterns) inline. v2 may make it user-extendable. Patterns target the command string after expansion; conservative — false positives mean a confirm prompt the user dismisses, false negatives mean unsupervised destructive action. Bias to false positives. |
 | LLM second-opinion model | **The `deep` preset** (independent model class, not the one emitting actions) | R-B2 resolution. Same model class self-policing is circular — `deep` (qwen3-30b currently) judges actions emitted by the active model (often `fast` qwen-1.5b under Norris). Adds ~1-3s per probe; broker failure → YES (safe default). Re-roll inversion: if first probe says NO, ask the inverted "Is this safe?" — disagreement → HALT. |
 | Norris prompt suffix | **Status appended to the system prompt** when Norris is active: `[NORRIS MODE] You are operating autonomously toward a stated goal. Plan and execute step by step. Use CMD: lines or tool_calls. When done, emit "GOAL: complete" on its own line.` | The `GOAL: complete` sentinel is how the model signals task completion; Norris loop exits the planning sub-loop on seeing it. |
 | Interrupt handling | **`\C-c` during a Norris step sends abort** | Standard SIGINT semantics for the user. Mid-stream, this means: stop the broker request, stop any running shell command, drop to interactive mode. The current context is preserved (incl. partial assistant turn). |
 | Context budgeting under Norris | **Same `max_turns` and `token_budget` as interactive** | Sliding window evicts oldest non-system turns when budget exceeded — including mid-Norris-session if the loop runs long. Phase 4's `memory.jsonl` summarization is the proper fix; Phase 3 just gets the eviction status as before. |
 ---
 ## 3. Module Changes
 | File | State after Phase 2 | Phase 3 changes |
 |---|---|---|
 | `safety.lua` | `confirm_tool_call` (Phase 2 surface only) + Phase 3 stubs `is_destructive` / `norris_step` raising error() | Implement the stubs: (a) `is_destructive(cmd_or_tool_call) -> (bool, reason)` with static pattern matching + optional LLM second-opinion (controlled by `cfg.safety.llm_second_opinion`, default true); (b) `norris_step(ctx, broker_cfg, executor_fn, tools_fn, halt_fn, opts) -> {status, reason}` — single iteration of the Norris loop. Pattern list is module-local; LLM second-opinion uses `broker.chat` (non-streaming, no tools, single-shot). |
 | `repl.lua` | tool-sub-loop + `:mcp` meta + Phase 1 `\C-n` no-op binding | Replace `\C-n` body with a Norris toggle. Add `:norris <goal>` meta cmd as the explicit-launch variant. New module-local `norris_active` flag. Implement the Norris driver loop: while active, call `safety.norris_step`; handle HALT decisions; exit on `GOAL: complete`, `abort`, or step budget exceeded. Auto_approve policy from `confirm_tool_call` is consulted in-line. |
 | `renderer.lua` | exec frame + tool-call frame + assistant streaming | Add `M.norris_begin(goal)`, `M.norris_step(n, action_desc)`, `M.norris_halt(reason, action)`, `M.norris_end(status, reason)`. Visual: bold cyan banner on enter, indented step counter per iteration, red HALT banner on intercept, dim summary on exit. Phase 0 prompt becomes `[aish:fast ⚡]>` when Norris is active per PHASE0.md §9. |
 | `broker.lua` | `chat_stream` with opts.tools, `chat` non-streaming | Re-used as-is for planning rounds (Norris just calls chat_stream like interactive). See row below for the small `max_tokens` opts extension needed by the LLM second-opinion path. |
 | `context.lua` | system_prompt + turns + pending_exec_output + use_tool_role | When Norris is active, `to_messages()` appends the Norris suffix (§2 row "Norris prompt suffix") to the system message. The suffix is computed dynamically — when Norris exits, subsequent broker calls revert to plain system prompt. No additional storage. |
 | `ffi/readline.lua` | `bind(seq, fn)` (Phase 1) — frees old callback before rebinding | **Small extension per A1 + R-C4 fix**: (a) add `rl_insert_text` + `rl_redisplay` to the `ffi.cdef` block and expose `M.insert_text(s)` / `M.redisplay()` wrappers — needed so `\C-n` can stuff `:norris ` into the buffer; (b) drop the `_bound[seq]:free()` call from `M.bind` — readline retains the function pointer in its keymap; freeing before re-bind opens a use-after-free window if the user presses the key in that gap. Pin all bound callbacks for process lifetime; memory cost is bounded (one closure per key, ~O(N) where N = number of bound keys ≤ ~10). |
 | `broker.lua` | `chat_stream(cfg, msgs, on_delta, opts)` with opts.tools | **Small extension per A2**: `opts.max_tokens` (integer) is passed through to the request body as `max_tokens`. Omitted when nil. `M.chat` accepts the same opt. Needed so `safety.is_destructive`'s YES/NO probe terminates in ~2 tokens. |
 | `config.lua` | mcp example block | New optional `safety = { llm_second_opinion = true, llm_model = "fast", destructive_patterns = {...} }` block, also commented-out example. Defaults are sane when absent. |
 No new module files beyond what already exists. The `\C-x\C-c` abort keybinding (PHASE1.md §7 reserved) gets wired here.
 ---
 ## 4. The Planning Loop (`safety.norris_step`)
 One iteration of Norris is exactly one round-trip with the model — same
 shape as Phase 2's tool-sub-loop iteration, with the model deciding what
 to do next based on accumulated context:
 ```
 norris_step(ctx, broker_cfg, executor_fn, tools_fn, halt_fn, opts):
    # opts.step_n, opts.max_steps, opts.cfg, opts.consecutive_skips
    1. Call broker.chat_stream(broker_cfg, ctx:to_messages(), on_delta, {tools=tools_fn()})
       — collect (text, tool_calls).
    2. Extract actions from response:
         - tool_calls   (already collected by broker accumulator)
         - cmd_lines    via executor.extract_cmd_lines(text) — line-anchored
         - goal_done    line-level scan for exact "GOAL: complete" (R-C5)
    3. If actions are empty AND goal_done is false:
         → return {status="stalled", reason="no action"}.
    4. Dispatch ALL pending actions BEFORE checking goal_done (R-C2):
       tool_calls first (structured route), CMD: lines second (legacy).
       For each action:
         a. Pass through safety.is_destructive(action).
            - tool_calls: check tool-name set + serialized args.
            - CMD: lines: pattern match + LLM probe.
         b. If destructive: invoke halt_fn(action, reason, opts.cfg).
            "proceed" → run action.
            "skip"    → opts.consecutive_skips += 1.
                        If consecutive_skips >= 3 (R-C1):
                          escalate halt with reason "repeated similar skips"
                          → user verdict abort / force-proceed.
                        Append synthesized "[aish] action skipped by user: <reason>"
                        as a role:"tool" turn (for tool_calls) or as exec-output
                        prefix (for CMD: lines) — alternation invariant.
            "abort"   → return {status="aborted"}.
         c. If non-destructive (cleared by static + LLM):
            - tool_call: check auto_approve. If in policy, run silently;
              otherwise (R-C6) halt_fn STILL fires for the consent prompt
              (Norris is conservative; auto_approve is the *only* way to
              skip consent in autonomous mode).
            - CMD: line: run (destructive-check is the gate; confirm_cmd
              is interactive-mode-only — R-B3 narrows scope).
         d. On successful proceed: opts.consecutive_skips = 0.
         e. Append result turn to ctx (role:"tool" for tool calls,
            exec-output buffer for CMD: — same as Phase 0/2 paths).
    5. After all actions dispatched: if goal_done → return {status="done"}.
    6. step_n += 1. If step_n >= max_steps:
       return {status="budget_exhausted"}.
    7. Continue loop (driver in repl.lua re-calls norris_step).
 ```
 The driver in repl.lua is the simple while loop; norris_step is one
 iteration so testing is granular.
 ---
 ## 5. Destructive-Op Heuristic (`safety.is_destructive`)
 ### Static pattern list (v1, ~20 entries)
 ```lua
 local DESTRUCTIVE_PATTERNS = {
    -- ── Shell wrappers (R-B1) — flag the wrapper itself; can't inspect content
    --    safely without parsing the inner shell. Norris HALTs on these
    --    unconditionally; the user can proceed/abort with the full context.
    { pat = "^%s*bash%s+%-l?c%s",              reason = "bash -c (wrapped shell)" },
    { pat = "^%s*sh%s+%-l?c%s",                reason = "sh -c (wrapped shell)" },
    { pat = "^%s*zsh%s+%-l?c%s",               reason = "zsh -c (wrapped shell)" },
    { pat = "^%s*eval%s",                      reason = "eval (dynamic shell)" },
    { pat = "^%s*python3?%s+%-c%s",            reason = "python -c (inline script)" },
    { pat = "^%s*perl%s+%-e%s",                reason = "perl -e (inline script)" },
    { pat = "|%s*sh%s",                        reason = "pipe-to-sh" },
    { pat = "|%s*sh%s*$",                      reason = "pipe-to-sh (eol)" },
    { pat = "|%s*bash%s",                      reason = "pipe-to-bash" },
    { pat = "|%s*bash%s*$",                    reason = "pipe-to-bash (eol)" },
    { pat = "xargs%s+.-rm",                    reason = "xargs ... rm" },
    -- ── Filesystem destructive
    { pat = "rm%s+.-%-rf?",                    reason = "rm -rf" },
    { pat = "rm%s+.-%-fr?",                    reason = "rm -fr" },
    { pat = "find%s+.-%-delete",               reason = "find -delete" },
    { pat = "find%s+.-%-exec%s+rm",            reason = "find -exec rm" },
    { pat = ">%s*/dev/sd[a-z]",                reason = "write to raw disk" },
    { pat = "dd%s+.-of=/dev/",                 reason = "dd to device" },
    { pat = "mkfs%.",                          reason = "mkfs (format)" },
    { pat = "shred%s",                         reason = "shred" },
    { pat = "wipefs%s",                        reason = "wipefs" },
    { pat = "truncate%s+.-%-s%s*0",            reason = "truncate to zero" },
    -- ── Version control destructive
    { pat = "git%s+push%s+.-%-%-force",        reason = "git push --force" },
    { pat = "git%s+push%s+.-%-f%f[%s]",        reason = "git push -f" },
    { pat = "git%s+reset%s+.-%-%-hard",        reason = "git reset --hard" },
    { pat = "git%s+clean%s+.-%-fd?",           reason = "git clean -fd" },
    { pat = "git%s+branch%s+.-%-D",            reason = "git branch -D" },
    -- ── Database / process
    { pat = "DROP%s+TABLE",                    reason = "DROP TABLE", ci = true },
    { pat = "DROP%s+DATABASE",                 reason = "DROP DATABASE", ci = true },
    { pat = "TRUNCATE%s+TABLE",                reason = "TRUNCATE TABLE", ci = true },
    { pat = "kill%s+%-9",                      reason = "kill -9" },
    { pat = "pkill%s+%-9",                     reason = "pkill -9" },
    -- ── Network/permission (chown tightened per NIT 2)
    { pat = "chmod%s+.-777",                   reason = "chmod 777" },
    { pat = "chown%s+.-%s+/%s*$",              reason = "chown on root path" },
 }
 ```
 The 9 wrapper patterns are the conservative floor against R-B1 bypass classes. Norris emits `bash -c '...'` → wrapper hit → HALT (user can proceed if they read the inner). LLM second-opinion still runs as a backup but the static net catches the obvious cases first.
 Patterns are Lua patterns (not regex). `ci = true` enables case-insensitive
 match — the matcher loop lowercases the input string when `ci` is set on
 the rule, so `DROP TABLE` and `drop table x` and `Drop Table` all match
 the same rule. Without `ci`, patterns are case-sensitive (the default).
 ### LLM second-opinion (when static doesn't HALT)
 If `cfg.safety.llm_second_opinion ~= false`, call `broker.chat`
 (non-streaming, no tools, `opts.max_tokens=4`) on the **deep** model
 preset (independent class from the action-emitting model — R-B2):
 ```
 System: Detect destructive shell commands. Reply YES if running this would
 delete, overwrite, or irreversibly modify data, processes, or system state.
 Otherwise reply NO. Respond with only YES or NO.
 User: <the command text or tool-call summary>
 ```
 Response parse: case-insensitive match on `YES` → destructive.
 **Re-roll on NO** (R-B2 cheap insurance): if the first probe returns NO,
 run a second probe with inverted phrasing:
 ```
 System: Reply YES or NO only. Is the following shell command SAFE to
 run autonomously without user review?
 User: <same command>
 ```
 Re-roll says NO → command is dangerous → HALT. Disagreement between the
 two probes (first NO, second NO) → HALT (treat agreement-by-default as
 suspicious). Both probes agree YES is safe → clear.
 Broker failure → YES (safe default).
 Session-scoped cache keyed by the normalized command string mitigates
 the latency cost (~1-3s per probe on deep model — see PHASE3-baseline §1).
 Repeated patterns within a single session probe once.
 Returns: `(is_destructive, reason)`. Reason is the matched pattern name
 for static hits, "LLM flagged as destructive" / "LLM probe disagreement"
 for the two LLM failure modes.
 ### Tool-call destructive check
 For MCP tool_calls, `is_destructive` checks:
 1. Tool name against an "always destructive" set (configurable; v1 includes
   `*__shell` / `*__write_file` / `*__edit_file` / `*__shell_bg` patterns).
 2. Arguments serialized as JSON against the static shell patterns (in case
   a `shell` tool's command argument is destructive).
 3. LLM second-opinion on the JSON-serialized call.
 ---
 ## 6. HALT Protocol
 When `is_destructive` returns true OR a non-auto_approve tool_call is
 attempted under Norris (auto_approve is the user's explicit consent
 that DOES apply):
 ```
 ─── NORRIS HALT ───────────────────────────────
  step 7/16
  reason: rm -rf
  action: rm -rf /var/log/old
 [N] proceed / skip / abort? p
 ```
 User types `p` (proceed) / `s` (skip) / `a` (abort).
 - **proceed**: run the action, append result to context, continue loop.
 - **skip**: append a synthesized turn explaining the user skipped this
  step (gives the model a chance to re-plan); continue loop.
 - **abort**: exit Norris mode; the conversation context is preserved.
  Drop back to the interactive prompt.
 `\C-x\C-c` at any prompt also aborts.
 Auto-approved tools (per `cfg.mcp.auto_approve`) skip the HALT entirely
 IF AND ONLY IF the destructive-op heuristic doesn't flag them. The
 heuristic is the final word — auto_approve is a confirmation bypass,
 not a destructive bypass.
 ---
 ## 7. Meta Commands (Phase 3 additions)
 | Command | Action |
 |---|---|
 | `:norris <goal>` | Launch Norris mode with an explicit goal text (same as `\C-n` after typing a goal but works on previously-issued goals too) |
 | `:norris off` | Exit Norris mode mid-loop (alternative to abort prompt) |
 | `:safety patterns` | Show the active destructive-op pattern list |
 | `:safety check <cmd>` | Probe `is_destructive` against a hypothetical command without running it (debug aid) |
 `\C-n` toggles Norris on/off in-place. If on, prompts for a goal if none
 pending; if off and a goal is in progress, asks for confirm-abort.
 ---
 ## 8. System Prompt Augmentation (active only in Norris)
 Appended to the default Phase 2 system prompt while `norris_active == true`.
 The current goal is embedded in the suffix so eviction can't drop the
 anchor (R-C3):
 ```
 [NORRIS MODE] You are operating autonomously toward the following goal:
    <ctx.norris_goal>
 Plan and execute step by step using CMD: lines (for shell) or tool_calls
 (when MCP tools are available). After each action, you will see its
 result in the next turn. Re-plan based on what you observe.
 When the goal is achieved, emit a single line:
    GOAL: complete
 on its own line, optionally followed by a brief summary.
 If the goal is unreachable or you need user input, emit:
    GOAL: blocked
 with a one-line reason.
 Avoid destructive operations unless the goal explicitly requires them.
 The user will be prompted to confirm destructive actions; expect their
 verdict in the next turn as "[aish] action skipped by user" or
 "[aish] action approved".
 ```
 This block is composed dynamically by `context.to_messages()` when
 `ctx.norris_active` is set. State stored:
  - `ctx.norris_active = true|false`
  - `ctx.norris_goal   = "<goal text>"` (cleared on exit)
 The user-emitted "[norris] <goal>" turn ALSO lives in the turn list as
 a regular user turn for the model's reading benefit. If the sliding
 window evicts it later, the system-prompt suffix still carries the
 goal — alignment with the eviction policy without special-case pinning.
 ---
 ## 9. Migration from Phase 2
 User-visible:
 - `\C-n` now does something (was a Phase 1 placeholder) — inserts
  `:norris ` at the cursor.
 - `:norris <goal>` is a new meta command.
 - **Interactive mode is UNCHANGED** (R-B3 resolution of Q24): the
  `is_destructive` heuristic runs ONLY when `norris_active == true`.
  Interactive `CMD:` extraction continues to honor `confirm_cmd`
  exactly as Phase 0 specified. No surprises for existing users.
 Substrate (PHASE0.md §3) invariants: unchanged. The `CMD:` extraction
 marker is still the only shell-suggestion contract. `confirm_cmd`
 semantics are preserved as-defined in PHASE0 §10.
 `config.lua`: configs without a `safety` block work unchanged — defaults
 kick in (LLM second-opinion enabled, default pattern list, default step
 budget).
 ---
 ## 10. Out of Scope (Phase 3)
 Per PHASE0.md §11, these belong to later phases:
 - `memory.jsonl` summarization across sessions (Phase 4).
 - Multi-model routing / cloud fallback (Phase 5) — but Norris's
  LLM second-opinion uses the `fast` model regardless of active model.
 - Tree-sitter syntax highlighting (Phase 6).
 Specifically out of Phase 3 scope despite proximity:
 - Per-session destructive-pattern learning (user-corrects-LLM feedback
  loop). v2.
 - Parallel exploration / branching Norris sessions. v3+.
 - User-extendable pattern list via config. v2 — Phase 3 ships hardcoded.
 - Goal-decomposition for very long-running tasks (multi-day, persistent
  state). Out of aish's scope entirely; that's a different tool.
 ---
 ## 11. Open Questions
 | # | Question | Impact | Resolve by |
 |---|---|---|---|
 | Q23 | ~~LLM second-opinion latency budget~~ | safety.lua | **Resolved at baseline** — 425-1162ms per probe on the **fast** model (baseline §1); switched to **deep** at review (R-B2) at the cost of ~1-3s per probe, paid back by independent model class. Session cache mitigates repeated patterns. |
 | Q24 | ~~`is_destructive` also runs on interactive `CMD:` extraction?~~ | safety.lua + repl.lua | **Resolved at review (R-B3)** — NO. `is_destructive` runs ONLY when `norris_active == true`. Interactive `CMD:` extraction honors `confirm_cmd` exactly as Phase 0 specified. No substrate amendment. |
 | Q25 | ~~`GOAL: complete` AND pending actions in same response?~~ | repl.lua norris driver | **Resolved at review (R-C2)** — dispatch all pending actions FIRST (tool_calls then CMD:), THEN check for `GOAL: complete`. Algorithm in §4 reflects. |
 | Q26 | Context preservation when Norris ends with `abort` vs `done` vs `budget_exhausted`. Proposal: all three keep ctx intact (user sees the conversation in `:history`). The only difference is the renderer summary. | repl.lua + renderer.lua | Phase 3 (plan) |
 | Q27 | Resume mode after abort: should the user be able to type `:norris continue` to pick up where the model left off? v1 says no — too many edge cases with stale plans. v2 maybe. | scope | Phase 3 — defer to v2 |
 | Q28 | `tool_calls` from MCP servers that have side effects but aren't in `*__shell` / `*__write_file` patterns (e.g. a custom `hertz__wol_machine` tool that wakes a server). The static set in §5 won't catch this; the LLM second-opinion might. Reasonable default given the LLM's role here. | safety.lua | Phase 3 (verify) |
 | Q29 | Norris response when `is_destructive` returns YES but the user-stated goal explicitly authorizes destruction (e.g. "clean up old logs in /var/log"). Currently the HALT still fires. Should the model be allowed to convey "user authorized this implicitly" in the goal? v1: no — explicit per-action confirm always. v2 could relax. | UX + safety.lua | Phase 3 (verify) |
 | Q30 | `:norris` without a goal arg vs `\C-n`: should they share a single "ask for goal" code path? Yes; trivial. | repl.lua | Phase 3 (plan) |
 Resolved at formulate (in §2 table):
 - Q2 (planner shape) — iterative re-plan after each action.
 - Q8 inheritance — auto_approve from Phase 2 applies under Norris IF destructive heuristic clears.
 Carried forward (not in §13 originally):
 - Norris's interaction with Phase 4's memory.jsonl — captured tasks could pre-populate context. Phase 4 concern.
 ---
 ## 12. Implementation Plan (commit-by-commit)
 Bottom-up, same cadence as Phase 0/1/2. Six commits expected:
 1. **`safety.is_destructive` — static pattern list only.** Implement the
   ~20-pattern matcher + the tool-call shell-arg extraction. No LLM
   second-opinion yet. Returns `(bool, reason)`. **Test**: unit-table of
   ~30 commands (mix of destructive + safe) → assertEqual on each.
 2. **`safety.is_destructive` — LLM second-opinion + cache.** Add the
   fast-model probe path with a session-scoped cache keyed by the
   normalized command string (mitigates Q23 latency). Broker-failure
   falls back to YES. **Test**: mock broker; verify cache hits don't
   re-call; verify failure-fallback is YES.
 3. **`renderer.lua` — Norris frames.** Add `norris_begin/step/halt/end`
   per §3. Visual parity with exec/tool frames. Update prompt to
   include `⚡` when active. **Test**: one-liner script renders each
   frame visually.
 4. **`safety.norris_step` — single-iteration planner.** The
   `norris_step` function per §4. Caller provides ctx + dispatch
   helpers; returns `{status, reason}`. No driver loop yet — that's
   the next commit. **Test**: mock broker emitting various model
   responses (text+actions, GOAL:complete, stalled, destructive
   action requiring HALT) and verify each return shape.
 5. **`repl.lua` — Norris driver + `\C-n` real binding + `:norris` meta.**
   The while-loop driver consuming `safety.norris_step`, the rebound
   `\C-n` (replacing Phase 1 placeholder), the `:norris <goal>` /
   `:norris off` meta cmds, and `\C-x\C-c` abort handler. **Interactive
   `CMD:` extraction is UNCHANGED** — `is_destructive` runs ONLY when
   `norris_active == true` (R-B3 resolution of Q24); `confirm_cmd`
   semantics from PHASE0 §10 are preserved exactly. Bundled with this
   commit: `ffi/readline.lua` extension per §3 row — `rl_insert_text` +
   `rl_redisplay` cdefs + `M.insert_text` / `M.redisplay` wrappers,
   AND removal of the `_bound[seq]:free()` call from `M.bind` (R-C4 —
   small Phase 1 amendment, called out here so the commit body cites
   it). **Test**: mocked-broker end-to-end — submit a multi-step goal,
   verify driver loops correctly, hits GOAL:complete, returns to
   interactive.
 6. **`config.lua` — `safety` example block.** Commented-out example
   showing `llm_second_opinion`, `llm_model`, `destructive_patterns`,
   `max_norris_steps`. Documentation only.
 ### Risk / non-obvious
 - **Catastrophic false-negative in `is_destructive`**: the static list
  is patterned; a creative model could write `bash -c "rm -rf /tmp"` or
  `r"m" -rf` etc. Static is the floor, LLM second-opinion is the
  net. Both check.
 - **LLM second-opinion model itself being autonomous** in a Norris run
  would be circular. Mitigation: the second-opinion call uses
  `broker.chat` (no tools, no streaming, dedicated prompt) — distinct
  call path from the Norris planning stream. No tool-call recursion
  possible.
 - **Norris loop runs the LLM N times**: each step is a full broker
  round-trip plus optionally an LLM second-opinion. A 16-step Norris
  goal could be ~32 LLM calls on the fast model. Visible as latency
  but no economic surprise on local models.
 - **Q24 resolution (R-B3)**: `is_destructive` runs ONLY in Norris
  mode. Interactive `CMD:` extraction continues to honor `confirm_cmd`
  exactly as Phase 0 specified. No substrate amendment; no surprises
  for users of `confirm_cmd=false` setups.
 - **`GOAL: complete` extraction** uses the same `^GOAL: complete$` regex
  on emitted text. Substrate-aligned with CMD: extraction.
 ### Open at plan; resolve at review
 - Whether to ship the LLM second-opinion **on by default** or
  **off by default with a config opt-in**. Default on is safer; off
  saves latency. Recommend on; Phase 7 verify will quantify the
  overhead.
 - Whether `:safety check <cmd>` should also be reachable by `\C-x`
  keybinding for fast probing during interactive sessions. v2.
 ---
 *End of Phase 3 Manifest — aish*
@@ -0,0 +1,459 @@
 # aish — Phase 4 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 4 Requirements, Architecture & Design Decisions
 **Status:** Plan (review fold-in 2026-05-13 — TOCTOU race + Norris suppression + summarizer self-amp resolved)
 **Date:** 2026-05-13
 **Review fold-in (2026-05-13):**
 R-B1. **TOCTOU race on memory.jsonl** — two aish processes against the
    same `history.dir` would each compute the same `next_id` and
    produce duplicate ids; tombstones become ambiguous. Resolution:
    `M.open_memory` takes an `flock(LOCK_EX | LOCK_NB)` advisory lock
    on the file descriptor. Held until handle close. Failure to
    acquire → `nil, "memory.jsonl held by another aish process"`.
    Requires extending `ffi/libc.lua` with `flock(2)` — one cdef +
    two constants (LOCK_EX=2, LOCK_NB=4). The lock is the *enforcement*
    of the single-writer assumption stated in §2; documented in §2 row.
 R-C1. **System-prompt growth under Norris** — over an 8-step Norris run,
    a 2KB [background] block adds ~16K redundant tokens. The Phase 0
    §8 sliding window evicts user/asst pairs but keeps the system
    prompt, so big system prompts displace conversation. Resolution
    (Q33 closed): suppress [background] when `ctx.norris_active == true`.
    Memory items rarely change Norris-step planning, and Norris has
    its goal anchor via the NORRIS suffix already. §5 + §11 reflect.
 R-C2. **Summarizer self-amplification** — running `:memory summarize`
    twice in one session would feed the previous summarize call's
    *assistant turn* back into the input, leading to drift (re-propose
    accepted items, no signal about rejections). Resolution: operate
    on the session log file (`history.load(session_path)`) rather
    than `ctx:to_messages()`. The session log is the authoritative
    "what was discussed" stream. Skip lines tagged
    `{role:"assistant", meta:"summarize"}` (a new optional field on
    the JSONL turn). §6 reflects.
 R-C3. **DEFAULT_SYSTEM_PROMPT bakes MCP statically** — cosmetic. §5
    diagram now reads "DEFAULT (Phase 0 + Phase 2 MCP) → [background]
    → NORRIS". No code change.
 NITs folded inline:
  N1. `:memory forget <id>` for an already-tombstoned id → no-op + status.
  N2. §2 path note: memory.jsonl is sibling of sessions/, no collision.
  N3. §4 invariant: items have id ≥ 1; meta header has no id and is
      ignored; tombstones with non-matching targets are no-ops.
  N4. §7 `:memory inject` semantics: replaces `ctx.memory_items` from
      a fresh `load_memory()` + LRU-by-ts truncation (same as startup).
 **Analyze findings (2026-05-13):**
 **Analyze findings (2026-05-13):**
 A1. **history.lua surface is clean** — `M.open`/`Session:append`/
    `Session:close`/`M.load`/`M.list_sessions`. The memory functions
    can mirror this exactly: `M.open_memory`/`memory:add`/
    `memory:forget`/`memory:close`/`M.load_memory`. No structural
    refactor needed; pure additions.
 A2. **Counter persistence — scan at open, cache in handle.** Phase 1's
    session log writes a `{"meta":{...}}` header on first creation but
    doesn't track entry-id (turns aren't numbered). For memory, the
    monotonic id is needed for forget-targeting. Cheapest correct
    approach: on `M.open_memory`, read all lines once, find the max
    `id` field present (skipping the meta header if any), cache as
    `handle.next_id`. Subsequent `add` calls increment in-memory and
    persist on the next append. O(n) at open is acceptable since n is
    bounded by user curation (~hundreds, not millions). No sidecar.
 A3. **System-prompt suffix order, post-analyze**: actual current
    composition is `DEFAULT_SYSTEM_PROMPT` (which has Phase 2 MCP
    guidance already baked-in as a static block) → optional `NORRIS`
    dynamic suffix. The Phase 2 MCP block is NOT computed dynamically
    — it's part of DEFAULT_SYSTEM_PROMPT. So Phase 4's `[background]`
    block lives between DEFAULT and NORRIS. Token cost measured:
    - DEFAULT: 697 chars (~174 tokens)
    - DEFAULT + NORRIS: 1458 chars (~364 tokens)
    - DEFAULT + 2KB background + NORRIS: ~3460 chars (~865 tokens)
    Within typical 4-8K context budgets.
 These findings don't require manifest changes — the §3 module-changes
 table and §5 injection mechanism already match. Recording the
 measurements here so verify (Phase 7) has anchors.
 PHASE0 is the locked substrate; PHASE1, PHASE2, PHASE3 are layered on top.
 This manifest specifies what Phase 4 adds — **cross-session memory** — and
 the user-facing surface for managing it.
 ---
 ## 1. Scope of Phase 4
 Three pillars per PHASE0.md §11 row 4:
 1. **`memory.jsonl` persistent store** — a single append-only file
   (`<config.history.dir>/memory.jsonl`) carrying user-curated facts,
   preferences, and project context that survive aish restarts. Same
   storage convention as session logs but a separate file because the
   read pattern (load at startup) and write pattern (curated only)
   differ from session logs (append-every-turn).
 2. **Startup context injection** — at REPL boot, recent memory items
   are loaded into the live `Context` so the model sees them on the
   very first turn. Injection is bounded (token budget) and visible
   to the user via `:memory list`.
 3. **`:memory` management surface + automatic candidate extraction** —
   meta commands for `add`, `list`, `forget`, `clear`, plus an opt-in
   summarizer that runs at session end (or on demand) extracting
   candidate facts from the session log for the user to triage into
   memory.
 **Phase 4 is done when:**
 - `:remember <text>` (alias for `:memory add <text>`) writes a line to
  `memory.jsonl` and the next REPL boot sees it in context.
 - `:memory list` shows current memory items with their IDs and ages.
 - `:memory forget <id>` removes one item; `:memory clear` removes all
  (with confirm).
 - At startup, the top-N most recent memory items are prepended to the
  Context as a single "background:" block (configurable cap).
 - `:memory summarize` runs the active model over the current session
  log and proposes candidate memory items; the user accepts/rejects
  per-candidate via prompt.
 - Existing configs without a `memory` section behave exactly like
  Phase 3 (no startup injection, no auto-summarize).
 ---
 ## 2. Technology Decisions (delta from Phase 3)
 | Decision | Choice | Rationale |
 |---|---|---|
 | Storage format | Append-only JSONL, one item per line | Same convention as Phase 1's session logs. Greppable, robust to truncation, no parser dependency beyond vendored dkjson. |
 | Storage location | `<config.history.dir>/memory.jsonl` (sibling to `sessions/`) | Co-located with session logs; users can back up one directory. Defaults to `~/.local/share/aish/memory.jsonl`. Path is a sibling of `sessions/` (not inside it), so `:save <name>` cannot collide. |
 | Memory-item shape | `{id, ts, kind, content, tags?, source?}` | `id` is monotonic int (counter persisted in `memory.id`); `kind ∈ {"fact","pref","context"}` lightly typed for future routing; `content` is the body text; optional `tags` array; optional `source` carrying session-id provenance when auto-extracted. |
 | Forget semantics | **Append a tombstone**, don't rewrite the file (`{id, ts, kind:"forget", target:<other_id>}`) | Append-only preserves history. `M.load_memory` resolves tombstones during read — silently drops any item whose `id` appears as a forget-target. `:memory clear` writes one tombstone per active item; could also support a wildcard forget. |
 | Auto-summarize cadence | **Manual only in v1** (`:memory summarize`). Auto-trigger on `:quit` or by token count is Q-list material. | Conservative; users opt in. Avoids burning tokens on every session end. Manual surface lets the user QA candidates before they land. |
 | Summarizer model | The `fast` preset by default (cheap; quality good-enough for extraction); configurable via `cfg.memory.summarizer_model` | Summarization is recall over precision — fast model's tendency to err on the side of inclusion is fine because the user filters per-candidate. |
 | Startup injection mechanism | A new dynamic block on the system prompt, appended by `context.to_messages()` when `ctx.memory_items` is non-empty | Same hybrid-prompt pattern as Phase 2's MCP block and Phase 3's NORRIS suffix. No new context structure beyond a list on the Context. |
 | Injection budget | `cfg.memory.inject_max_chars` (default 2000 chars total — roughly 500 tokens) | Cap so memory doesn't eat the whole context. LRU-by-`ts` selection if items exceed budget. |
 | Pruning policy | Manual `:memory forget` + optional `cfg.memory.prune_older_than_days` (default unset — no auto-pruning) | Conservative defaults; user owns the lifecycle. |
 | Interaction with sessions | `memory.jsonl` is independent of `sessions/*.jsonl`. Session JSONL stays the per-conversation log; memory is the curated cross-session knowledge | Distinct concerns. Session log answers "what did we talk about last Tuesday?"; memory answers "what does aish know about me/this-project?". |
 | Concurrency | Single-writer **enforced via `flock(LOCK_EX \| LOCK_NB)`** (R-B1) on the memory.jsonl file descriptor in `M.open_memory`. Held until close. Acquire failure → handle creation fails with a clear status message | Session logs got away with single-writer-by-uniqueness (timestamped filenames). memory.jsonl is one shared file, so the flock is the actual enforcement. The lock is advisory (Linux file-lock semantics) but every aish process honors it, which is sufficient for our trust model. |
 ---
 ## 3. Module Changes
 | File | State after Phase 3 | Phase 4 changes |
 |---|---|---|
 | `history.lua` | `M.open(path, meta)`, `session:append(turn)`, `M.load(path)`, `M.list_sessions(dir)` | Add memory functions alongside session functions: `M.open_memory(path) -> handle\|nil, err`; `handle:add(kind, content, tags?, source?) -> id`; `handle:forget(id)`; `handle:close()`; `M.load_memory(path) -> items_table` (resolves tombstones). Handle internals: fd (LuaJIT FFI int), next_id (scanned from existing JSONL), held flock. |
 | `ffi/libc.lua` | `chdir`, `errno`, `strerror`, plus Phase 1's waitpid/raw I/O/termios/poll, plus Phase 1's read/write/close/kill | Add `flock(2)` cdef (`int flock(int fd, int operation)`), constants `LOCK_EX = 2`, `LOCK_NB = 4`, `LOCK_UN = 8`. Wrapper `M.flock(fd, op) -> true\|false, errmsg`. Used by `history.M.open_memory` for the single-writer enforcement (R-B1). |
 | `context.lua` | system prompt + MCP block + NORRIS suffix toggle | Add a `memory_items` field on Context. `to_messages()` composes a dynamic "[background]" block on the system prompt when `memory_items` is non-empty AND not already in Norris mode (don't double-pile). Cap respected via the inject_max_chars budget. |
 | `repl.lua` | meta cmds + tool sub-loop + Norris driver | New meta: `:remember <text>` (shortcut for `:memory add fact <text>`); `:memory add <kind> <text>`; `:memory list`; `:memory forget <id>`; `:memory clear`; `:memory summarize`. At startup, after loading config + opening session, also open memory handle and inject the top-N items into `ctx.memory_items`. |
 | `broker.lua` | streaming chat + opts.tools/max_tokens/timeout_ms | No structural changes. Used by the summarizer (calls broker.chat with the session log as a single user turn). |
 | `config.lua` | example with mcp + safety blocks | Add commented-out `memory = { ... }` example. Default behavior is "no memory injection, no auto-summarize". |
 | `executor.lua` | unchanged | unchanged |
 | `safety.lua` | is_destructive + norris_step | unchanged (Norris-side suppression of background block is in context.lua, not safety.lua) |
 No new module files. All Phase 4 functionality grows existing files —
 mostly `history.lua` and `repl.lua`.
 ---
 ## 4. memory.jsonl Format
 ```jsonl
 {"id":1,"ts":"2026-05-13T19:01:01Z","kind":"fact","content":"User prefers terse responses; no end-of-turn summaries."}
 {"id":2,"ts":"2026-05-13T19:01:35Z","kind":"pref","content":"Default to :model deep for code reasoning tasks."}
 {"id":3,"ts":"2026-05-13T19:02:00Z","kind":"context","content":"Current project: aish (LuaJIT REPL with MCP tools).","tags":["aish","luajit"]}
 {"id":4,"ts":"2026-05-13T20:00:00Z","kind":"forget","target":2}
 ```
 After `load_memory`, item `id=2` is dropped because of the tombstone.
 Active items: 1, 3.
 ### kind values
 - **`fact`** — factual statement about the user, their environment, or
  project state.
 - **`pref`** — user preference for aish behavior (response style,
  default model, etc.).
 - **`context`** — project / domain context that helps the model orient
  on common tasks.
 - **`forget`** — tombstone; refers to another id via `target`.
 v1 is lightly typed — the model sees all kinds identically as a flat
 list in the [background] block. Future phases may route them
 differently (e.g. `pref` into a system-prompt section, `context` into
 a user-style preamble). Today they're prose.
 ### Item-id invariants (N3)
 - Items have `id ≥ 1`. The optional meta header line `{"meta":{...}}`
  has no `id` field and is ignored during load.
 - Tombstones with non-matching `target` (id doesn't exist, or already
  tombstoned) are no-ops at load — silently dropped from the active
  set. The `:memory forget` meta handler also checks active-set
  membership before appending a tombstone, surfacing a status when
  the id isn't active.
 ---
 ## 5. Startup Injection
 When aish boots and `cfg.memory` is present (or `memory.jsonl` exists):
 1. `history.load_memory(path)` reads all items, applies tombstone
   resolution, returns active items sorted by `ts` descending (most
   recent first).
 2. Take items until `cfg.memory.inject_max_chars` (default 2000) is
   consumed. Older items are dropped from injection (still in the
   file).
 3. Store on `ctx.memory_items` as an array of `{kind, content}` (id
   and ts not needed at render-time).
 `context.to_messages()` composition:
 ```
 <DEFAULT_SYSTEM_PROMPT> (Phase 0 + Phase 2 MCP block, statically embedded)
 [background] (memory loaded at startup; managed via :memory)
 - (fact) User prefers terse responses; no end-of-turn summaries.
 - (context) Current project: aish (LuaJIT REPL with MCP tools).
 ```
 Order of suffixes on the system prompt:
 1. DEFAULT_SYSTEM_PROMPT (Phase 0 + Phase 2 MCP guidance, currently
   baked-in to the static constant — R-C3 note: not a separate dynamic
   block in v1; future phases may split)
 2. Phase 4 [background] block (when memory_items non-empty AND NOT in
   Norris mode — R-C1 suppression to avoid ~16K of redundant tokens
   per Norris run)
 3. Phase 3 NORRIS MODE block (when norris_active)
 When Norris is active the order becomes: DEFAULT → NORRIS (no background).
 Norris's planning loop already has the goal anchored in its suffix; the
 memory items rarely change step-to-step planning.
 ---
 ## 6. `:memory summarize` (Manual Auto-Extraction)
 `:memory summarize` triggers the active model (or
 `cfg.memory.summarizer_model` if set) to read the current session's
 turns and propose candidate memory items.
 ### Flow
 1. **Source of truth is the session log file** (R-C2), not
   `ctx:to_messages()`. `history.load(session_path)` returns all
   turns; filter out turns tagged `meta = "summarize"` (set on the
   assistant turn that emitted a prior summarize response) so the
   summarizer can't feed on its own output across multiple calls.
 2. Build a prompt: "Read the following conversation transcript. Extract
   facts, preferences, or context worth remembering across future
   sessions. Output ONE candidate per line, prefixed with the kind:
   `fact: …`, `pref: …`, or `context: …`. Maximum 10 candidates."
 3. Send the filtered transcript as a single user turn + the
   instruction above. Use `cfg.memory.summarizer_model` if set (else
   the active model). The resulting assistant turn gets logged
   with `meta = "summarize"` so future :memory summarize calls
   exclude it.
 4. Parse the response line-by-line for `(fact|pref|context):
   <content>` shapes. Tolerate markdown bullet prefixes (`-`, `*`).
 4. For each candidate, prompt the user:
   ```
   [memory] candidate (fact): User prefers terse responses; no end-of-turn summaries.
   keep? [y/N/edit]
   ```
   - `y` → write to memory.jsonl.
   - `N` (or empty) → drop.
   - `edit` → readline-edit the content before write.
 5. Status when done: `[aish] memory: added N candidates`.
 ### Why manual not automatic in v1
 A successful auto-summarize that runs at every `:quit` would either:
 - be expensive (tokens on every exit)
 - drift over time if the model picks up noise
 - compete with the user's intentional `:remember <text>` curation
 Manual gives the user the trigger. Q-list tracks auto-cadence options.
 ---
 ## 7. Meta Commands (Phase 4 additions)
 | Command | Action |
 |---|---|
 | `:remember <text>` | Shortcut for `:memory add fact <text>` |
 | `:memory add <kind> <text>` | Append a memory item (kind ∈ fact, pref, context) |
 | `:memory list` | Show all active memory items (id + ts + kind + content) |
 | `:memory forget <id>` | Append a tombstone for `<id>` |
 | `:memory clear` | Forget all active items (with `[y/N]` confirm) |
 | `:memory summarize` | Extract candidate items from current session via LLM |
 | `:memory inject` | Replace `ctx.memory_items` from a fresh `load_memory()` + LRU-by-ts truncation. Same logic as startup injection. Useful after hand-editing `memory.jsonl` or after `:memory forget` to immediately reflect in the system prompt. |
 `:help` updated.
 ---
 ## 8. Configuration Schema (Phase 4 example block)
 ```lua
 memory = {
    -- Path defaults to <history.dir>/memory.jsonl. Override per fleet
    -- if you want shared memory (read-only is safer than write-shared).
    -- path = (history.dir or "~/.local/share/aish") .. "/memory.jsonl",
    -- Cap on how much memory content is injected into the system prompt
    -- at startup. Roughly 2000 chars ≈ 500 tokens. Older items are
    -- dropped from injection if exceeded; they remain in the file.
    inject_max_chars = 2000,
    -- Which model to use for :memory summarize. Defaults to the active
    -- model when nil. Use "fast" for speed; "deep" for better quality.
    summarizer_model = "fast",
    -- Auto-prune items older than N days at startup. nil = never auto-prune.
    -- Manual :memory forget always works regardless.
    -- prune_older_than_days = 90,
 }
 ```
 ---
 ## 9. Migration from Phase 3
 User-visible:
 - `:remember`, `:memory list / forget / clear / summarize` are new
  meta commands.
 - A `[background]` block in the system prompt appears when memory items
  exist.
 - Existing configs without `memory = {...}` continue to work — no
  injection, no auto-summarize. Phase 3 behavior intact.
 Substrate (PHASE0.md §3) invariants: unchanged.
 The `[background]` system-prompt suffix is composed dynamically by
 `context.to_messages()` (same pattern as Phase 2 MCP block and Phase 3
 NORRIS suffix). No new substrate contract.
 ---
 ## 10. Out of Scope (Phase 4)
 Per PHASE0.md §11 these belong to later phases:
 - Multi-model routing / cloud fallback (Phase 5).
 - Tree-sitter syntax highlighting (Phase 6).
 Specifically out of Phase 4 scope despite proximity:
 - Multi-process memory sharing (single-writer assumed v1).
 - Retrieval-augmented injection (RAG over memory.jsonl) — v1 just LRU.
 - Auto-trigger of `:memory summarize` at `:quit` (Q-list).
 - Memory categories beyond fact/pref/context — minimal typing v1.
 - Cross-aish-instance memory sync (memory.jsonl in a synced dir
  works coincidentally; not designed for it).
 - Encryption at rest — same posture as session logs (none in v1).
 ---
 ## 11. Open Questions
 | # | Question | Impact | Resolve by |
 |---|---|---|---|
 | Q31 | Auto-summarize trigger: manual only (current), automatic at `:quit`, automatic on token-budget eviction, or config-flagged threshold? | history.lua + repl.lua | Phase 4 (analyze) |
 | Q32 | Editing memory items in place: `:memory edit <id>` to rewrite content? Append-only means edit = new id + forget old. Worth the extra meta? | history.lua + UX | Phase 4 (analyze) |
 | Q33 | ~~Memory injection while in Norris mode~~ | context.lua | **Resolved at review (R-C1)**: SUPPRESSED. Memory items aren't injected when `ctx.norris_active == true`. Norris has its goal anchor in the NORRIS suffix; 16K of redundant background per 8-step run is not worth the marginal context value. |
 | Q34 | Memory kinds: stick with fact/pref/context or split prefs into a dedicated section of the system prompt (where they're more impactful)? v1 says no — flat list. | context.lua + UX | Phase 5 if it bites |
 | Q35 | Privacy / redaction: `:memory summarize` could capture sensitive tokens from a chat (passwords, paths). Should it auto-redact? Strip command-history-style? | safety.lua + memory.lua | Phase 4 (verify) — review user-emergent risk |
 | Q36 | Memory deduplication: user adds the same fact twice. Detect and warn, dedupe silently, or allow? v1: allow (cheap; user can `:memory list` to spot). | history.lua | Phase 4 (verify) |
 ---
 ## 12. Implementation Plan (commit-by-commit)
 Bottom-up, same cadence as Phase 0/1/2/3. Five commits expected:
 1. **`history.lua` — memory store + `ffi/libc.lua` flock (R-B1 bundled).**
   - `ffi/libc.lua`: cdef `flock(2)` + LOCK_EX/LOCK_NB/LOCK_UN constants
     + `M.flock(fd, op)` wrapper.
   - `history.lua`: `M.open_memory(path)` opens the file (creating parent
     dirs + meta-header line if empty), takes `flock(LOCK_EX | LOCK_NB)`
     on the fd, scans the existing JSONL for max id → handle.next_id.
     Returns `(handle, nil)` on success; `(nil, errmsg)` on lock-held.
   - `handle:add(kind, content, tags?, source?)`: assigns next id,
     appends JSON line, returns id.
   - `handle:forget(id)`: appends a tombstone for id.
   - `handle:close()`: releases flock + closes fd.
   - `M.load_memory(path)`: reads all lines, builds forget-target set
     from kind=="forget" entries, returns active items sorted by `ts`
     descending. Drops items whose id is in the forget-set OR whose id
     is nil (meta header).
   **Test in isolation**: round-trip add/forget/load, lock-held
   detection (open twice in same process, second should fail).
 2. **`context.lua` — memory injection.** Add `ctx.memory_items` and
   the `[background]` block composer in `to_messages()`. Cap by
   `inject_max_chars`. **Test in isolation**: assert composition order
   (MCP → background → Norris); cap honored.
 3. **`repl.lua` — `:remember` + `:memory list / add / forget / clear / inject`.**
   At startup, after MCP setup, open the memory handle + LRU-load items.
   Hook the meta dispatch. No summarize yet. **End-to-end**: run aish,
   `:remember X`, `:quit`, restart, `:memory list` shows X, `:history`
   shows X in [background].
 4. **`:memory summarize`** — manual extraction. Bundle a system-prompt
   for the summarizer model; parse response; per-candidate confirm
   prompt; append accepted items. **End-to-end**: short conversation,
   summarize, accept one of two candidates, restart, verify accepted
   one persists.
 5. **`config.lua` — example memory block.** Documentation-only;
   commented-out example. Final commit.
 ### Risk / non-obvious
 - **Counter persistence**: `memory:add` needs a monotonic id. Options:
  (a) sidecar `memory.id` file with a single integer, (b) scan the
  JSONL on open for max id, (c) use timestamp as id (no monotonic
  guarantee across rapid adds). Plan: (b) — scan once at open; cache
  in the handle. Wraps if integer overflow but at 2^53 entries we're
  fine.
 - **Tombstone resolution at load**: build a set of forget-target ids
  from kind=="forget" entries; filter active items to exclude. Order
  doesn't matter (tombstones can appear before their targets if the
  file is hand-edited; the resolution is set-based).
 - **Empty file at open** vs **nonexistent file**: both should yield an
  empty memory handle. Phase 1's `history.open` already handles file
  creation; extend the pattern.
 - **System prompt growth**: the suffix-stacking pattern is up to 4
  blocks now (default + MCP + background + Norris). Token cost ~200
  + ~80 + 2000 + ~250 = ~2530 chars baseline before any user/asst
  turns. Worth measuring at baseline phase.
 - **`:memory summarize` parse robustness**: small models may emit
  "fact: ..." sometimes with markdown bullets, sometimes without.
  Parser should tolerate `^[-*]?\s*(fact|pref|context):\s*(.+)`.
 - **`:memory clear` with confirm**: same UX as Phase 3 destructive
  prompts. `[y/N]` default-no.
 ### Open at plan; resolve at review
 - Whether `:remember` should append to the LIVE `ctx.memory_items`
  immediately (so the model sees it on the next turn without restart)
  or only on next session boot. v1 says yes — append both to file AND
  to live ctx for immediate visibility.
 - Whether the summarizer should be fed the FULL session log or just
  recent turns (token budget). v1 says full minus the [background]
  suffix; cap at session-log size <= 64KB or last N turns.
 ---
 *End of Phase 4 Manifest — aish*
@@ -0,0 +1,440 @@
 # aish — Phase 5 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 5 Requirements, Architecture & Design Decisions
 **Status:** Plan (review fold-in 2026-05-13 — callback signature, Norris suppression, cost defaults resolved)
 **Date:** 2026-05-13
 **Review fold-in (2026-05-13):**
 R-B1. **Summary callback signature canonical**: the closure is
    `summarize_fn(prior_summary, evicted_turns) -> string | nil`.
    `prior_summary` is `nil` on the first ever summarize; otherwise
    the current `ctx.summary` string. `evicted_turns` is `nil` for
    the re-summarize-compress trigger (C1 resolution); otherwise the
    array of evicted turn tables. The closure dispatches:
      first-time:  prior=nil,    evicted=[...]   → "summarize these turns"
      additive:    prior=str,    evicted=[...]   → "extend the prior summary"
      compress:    prior=str,    evicted=nil     → "compress the prior summary"
 R-C2. **Routing taken once per ask_ai**: the model decision happens
    on entry to `ask_ai`. The chosen `active_cfg` is used for every
    iteration of the tool-call sub-loop. Original `active_cfg` is
    restored after `ask_ai` returns. NOT per-broker-call.
 R-C3. **AUTO-routing does NOT fire inside Norris**: `run_norris`
    operates on a fixed model (whatever the user set via `:model`
    before launching). The auto-router would otherwise switch models
    mid-plan, which loses planning continuity and costs tokens
    rebuilding context. State explicit in §4 + §10.
 R-C4. **Summary block suppressed under Norris**: mirrors Phase 4
    R-C1 ([background] suppression). Both blocks are "earlier context"
    the planner generally doesn't need mid-iteration. §6 + §3 reflect.
 R-C5. **Fallback pattern coverage**:
    - Add `HTTP 408` to §5 patterns (Q41 moves from open to resolved).
    - Add `Operation timed out` (curl variant of "Timeout was reached").
    - Drop "HTTP response code said error" from A2 — FAILONERROR was
      removed in Phase 4 commit `f26cbd9`, this shape no longer fires.
 NITs folded:
  N1. `:route check <text>` always runs the heuristic regardless of
      `cfg.routing.auto` — debug aid surfaces the class + would-be
      model + "(routing currently disabled)" suffix when auto is off.
  N2. **`reasoning → nil` by default** — the v1 heuristic that maps
      "explain" / "why" / "how does" to a model is too aggressive
      paired with `nil = keep current` semantics. User must
      EXPLICITLY map `routing.classes.reasoning = "cloud"` to send
      reasoning prompts to paid API. Same cost-safety rationale as
      `cfg.routing.auto = false`.
  N3. "Retry only when no deltas have arrived" promoted to normative
      rule in §5 (was in §11 risk row).
  N4. Config key renamed `cfg.routing.cloud_fallback` →
      `cfg.routing.fallback` to align with the `:fallback` meta verb.
      Single-source naming.
 **Analyze findings (2026-05-13):**
 **Analyze findings (2026-05-13):**
 A1. **router.lua surface clean** — already a pure-Lua module with
    `M.classify(line, config) -> (kind, payload)`. Adding
    `M.classify_model(text, cfg) -> name | nil` is a natural sibling.
    No structural refactor.
 A2. **broker error message shapes** all carry transport-stage prefixes
    that the fallback matcher must account for. The actual shapes
    callers see:
      "transport: HTTP %d%d%d: <body-snippet>"   (from post_sse status>=400)
      "transport: Timeout was reached"
      "transport: Couldn't resolve host"
      "transport: Connection refused"
      "transport: HTTP response code said error"  (rare; from FAILONERROR)
      "api: <error.message>"                      (SSE-framed error envelope)
      "broker: model_cfg.endpoint and .model required" (config bug)
    Fallback patterns in §5 should match against the "transport: "
    prefix explicitly. "api: ..." errors don't fall back (they're
    semantic — bad request shape, not server failure). "broker: ..."
    errors don't fall back either (config bug).
 A3. **Q38 resolved at analyze** — placing the rolling summary as
    `turns[1]` with `role:"system"` would produce system/system
    back-to-back in to_messages output (msg[1] is the composed
    system prompt; msg[2] would be the summary as another system
    message). Strict templates may reject this same way they reject
    user/user (PHASE0 §6). Resolution: render the summary INSIDE the
    composed system message (same pattern as the [background] and
    NORRIS blocks). Storage stays simple — keep `_summary` text on
    `ctx.summary` (NOT in `ctx.turns`), append to the system prompt
    in `to_messages` alongside the [background] and NORRIS blocks.
    §6 + §3 reflect.
 PHASE0 is the locked substrate; PHASE1-4 are layered on top. This manifest
 specifies what Phase 5 adds — **multi-model routing**, **cloud fallback**,
 and **context summarization on eviction**.
 ---
 ## 1. Scope of Phase 5
 Three pillars per PHASE0.md §11 row 5:
 1. **Multi-model routing by task type** — `router.lua` extended with a
   per-request `classify_model(text, cfg)` that suggests a model
   preset based on lightweight heuristics over the user input.
   Opt-in via `cfg.routing.auto = true`; default off (explicit `:model`
   stays the only switch).
 2. **Cloud fallback on local failure** — when the active broker call
   returns `nil, err` for a transport reason that looks like
   "local backend down" (HTTP 502 / 503 / 404 model-not-found /
   libcurl connection-refused / timeout), automatically retry once
   against the configured `cloud` preset, surfacing a status line so
   the user knows what happened. Opt-in via `cfg.routing.cloud_fallback = true`;
   default off (single-shot only).
 3. **Context summarization on eviction** — when
   `context.enforce_budget()` would evict the oldest turn pair, instead
   send those turns to the `fast` model (or `cfg.context.summarizer_model`)
   with "summarize these turns in 2-3 sentences", then replace them
   with a synthetic `role:"system"`-adjacent turn carrying the summary.
   Subsequent evictions append to or re-summarize the rolling summary.
   Opt-in via `cfg.context.summarize_on_evict = true`; default off
   (Phase 0 silent eviction stays the default).
 **Phase 5 is done when:**
 - With `cfg.routing.auto = true`, a prompt like "explain this Python
  traceback ..." gets routed to `deep` while "ls /tmp" or "what time
  is it?" stays on `fast` — visible status `[aish] routed to deep`.
 - With `cfg.routing.cloud_fallback = true`, killing the local
  llama.cpp upstream and asking a question yields a single retry on
  the cloud preset + a status line.
 - With `cfg.context.summarize_on_evict = true`, a long conversation
  that exceeds `max_turns` no longer silently drops history — the
  evicted span is summarized into a single rolling turn the model
  still sees.
 - Existing configs without `cfg.routing` or `cfg.context.summarize_on_evict`
  behave exactly like Phase 4 (Phase 4 regression coverage).
 ---
 ## 2. Technology Decisions (delta from Phase 4)
 | Decision | Choice | Rationale |
 |---|---|---|
 | Routing trigger | Per-request, in `repl.ask_ai`, BEFORE the broker call | Same hook point as the tool-sub-loop entry. Decision is one function call (`router.classify_model`) that returns the resolved (name, cfg) pair OR nil = keep current. |
 | Classification mechanism | **Pure-Lua heuristics** in `router.classify_model` — keyword/length thresholds, no LLM call | Fast (no network), deterministic, debuggable. An LLM-based classifier is overkill v1; can be added in Phase 6+ if heuristics drift. |
 | Routing classes (v1) | `code`, `reasoning`, `default` → mapped to model presets via `cfg.routing.classes` | Three classes for the first cut. **Defaults (N2 fold-in)**: `code → "deep"`, `reasoning → nil` (heuristic still fires but no override unless user maps it), `default → nil`. The aggressive `reasoning → "cloud"` default sent ordinary "why does ..." prompts to a paid API; user must opt in explicitly to pay for reasoning. Same cost-safety rationale as `cfg.routing.auto = false`. |
 | Routing cost-safety | `cfg.routing.auto = false` default | Same rationale as `confirm_cmd = true` and `llm_second_opinion = true`: a default-on routing maps "explain ..." prompts to whatever class maps to `"cloud"`, spending paid-API tokens on prompts the user typed for what they thought was their local model. Default off; user opts in. |
 | Fallback trigger | Transport-error pattern match against `err` string — HTTP 5xx, model_not_found, "Connection refused", "Couldn't resolve host", "Timeout was reached" | These are the four shapes the broker actually emits. Library-error patterns are stable enough that string-match is fine for v1. |
 | Fallback target | `cfg.routing.fallback_model` (default `"cloud"` when present) | One-hop fallback only; if cloud also fails, surface the error normally. No retry loops. |
 | Fallback timing | **Only retry when no deltas have arrived yet** (N3 fold-in) | If the local broker emits partial text then 5xx's mid-stream, the user has seen prose; retrying via cloud would duplicate the prefix and confuse the user. The retry path checks an `any_delta` flag in the on_delta callback; only retries when false. |
 | Fallback announcement | Status line `[aish] local <name> failed (<reason>); retrying via <fallback_name>` | Visibility — user always knows when a fallback fired. |
 | Summarize trigger | Inside `context.enforce_budget()`, when it would otherwise `table.remove` | Same place the eviction status fires. The summarize is a *replacement* not an addition; total turn count stays bounded. |
 | Summary turn shape | Single rolling `{role = "system", content = "[earlier conversation]\n<summary>", _summary = true}` turn at index 1 (after the system prompt) | One synthetic turn carries all evicted history. New evictions either *append* to it (cheap) or trigger a re-summarize when the summary itself exceeds a char cap (default 2000). |
 | Summary model | `cfg.context.summarizer_model` (default `"fast"`) | Same pattern as `cfg.memory.summarizer_model`. Fast model is cheap enough to summarize on every eviction. |
 | Summary failure handling | If broker returns nil, fall back to *silent eviction* (Phase 0 behavior) and status-log once. Don't block the user's main request. | Best-effort; never let summarization break the REPL. |
 ---
 ## 3. Module Changes
 | File | State after Phase 4 | Phase 5 changes |
 |---|---|---|
 | `router.lua` | `classify(line, config)` → `(kind, payload)` for shell/AI/meta dispatch | Add `M.classify_model(text, cfg) -> name | nil`. Heuristics: line length > N, presence of code-fence backticks, keywords like "traceback", "stacktrace", "explain", "why does", etc. Returns the model NAME (string) or nil = keep current. |
 | `context.lua` | turns + memory_items + Norris suffix | Extend `enforce_budget()` to invoke a callback (passed via `Context.new(opts.summarize_fn)`) when about to evict. Store the returned summary as `ctx.summary` (string) — NOT a turn (A3 — avoids system/system alternation). `to_messages` composes it into the system message alongside `[background]` and NORRIS, between them: `system → [background] → [earlier summary] → NORRIS`. New evictions append to `ctx.summary`; when its length exceeds `max_summary_chars` (default 2000), the callback is invoked AGAIN with `(prior_summary, new_evicted_turns)` to re-summarize. Silent eviction is the fallback when the callback returns nil. |
 | `repl.lua` | tool-sub-loop + meta + memory injection | (a) Pre-broker hook: if `cfg.routing.auto`, call `router.classify_model(text, cfg)` and switch `active_cfg` for THIS request only (revert after). (b) Post-broker error hook: if err matches a fallback pattern AND `cfg.routing.cloud_fallback`, retry against the fallback model once. (c) Wire `Context.new` with a `summarize_fn = function(turns) ... end` closure that calls `broker.chat(cfg.models[cfg.context.summarizer_model], ..., {max_tokens=300})`. |
 | `broker.lua` | streaming + opts.tools/max_tokens/timeout_ms | Unchanged — Phase 5 composes on top of the existing surface. |
 | `config.lua` | example with mcp/safety/memory blocks | Add commented-out `routing = {...}` and `context.summarize_on_evict = true` example. |
 No new module files. All Phase 5 functionality grows existing files —
 mostly `repl.lua` and `router.lua`.
 ---
 ## 4. Routing Heuristics (v1)
 `router.classify_model(text, cfg)` returns a model NAME (looked up in
 `cfg.routing.classes`) or `nil` (use the user-set active model).
 Heuristics, in order — first hit wins:
 1. **Code class** if any of:
   - Triple-backtick code fence anywhere
   - Token "traceback" / "stacktrace" / "stack trace" (case-insensitive)
   - Token "error:" or "exception:" near beginning
   - Text contains a path-like `./|/usr|~/` + `.py|.lua|.c|.js|.go|.rs`
   - More than 4 lines AND has indentation (looks like a paste)
 2. **Reasoning class** if any of:
   - Token "explain" / "why" / "how does" / "compare"
   - Question mark + > 100 chars total
 3. **Default class** otherwise.
 Each class maps to a model name via `cfg.routing.classes`:
 ```lua
 routing = {
    auto = true,
    classes = {
        code      = "deep",     -- code questions to deep
        reasoning = "cloud",    -- reasoning to cloud (best quality)
        default   = nil,        -- nil = keep current active model
    },
    cloud_fallback = true,
    fallback_model = "cloud",
 }
 ```
 When `auto = false`, `classify_model` returns nil always — equivalent to
 not setting a routing block. The heuristic functions live behind the
 flag.
 ---
 ## 5. Cloud Fallback Flow
 In `repl.ask_ai` after the broker call:
 ```lua
 local ok, err = broker.chat_stream(active_cfg, msgs, on_delta, opts)
 if not ok and should_fallback(err, cfg) then
    renderer.status(("local %s failed (%s); retrying via %s")
                    :format(active_name, fallback_reason(err),
                            cfg.routing.fallback_model))
    local fb_cfg = cfg.models[cfg.routing.fallback_model]
    if fb_cfg then
        ok, err = broker.chat_stream(fb_cfg, msgs, on_delta, opts)
    end
 end
 ```
 `should_fallback(err, cfg)` matches `err` against fallback patterns
 ONLY when `cfg.routing.cloud_fallback == true`. Otherwise returns false.
 ### Fallback-eligible error patterns
 All patterns match against the err string AS IT ARRIVES from broker.lua,
 which is prefixed `"transport: "` for libcurl/HTTP issues (A2 confirmed).
 The matcher strips the prefix before testing.
 | Pattern (after prefix strip) | Meaning |
 |---|---|
 | `^HTTP 5%d%d` | server-side error (502 Bad Gateway, 503 Unavailable, 504 Timeout) |
 | `^HTTP 404.*model_not_found` | the routed model isn't loaded on the local backend |
 | `^HTTP 408` | Request Timeout (gateway-level; some proxies emit this — Q41 resolved) |
 | `Couldn'?t resolve host` | DNS / unreachable local broker |
 | `Connection refused` | broker not listening |
 | `Timeout was reached` | libcurl's internal timeout phrasing |
 | `Operation timed out` | curl variant of timeout (libcurl version-dependent) |
 Errors NOT matched (NOT retried):
 - HTTP 401 / 403 (auth failure — won't get better on cloud)
 - HTTP 400 (bad request — schema issue)
 - `^api:` errors (semantic — bad request shape)
 - `^broker:` errors (config bug — endpoint/model missing)
 - Lua-level errors (broker pipeline bug, not transport)
 ---
 ## 6. Context Summarization on Eviction
 `Context.new(opts)` accepts an optional `summarize_fn(turns) -> string |
 nil` closure. When set AND `enforce_budget` would evict, the callback
 is invoked with the evicted slice; the returned summary (if non-nil)
 replaces the rolling summary turn.
 ### Storage shape (post-A3 resolution)
 The rolling summary lives on `ctx.summary` (a string), NOT in `ctx.turns`:
 ```lua
 ctx.summary = "Earlier conversation: user discussed X, asked about Y, "
           .. "agreed to Z. Later asked..."
 ```
 `to_messages()` composes it into the system message between `[background]`
 and the NORRIS suffix:
 ```
 DEFAULT_SYSTEM_PROMPT
 [background] (memory items)
 - (fact) ...
 [earlier conversation summary]
 <ctx.summary>
 [NORRIS MODE] (if active)
 ...
 ```
 No new role:"system" message at turns[1] — avoids system/system alternation.
 ### Summary update flow
 1. enforce_budget identifies the oldest 2 turns to evict (user + assistant).
 2. If `summarize_fn` is set, call it with `(prior_summary, evicted_turns)`.
 3. If summary text returned:
   - Replace `ctx.summary` with the new text.
   - If `#ctx.summary > max_summary_chars` (default 2000), invoke the
     callback once more with `(ctx.summary, {})` to re-summarize for
     compactness. Lossy by design — Q40 documents this trade-off.
 4. Remove the evicted turns from `ctx.turns`.
 5. If callback returned nil → silent eviction; `ctx.summary` unchanged.
 ### Failure handling
 Inside the callback (in `repl.lua`):
 ```lua
 local summary, err = broker.chat(summarizer_cfg, {
    {role="system", content="Summarize the following conversation in 2-3 sentences."},
    {role="user",   content=render_turns_compact(evicted)},
 }, {max_tokens=300, timeout_ms=30000})
 return summary  -- nil propagates; context.lua falls back to silent eviction
 ```
 ---
 ## 7. Meta Commands (Phase 5 additions)
 | Command | Action |
 |---|---|
 | `:route on` / `:route off` | Toggle `cfg.routing.auto` at runtime (overrides config) |
 | `:route classes` | Show the active class → model mapping |
 | `:route check <text>` | Print which class a given text would be routed to (debug aid) |
 | `:fallback on` / `:fallback off` | Toggle `cfg.routing.cloud_fallback` at runtime |
 `:help` updated.
 ---
 ## 8. Migration from Phase 4
 User-visible:
 - New `:route` and `:fallback` meta commands.
 - With `cfg.routing.auto`, the active model may CHANGE per-request as
  the heuristic fires. Prompt color tag could vary (Phase 6 maybe).
 - With `cfg.context.summarize_on_evict`, eviction now spends a fast-
  model round-trip instead of silently dropping turns.
 Existing configs without `routing` or `context.summarize_on_evict`
 continue exactly as Phase 4 — defaults are OFF.
 Substrate (PHASE0.md §3) invariants: unchanged. The `CMD:` extraction
 marker, `cd` interception, and the entire system-prompt suffix order
 from Phase 4 stay the same.
 ---
 ## 9. Out of Scope (Phase 5)
 Per PHASE0.md §11 these belong to Phase 6:
 - Tree-sitter syntax highlighting hooks
 - Diff-aware code injection
 - Project-level context (file tree summary)
 Specifically out of Phase 5:
 - LLM-based classification (heuristics-only v1).
 - Multi-hop fallback chains (one retry only).
 - Per-class temperature overrides (use the model preset's default).
 - Cost accounting for cloud calls (Q-list candidate).
 - Auto-router learning from user `:model` overrides (Phase 6+).
 ---
 ## 10. Open Questions
 | # | Question | Impact | Resolve by |
 |---|---|---|---|
 | Q37 | Should routing apply to `:ask <text>` (explicit AI route) the same way it does to bare prompts? Yes seems obvious but worth documenting. | repl.lua | Phase 5 (plan) |
 | Q38 | ~~Summary turn placement: index 1 vs index 0~~ | context.lua | **Resolved at analyze (A3)**: NEITHER — summary lives on `ctx.summary` (string) and composes into the SYSTEM MESSAGE alongside [background] and NORRIS suffix. No new role:"system" message; no alternation risk. |
 | Q39 | ~~Fallback under Norris~~ | repl.lua + safety.lua | **Resolved at review (R-C3)**: AUTO-routing does NOT fire inside the Norris loop. The model is fixed at `:norris <goal>` launch time; the planner stays on it for every iteration. Per-iteration fallback (if a local broker call inside Norris fails) is still gated by `cfg.routing.fallback`; that retries the failed call against cloud but doesn't permanently switch the planner. |
 | Q40 | Summarizer recursion: the summary itself might be summarized later when it grows past max_summary_chars. Does the re-summarize lose fidelity? Probably yes; acceptable trade-off. Note the lossy-by-design contract in §6. | context.lua | Phase 5 (verify) |
 | Q41 | ~~HTTP 408 / Operation timed out eligibility~~ | repl.lua | **Resolved at review (R-C5)**: both added to §5 patterns. |
 | Q42 | Auto-router decisions inside the tool-call sub-loop: does each sub-iteration re-classify, or does the first user turn fix the model for the whole sub-loop? Proposal: fix at sub-loop entry — model switching mid-tool-call would confuse the model AND cost tokens by rebuilding context. | repl.lua | Phase 5 (plan) |
 ---
 ## 11. Implementation Plan (commit-by-commit)
 Five commits expected:
 1. **`router.lua` — `classify_model`.** Pure-Lua heuristics; no IO. Returns
   model name or nil. Module-local pattern set so tests can introspect.
   **Test in isolation**: ~30-case corpus of (input → expected class).
 2. **`context.lua` — eviction callback.** Add `opts.summarize_fn`,
   `_summary` index-1 turn convention, `to_messages()` rendering
   (which Just Works since `_summary` turns have `role` + `content`).
   **Test in isolation**: mock summarize_fn returning "(summary N)",
   build a context that exceeds budget, verify the summary turn
   appears and accumulates.
 3. **`repl.lua` — fallback + routing wiring.** Pre-broker
   classify_model hook (gated by cfg.routing.auto); post-error
   fallback retry (gated by cfg.routing.cloud_fallback); wire
   summarize_fn at Context.new time. **Test against hossenfelder**:
   prompt classified as "code" → routes to deep; deliberately
   misconfigure local endpoint → fallback fires.
 4. **`:route` and `:fallback` meta commands.** Standalone — config
   toggles via runtime cmds. **End-to-end**: boot, `:route on`,
   issue a query, observe routing status; `:route off`, query
   again, no routing.
 5. **`config.lua` — routing + summarize_on_evict example.**
   Documentation-only; commented-out example block. Final commit.
 ### Risk / non-obvious
 - **Heuristic false positives**: a normal conversational question
  containing the word "explain" gets routed to cloud. Conservative
  defaults (`reasoning → nil` by default? then user opts in
  explicitly per class) might be safer. Default mapping in §4 is
  aggressive; tone down at plan if user prefers.
 - **Active-model state after routing**: the per-request routing
  switches `active_cfg` momentarily. The `prompt()` function reads
  `active_name` which IS reverted post-request, so the prompt label
  stays accurate.
 - **Fallback during streaming**: if the local broker fails MID-stream
  (e.g. emits some text then 5xx), the user has already seen partial
  text. Retrying via cloud means duplicated prefix. v1 only retries
  on errors BEFORE any deltas arrived (we can detect by tracking
  whether on_delta was called).
 - **Summarize during Norris**: Norris's planning loop generates many
  turns. Eviction during Norris means summarizing mid-plan — the
  model loses context about its earlier steps. Risky. v1 disables
  summarize when ctx.norris_active.
 - **Memory items + summary turn**: both are dynamic system-context
  additions. The summary is `role:"system"` in turns[1]; memory
  is the `[background]` block in the actual system message.
  Compatible — no overlap.
 ---
 *End of Phase 5 Manifest — aish*
@@ -0,0 +1,239 @@
 # Phase 6 Baseline — pre-implementation measurements
 **Date:** 2026-05-16
 **Tree probed:** `ad52fe4` (Phase 5 + #2/#3/#4/#5/#6/#7/#8/#9/#10/#11/#13/#14/#23/#32/#33/#51/#52 follow-up).
 **Hosts probed:** noether (primary), higgs (Pi5).
 **Broker probed:** `hossenfelder.fritz.box:8082` (local `qwen-coder-7b-snappy-8k`, cloud `anthropic/claude-haiku-4.5`).
 This is the Phase 7 (verify) anchor for Phase 6. Captures the world
 just before tree-sitter / diff / project-tree implementation lands.
 ---
 ## B1. `git` output through `executor.exec` carries ANSI + terminal control
 `executor.exec` uses `pty.spawn` (forkpty). When git's stdout is a
 PTY, git enables both color output AND interactive pager defaults
 (DEC keypad mode `\27[?1h=` ... `\27[?1l>`, line-clear `\27[K`).
 Observation:
 ```
 > executor.exec("git diff --stat HEAD~1..HEAD")
 exit=0 len=173
 \27[?1h= docs/PHASE6.md | 207 \27[32m++...\27[m\27[31m--...\27[m\27[m
 1 file changed, 166 insertions(+), 41 deletions(-)\27[m
 \27[K\27[?1l>
 ```
 With `--no-pager`: keypad sequences gone, color stays:
 ```
 > executor.exec("git --no-pager diff --stat HEAD~1..HEAD")
 exit=0 len=148
 docs/PHASE6.md | 207 \27[32m++...\27[m\27[31m--...\27[m
 1 file changed, 166 insertions(+), 41 deletions(-)
 ```
 With `--no-pager --color=never`: clean.
 ```
 > executor.exec("git --no-pager diff --color=never --stat HEAD~1..HEAD")
 exit=0 len=132 clean=true
 docs/PHASE6.md | 207 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 166 insertions(+), 41 deletions(-)
 ```
 **Implication for §5 (`:diff` meta):** the implementation MUST use
 both `--no-pager` and `--color=never`. Without either, the injected
 context block carries escape codes that confuse the model AND inflate
 token counts.
 The same flags apply to any future `git log` / `git show` / `git blame`
 verbs that might land beyond Phase 6.
 ---
 ## B2. SSE chunk size envelope (relevant to fence-aware highlighter)
 `renderer.assistant_delta` receives whatever chunks the broker streams.
 Measured against two model classes:
 ### Local llama.cpp (`qwen-coder-7b-snappy-8k`)
 ```
 prompt: "reply with a python code block that prints hello world,
         then a brief explanation"
 max_tokens: 150
 chunks:     97
 total:      423 chars
 sizes:      min=1, max=13, median=4
 fences:     fence at char 58 -> chunk 14 ('```')
            fence at char 91 -> chunk 23 ('``')   <-- split fence
 ```
 **The local model splits fences across chunks** (`'``'` arrives, the
 final ` ` ` is in the next chunk). The fence-aware filter MUST handle
 fragment-across-boundary correctly.
 ### Cloud (`anthropic/claude-haiku-4.5` via OpenRouter)
 ```
 prompt: "write a 5-line python hello world example wrapped in a code fence"
 max_tokens: 150
 chunks:     3
 total:      60 chars
 sizes:      7 / 27 / 26
 fences:     fence at char 0  -> chunk 0 ('```python\n# Hello World in')
            fence at char 57 -> chunk 2 ('\nprint("Hello, World!")\n```')
 ```
 Cloud delivers BIG chunks (median ~26 chars); fences typically arrive
 intact within a single chunk.
 **Implication for §4 (highlight stream filter):** the state machine
 must accumulate enough `buf` to detect a fence opening or closing
 even when only `'``'` arrives in a chunk. The §4 design already
 specifies "look at the cumulative `buf`, so partial markers are
 recovered correctly" — confirmed necessary by local-model behavior.
 ---
 ## B3. **LuaJIT `io.popen():close()` does NOT expose exit codes**
 This is a divergence from Lua 5.2+ behavior assumed by the §4 (A4)
 highlighter resolution:
 ```
 > luajit -e "for _, cmd in ipairs({'true','false','exit 7'}) do
              local p = io.popen(cmd); local ok, err, code = p:close()
              print(cmd, ok, err, code) end"
 true     true    nil    nil
 false    true    nil    nil
 exit 7   true    nil    nil
 ```
 `io.popen():close()` returns `(true, nil, nil)` regardless of child
 exit status. The exit code is silently discarded.
 **Revised Q-H1 resolution (supersedes A4):** the highlighter must
 detect tree-sitter failure via a different channel. Cleanest path:
 write the body to a tmpfile, then invoke the highlighter via
 `executor.exec("cat tmpfile | tree-sitter highlight --lang X")`.
 `executor.exec` uses its own forkpty + waitpid path and DOES return
 `(out, exit_code)` reliably.
 Updated sketch:
 ```lua
 local function highlighted(body, lang)
    if not highlight_enabled or not lang_map[lang] then return body end
    local tmp = os.tmpname()
    local f = io.open(tmp, "wb")
    if not f then return body end
    f:write(body); f:close()
    local out, code = executor.exec(
        ("cat %s | tree-sitter highlight --lang %s")
        :format(_shq(tmp), lang_map[lang]))
    os.remove(tmp)
    if code ~= 0 then return body end
    return out
 end
 ```
 Cost: tmp file write + read + remove + one executor.exec roundtrip
 per code block. Acceptable; tree-sitter highlighter latency dominates.
 **This finding will fold into PHASE6.md §4 during the analyze
 revision** (or as a baseline-time amendment).
 ---
 ## B4. tree-sitter CLI presence on the fleet
 ```
 noether (local primary):  ABSENT  (which tree-sitter -> not found)
 higgs   (Pi5 / Debian 13): ABSENT  (which tree-sitter -> not found)
 ```
 **Implication for §1 (scope):** the design's "external CLI when
 present, no-op otherwise" decision is the right call — on the
 fleet as-tested, ZERO hosts ship tree-sitter by default. Users
 who want highlighting will need to opt in explicitly (apt / cargo /
 manual install).
 Documentation should mention this clearly in PHASE6 implementation
 notes + the config example. `:highlight on` against a host without
 the CLI should emit a clear "tree-sitter CLI not found; install with
 e.g. `apt install tree-sitter` or `cargo install tree-sitter-cli`"
 status, not silently no-op.
 ---
 ## B5. Project-tree envelope (`git ls-files` performance)
 ```
 > time git -C /home/mfritsche/src/aish ls-files --cached --others --exclude-standard >/dev/null
 real    0.002s
 files: 32, total: 449 chars, avg/file: 14
 ```
 Sampling other repos on noether (`~/src/*` with `.git/`):
 | Repo | Files | Time |
 |---|---|---|
 | aish | 32 | 2 ms |
 | ampere-fourier | 15 | 5 ms |
 | ampere-kernel-decoders | 23 | 1 ms |
 | cfw | 25 | (similar) |
 **Implication for §6 (`:tree` scan):**
 - Scan latency on typical local repos is negligible (<10ms).
 - The 4096-char default `tree_max_chars` cap accommodates ~290 paths
  at the observed avg of 14 chars/path — fine for most aish-target
  workflows.
 - Repos with thousands of files (kernel, nix-pkgs, etc.) WILL exceed
  the cap; users can lower `tree_depth` or raise the cap. The §9
  risk row already covers this; no design change needed.
 ---
 ## B6. `os.tmpname()` behavior
 ```
 > luajit -e "for i = 1, 3 do print(os.tmpname()) end"
 /tmp/lua_qAGTFV
 /tmp/lua_RhpXLK
 /tmp/lua_F9WtYx
 ```
 LuaJIT's `os.tmpname` returns POSIX-style `/tmp/lua_XXXXXX` paths.
 Adequate for B3's tmpfile-roundtrip pattern. No filesystem-level race
 window — `os.tmpname` uses `mkstemp(3)` semantics on Linux (returns
 a unique name; the caller is responsible for `io.open` and cleanup).
 Note: B3's pattern does `f:write(body); f:close()` between the name
 and use — the open-with-O_EXCL guarantee from mkstemp is implicit
 via Lua's `io.open`. Acceptable for a local-only tmpfile holding
 short-lived code-block content; not a security concern (we trust the
 local user per PHASE0 §12).
 ---
 ## Summary
 | Finding | Affects | Resolution |
 |---|---|---|
 | B1 git ANSI/pager leakage | §5 `:diff` impl | Add `--no-pager --color=never` to every git invocation |
 | B2 SSE chunk envelope | §4 fence filter | Existing accumulator design is correct; local-model split-fence case confirmed necessary |
 | B3 io.popen no exit code | §4 (A4) highlighter | Revise: route via `executor.exec("cat tmp \| tree-sitter ...")` for reliable exit code |
 | B4 no tree-sitter on fleet | §1 / docs | Highlighter is opt-in; absent-CLI emits install-hint status |
 | B5 tree scan envelope | §6 `:tree` | No change; defaults fit observed repo sizes |
 | B6 os.tmpname semantics | §4 highlighter | Confirmed adequate for tmpfile-roundtrip |
 No structural changes to the formulate/analyze design. B1, B3, and
 B4 surface as implementation-time amendments to PHASE6.md sections
 §4, §5, and §1 respectively. Will fold these into the manifest
 during plan.
@@ -0,0 +1,896 @@
 # aish — Phase 6 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 6 Requirements, Architecture & Design Decisions
 **Status:** Implement (6 commits landed: c4fc7fd, d1dce83, 4d5f93a, 0d63f01, 11d0e59, this)
 **Date:** 2026-05-16
 **Review findings (independent agent, 2026-05-16) — 2 BLOCKERs resolved
 in-place, 7 CONCERNs folded, 6 NITs applied:**
 R1 (BLOCKER, RESOLVED). **§4 fence detector's `outside`-state branch
    drops the leading `'``'` chunk of a split-fence.** The §4
    pseudocode as written ("look for ` ```<lang>\n ` in chunk; if found
    [...] else: emit chunk as-is") emits the partial-fence chunk
    immediately, so the next chunk no longer sees the full marker.
    Contradicts B2's split-fence requirement. **Fix folded into §4:**
    `outside`-state also holds a small tail (up to 10 chars) when the
    chunk's tail could be a fence prefix; flushes on next push. Same
    pattern as the `secrets.lua` streaming rehydrator (`secrets.lua`
    ~213). Pseudocode + algorithm updated.
 R2 (BLOCKER, RESOLVED). **`highlighted()` file placement was ambiguous
    in §3 vs §12.** `highlighted()` needs `_shq` (currently a `repl.lua`
    M.run-local closure) and `require("executor")`. **Resolution:**
    `highlighted()` stays in `repl.lua`; `renderer.lua` exposes
    `renderer.set_highlight(enabled, detected, highlight_fn)`. The
    filter state machine in `renderer.lua` calls back through
    `highlight_fn(body, lang)` at fence-close. No `executor` dependency
    in `renderer.lua`; no `_shq` lift. §3 + §12 commit 5 updated to
    state this explicitly.
 R3 (CONCERN, FOLDED). **PTY raw-mode toggle per code block.** Each
    `executor.exec` call calls `libc.set_raw(0)` briefly. For an
    assistant turn with N fenced blocks that's N raw-mode toggles
    on the streaming hot path. Smoke-test for cursor/flicker before
    locking in. Added to §12 commit 5 risk row.
 R4 (CONCERN, FOLDED — risk noted, needs verify at implement-time).
    **`tree-sitter highlight --lang X` invocation grammar is
    unverified.** The upstream `tree-sitter` CLI's `highlight`
    subcommand canonically takes a path argument and infers language
    from the file extension via `~/.config/tree-sitter/config.json`.
    A `--lang` flag may not exist. Since B4 confirmed zero fleet hosts
    have tree-sitter installed, this can't be probed locally.
    **Resolution:** §4 amended — at commit 5 implement time, VERIFY
    against a real install. If `--lang` is wrong, switch to writing
    the tmpfile with the matching extension (`/tmp/lua_XXX.py`) and
    pass the path. Path-based discovery is the CLI's documented
    primary mode.
 R5 (CONCERN, FOLDED). **`:tree off` semantics ambiguous.** §6 listed
    it as "clear ctx.project" but didn't clarify whether subsequent
    `:tree` (no arg) re-uses cached opts or falls back to config
    defaults. Clarified in §6: `:tree off` is a one-shot clear of
    `ctx.project`; subsequent `:tree` re-scans with config defaults
    or the explicit arg if given.
 R6 (CONCERN, FOLDED). **cwd-coupling differs between `:diff` and
    `:tree`.** `:diff` reads `libc.getcwd()` at meta invocation
    time; `:tree`'s captured `ctx.project` is fixed at scan time
    (per A8). After `cd /other-project`, `:diff` shows the new
    project's diff but `ctx.project` still holds the old project's
    tree. Documented in §5 (the diff section now cross-refs §6 / A8)
    so the user-facing expectation is clear.
 R7 (CONCERN, FOLDED). **`:tree refresh` opts caching unspecified.**
    Should `:tree refresh` re-use the last explicit `:tree <N>` depth
    override, or fall back to `cfg.project.tree_depth`? Resolution:
    cache the last opts on `ctx._project_opts`; `:tree refresh` reuses
    them; falls back to config defaults if no prior call. §6 updated.
 R8 (CONCERN, FOLDED). **`:reset` interaction with `ctx.project`.**
    Phase 4 established that `:reset` does NOT clear `ctx.memory_items`
    (parity is desirable — startup-injected facts persist across a
    user-driven context reset). `ctx.project` should follow the same
    rule: `:reset` clears `ctx.turns` and `pending_exec_output` and
    `ctx.summary` (per `Context:reset` at `context.lua` ~343), but
    NOT memory_items and NOT project. Documented in §3 + §12 commit 1.
 R9 (CONCERN, FOLDED). **Status-bump duplication between §12 commits 5
    and 6.** Commit 5 sub-step (e) said "PHASE6 status → Implement";
    commit 6 also said the same. Resolved: commit 5e does NOT bump
    the status (only HELP update); commit 6 owns the status bump
    (along with the config example). One owner per change.
 R-N1..N6 (NITs, APPLIED):
  N1. §4 algorithm pseudocode now includes the SOL/post-newline
      anchor requirement (mid-line backticks in prose don't open a
      fence). The plan §12 risk row already promised this; now §4
      matches.
  N2. §4 detection block gained a comment explaining the `read("*l")
      and pipe:close()` pattern — close return-value is ignored per
      B3; presence of an output line is the signal.
  N3. §5 `:diff staged → git diff --cached` table row dropped (the
      meta is a thin pass-through; user types the right git flags).
      `:diff --cached` works directly. Surface is honest.
  N4. §6 `_scan_project_tree` switched from `os.execute("cd " .. shq
      .. " && git rev-parse ...")` to `git -C <dir> rev-parse
      --git-dir` — no subshell, more idiomatic.
  N5. §12 "Open at plan-time" first bullet (dir-arg vs hardcoded
      getcwd) dropped — already decided in §6's signature; not open.
  N6. §11 wording on Phase 7+ left as-is (reviewer marked purely
      cosmetic).
 **Analyze findings (2026-05-16):**
 A1. **renderer.lua surface clean** — `assistant_delta(chunk)` already
    concatenates into a `stream_buf` then `emit()`s the chunk;
    `assistant_flush()` finalizes with a trailing newline if missing.
    The fence-aware highlight filter slots in between chunk receipt and
    `emit` without restructuring; no callers besides `repl.lua` touch
    `stream_buf` so the filter state can live alongside it.
 A2. **executor surface clean** — `executor.exec(cmd)` already
    forkpty-spawns, captures + live-streams output, returns `(out, code)`.
    Phase 6's `:diff` and `_scan_project_tree` reuse this path verbatim;
    no new IO model. `git`-rooted commands inherit cwd from the parent
    (which `libc.chdir` already mutates), so a `:diff` after `cd` reads
    the right repo.
 A3. **context composition order locked** — current `to_messages` builds
    `sys_content = base + [background] + [earlier summary] + NORRIS suffix`.
    Phase 6 inserts `[project]` between `[background]` and `[earlier
    summary]`. Same Norris-suppression guard already in place
    (`if not self.norris_active`).
 A4. **Q-H1 RESOLVED: tmpfile roundtrip** for `tree-sitter highlight`
    write+read. Avoids ARGMAX risk on large code blocks (vs `printf
    BODY | tree-sitter ...`) and shell-escape complexity. Two file
    handles, deterministic cleanup via `os.remove`. Sketch:
    ```lua
    local tmp = os.tmpname()
    local w = io.popen(("tree-sitter highlight --lang %s > %s")
                       :format(lang, tmp), "w")
    w:write(body); local _, _, code = w:close()
    local f = io.open(tmp, "rb"); local out = f:read("*a"); f:close()
    os.remove(tmp)
    if code ~= 0 then return body end  -- pass-through on failure
    return out
    ```
 A5. **Q-D1 RESOLVED: no confirm gate on `:diff`.** `git diff` is
    read-only; matches `:history`, `:sessions`, `:safety check` —
    none of which gate. Permission DSL (#9) only applies to AI-suggested
    `CMD:` lines, not user-issued metas.
 A6. **Q-D2 RESOLVED: tiered resolution for `@<token>`.** The mention
    parser tries `<token>` as a file path first; if it doesn't resolve
    AND the token contains `..`, retry as a diff range. This keeps
    `@../sibling.txt` (path) working AND allows `@origin/main..feature`
    (ref range — resolves via second attempt since no such file exists).
    No grammar prefix needed.
 A7. **Q-H2 RESOLVED: highlighting is assistant-output only in v1.**
    `expand_mentions` content lands in the user-turn payload — visible
    on the terminal via readline echo, not via `assistant_delta`. Filing
    "highlight @path-expanded code in echo" as v2 polish. Reason:
    intercepting readline echo for ANSI injection is non-trivial and
    orthogonal to the stream filter.
 A8. **Q-T1 RESOLVED: project tree captured at scan time, not auto-
    refreshed on cd.** `cd /other-project` leaves the existing
    `ctx.project` stale; `:tree refresh` is the manual verb to update.
    Auto-refresh on cd intercept is a v2 polish (the cd interceptor in
    `executor.maybe_chdir` is a clean hook for it).
 A9. **Q-T2 RESOLVED: rely on `.gitignore` via `git ls-files`** in repos;
    fall back to `find` with simple excludes outside. Custom
    include/exclude glob lists deferred to v2. Reason: most users live
    inside git repos; `.gitignore` already encodes their notion of
    "noise". Out-of-repo users get the simple fallback and can scope
    via `:tree <depth>`.
 A10. **`expand_mentions` punct-peel does NOT strip `/`** — so
     `@HEAD~1..HEAD,` peels the `,` and the underlying token `HEAD~1..HEAD`
     has no slash; the path-then-diff retry from A6 catches it. No new
     peel logic needed.
 A11. **Auto-injection ordering for `[project]`** — if both `cfg.memory.
     inject_max_chars` and `cfg.project.auto_tree` fire at startup, the
     order is: memory load → tree scan → first ask_ai. The composition
     in `to_messages` places `[background]` (memory) before `[project]`
     so the model reads memory facts before file tree. Documented in §3.
 A12. **Norris interaction** — `[project]` block follows the established
     [background]/[earlier summary] suppression rule under
     `ctx.norris_active`. Planner stays on its goal anchor; the tree
     can be re-introduced via the goal text if needed. Matches R-C1/R-C4.
 PHASE0 is the locked substrate; PHASE1-5 are layered on top. This manifest
 specifies what Phase 6 adds — **tree-sitter syntax highlighting hooks**,
 **diff-aware code injection**, and **project-level context (file-tree
 summary)**.
 ---
 ## 1. Scope of Phase 6
 Three pillars per PHASE0.md §11 row 6:
 1. **Tree-sitter syntax highlighting hooks** — when an external
   `tree-sitter` CLI is detected at startup, assistant code-fence
   content is filtered through it for ANSI-colorized display. Plain
   prose streams unchanged. When the CLI is absent, the filter is the
   identity function (zero overhead, zero hard dependency). Toggleable
   at runtime with `:highlight on|off`. Default off until the user
   opts in (don't surprise existing users with a display change).
   Per B4: tree-sitter is **absent on every fleet host probed**;
   `:highlight on` when the CLI is missing emits a status that names
   the install hint (`apt install tree-sitter` / `cargo install
   tree-sitter-cli`) rather than silently falling back to identity.
 2. **Diff-aware code injection** — surface git diffs as first-class
   context. Two entry points:
   - Meta verb: `:diff [args]` runs `git diff <args>` from cwd, appends
     output to context as exec-output. `:diff staged`, `:diff HEAD~3`,
     `:diff main..feature` all delegate to git's argument grammar.
   - @-mention extension: `@HEAD..feature` (a ref-range expression
     anywhere a `@path` would go) expands inline as a fenced `diff`
     block, mirroring how `@README.md` already works.
 3. **Project-level context (file-tree summary)** — `git ls-files`-based
   tree summary of the cwd, injected as a `[project]` block in the
   system prompt. Two entry points:
   - Meta verb: `:tree [depth]` injects on demand; `:tree refresh`
     re-scans.
   - Auto-inject at startup when `cfg.project.auto_tree = true` —
     gated like memory injection so existing configs don't change
     behavior.
 **Phase 6 is done when:**
 - With `tree-sitter` CLI installed and `:highlight on`, the assistant
  reply ```py\nprint("hi")\n``` shows up with ANSI colors. Without
  the CLI, `:highlight on` is a no-op + emits a status warning.
 - `:diff` from a dirty git repo shows the working-tree diff in the
  exec-output frame; the model sees it on the next ask_ai turn.
 - `@HEAD~1..HEAD` in a prompt expands inline to a fenced diff block.
 - `:tree` injects a `[project] <N files>:` block visible in
  `ctx:to_messages()` (via the system prompt assembly).
 - With `cfg.project.auto_tree = true`, the project block appears on
  every broker call (subject to `max_chars` cap).
 - Existing configs without `cfg.project` and with `:highlight off`
  (default) behave exactly like Phase 5 (Phase 5 regression coverage).
 ---
 ## 2. Technology Decisions (delta from Phase 5)
 | Decision | Choice | Rationale |
 |---|---|---|
 | Highlight backend | External `tree-sitter` CLI (`tree-sitter highlight --lang X`) | Honors PHASE0 §3: no compiled extensions, no luarocks. Detected once at startup; absence → identity filter. Opt-in via `:highlight on` so install-state changes don't break users. |
 | Highlight buffering | Accumulate inside fenced code blocks, emit on closing fence; pass-through outside fences | Streaming UX preserved for prose. Code blocks get colorized atomically, accepting a per-block latency (~ block streaming time). Per-chunk highlighting would split a token across `tree-sitter` invocations and corrupt the output. |
 | Lang detection | First-line fence info-string (` ```py`, ` ```python`, ` ```lua`) → normalized via small map (py→python, js→javascript, etc.) | The lang tag mirrors the one we already emit in `expand_mentions` (#7). No tag → identity (no highlight). |
 | Diff backend | Shell out to `git diff <args>` via `executor.exec` | Honors substrate (no libgit2 FFI). The existing exec frame handles capture + stream. `git` is universally present where aish makes sense. |
 | Diff failure | Bail with status `[aish] :diff failed (not a git repo / bad ref)`; do NOT inject empty output | Avoids polluting context with stale or empty diffs. |
 | Tree backend | `git ls-files --cached --others --exclude-standard` when cwd is a git repo, else `find . -type f -not -path './.*'` | Free `.gitignore` honor in repos; sensible default outside. Both are POSIX-portable. |
 | Tree summary form | Sorted relative paths, grouped by directory at depth ≤ `cfg.project.tree_depth` (default 3), truncated by char count `cfg.project.tree_max_chars` (default 4096) | One block, deterministic order, cheap to compute. Matches the [background] memory block convention (Phase 4) so the system prompt's compositional shape stays familiar. |
 | Tree injection point | `context.lua`: new `compose_project(...)` adds a `[project] <header>\n<body>` block to the system content, between [background] and [earlier summary] | Same suppression rule as [background]/[earlier summary]: NOT injected during Norris (R-C1 / R-C4 — planner stays on its anchor). |
 | Tree refresh policy | One scan at startup if auto; `:tree refresh` to re-scan on demand | Scanning on every ask_ai is wasteful for slow filesystems. Manual refresh is sufficient for v1. |
 | @-mention diff syntax | `@<ref>..<ref>` (two `..` separator) only — recognized via the existing trailing-punct peel logic | Avoids ambiguity with literal paths. `@HEAD` alone is NOT a diff trigger (would collide with files literally named HEAD). |
 ---
 ## 3. Module Changes
 | File | State after Phase 5 | Phase 6 changes |
 |---|---|---|
 | `renderer.lua` | `assistant_delta(text)` writes chunks; `assistant_flush()` finalizes | Add fence-aware filter inside the assistant stream. State machine: outside-fence (pass-through) / inside-fence (buffer, emit on close). On close, pipe buffer through `tree-sitter highlight --lang <X>` (if highlight enabled), emit result. Toggle exposed as `renderer.set_highlight(bool)`. |
 | `executor.lua` | `extract_cmd_lines`, `extract_cmd_bg_lines`, `extract_delegate_lines` | No changes. Diff and tree use the existing `exec` path. |
 | `context.lua` | system prompt = base + [background] + [earlier summary] + NORRIS suffix | Add `self.project = "..."` string field + `compose_project(self.project)` helper. Injection between [background] and [earlier summary] (A11: memory facts read before file tree). Suppressed under Norris (A12, parity with R-C1/R-C4). |
 | `repl.lua` | meta dispatch + main loop + #13 secrets wiring | New helpers: `_detect_treesitter()` (run once at startup), `_run_git_diff(args)`, `_scan_project_tree(dir, opts)`. New meta: `:highlight`, `:diff`, `:tree`. Extend `expand_mentions` to recognize `<ref>..<ref>` token shape. |
 | `config.lua` | example blocks for mcp/safety/memory/routing/secrets/etc. | Add commented-out `project = { auto_tree = false, tree_depth = 3, tree_max_chars = 4096 }` block. |
 No new module files in v1. Three new helpers in `repl.lua` keep the
 file growing but consolidate the Phase 6 surface. If the highlighter
 filter grows past ~80 LOC, lift it into `highlight.lua` as a follow-up.
 ---
 ## 4. Pillar 1 — Tree-sitter highlighting
 ### Detection (startup, once)
 ```lua
 local function _detect_treesitter()
    local pipe = io.popen("command -v tree-sitter 2>/dev/null && tree-sitter --version 2>/dev/null")
    -- N2 / B3: pipe:close() returns true on LuaJIT regardless of exit
    -- code; we don't use it for the verdict. Presence of an output
    -- line from --version is the actual signal.
    local ok = pipe and pipe:read("*l") and pipe:close()
    return ok
 end
 ```
 If not present, `renderer.set_highlight(true)` emits a status warning
 and leaves the filter as a no-op. Don't error; the user can install
 tree-sitter and re-toggle.
 ### Stream filter
 The filter wraps `renderer.assistant_delta`. State machine (R1 + N1
 revisions — outside-state accumulator + SOL anchor):
 ```
 state    = "outside" | "inside"
 tail     = ""                  -- outside-state lookahead buffer (R1)
 buf      = ""                  -- only used in "inside"
 lang     = nil                 -- captured at fence open
 push(chunk):
  if state == "outside":
      combined = tail .. chunk
      -- R1: hold back trailing partial-fence so a split fence
      -- ("``" arrives, then "`python\n") doesn't get emitted
      -- as plain text before we recognize the opener.
      -- N1: fence opens only at start-of-stream OR after a newline
      -- ("^```" or "\n```"). Inline backticks in prose don't open.
      match_pos = find(combined, "(^|\n)```([%w_-]*)\n")
      if match_pos:
          -- everything before the opening is plain text
          emit combined[1 .. fence_start - 1]
          lang = captured_lang
          buf  = combined[fence_end .. end]   -- text after \n
          state = "inside"; tail = ""
          if buf has \n``` inside, fall through to inside-state below
      else:
          -- Hold back the last K chars if they could be the start
          -- of a fence-open. Specifically: tail = the longest suffix
          -- of combined that is a prefix of any well-formed fence
          -- marker ("`", "``", "```", "```l", "```lua", "```lua\n").
          -- Bounded by max-lang-tag-length + 4 (~10 chars in practice).
          tail = longest_partial_fence_suffix(combined, max=10)
          emit combined[1 .. #combined - #tail]
          -- (next push will combine tail with the next chunk and retry)
  if state == "inside":
      buf = buf .. chunk
      -- closing fence: "\n```" anywhere in buf (followed by EOL or end).
      close_pos = find(buf, "\n```")
      if close_pos:
          fence_body = buf[1 .. close_pos - 1]
          closing    = buf[close_pos .. close_pos + 3]   -- "\n```"
          rest       = buf[close_pos + 4 .. end]
          emit highlighted(fence_body, lang)
          emit closing verbatim
          state = "outside"; buf = ""; tail = ""
          if rest != "":
              push(rest)   -- recurse for any plain text after the closing
      else:
          -- still buffering; nothing emitted this push
 ```
 Edge cases:
 - Chunk boundary lands inside an opening marker (e.g., chunk ends with
  `'``'`, next starts with `'`python\n'`). The `tail` buffer holds
  `'``'`; next push combines and finds the full opener.
 - Chunk boundary inside a closing marker. The `inside` branch already
  accumulates into `buf`; `find` against cumulative `buf` recovers.
 - Inline backticks in prose (`"use ``` to mark code"`). N1's
  `(^|\n)```` anchor means this does NOT open a fence — `\n` is
  required before the three backticks.
 The `tail` is bounded (max ~10 chars), so streaming UX latency is at
 most 10 chars worth of buffering when between fenced blocks. The
 existing `assistant_delta`'s `stream_buf` for full-text accumulation
 is unaffected — the filter sits BEFORE `emit`.
 `highlighted(body, lang)` — **B3 + R2 + R4-revised**:
 Lives in `repl.lua` (per R2; `renderer.lua` calls it via the
 `highlight_fn` passed to `renderer.set_highlight`). Has access to
 `_shq` (existing helper from #3) and the `executor` require.
 ```lua
 -- repl.lua local. Wired into renderer via:
 --   renderer.set_highlight(true, treesitter_present, highlighted)
 local function highlighted(body, lang)
    if not highlight_enabled or not lang_map[lang] then return body end
    -- R4: tree-sitter highlight CLI grammar is UNVERIFIED.
    -- Upstream `tree-sitter highlight` canonically takes a path and
    -- infers language from the file extension. At commit-5 implement
    -- time, install tree-sitter and check whether `--lang` exists.
    -- If not, name the tmpfile with the language's canonical extension
    -- (lang_extension[lang]) and pass the path directly:
    --   tmp = os.tmpname() .. lang_extension[lang]
    --   cmd = "tree-sitter highlight " .. _shq(tmp)
    -- Below is the optimistic --lang form for code reading; the actual
    -- implementation must be verified.
    local tmp = os.tmpname()
    local f = io.open(tmp, "wb")
    if not f then return body end
    f:write(body); f:close()
    -- B3: io.popen():close() doesn't expose exit codes in LuaJIT.
    -- Route via executor.exec which uses pty.spawn+waitpid and
    -- returns (out, exit_code) reliably.
    local out, code = executor.exec(
        ("cat %s | tree-sitter highlight --lang %s")
        :format(_shq(tmp), lang_map[lang]))
    os.remove(tmp)
    if code ~= 0 then return body end   -- pass-through on highlighter failure
    return out
 end
 ```
 Why this shape (and not the formulate-time A4 sketch):
 - **R2 file placement**: `highlighted` lives in `repl.lua` so it has
  natural access to `_shq` + `executor`. `renderer.lua` stays free of
  the `executor` require; it calls back through `highlight_fn`.
 - **B3 exit-code path**: LuaJIT (5.1 contract) doesn't expose the exit
  status via `io.popen(...):close()`. `executor.exec` is the only
  reliable channel in our substrate.
 - **R4 grammar verification**: the `--lang` flag is the formulate-time
  assumption; the upstream CLI's `highlight` subcommand may want a
  PATH with a recognized extension instead. Implement-time check
  required before commit 5 ships.
 - The tmpfile stays — avoids ARGMAX on `printf '%s' BODY |` and
  sidesteps shell-escape edge cases on arbitrary code-block bytes.
 - Cost: one syscall round (tmpfile create/remove) + one pty spawn per
  code block — negligible vs the highlighter latency.
 ### Lang map (v1)
 ```lua
 local LANG_MAP = {
    py = "python", python = "python",
    lua = "lua",
    js = "javascript", javascript = "javascript", ts = "typescript",
    sh = "bash", bash = "bash",
    c = "c", h = "c", cpp = "cpp", cc = "cpp",
    rs = "rust", go = "go", java = "java", rb = "ruby",
    md = "markdown", json = "json",
 }
 ```
 Reuses the same map as `expand_mentions`. Factor into a shared
 helper once both reference it (small `_lang_of_ext()` in repl.lua).
 ### Toggle
 `:highlight` (no arg) → flip. `:highlight on|off` → set explicit.
 `:highlight status` → report enabled + whether tree-sitter is present.
 Default: off (don't change existing-user UX).
 ---
 ## 5. Pillar 2 — Diff-aware code injection
 ### Meta: `:diff [args]`
 - `:diff` → `git diff` (working tree vs index)
 - `:diff HEAD` → `git diff HEAD`
 - `:diff --cached` → `git diff --cached` (staged-only)
 - `:diff main..feature` → `git diff main..feature`
 - `:diff <anything else>` → passed verbatim to `git diff <anything>`
 N3: the meta is a thin pass-through to `git diff`. Don't introduce
 aliases like `staged` that would diverge from git's own grammar — the
 user types the real flag (`--cached`) and aish doesn't second-guess.
 R6: `:diff` reads `libc.getcwd()` at **meta-invocation** time. Compare
 with `:tree` / `ctx.project` which captures the cwd at **scan** time
 (A8): after `cd /other-project`, `:diff` shows the new project's diff,
 but `ctx.project` still holds the old project's tree until `:tree
 refresh`.
 Implementation — **B1-revised** (must disable pager + color):
 ```lua
 meta.diff = function(args)
    args = (args or ""):gsub("^%s+", ""):gsub("%s+$", "")
    -- B1: forkpty makes git think it's interactive, enabling color
    -- ANSI + DEC keypad/line-clear escapes that pollute the injected
    -- context block. --no-pager kills the keypad sequences; --color=
    -- never kills the color codes. Both are required.
    local cmd = "git --no-pager -c color.ui=never diff " .. args
    local out, code = executor.exec(cmd)
    if code ~= 0 then
        renderer.status(("diff failed (exit %d)"):format(code))
        return
    end
    if out == "" or out:gsub("%s", "") == "" then
        renderer.status("(no diff)")
        return
    end
    ctx:append_exec_output(("[diff %s]\n%s"):format(
        args == "" and "(working tree)" or args, out))
 end
 ```
 The `[diff ...]\n<output>` framing matches the `[bg:N exited]` /
 `[delegate X]` conventions established in Phase 5 / #6 / #8.
 The same `--no-pager -c color.ui=never` prefix applies to the
 `@<r1>..<r2>` resolution path in the next section, and to any
 future git verbs we add (`:log`, `:show`, etc.). Factor into a
 helper `_git_clean_cmd(subcmd)` if multiple call sites accumulate.
 ### @-mention: `@<ref1>..<ref2>` — tiered resolution (A6)
 Extends `expand_mentions` (#7) by adding a SECOND resolution attempt
 when the first (path lookup) fails AND the token contains `..`:
 ```lua
 -- Existing path-attempt block ends with content = _read_truncated(path)
 -- which returns nil if no such file. Add the diff retry there:
 if not content and path:find("..", 1, true) then
    local r1, r2 = path:match("^(.-)%.%.(.+)$")
    if r1 and r2 and r1 ~= "" and r2 ~= "" then
        -- B1: --no-pager + color=never (same as the :diff meta path).
        -- B3: io.popen close() doesn't expose exit codes — use the
        -- file-redirect trick OR executor.exec. Here we want a quick
        -- best-effort and the cost of an extra forkpty is acceptable.
        local out, code = executor.exec(
            ("git --no-pager -c color.ui=never diff %s..%s 2>/dev/null")
                :format(shq(r1), shq(r2)))
        if code == 0 and out:match("%S") then
            content = out
            -- Note: language tag becomes "diff" regardless of path lang
            lang_override = "diff"
        end
    end
 end
 ```
 Output replaces the token with:
 ````
 ```diff path=<r1>..<r2>
 <content>
 ```
 ````
 Tiered resolution semantics:
 - `@README.md` → file lookup succeeds → file expansion
 - `@../sibling.txt` → file lookup succeeds → file expansion
 - `@HEAD~1..HEAD` → file lookup fails, `..` present, ref-range succeeds → diff
 - `@origin/main..feature` → file lookup fails (no such file), `..` present,
  ref-range succeeds → diff. The token has `/` in `r1` but `git diff` accepts
  it as a ref; no `/`-based heuristic needed (resolves Q-D2).
 - `@nonexistent-file..but-also-not-a-ref` → both fail; literal token
  preserved with the existing `[aish] @X: not found` status path.
 ---
 ## 6. Pillar 3 — Project file-tree
 ### Meta: `:tree [depth]`
 - `:tree` → scan + inject with default depth and char cap; if a
  prior `:tree <N>` set a depth override, this re-scan uses the
  config defaults (`:tree` resets to defaults)
 - `:tree <N>` → override depth for this scan; cached as
  `ctx._project_opts` for `:tree refresh`
 - `:tree refresh` → re-scan with `ctx._project_opts` (last explicit
  opts) if present; otherwise config defaults (R7)
 - `:tree off` → clear `ctx.project` AND `ctx._project_opts`. Future
  `:tree` (no arg) re-scans with config defaults. One-shot semantics
  — there's no "disabled until re-enabled" flag (R5).
 ### Scan logic
 ```lua
 local function _scan_project_tree(dir, opts)
    opts = opts or {}
    local max_chars = opts.max_chars or 4096
    local depth     = opts.depth or 3
    -- Prefer git ls-files for .gitignore honor; fall back to find.
    -- N4: `git -C <dir>` skips the subshell vs `cd && git ...`.
    local in_git = os.execute(("git -C %s rev-parse --git-dir >/dev/null 2>&1"):format(shq(dir))) == 0
    local listcmd
    if in_git then
        listcmd = ("git -C %s ls-files --cached --others --exclude-standard"):format(shq(dir))
    else
        listcmd = ("find %s -maxdepth %d -type f -not -path '*/\\.*' 2>/dev/null"):format(shq(dir), depth + 1)
    end
    local pipe = io.popen(listcmd)
    if not pipe then return nil, "scan failed" end
    local files = {}
    for line in pipe:lines() do
        -- Depth filter: count `/` separators
        local _, slashes = line:gsub("/", "")
        if slashes < depth then files[#files + 1] = line end
    end
    pipe:close()
    table.sort(files)
    -- Build a tree-ish summary, truncate by char count.
    local body = table.concat(files, "\n")
    local truncated = false
    if #body > max_chars then
        body = body:sub(1, max_chars) .. "\n... (truncated)"
        truncated = true
    end
    return body, { file_count = #files, truncated = truncated }
 end
 ```
 ### Injection
 `ctx.project = "..."` (string), composed into the system prompt
 between [background] and [earlier conversation summary]:
 ```
 [project] 142 files (truncated at 4096B):
 README.md
 broker.lua
 config.lua
 context.lua
 ...
 ```
 Suppressed under Norris (R-C1 / R-C4 — planner stays focused; the
 project context can be re-introduced via the Norris goal text if
 needed).
 ### Auto-inject
 `cfg.project.auto_tree = true` runs the scan once at startup and
 sets `ctx.project`. Default false (existing configs unchanged).
 ---
 ## 7. UX Surface Summary
 | Meta | Behavior |
 |---|---|
 | `:highlight [on/off/status]` | Toggle tree-sitter highlighter (no-op when CLI absent) |
 | `:diff [args]` | `git diff <args>`, append output to context as `[diff ...]` |
 | `:tree [N/refresh/off]` | Scan/refresh/clear project file-tree block |
 | @-mention | Behavior |
 |---|---|
 | `@path` | Existing (#7) file expansion |
 | `@<ref1>..<ref2>` | New: inline `git diff <r1>..<r2>` expansion |
 | Config | Default | Effect |
 |---|---|---|
 | `cfg.project.auto_tree` | `false` | Inject project tree at startup |
 | `cfg.project.tree_depth` | `3` | Depth filter for the scan |
 | `cfg.project.tree_max_chars` | `4096` | Truncation cap for the injected block |
 | (no config flag for `:highlight`) | — | Runtime toggle only; no persistence in v1 |
 ---
 ## 8. Out of Scope (Phase 6)
 - **Pure-Lua syntax highlighter** — defer to a future phase if
  tree-sitter CLI absence becomes a practical pain point. v1 says
  "install tree-sitter or accept plain text".
 - **bat/glow/chroma integration** — only `tree-sitter` is wired.
  Other highlighters can be added behind the same `:highlight` toggle
  later (config field `cfg.highlight.backend = "tree-sitter"|"bat"|...`).
 - **Smart diff context selection** — no AI-driven "which diff to show".
  User explicitly says `:diff <range>` or `@<r1>..<r2>`.
 - **File-tree LRU / smart summarization** — v1 is a flat truncated list.
  Hierarchical roll-up ("docs/ — 8 files") is a v2 polish.
 - **Watching for file changes** — no fs-notify reload. Re-scan via
  `:tree refresh`.
 - **Diff history** — `:diff` doesn't track its previous invocations.
  Each invocation is independent.
 - **Inline diff highlighting** — the `diff` lang is in `LANG_MAP` so
  `tree-sitter highlight --lang diff` works, but we don't ship custom
  ANSI for added/removed lines — tree-sitter's own theme covers it.
 - **Highlighter on @-mention echo** (v2 polish per A7) — `:highlight`
  applies to assistant output only. Highlighting user-pasted code as
  it's echoed by readline would need a separate hook in the readline
  display path; out of scope here.
 - **Auto-refresh project tree on `cd`** (v2 polish per A8) — the cd
  interceptor in `executor.maybe_chdir` is a clean place to call
  `_scan_project_tree(libc.getcwd(), ...)` on every successful cd.
  Skipped in v1 because the scan can be slow on large trees; manual
  refresh via `:tree refresh` is the v1 verb.
 - **Custom include/exclude globs for project tree** (v2 polish per A9) —
  `cfg.project = { include = {...}, exclude = {...} }` would extend
  beyond `.gitignore`. v1 ships with `.gitignore`-only honor (via
  `git ls-files --exclude-standard`) plus the `find` fallback for
  non-repo cwds.
 ---
 ## 9. Risks
 | Risk | Mitigation |
 |---|---|
 | `tree-sitter` CLI not on fleet → most users get no highlighting | It's opt-in; default off; status warning on toggle when absent. |
 | Highlighter latency on long code blocks (whole-block buffering) | Accepted trade-off vs corrupting output. If painful in practice, add a per-block size cap above which we pass-through unhighlighted. |
 | `git diff` on huge changesets blows context budget | Diff output reuses `enforce_budget` eviction (it's just exec output). User can `:diff <subdir>` to scope. v2 could add a `--max-bytes` truncation. |
 | `git ls-files` in a non-git cwd → falls back to `find`, may pick up node_modules / target / etc. | Document in config example; v2 could honor `.aishignore` or similar. |
 | @`<ref1>..<ref2>` collides with paths like `@../sibling.txt` | A6: tiered resolution — try as path first; only fall through to diff retry when path lookup fails AND token contains `..`. `@../sibling.txt` hits the path branch and never reaches the diff retry. |
 | Project tree injection adds tokens to every broker call | Char cap + opt-in `auto_tree = false` default. Suppressed under Norris. |
 | `:highlight on` mid-stream produces inconsistent rendering for the in-flight turn | Toggle takes effect from the NEXT assistant turn. Document this. |
 ---
 ## 10. Open Questions (Phase 6)
 All six formulate-time Qs were resolved in analyze (A4–A9). None remain
 open as blockers for implementation.
 | # | Question | Resolution |
 |---|---|---|
 | Q-H1 | popen3 for `tree-sitter highlight` | A4: tmpfile roundtrip — `io.popen("w")` writes body with stdout redirected to a tmp file, then `io.open` reads the file. Avoids ARGMAX + shell-escape complexity. |
 | Q-D1 | Confirm gate on `:diff`? | A5: no. `git diff` is read-only; matches `:history` / `:sessions` / `:safety check` (none gate). Permission DSL (#9) applies only to AI-suggested `CMD:` lines. |
 | Q-D2 | `@<r1>..<r2>` with refs containing `/` | A6: tiered resolution — file lookup first, then if it fails AND `..` is present, retry as ref-range. `@origin/main..feature` naturally falls through to the retry; no grammar prefix needed. |
 | Q-T1 | `cfg.project.auto_tree` update on cd | A8: no auto-refresh in v1. `:tree refresh` is the manual verb; cd-intercept hook is documented as v2 polish in §8. |
 | Q-T2 | Custom include/exclude globs | A9: rely on `.gitignore` via `git ls-files` in repos; `find` fallback outside. Custom globs deferred to v2. |
 | Q-H2 | Highlighting on @-mention echo | A7: assistant-output only in v1. Echo via readline is a different code path; deferred to v2 (see §8). |
 ---
 ## 11. Phase 6 → Phase 7+ Out-of-band
 The §11 "Planned Phase Sequence" table in PHASE0.md does not list
 phases beyond 6. After Phase 6 lands, candidate next iterations
 (non-binding, for the formulate of Phase 7 to confirm):
 - **Phase 7**: secret-redaction wiring into `safety.lua` (#52
  follow-up filed during Phase 5/13 close); session-multiplex / tmux
  parity surfaces (out of scope per §12 — explicitly rejected);
  or other backlog as it accumulates on Gitea.
 Phase 6 itself is self-contained — none of its three pillars introduce
 substrate dependencies on phases not yet planned.
 ---
 ## 12. Implementation Plan (commit-by-commit)
 Bottom-up ordering: foundations first (context.lua field + composer),
 then the diff and tree surfaces that have no display-layer risk, then
 the highlighter (largest experimental surface — last so the rest of
 Phase 6 ships even if highlighter slips). Each commit leaves the tree
 green (existing tests pass + smoke ok) and adds a discrete capability.
 ### Order
 1. **`context.lua` — `[project]` block plumbing.** Add `self.project`
   (string, nil-allowed) on `Context.new`. Add `compose_project(text)`
   helper mirroring `compose_background` / `compose_summary`. In
   `to_messages`: insert between `compose_background` and
   `compose_summary` so the read order is memory → project tree →
   earlier-summary → NORRIS. Suppressed under `self.norris_active`
   (parity with R-C1 / R-C4). No behavior change yet — nothing sets
   `ctx.project`.
   **R8: `:reset` does NOT clear `ctx.project`.** Phase 4 established
   that `:reset` preserves `ctx.memory_items` (startup-injected facts
   survive a user-driven context reset); `ctx.project` follows the
   same rule. Compare `Context:reset` at `context.lua` ~343 — clears
   `turns`, `pending_exec_output`, `summary`; leaves `memory_items`
   and now `project` alone. Smoke: `:to_messages()` still empty when
   project nil; with project set, `:reset` then `:to_messages()`
   still shows the `[project]` block.
 2. **`repl.lua` — `_scan_project_tree` helper + `:tree` meta.**
   - `_scan_project_tree(dir, opts)` per §6: `git ls-files --cached
     --others --exclude-standard` in a repo, `find . -maxdepth N
     -type f -not -path '*/\.*'` outside. Returns `(body, info)`
     where `info = { file_count, truncated }`.
   - `:tree [N|refresh|off]` meta: scans cwd, sets `ctx.project`,
     emits status with file count + truncation note.
   - `cfg.project.auto_tree` startup hook: if true, run `_scan` once
     and set `ctx.project` (before the main loop opens). Default
     false (existing configs unchanged).
   - Update HELP with `:tree` lines.
   - Smoke: in the aish repo, `:tree` injects a ~32-file block;
     `:to_messages()` shows the `[project]` block in the system prompt.
 3. **`repl.lua` — `:diff` meta + `_git_clean_cmd` helper (B1).**
   - `_git_clean_cmd(subcmd_and_args)` returns the `git --no-pager
     -c color.ui=never <subcmd_and_args>` prefix. Used by `:diff`
     and the `@<r1>..<r2>` path in commit #4.
   - `:diff [args]` meta per §5 (B1-revised): runs the clean git
     command via `executor.exec`, appends `[diff <args>]\n<out>`
     to context as exec_output. Empty / non-repo / bad-ref paths
     emit status and skip.
   - Update HELP with `:diff` line.
   - Smoke: `:diff` from a dirty aish checkout injects the working
     tree diff; `:diff staged` works; `:diff junkref` emits status
     and skips.
 4. **`repl.lua` — `expand_mentions` tiered resolution (A6).**
   Extend the existing path-resolution loop with the diff-retry
   branch from §5: if `_read_truncated` returns nil AND the token
   contains `..`, parse as `<r1>..<r2>` and try `_git_clean_cmd(
   "diff <r1>..<r2>")`. On success, replace with a fenced `diff`
   block. Preserves existing peel-on-trailing-punct logic. Smoke:
   `@HEAD~1..HEAD` expands inline; `@origin/main..feature` works
   when the ref exists; `@../sibling.txt` still resolves as file.
 5. **`renderer.lua` + `repl.lua` — tree-sitter highlighter.**
   This commit is the largest single change in Phase 6. Substeps:
   a. `_detect_treesitter()` in repl.lua: one-shot popen of
      `command -v tree-sitter && tree-sitter --version`. Stash
      result on a local.
   b. `renderer.lua` — fence-aware state machine wrapping
      `assistant_delta`. Exports `renderer.set_highlight(enabled,
      detected, highlight_fn)` so repl.lua wires the toggle,
      cli-availability flag, AND the `highlighted` callback (R2:
      keeps `executor` dependency out of `renderer.lua`). State:
      `outside` (pass-through + tail accumulator per R1) /
      `inside` (buffer until closing fence). On close: call
      `highlight_fn(body, lang)` and emit. Algorithm per §4;
      bytes-of-cumulative-buf scan + tail lookahead handles
      fragment-across-boundary fences (B2 + R1).
   c. `highlighted(body, lang)` per §4 (B3 + R2 + R4): lives in
      `repl.lua`. Write body to `os.tmpname()`, invoke via
      `executor.exec("cat tmp | tree-sitter highlight --lang X")`,
      capture out + exit code, cleanup tmp, pass-through on failure.
      **R4 implement-time check**: verify the `--lang` flag exists
      on the installed CLI; if not, switch to tmpfile-with-extension
      and pass the path directly.
   d. `:highlight [on|off|status]` meta in repl.lua. `:highlight on`
      when CLI absent → status with install hint (B4); `:highlight
      status` always reports current toggle + CLI availability.
   e. HELP update. **R9: status header bump moves to commit 6**
      (single owner; no duplication).
 6. **`config.lua` + docs/PHASE6 status bump (R9).**
   - Add commented-out `project = { auto_tree = false, tree_depth = 3,
     tree_max_chars = 4096 }` block in config.lua (parity with the
     Phase 1-5 example blocks).
   - PHASE6.md status header → **Implement** (matches Phase 5
     cadence — manifest tracks implementation state).
 ### Risk index per commit
 | Commit | Risk | Mitigation |
 |---|---|---|
 | 1 (compose_project) | Composition-order regression breaks Phase 4/5 callers | Order test: empty memory + empty project = identical sys_content to pre-Phase-6 baseline |
 | 2 (:tree) | `find` fallback picks up node_modules / target / build / etc. | Document in status warning; users in non-repo cwds scope via `:tree <depth>` |
 | 3 (:diff) | B1 — color/keypad codes leak if a future caller forgets the helper | All call sites must go through `_git_clean_cmd`; lint by grep before commit |
 | 4 (@<r1>..<r2>) | False positive on `@../sibling.txt` when no such file exists | A6's tiered resolution: only retry as diff when file lookup fails. `@../sibling.txt` resolves as path; if the path is missing, diff retry runs and naturally fails — same outcome as before |
 | 5 (highlighter) | Fence detector misclassifies inline ` ` ``` ` ` triple-backtick in prose | N1: state machine triggers on `^```` at start of stream OR after `\n` only. §4 algorithm now encodes this constraint in the pseudocode. |
 | 5 (highlighter) | tmpfile race / leak on crash | `os.remove(tmp)` in normal exit path; OS cleans `/tmp/lua_*` files on reboot. Single-user trust per PHASE0 §12. |
 | 5 (highlighter) | R3: PTY raw-mode toggle on every code-block render (`executor.exec` -> `libc.set_raw(0)`) | Smoke-test before locking: render an assistant turn with 5+ fenced blocks; watch for cursor flicker, SIGWINCH races, terminal state corruption. If problematic, alternate paths: direct `io.popen` for stdin-write (accept the lost exit code; treat empty output as failure) or run highlighter via `os.execute` with shell redirection. |
 | 5 (highlighter) | R4: `tree-sitter highlight --lang X` invocation grammar unverified | Implement-time CLI check (`tree-sitter highlight --help`). If `--lang` is wrong, fall back to extension-based: name the tmpfile `lua_XXX.<ext>` per `lang_extension[lang]` map and pass the path. |
 | 6 (config bump + status) | none — pure docs / commented config |
 ### Tests + smoke per commit
 Each commit must:
 - Pass `luajit test_safety.lua` (87/87) and `luajit test_router_model.lua` (31/31)
 - Load cleanly: `luajit -e 'package.path="./?.lua;./vendor/?.lua;"..package.path; require("repl"); print("ok")'`
 - Pass a feature-specific smoke (described per row above)
 No new test framework dependency. Per-feature unit tests can live as
 inline `luajit -e '...'` blocks in commit messages or as a dedicated
 `test_phase6.lua` if the surface area justifies it (decide at impl-time).
 ### Things deliberately NOT split into a separate commit
 - `_shq` (shell-quote helper) — already exists in repl.lua from #3.
  Reuse in commit 5 (highlighter); no new helper.
 - Lang map — small enough to copy locally in commit 5 (~15 lines);
  the existing `_lang_of(path)` in `expand_mentions` uses a similar
  but smaller map. Factor only if a third caller appears.
 - Streaming-rehydration interaction with the highlighter — `secrets_session`
  rehydrate runs BEFORE the highlight filter in the chunk pipeline.
  Order: `chunk → rehydrator:push → highlight_filter → emit`. The
  highlighter operates on plain text only; rehydrated placeholders
  resolve to real values which the highlighter sees as code. No
  special wiring needed.
 ### Open at plan-time (resolve at implement)
 - **R4 implement-time verification**: confirm `tree-sitter highlight
  --lang X` works on the installed CLI. If not, switch to extension-
  based path passing. Block commit 5 ship on this check.
 - **R3 smoke test**: render an assistant turn with 5+ fenced blocks
  through the highlighter; confirm no cursor flicker / SIGWINCH race
  / terminal-state corruption from per-block raw-mode toggle. If
  problematic, alternate paths listed in §12 risk row.
 - Whether `:highlight status` should also probe `tree-sitter --print-langs`
  to show which langs are actually available. Nice-to-have; defer
  unless install paths produce variable lang sets in practice.
@@ -0,0 +1,155 @@
 # Phase 7 Baseline — pre-implementation measurements
 **Date:** 2026-05-16
 **Tree probed:** `f0bccde` (PHASE7 formulate + analyze).
 **Broker probed:** `hossenfelder.fritz.box:8082` (local `qwen-coder-7b-snappy-8k`, cloud `anthropic/claude-haiku-4.5`).
 This is the Phase 7 (verify) anchor for the cost/usage observability
 work. Captures the world just before broker.lua / context.lua / repl.lua
 edits land.
 ---
 ## B1. `stream_options.include_usage = true` is safely accepted everywhere
 Probed both backends with and without the flag in the request body:
 | Backend | Without flag | With flag | Notes |
 |---|---|---|---|
 | Cloud (Anthropic via Bedrock through OpenRouter) | usage IS in final chunk | usage IS in final chunk | OpenRouter emits usage by default; the flag is a no-op there |
 | Local llama.cpp (qwen-coder-7b-snappy-8k via hossenfelder) | NO usage emitted | usage IS in final chunk | The flag is **required** for local; hossenfelder forwards it correctly to llama.cpp |
 **Implication for §2 / §4:** the formulate-time decision to default
 `opts.include_usage = true` is correct. Without the flag we'd silently
 miss local-model usage tracking. With the flag both backends emit
 `usage` reliably. No need for a per-backend opt-out in v1.
 ---
 ## B2. Usage payload shape — TWO emission patterns
 **Cloud (Anthropic/Bedrock):** usage rides the FINAL delta chunk that
 ALSO carries the closing `finish_reason`. `choices` is non-empty.
 ```json
 {
  "id": "gen-...",
  "object": "chat.completion.chunk",
  "model": "anthropic/claude-4.5-haiku-20251001",
  "provider": "Amazon Bedrock",
  "choices": [{
    "index": 0,
    "delta": { "content": "", "role": "assistant" },
    "finish_reason": "length"
  }],
  "usage": {
    "prompt_tokens": 8,
    "completion_tokens": 4,
    "total_tokens": 12,
    "cost": 0.000028,                                  // dollars
    "cost_details": {
      "upstream_inference_cost": 0.000028,
      "upstream_inference_prompt_cost": 0.000008,
      "upstream_inference_completions_cost": 0.00002
    },
    "prompt_tokens_details": { "cached_tokens": 0, "cache_write_tokens": 0, ... },
    "completion_tokens_details": { "reasoning_tokens": 0, ... }
  }
 }
 ```
 **Local (llama.cpp):** usage rides a SEPARATE final chunk where
 `choices: []`. Then `[DONE]` marker.
 ```json
 {
  "id": "chatcmpl-...",
  "object": "chat.completion.chunk",
  "model": "qwen-coder-7b-snappy-8k",
  "choices": [],
  "usage": {
    "prompt_tokens": 30,
    "completion_tokens": 6,
    "total_tokens": 36,
    "prompt_tokens_details": { "cached_tokens": 29 }
  },
  "timings": {
    "cache_n": 29, "prompt_n": 1, "prompt_ms": 152.391,
    "predicted_n": 6, "predicted_ms": 758.778, ...
  }
 }
 data: [DONE]
 ```
 **Implication for §4 extraction algorithm:** `if doc.usage then
 final_usage = doc.usage end` works for BOTH shapes (cloud-style
 non-empty-choices chunk AND local-style empty-choices chunk). The
 existing on_event branch on `choices and choices[1] and delta` is
 short-circuited safely when choices is empty.
 ---
 ## B3. `cost` field is dollar-denominated and present on cloud only
 | Provider | `usage.cost` | `usage.cost_details` |
 |---|---|---|
 | Anthropic via Bedrock (OpenRouter) | ✓ (number, USD) | ✓ (upstream_inference_cost / _prompt_cost / _completions_cost) |
 | Local llama.cpp | absent | absent |
 The local model has `timings` instead — useful for perf observability
 but NOT cost. **Implication:** in the accumulator, capture
 `usage.cost` as-is when present; treat `nil` as 0 (matches the
 formulate-time "local: free" framing). `:cost detail` annotates
 local lines as `(local)` so the displayed `$0` isn't misread.
 ---
 ## B4. Model identifier in usage events — choose source carefully
 Cloud's usage event carries:
 - `doc.model = "anthropic/claude-4.5-haiku-20251001"` (the resolved upstream-API-version)
 But the REQUEST was `"model": "anthropic/claude-haiku-4.5"`. The
 broker / OpenRouter rewrote the model name to the dated version.
 **Implication:** the accumulator should key by the CALLER-INTENDED model
 name (i.e., `model_cfg.model` from the request, NOT `doc.model` from the
 response). This keeps `:cost detail` output stable across upstream API
 version bumps. Documented in §5 of the manifest already (uses
 `model_name`).
 For local the two match (model_cfg.model == doc.model), so this is a
 cloud-only consideration.
 ---
 ## B5. Multi-chunk vs single-chunk delivery
 Cloud (Bedrock) returns the whole 4-token response in ~3 chunks (median
 27 chars each per B2 of Phase 6 baseline). Local returns ~6 chunks of
 ~4 chars each. In both cases the `usage` event is the LAST data event
 before `[DONE]`. So the post-`curl.post_sse` emission of
 `on_delta("usage", ...)` in chat_stream is the right place to fire —
 it happens once per stream, after all text/tool_calls have been
 delivered.
 ---
 ## Summary
 | Finding | Affects | Resolution |
 |---|---|---|
 | B1 stream_options safe + required for local | §4 `opts.include_usage` default | Default true; no per-backend opt-out needed |
 | B2 two emission patterns (non-empty vs empty choices) | broker.on_event branch | `if doc.usage then final_usage = doc.usage end` works for both |
 | B3 cost dollar-denominated, cloud-only | accumulator + :cost detail | Capture as-is; nil→0; annotate local lines |
 | B4 model identifier rewrite by upstream | accumulator keying | Key by `model_cfg.model` (caller-intended) not `doc.model` |
 | B5 usage is last event before [DONE] | emission placement | Fire `on_delta("usage", ...)` after curl.post_sse returns |
 All findings align with the formulate/analyze design. No structural
 changes needed. The implementation can proceed to plan.
 **Q-C4 RESOLVED** (was: does the hossenfelder broker forward
 `stream_options` to all backends?): YES — local llama.cpp receives
 and honors the flag; cloud emits usage with or without (the flag is
 a no-op there). Both confirmed via real probes against
 `hossenfelder.fritz.box:8082`.
@@ -0,0 +1,803 @@
 # aish — Phase 7 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 7 Requirements, Architecture & Design Decisions
 **Status:** Implement (6 commits landed: 7364963, 7b4a9be, 8adebd5, b30212a, 0d6ff93, this)
 **Date:** 2026-05-16
 **Review findings (independent Sonnet agent, 2026-05-16) — 3 BLOCKERs
 resolved in-place, 6 CONCERNs folded, 5 NITs applied:**
 R1 (BLOCKER, RESOLVED). **`M.chat` would silently return `(text, nil)`
    for ALL non-streaming callers.** `M.chat`'s internal on_delta only
    captures `kind == "text"`. Without explicit handling of
    `kind == "usage"`, four out of five categories that go through
    `broker.chat` (summarize / delegate / memory_summarize / probe)
    would report zero usage even after a cloud round-trip. **Fix
    folded into §4 + §13 commit 1:** M.chat's on_delta also captures
    the usage payload and returns it as the second value.
 R2 (BLOCKER, RESOLVED). **`call_broker` fallback retry — usage
    payload's `model` field credits the WRONG model name.** The
    `wrapped` on_delta in call_broker is closed over the PRIMARY's
    name; if the wrapped function uses an outer-scope `model_name`
    variable to key the accumulator, the fallback's usage gets
    misattributed. **Resolution:** the broker emits `payload.model =
    model_cfg.model` (which IS the fallback's model when called with
    `fb_cfg` — chat_stream's local upvar). The wrapper keys by
    `payload.model`, NOT by the outer `model_name`. Documented in
    §4 emission code + §13 commit 3 (wrapped on_delta uses
    `payload.model` for accumulator keying).
 R3 (BLOCKER, RESOLVED — promoted to docs). **`build_request` has
    TWO internal callers inside broker.lua itself**, not just the
    public surface. Migration is contained but both internal sites
    must be updated in commit 1. Plan §13 commit 1 risk row updated
    to call this out explicitly so the implementer doesn't read
    "every caller already passes opts" as "only external callers
    need touching".
 R4 (CONCERN, FOLDED). **Single `cost_warn_fired` flag for two
    thresholds is broken.** When both warn_at_dollars AND
    warn_at_tokens are configured, the first-to-fire suppresses the
    other. **Fix:** `ctx.cost_warn_fired` becomes `ctx.cost_warn_state
    = { dollars = false, tokens = false }`. Each threshold has its
    own flag; `:cost reset` clears both. §7 pseudocode updated.
 R5 (CONCERN, FOLDED). **Warn-check centralization decided:** use a
    single `_record_usage(model, category, usage)` helper inside
    repl.lua that wraps `ctx:add_usage` AND does the threshold check
    AND calls renderer.status when crossed. `context.lua` stays
    decoupled from `renderer`. safety.lua call sites get
    `helpers.on_usage = _record_usage` in the helpers table; probe
    callsite gets `opts.on_usage = _record_usage`. Single chokepoint
    for the warn check. §3 + §7 + §13 commits 3-5 reflect.
 R6 (CONCERN, FOLDED). **`nil` vs `0` cost distinction must be
    preserved at the accumulator level.** Local-model `$0` (no cost
    field) vs cloud-call-that-happens-to-cost-zero need to be
    distinguishable for `:cost detail` annotation. **Fix:** accumulator
    slot gains `is_local = true` when ANY recorded usage for that
    slot had `cost == nil`. Cloud calls with `cost = 0` (rare) stay
    annotated as cloud. §5 pseudocode + §6 annotation logic updated.
 R7 (CONCERN, FOLDED). **`:cost detail` sort needs three-level key
    for determinism.** Lua's `table.sort` is unstable; equal-cost
    rows would have arbitrary order. **Fix:** sort key is
    `(cost desc, model asc, category asc)`. §6 updated.
 R8 (CONCERN, FOLDED). **`call_broker` fallback passes `opts.include_usage`
    unchanged.** Documented as a known assumption (B1 confirms both
    backends accept; if a future fallback host rejects, the call-site
    can pass `include_usage = false` explicitly). §10 risk row added.
 R9 (CONCERN, FOLDED). **`:resume` does NOT restore historical
    `usage_totals`.** Per-turn usage IS in the session JSONL but
    `:resume` reloads turns for conversation continuity only; the
    accumulator stays empty. Documented in §8 surface notes; users
    who want cross-session totals can script the jsonl or wait for
    the deferred Q-C2 follow-up.
 R10 (CONCERN, FOLDED). **`$%.4f` loses sub-cent precision.** A
     `0.000028` cloud cost displays as `$0.0000` — indistinguishable
     from `$0` local. **Fix:** format strings widened to `$%.6f` in
     §6 (and the warn message in §7). 6 decimal places accommodates
     the smallest observed real cost.
 R-N1..N5 (NITs, APPLIED):
  N1. §4 extraction pseudocode gains a comment noting the
      `if doc.usage` branch is INDEPENDENT of the choice branch and
      must be checked regardless of choice nil-ness (handles both
      B2 emission shapes).
  N2. §2 "Cost extraction" row referenced stale "B7"; corrected to B3.
  N3. §13 commit 3 row gains an explicit dependency note: commit 3's
      "capture the new second return value" requires commit 1's M.chat
      fix from R1 to ship first.
  N4. §3 safety.lua row + §13 commit 4 row spell out the signature
      chain: `llm_probe` → `llm_second_opinion` → `M.is_destructive`
      all widen to thread `opts.on_usage` through.
  N5. §3 PHASE0.md row + §13 commit 6 row — the PHASE0 §11 amendment
      is ALREADY in tree (committed at `3bad07b` with the formulate
      doc). Commit 6 should NOT re-apply; only adds config.lua block
      + bumps PHASE7 status header.
 **Analyze findings (2026-05-16):**
 A1. **broker.chat_stream surface is clean for the extension.** The
    existing `on_event(data)` closure inside `M.chat_stream` already
    parses `doc.error` / `doc.choices` / `delta` / tool_calls — adding
    `if doc.usage then final_usage = ... end` is one block. Emission
    happens via a closure-local `final_usage` that the post-loop code
    in `chat_stream` reads and calls `on_delta("usage", final_usage)`
    on. `build_request` needs minor extension OR (cleaner) `chat_stream`
    inserts `stream_options.include_usage = true` into the body table
    AFTER `json.encode` — but we currently encode in `build_request`.
    Cleanest: extend `build_request(model_cfg, messages, stream, opts)`
    so it can read `opts.include_usage`. Phase 7 simplifies the
    signature in passing.
 A2. **7 caller sites** identified for `opts.category` threading:
    | Site | Category |
    |---|---|
    | `safety.lua:191` (LLM probe) | `"probe"` |
    | `safety.lua:354` (norris main) | `"norris"` |
    | `repl.lua:326` (summarize-on-evict) | `"summarize"` |
    | `repl.lua:685` (call_broker wrapper, used by ask_ai) | `"main"` |
    | `repl.lua:1104` (DELEGATE: handler) | `"delegate"` |
    | `repl.lua:1587` (:memory summarize) | `"memory_summarize"` |
    | `repl.lua:2156` (:delegate meta) | `"delegate"` |
    All callers pass `opts` already; adding a `category` field is
    additive and backward-compatible (default to `"main"` when absent).
 A3. **`build_request` signature simplification.** Today it takes
    `(model_cfg, messages, stream, tools, max_tokens)` — five positional
    args. With Phase 7 needing `include_usage` AND `stream_options`,
    positional growth gets unwieldy. **Resolution:** widen to
    `(model_cfg, messages, stream, opts)` where opts carries
    `{tools, max_tokens, include_usage, stream_options}`. Callers in
    `M.chat_stream` and `M.chat` pass their existing opts table through.
    This is a refactor but contained inside broker.lua.
 A4. **Q-C3 RESOLVED: free-form categories.** The closed-set vs free-form
    debate resolved in favor of free-form per the helpers/skills
    convention already in place (Phase 6 :tree / :diff metas don't
    validate sub-args either). `:cost detail` will show whatever
    categories appear — small + documented closed set in practice
    (7 entries from A2), no surprise.
 A5. **Q-C5 RESOLVED: warn fires on the call that crossed.** The crossed
    call's usage IS in the accumulator at the moment we check (we
    check AFTER `add_usage`). Firing on the NEXT call would mean a
    delay of one full broker round-trip before the user sees the
    warn — defeats the purpose. Just emit-on-cross.
 A6. **Q-C6 RESOLVED: `:reset` does NOT clear `cost_warn_fired`.**
    Parity with `usage_totals` itself (per the §2 decision row); the
    user reset their conversation, not their cost meter. The flag
    AND the totals are reset only by the explicit `:cost reset` verb.
 A7. **Norris call-graph rewires (existing safety.lua:354 path):** with
    issue #52 wired (commit `955bd82`), the Norris broker call now
    passes `helpers.scrub_msgs` / `helpers.streaming_rehydrator`. The
    on_delta wrapping pattern means I need to be careful that the new
    `("usage", payload)` kind also flows through any wrapper. Since
    secrets streaming_rehydrator only matches on `kind == "text"`, the
    "usage" kind passes through unchanged. No new entanglement.
 A8. **`ctx.usage_totals` survives `:reset` per R8** — same invariant
    as `memory_items` (Phase 4) and `project` (Phase 6). Documented in
    §5 of the manifest; reinforces the "ambient context survives
    conversation reset" rule.
 A9. **Session JSONL serialization** — assistant turn dict gets an
    optional `usage` field. `history.lua` log_turn currently calls
    `json.encode(turn)` opaquely; the dkjson serializer handles nested
    tables. No code change needed; the new field flows through
    automatically when the assistant turn carries one.
 A10. **Q-C1 PARTIAL: local providers may not emit `usage`.** The
     formulate-time assumption was "treat absence as zero-cost / unknown".
     A real probe against `qwen-coder-7b-snappy-8k` is a baseline
     action — see B-probes below. The implementation will be defensive:
     if `doc.usage` never appears in the stream, no "usage" event is
     emitted, and the accumulator is unchanged for that turn. `:cost`
     output naturally reflects "0 calls counted for local model" if
     that's the case.
 A11. **Q-C4 deferred to baseline**: actual `stream_options` forwarding
     by the hossenfelder proxy must be probed against a live broker.
     If the proxy strips the option, we get no `usage` events even
     for cloud calls. Baseline action.
 PHASE0 is the locked substrate; PHASE1-6 are layered on top. This manifest
 specifies what Phase 7 adds — **cost / usage observability**: the ability
 to know, mid-session, how many tokens you've spent and how much money the
 paid-cloud calls have cost.
 PHASE0 §11 originally listed phases only through 6; this commit amends
 §11 to add Phase 7.
 ---
 ## 1. Scope of Phase 7
 Four pillars:
 1. **Usage capture in broker** — `broker.chat_stream` extracts the
   provider's `usage` block (and `cost` where present) from the response
   stream. Surfaces it to the caller via a new `on_delta("usage", ...)`
   kind. The existing `broker.chat` buffering wrapper exposes it as a
   second return value `(text, usage)`. Backward-compatible: callers
   that don't handle the new kind / second value simply ignore it.
 2. **Per-session accumulator on `ctx`** — running totals per-model AND
   per-call-category (main / delegate / summarize / probe) accumulate on
   `ctx.usage_totals`. No persistence across sessions in v1 (Q-C2
   defers cross-session); the session-log JSONL files DO carry per-turn
   usage so historical analysis is possible after the fact.
 3. **`:cost` meta** — a `:cost` reporter that shows the current session
   totals, with optional `:cost detail` for the per-model + per-category
   breakdown. Zero broker calls (purely local read of `ctx.usage_totals`).
 4. **Optional warning thresholds** — `cfg.cost.warn_at_dollars` and
   `cfg.cost.warn_at_tokens` emit a status the first time the running
   total crosses the configured threshold. Default off (no warnings
   without config). Useful when cloud presets are configured and you
   want a "you've spent $1 this session" nudge before runaway cost.
 **Phase 7 is done when:**
 - `broker.chat_stream` exposes usage via the new `on_delta("usage", ...)`
  callback kind; `broker.chat` returns `(text, usage)`. Backward compat
  preserved (no existing caller breaks).
 - After a session with mixed local + cloud calls, `:cost` prints a
  total like:
  ```
  [aish] session usage: 24 turns, prompt=12,450 / completion=3,210 tokens
                                  cost=$0.0234 (cloud only; local: 0)
  ```
 - `:cost detail` breaks down by model + category:
  ```
  fast    main: 14 turns, 8200/2100 tokens
  cloud   main: 8 turns, 3850/980 tokens, $0.0180
  cloud   delegate: 1 turn, 250/80 tokens, $0.0012
  cloud   probe: 1 turn, 150/30 tokens, $0.0042
  ```
 - Session JSONL gains a `usage` field on assistant turns (when the
  broker returned one).
 - With `cfg.cost.warn_at_dollars = 0.50` set, crossing $0.50 cumulative
  emits exactly one status line.
 - Existing configs without `cfg.cost` behave exactly like Phase 6
  (Phase 6 regression coverage).
 ---
 ## 2. Technology Decisions (delta from Phase 6)
 | Decision | Choice | Rationale |
 |---|---|---|
 | Where to extract usage | In `broker.chat_stream` event loop, looking at each SSE event's `usage` field on the final chunk | The OpenAI streaming spec puts `usage` on the FINAL chunk when `stream_options: { include_usage: true }` is in the request body. The Anthropic-via-Bedrock path through OpenRouter respects this; need to verify (baseline). |
 | New on_delta kind | `on_delta("usage", { prompt_tokens, completion_tokens, total_tokens, cost?, model?, native_finish_reason? })` | Mirrors the existing `("text", chunk)` / `("tool_call", call)` shape. Callers ignore unknown kinds; backward-compatible. |
 | Where to enable usage on the wire | `opts.include_usage = true` (default `true`) sets `stream_options.include_usage = true` in the outbound request body | Off-switch for hosts that reject `stream_options`. Defaults on; baseline probe confirms current broker tolerates it. (A3: `build_request` signature widens to take an `opts` table; positional growth was getting unwieldy.) |
 | Accumulator location | `ctx.usage_totals[model_name][category]` table | ctx is per-conversation; matches the `:reset`-survives-or-not rules already in place. |
 | Categories | `"main"` (ask_ai), `"delegate"`, `"summarize"`, `"memory_summarize"`, `"probe"`, `"norris"` | One-tag-per-call-site. Tagged at the caller site (caller passes `opts.category` to `broker.chat_stream`). |
 | Cost extraction | `usage.cost` (OpenRouter convention; dollars as a number). For Anthropic/Bedrock the cost arrives in dollars on `usage.cost`. For pure local llama.cpp: no `cost` field — record as nil (R6 — preserves the local-vs-cloud-zero distinction in the accumulator). | Single field name across observed providers per baseline B3. |
 | Cost precision | Store as `number` (Lua double = 53-bit mantissa, ~15 decimal digits — plenty for sub-cent precision) | No floating-point cumulative-error concerns at this scale. |
 | Warning trigger | First crossing of either threshold emits a single status: `[aish] session cost $X.XXXX has crossed warn_at_dollars=$Y.YYYY`. Crossed-flag stored on ctx; reset only on session end / `:cost reset`. | One-shot to avoid spamming. |
 | `:reset` interaction | `:reset` does NOT clear `ctx.usage_totals` (parity with `memory_items`/`project`) — the user reset their conversation, not their cost tracking. `:cost reset` is the explicit reset verb. | Matches R8 invariant from Phase 6. |
 | Session-log persistence | Assistant turn entries gain an optional `usage` field when broker returned one. `history.lua` log_turn writes it through verbatim. | Per-turn granularity preserved for after-the-fact analysis. No new file. |
 ---
 ## 3. Module Changes
 | File | State after Phase 6 | Phase 7 changes |
 |---|---|---|
 | `broker.lua` | `chat_stream(cfg, msgs, on_delta, opts)` with text + tool_call kinds; `chat` returns text | Extract usage from final SSE chunk; emit `on_delta("usage", payload)`; `chat` returns `(text, usage)`. New `opts.include_usage` (default true); new `opts.category` (passed through as a tag in the usage payload). |
 | `context.lua` | system prompt + turns + memory + project + summary | Add `self.usage_totals` (table) + `self.cost_warn_fired` (bool). New helpers: `Context:add_usage(model, category, usage)`, `Context:total_cost()`, `Context:total_tokens()`. `Context:reset` does NOT clear `usage_totals` (parity with memory_items / project per R8). |
 | `repl.lua` | ask_ai + delegate + summarize callbacks + Norris helpers | Wire `opts.category` at each broker call site (main / delegate / summarize / memory_summarize). Wire `on_delta("usage", ...)` -> `ctx:add_usage(...)`. New `:cost` and `:cost detail` / `:cost reset` metas. Cost-warn check after each `add_usage` call. |
 | `safety.lua` | norris_step + is_destructive | Pass `opts.category = "norris"` (for the main chat_stream call) and `"probe"` (for the is_destructive LLM probe). Surfaces probe-cost in the breakdown — useful since `safety.llm_model = "cloud"` is the recommended setting. |
 | `history.lua` | session.log_turn appends JSONL entries | log_turn already takes turn opaquely; assistant turns will carry `usage` if present and it'll serialize via dkjson. No code change unless filter desired. |
 | `config.lua` | example blocks for mcp/safety/memory/routing/secrets/hooks/project | Add commented-out `cost = { warn_at_dollars, warn_at_tokens }` block. |
 | `docs/PHASE0.md` | §11 lists phases 0-6 | Amendment landed at `3bad07b` (formulate commit). N5: commit 6 does NOT re-apply. |
 No new module files.
 ---
 ## 4. Pillar 1 — Usage capture in broker
 ### SSE shape (provider-by-provider — confirm in baseline)
 For OpenAI-compatible streams with `stream_options: { include_usage: true }`:
 ```json
 data: {"id":"...","choices":[{"index":0,"delta":{"content":"Hi"}, ...}]}
 data: {"id":"...","choices":[{"index":0,"delta":{}, "finish_reason":"stop"}]}
 data: {"id":"...","choices":[],"usage":{"prompt_tokens":15,"completion_tokens":3,"total_tokens":18,"cost":0.00004,"cost_details":{...}}}
 data: [DONE]
 ```
 The final usage event arrives AFTER `finish_reason` but BEFORE `[DONE]`.
 `choices` is empty `[]` on the usage event.
 For non-streaming `chat`: usage is in the response body at the top level.
 broker.chat is a wrapper around chat_stream, so it inherits the on_delta
 path.
 For local llama.cpp via hossenfelder: usage may or may not be present
 depending on the proxy's version. Treat absence as zero-cost / unknown.
 ### Extraction algorithm
 ```lua
 local final_usage = nil
 local function on_event(data)
    ...
    -- N1: this branch is INDEPENDENT of the choice branch below;
    -- check unconditionally. Per B2, local emits usage on a
    -- choices=[] chunk (choice nil); cloud emits on a non-empty
    -- choices chunk (with finish_reason). Both shapes funnel here.
    if doc.usage then
        -- R2: payload.model is ALWAYS the caller-stable model_cfg.model
        -- (chat_stream's local upvar). When called via call_broker's
        -- fallback retry, this naturally reflects the fallback's
        -- model name — wrapper callers can key by payload.model
        -- without tracking primary-vs-fallback themselves.
        final_usage = {
            prompt_tokens     = doc.usage.prompt_tokens or 0,
            completion_tokens = doc.usage.completion_tokens or 0,
            total_tokens      = doc.usage.total_tokens or 0,
            -- R6: keep nil-vs-0 distinction at this layer; the
            -- accumulator decides how to tag local-vs-cloud-zero.
            cost              = doc.usage.cost,   -- nil for local
            model             = model_cfg.model,  -- caller-stable per B4
            category          = opts.category or "main",
        }
        -- Don't emit yet — the [DONE] event marks stream end; emit
        -- once we exit the curl.post_sse loop so the caller sees
        -- usage as the LAST event in the stream order.
    end
    -- ... existing text + tool_call handling (unchanged) ...
 end
 -- After curl.post_sse returns (stream complete). R3-related:
 -- only emit on successful streams; transport / api errors skip
 -- the usage event (caller sees the error path and accumulator
 -- stays unchanged).
 if api_err then return nil, "api: " .. api_err end
 if not ok    then return nil, "transport: " .. tostring(err) end
 if final_usage then on_delta("usage", final_usage) end
 return true
 ```
 ### `M.chat` capture (R1 — BLOCKER fix)
 `M.chat` is the non-streaming buffering wrapper. Its existing on_delta
 only captured text. Under Phase 7 it MUST also capture the usage
 payload — otherwise EVERY non-streaming caller (summarize, delegate,
 memory_summarize, probe — 4 of 5 categories) silently reports zero.
 ```lua
 function M.chat(model_cfg, messages, opts)
    local parts        = {}
    local captured_usage  -- R1: required so M.chat returns (text, usage)
    local ok, err = M.chat_stream(model_cfg, messages,
        function(kind, payload)
            if kind == "text"  then parts[#parts + 1] = payload
            elseif kind == "usage" then captured_usage = payload
            end
        end, opts)
    if not ok then return nil, err end
    return table.concat(parts), captured_usage
 end
 ```
 Existing callers that do `local r = broker.chat(...)` automatically
 drop the second value (Lua semantics). Callers that want usage do
 `local r, u = broker.chat(...)`.
 ### Outbound include_usage
 ```lua
 local body_table = { model = ..., messages = ..., stream = true }
 if opts.include_usage ~= false then
    body_table.stream_options = { include_usage = true }
 end
 ```
 Risk: some providers reject unrecognized fields. Baseline check; if any
 host throws on `stream_options`, the per-model opt-out is one line.
 ### Category tagging
 `opts.category` is a string set by the caller. broker echoes it into the
 emitted usage payload so the accumulator knows what to credit. Default
 category if absent: `"main"`.
 ---
 ## 5. Pillar 2 — Accumulator on ctx
 ### Shape
 ```lua
 ctx.usage_totals = {
    -- [model_name] = { [category] = { prompt = N, completion = N,
    --                                 calls = N, cost = N } }
    fast = {
        main      = { prompt = 1234, completion = 567, calls = 14, cost = 0   },
    },
    cloud = {
        main      = { prompt = 3850, completion = 980, calls = 8,  cost = 0.0180 },
        delegate  = { prompt = 250,  completion = 80,  calls = 1,  cost = 0.0012 },
        probe     = { prompt = 150,  completion = 30,  calls = 1,  cost = 0.0042 },
    },
 }
 ctx.cost_warn_fired = false
 ```
 ### add_usage
 ```lua
 function Context:add_usage(model, category, u)
    model    = model    or "?"
    category = category or "main"
    self.usage_totals = self.usage_totals or {}
    local m = self.usage_totals[model] or {}
    local c = m[category] or {
        prompt = 0, completion = 0, calls = 0, cost = 0,
        is_local = false,  -- R6: cloud unless any usage came w/o cost
    }
    c.prompt     = c.prompt     + (u.prompt_tokens or 0)
    c.completion = c.completion + (u.completion_tokens or 0)
    c.calls      = c.calls      + 1
    -- R6: preserve nil-vs-0 distinction. A `nil` cost means the
    -- provider doesn't emit cost (i.e., local llama.cpp). Sticky:
    -- once a slot has seen any nil-cost call, it's flagged is_local.
    if u.cost == nil then
        c.is_local = true
    else
        c.cost = c.cost + u.cost
    end
    m[category] = c
    self.usage_totals[model] = m
 end
 function Context:total_cost()
    local total = 0
    for _, m in pairs(self.usage_totals or {}) do
        for _, c in pairs(m) do total = total + c.cost end
    end
    return total
 end
 function Context:total_tokens()
    local p, comp = 0, 0
    for _, m in pairs(self.usage_totals or {}) do
        for _, c in pairs(m) do
            p    = p    + c.prompt
            comp = comp + c.completion
        end
    end
    return p, comp
 end
 ```
 ### Reset semantics
 `Context:reset()` deliberately does NOT clear `usage_totals` —
 matches R8 invariant from Phase 6 (`:reset` clears `turns`,
 `pending_exec_output`, `summary`; preserves `memory_items`, `project`,
 and now `usage_totals`). The user reset their conversation, not their
 cost meter. `:cost reset` is the explicit reset verb for the meter.
 ---
 ## 6. Pillar 3 — `:cost` meta
 ```
 :cost                       summary line
 :cost detail                per-model + per-category breakdown
 :cost reset                 zero out ctx.usage_totals + cost_warn_fired
 ```
 Summary format (R10 — 6-decimal precision for sub-cent costs):
 ```
 [aish] session usage: 24 calls, prompt=12,450 / completion=3,210 tokens
                       cost=$0.023400 (cloud only; local: 0)
 ```
 Detail format (R7 — sort key is `(cost desc, model asc, category asc)`
 for deterministic ordering on equal-cost rows; R6 — annotation comes
 from the slot's `is_local` flag, NOT a `cost == 0` heuristic):
 ```
 [aish] session usage detail:
  cloud     main      8 calls,  3,850 / 980 tokens,   $0.018000
  cloud     delegate  1 call,     250 / 80  tokens,   $0.001200
  cloud     probe     1 call,     150 / 30  tokens,   $0.004200
  fast      main     14 calls,  8,200 / 2,100 tokens, $0       (local)
 ```
 Implementation: pure Lua iteration over `ctx.usage_totals`; no broker
 calls. Sort flattens into a list, sorts via `table.sort` with explicit
 3-level comparator: `cost desc, model asc, category asc`.
 ---
 ## 7. Pillar 4 — Warning thresholds
 Config:
 ```lua
 cost = {
    warn_at_dollars = 0.50,    -- emit once when cumulative cost crosses
    warn_at_tokens  = 100000,  -- emit once when cumulative tokens crosses
 }
 ```
 R5 centralizes the check inside a single `_record_usage(model, cat, u)`
 helper in repl.lua. This is the ONLY place that calls
 `ctx:add_usage`; safety.lua call sites route through it via the
 `helpers.on_usage` / `opts.on_usage` callback. Keeps `context.lua`
 decoupled from `renderer` (no module-coupling violation).
 R4: two independent flags (one per threshold) — first-to-fire must
 NOT suppress the other.
 ```lua
 -- repl.lua (sketch):
 local function _record_usage(model, category, u)
    ctx:add_usage(model, category, u)
    if not (config.cost) then return end
    ctx.cost_warn_state = ctx.cost_warn_state or { dollars = false, tokens = false }
    local cw = ctx.cost_warn_state
    if config.cost.warn_at_dollars and not cw.dollars then
        local cost = ctx:total_cost()
        if cost >= config.cost.warn_at_dollars then
            -- R10: 6-decimal format for sub-cent visibility
            renderer.status(("session cost $%.6f has crossed warn_at_dollars=$%.6f")
                            :format(cost, config.cost.warn_at_dollars))
            cw.dollars = true
        end
    end
    if config.cost.warn_at_tokens and not cw.tokens then
        local p, c = ctx:total_tokens()
        if (p + c) >= config.cost.warn_at_tokens then
            renderer.status(("session tokens %d has crossed warn_at_tokens=%d")
                            :format(p + c, config.cost.warn_at_tokens))
            cw.tokens = true
        end
    end
 end
 ```
 One-shot per threshold per session. `:cost reset` clears both
 totals AND both warn flags atomically.
 ---
 ## 8. UX Surface Summary
 | Meta | Behavior |
 |---|---|
 | `:cost` | One-line summary: calls / tokens / cost |
 | `:cost detail` | Per-model + per-category breakdown |
 | `:cost reset` | Zero out totals + clear warn-fired flag |
 | Config | Default | Effect |
 |---|---|---|
 | `cfg.cost.warn_at_dollars` | nil | Status when cumulative cost first crosses this dollar amount |
 | `cfg.cost.warn_at_tokens` | nil | Status when cumulative total tokens first crosses |
 | (broker `opts.include_usage`) | true | Adds `stream_options.include_usage = true` to outbound request |
 R9 boundary note: `:resume <name>` reloads turns for conversation
 continuity but does NOT reconstruct `ctx.usage_totals` from the
 per-turn `usage` fields stored in the session JSONL. After `:resume`,
 the cost meter starts fresh from zero for the resumed session's live
 calls. The historical usage IS in the JSONL for after-the-fact
 scripting; cross-session aggregation is Q-C2 deferred work.
 ---
 ## 9. Out of Scope (Phase 7)
 - **Cross-session cost persistence** — Q-C2 defers `<history.dir>/cost.jsonl`
  rollup; v1 is session-only. Per-turn usage IS in the session JSONL for
  after-the-fact aggregation if anyone wants to script it.
 - **Per-model rate limiting / cost caps that REFUSE the call** — v1 only
  warns. A future phase could add a hard cap that aborts before the
  broker call.
 - **Pricing-table fallback for local models** — if a local model doesn't
  emit `usage.cost`, we record 0. Estimating cost from token count + a
  static pricing table is a future polish (most users won't care about
  local "cost" anyway — local is free).
 - **Pretty token-bandwidth charts / sparklines** — out of scope; the
  detail breakdown is text-only.
 - **Estimated cost for future turns** — no preflight cost prediction.
 - **MCP tool-call usage** — MCP servers don't expose token usage;
  broker calls invoked DURING MCP tool dispatch ARE captured (because
  they go through the same path), but the MCP tool call itself isn't.
 ---
 ## 10. Risks
 | Risk | Mitigation |
 |---|---|
 | Some providers reject `stream_options` -> SSE errors at the top of the stream | `opts.include_usage = false` opt-out per call site; baseline-time probe of the actual hossenfelder broker behavior |
 | OpenRouter `cost` field shape varies between providers (Bedrock vs. Baidu vs. Together vs. ...) | Capture `usage.cost` as-is (number); document that the same provider must be used for cross-call comparison |
 | Local llama.cpp returns no `cost` -> displayed `$0` could mislead user "is this REALLY free?" | `:cost detail` annotates local lines with `(local)` literal; summary says `cost=$X (cloud only; local: 0)` |
 | `ctx.usage_totals` grows unboundedly with new model names mid-session | Bounded by `#models in config` × `#categories` — small constants. No mitigation needed. |
 | Warn threshold fires once and never again for a long-running session that crosses 2x / 10x the threshold | Acceptable for v1; user can `:cost reset` to re-arm. Future polish: warn at each Nx multiple. |
 | R8: `call_broker` fallback retry passes `opts.include_usage` unchanged | Documented assumption: B1 confirmed both backends accept the flag. If a future fallback host rejects, the call-site that knows can pass `opts.include_usage = false` explicitly. |
 ---
 ## 11. Open Questions (Phase 7)
 | # | Question | Impact | Resolution target |
 |---|---|---|---|
 | Q-C1 | Provider-without-usage handling | A10 — defensive silent skip; baseline probe will confirm shape on local llama.cpp. |
 | Q-C2 | Cross-session cost persistence (`cost.jsonl`) | Deferred to follow-up phase 8; v1 is session-only. |
 | Q-C3 | Categories closed-set vs free-form | A4 — **free-form**; caller decides. Matches Phase 6 helpers/skills convention. |
 | Q-C4 | `stream_options` forwarding by hossenfelder | B1 RESOLVED — both backends accept; flag is REQUIRED for local llama.cpp, no-op for cloud. Default-true is correct. |
 | Q-C5 | Warn fires on the crossed call or the next | A5 — **on the crossed call** (no UX-defeating delay). |
 | Q-C6 | `:reset` clears `cost_warn_fired` | A6 — **no**, only `:cost reset` clears the flag (R8 parity). |
 ---
 ## 12. Phase 7 → Phase 8+ Out-of-band
 Candidate follow-ups (non-binding):
 - **Phase 8**: cross-session cost persistence (Q-C2 deferral), with
  optional cost dashboards / weekly rollup reporter.
 - **Hard rate limits / cost caps that REFUSE the call** — an extension
  of the warn surface that promotes warnings into preflight enforcement.
 - **Better tokenization** (Q1 deferred-from-Phase-3): replace the char/4
  heuristic on `Context:estimate_tokens()` with model `/tokenize` calls.
  Indirectly improves accuracy of any future "preflight cost predictor".
 Phase 7 itself is self-contained — no upstream dependencies.
 ---
 ## 13. Implementation Plan (commit-by-commit)
 Bottom-up; broker first (it's the egress point that all callers
 depend on), then context (the accumulator), then the call-site
 rewires, then the user-facing meta + warn surface, then config +
 status bump. Each commit leaves the tree green (existing tests +
 load smoke + per-commit feature smoke).
 ### Order
 1. **`broker.lua` — usage capture + signature widening.**
   - `build_request(model_cfg, messages, stream, opts)` widened to
     take an opts table; opts.tools / opts.max_tokens fold in from
     the existing positional args.
   - **R3: TWO internal callers of `build_request` exist inside
     broker.lua itself** (`M.chat_stream` at line 65-66 and indirectly
     via `M.chat`). Both must be updated in this commit; the
     migration is CONTAINED but not zero-touch. "Every caller already
     passes opts" refers to the public surface — internal `build_request`
     was positional.
   - Opts.include_usage (default true) adds `stream_options.include_usage
     = true` to the request body (per B1, required for local).
   - `M.chat_stream` event loop adds `if doc.usage then final_usage =
     doc.usage end`; after `curl.post_sse` returns, if `final_usage`
     is set, `on_delta("usage", payload)` is called. Payload includes
     `model = model_cfg.model` (caller-stable per B4 + R2), the raw
     token counts, and `cost` as a number (nil for local per B3).
   - opts.category passthrough — the broker just echoes it into the
     emitted usage payload; doesn't validate (per A4 free-form).
   - **R1: `M.chat` (non-streaming wrapper) MUST capture usage in its
     internal on_delta and return `(text, usage)`. Without this, four
     out of five non-streaming categories silently report zero.** §4
     shows the explicit update.
   - Smoke: hand-build a request with stream_options, capture all
     three on_delta kinds (text, tool_call when applicable, usage),
     confirm usage payload matches what curl shows. Also smoke
     `broker.chat(...)` returns non-nil usage for cloud calls.
 2. **`context.lua` — accumulator + helpers.**
   - `Context.new`: `self.usage_totals = {}` + `self.cost_warn_fired = false`.
   - `Context:add_usage(model, category, usage)` — increments
     `usage_totals[model][category]` slots.
   - `Context:total_cost()` — sums all cost fields across all models/categories.
   - `Context:total_tokens()` — sums prompt + completion separately.
   - `Context:reset` — does NOT touch `usage_totals` or `cost_warn_fired`
     (R8 parity with `memory_items` and `project`).
   - Smoke: 4-case inline test of add_usage / totals / reset preservation.
 3. **`repl.lua` — wire opts.category + on_delta("usage") at non-Norris call sites.**
   **N3: depends on commit 1's R1 M.chat fix shipping first.** This
   commit's "capture the second return value" pattern only works
   after M.chat actually returns one.
   - `_record_usage(model, category, usage)` helper (R5) — the single
     chokepoint that wraps `ctx:add_usage` AND does the warn check.
     Replaces all direct `ctx:add_usage(...)` invocations in repl.lua.
   - call_broker wrapper (used by ask_ai): pass `opts.category =
     "main"`; the wrapped on_delta handles `kind == "usage"` by
     calling `_record_usage(payload.model, payload.category, payload)`
     — keys by **payload.model** per R2 (handles fallback retry
     correctly without tracking primary-vs-fallback at the wrapper).
   - DELEGATE: handler: opts.category = "delegate"; capture second
     return value from broker.chat and feed to `_record_usage`.
   - :delegate meta: opts.category = "delegate"; same.
   - summarize-on-evict callback: opts.category = "summarize"; same.
   - :memory summarize: opts.category = "memory_summarize"; same.
   - Smoke: send one cloud prompt, observe ctx.usage_totals grows;
     also smoke the fallback path with a deliberately-broken primary
     and confirm usage credits the fallback model name (R2 verification).
 4. **`safety.lua` — opts.category for Norris + probe.**
   - safety.norris_step's broker.chat_stream call: pass `opts.category
     = "norris"`. The on_delta wrapper inside safety.lua already
     widens (post-#52) to handle `kind == "text"` (rehydration);
     now also handles `kind == "usage"` by calling
     `helpers.on_usage(payload.model, payload.category, payload)`.
     R5: helpers.on_usage IS repl.lua's `_record_usage`.
   - **N4 signature chain widening**: `llm_probe`, `llm_second_opinion`,
     and `M.is_destructive` all widen to thread `opts.on_usage` through:
       - `llm_probe(model_cfg, system, cmd, opts)` — pass `opts.category
         = "probe"` to broker.chat; on the `(text, usage)` return,
         if `opts.on_usage` AND `usage`, call `opts.on_usage(usage.model,
         usage.category, usage)`.
       - `llm_second_opinion(cmd, cfg, opts)` — pass opts through to
         both llm_probe calls (probe 1 + probe 2 re-roll).
       - `M.is_destructive(cmd, cfg, opts)` — opts.on_usage already in
         the table from #52's scrub_msgs/rehydrate addition; threads
         through naturally.
   - Smoke: a Norris session shows both "norris" and "probe" category
     entries in :cost detail; the probe model is named correctly
     (e.g. "cloud" if safety.llm_model = "cloud").
 5. **`repl.lua` — :cost meta + warn-threshold + HELP.**
   - :cost (summary), :cost detail (per-model+category breakdown),
     :cost reset (zero totals + clear cost_warn_fired).
   - After every ctx:add_usage call (centralized in a helper if
     possible), check cfg.cost.warn_at_dollars / warn_at_tokens;
     emit one-shot status if crossed AND cost_warn_fired is false.
   - HELP gains 3 lines for :cost.
   - Smoke: :cost shows totals; :cost detail breaks down; warn fires
     once when threshold crossed; :cost reset re-arms.
 6. **`config.lua` example block + `docs/PHASE7.md` status bump.**
   - Commented-out `cost = { warn_at_dollars = 0.50, warn_at_tokens
     = 100000 }` block in config.lua.
   - **N5: PHASE0.md §11 amendment is already in tree** (committed
     at `3bad07b` with the formulate doc). Commit 6 must NOT re-apply.
   - PHASE7.md status header → **Implement** (matches Phase 5/6
     cadence — manifest tracks implementation state).
 ### Risk index per commit
 | Commit | Risk | Mitigation |
 |---|---|---|
 | 1 (broker) | R3: build_request has TWO INTERNAL callers in broker.lua; both must be updated in this commit | Explicit in commit-1 note above; grep `build_request\(` to confirm |
 | 1 (broker) | R1: M.chat must capture usage in on_delta and return (text, usage) | §4 shows the explicit M.chat update; smoke test verifies non-nil usage on cloud call |
 | 1 (broker) | `M.chat` second return value confuses callers that do `local r = broker.chat(...)` discarding the second | Lua doesn't error on dropped return values; backward-compat preserved automatically |
 | 2 (context) | usage_totals nil on old ctx serializations | Defensive `self.usage_totals = self.usage_totals or {}` in add_usage; no migration needed |
 | 3 (repl wires) | Forgetting one call site = silent under-count | Lint by grep for `broker.chat\(` and `broker.chat_stream\(` after the wire commit; ensure each is tagged with opts.category |
 | 3 (repl wires) | R2: fallback retry credits usage to wrong model | wrapped on_delta keys by `payload.model` (set inside broker per R2), NOT by outer `model_name`; smoke a deliberately-broken-primary case |
 | 4 (safety wires) | safety.lua must NOT introduce new module dep | Use helpers.on_usage callback convention (matches #52's scrub_msgs) |
 | 4 (safety wires) | N4: llm_probe → llm_second_opinion → is_destructive signature chain widening | Spelled out in commit-4 note above |
 | 5 (:cost + warn) | warn fires multiple times when threshold is much exceeded by one call | per-threshold one-shot flag in `ctx.cost_warn_state`; explicit :cost reset to re-arm both |
 | 5 (:cost + warn) | R4: single shared flag covers two thresholds | RESOLVED — split into `cost_warn_state.dollars` + `.tokens` |
 | 6 (config + status) | N5: PHASE0 §11 already amended at `3bad07b` | This commit does NOT re-apply the amendment |
 ### Tests + smoke per commit
 Each commit:
 - Pass `luajit test_safety.lua` (87/87) and `luajit test_router_model.lua` (31/31)
 - Load cleanly via `luajit -e 'package.path=...; require("repl"); print("ok")'`
 - Pass a per-feature smoke (described in each row above)
 ### Things deliberately NOT split
 - broker.chat backward-compat shim — Lua's multiple-return-values
  semantics handle it automatically (existing `local r = broker.chat(..)`
  drops the new `usage` value).
 - Per-category sub-tables — flat `model -> category -> counters` is
  simple enough; nesting deeper for e.g. timestamps is v2.
 - Cross-session persistence — explicitly Q-C2 deferred to phase 8.
 ### Open at plan-time (resolve at implement)
 - Whether `safety.is_destructive`'s opts should carry `on_usage`
  callback explicitly OR thread through cfg.helpers (the latter
  matches the Norris helpers convention but is more coupling).
  Decide at commit 4. Default to explicit opts.on_usage for minimum
  surface.
 - Whether to emit a `[aish] usage: model=X prompt=N completion=M cost=$X`
  status line PER TURN (verbose mode) or only via :cost on demand.
  v1 = on demand only; verbose mode is a follow-up nice-to-have.
@@ -0,0 +1,110 @@
 # Phase 8 Baseline — pre-implementation measurements
 **Date:** 2026-05-16
 **Tree probed:** `1a136d8` (PHASE8 formulate + analyze + pillar-5 addition).
 **Broker probed:** `hossenfelder.fritz.box:8082` (local `qwen-coder-7b-snappy-8k` was the active local model at probe time).
 ---
 ## B1. `/tokenize` ignores the `model` request field
 Probed three variants of the same request:
 | Request body | Response |
 |---|---|
 | `{"model":"qwen-coder-7b-snappy-8k","content":"hello world"}` | `{"tokens":[14990,1879]}` |
 | `{"model":"Qwen2.5-7B-Instruct-Q4_K_M.gguf","content":"hello world"}` | `{"tokens":[14990,1879]}` (identical) |
 | `{"content":"hello world"}` (no model) | `{"tokens":[14990,1879]}` (identical) |
 **Q-T5 RESOLVED**: hossenfelder's `/tokenize` does NOT switch
 tokenizer based on the request's `model` field. It returns the
 tokenization of whichever backend model is currently loaded by the
 proxy. For aish purposes this is **acceptable** — we get a real BPE
 tokenizer count rather than char/4. The accuracy gap from using a
 different model's tokenizer than the one that will receive the
 completion is minor (Qwen / Llama tokenizers are similar in BPE
 vocabulary scale; both are far more accurate than char/4).
 **Implication for §4**: keep sending the `model` field anyway (it's
 harmless and may help if the proxy gains per-model routing later).
 Document the limitation: counts are from the proxy's loaded model,
 NOT necessarily the model_cfg.model requested. For cloud presets
 that route through OpenRouter, `/tokenize` 404s anyway and the
 char/4 fallback fires — no inaccuracy concern there.
 ---
 ## B2. `/tokenize` round-trip latency
 Five probes against `hossenfelder.fritz.box:8082` for random-base64
 payloads of varying sizes:
 | Input size (chars) | Tokens returned | Round-trip (ms) |
 |---|---|---|
 | 50    | 39   | 23 |
 | 500   | 369  | 34 |
 | 2000  | 1509 | 32 |
 | 5000  | 3741 | 24 |
 **Latency is flat at ~25-35ms** across the size range, dominated by
 network round-trip (not tokenizer cost). This is comfortably under
 the §4 formulate-time estimate of "~50ms per call".
 **Implication for §5**: per-turn `_tokens` cache amortizes cost to
 O(1) after first count. Worst case fresh session with 40 cached
 turns: 40 × 30ms = 1.2s one-time cost for `enforce_budget`'s first
 call (after that, cached). Acceptable.
 The total tokens count for random base64 input is unusually high
 (~74% chars-to-tokens vs ~25% for natural prose). This is because
 base64 lacks the common-token patterns BPE compresses. Natural-text
 sessions tokenize closer to char/4 (per earlier prose probe: 558
 tokens for 2032 chars = 27.5%).
 ---
 ## B3. `/tokenize` body shape — `{tokens: [int, int, ...]}`
 Confirmed across all probes: response is `{"tokens": [N1, N2, ...]}`
 where each `Ni` is the token ID (integer). For aish purposes we only
 need the count (`#response.tokens`), so the token IDs themselves are
 discarded.
 The response is JSON (not SSE), so `ffi.curl.M.post` (blocking POST)
 is the right call — not `M.post_sse`.
 ---
 ## B4. No /tokenize on cloud (OpenRouter) — char/4 fallback path validated
 Already probed during formulate-time:
 ```
 curl http://hossenfelder.fritz.box:8082/v1/tokenize  -> 404
 curl http://hossenfelder.fritz.box:8082/tokenize ... model=anthropic/...
  -> 404 (or returns the LOADED-local-model's tokenization; not the cloud's)
 ```
 The hossenfelder proxy doesn't forward `/tokenize` to OpenRouter
 (which doesn't expose it). Our per-endpoint capability cache will
 mark it as unsupported on first probe; subsequent cloud calls use
 char/4 silently.
 **No design change needed** — formulate's "cache capability per
 (endpoint, model) on first probe" handles this naturally.
 ---
 ## Summary
 | Finding | Affects | Resolution |
 |---|---|---|
 | B1 /tokenize ignores `model` field | §4 token_count accuracy gap | Document; acceptable — BPE >> char/4 even with wrong tokenizer |
 | B2 ~25-35ms latency, flat over size | §5 per-turn cache strategy | Per-turn cache amortizes; worst case 1.2s on first enforce_budget |
 | B3 `{tokens: [...]}` body shape | §4 broker.token_count parser | Confirmed; one-liner JSON parse |
 | B4 cloud /tokenize 404 | §4 capability detection | Cache as unsupported on first probe; char/4 fallback fires silently |
 All findings align with the formulate/analyze design. No
 structural changes needed. Ready for plan.
 **Q-T5 RESOLVED** per B1. All open questions now resolved.
@@ -0,0 +1,622 @@
 # aish — Phase 8 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 8 Requirements, Architecture & Design Decisions
 **Status:** Implement (5 commits landed: 7ef2a6e, 8502517, db26d0c, 94b7d86, this)
 **Date:** 2026-05-16
 **Review findings (independent Sonnet agent, 2026-05-16) — 2 BLOCKERs
 resolved in-place, 4 CONCERNs folded, 4 NITs applied:**
 R1 (BLOCKER, RESOLVED). **§5 pseudocode missing per-turn cache pattern.**
    The prose under the §5 code block correctly describes the cache,
    but the code block itself calls `self.tokenize_fn(t.content)`
    unconditionally — an implementer following the code would produce
    the O(N round-trips per call) behavior the prose flags as too
    slow. **Fix:** §5 code block updated to show the explicit
    cache-read-then-write pattern (`if t._tokens then ... else
    t._tokens = self.tokenize_fn(t.content) end`). §13 commit 2 row
    also calls this out explicitly.
 R2 (BLOCKER, RESOLVED). **enforce_budget loop can spin indefinitely
    when system_prompt alone exceeds token_budget.** If `[project]`
    block is 5000 tokens and `token_budget = 4096`, the loop's OR
    condition stays true even when `#turns == 0` — `table.remove`
    is a no-op, the loop never exits. **Fix:** §13 commit 3 row
    updated to specify the explicit guard: `while (#self.turns >
    self.max_turns or self:estimate_tokens() > self.token_budget)
    and #self.turns > 0 do`. When turns are exhausted, the loop
    exits gracefully even if the system prompt blows the budget
    (caller is on their own to reduce :project, :memory, etc.).
 R3 (CONCERN, FOLDED). **`:cost detail` comparison semantically
    undefined.** Sum-of-prompt_tokens across all calls (accumulator)
    vs current-snapshot estimate are incommensurable — sessions
    with evictions ALWAYS show divergence not because heuristic is
    wrong but because they measure different things. **Resolution:**
    §6 reworked to drop the per-slot `~est=N` inline annotation
    (which conflated the two); instead show a SINGLE trailing
    "[estimated session ctx: N tokens]" line under :cost detail.
    Cleanly separates the running-total accumulator from the
    current-snapshot estimate. §13 commit 4 already pointed to this
    direction — now §6 matches.
 R4 (CONCERN, FOLDED). **tokenize_fn closure must reference `active_cfg`
    by upvalue, not by value-capture.** If the implementer writes
    `local cfg = active_cfg; return function(text) ... cfg ... end`,
    the closure won't follow `:model` switches. **Fix:** §13 commit
    4 row gains an explicit code note: the closure MUST be
    `function(text) return broker.token_count(active_cfg, text) end`
    — direct upvalue reference. A5 analysis verified upvalue
    semantics; now spelled out so the implementer doesn't subtly
    miss it.
 R5 (CONCERN, FOLDED). **2s tokenize timeout can spuriously cache as
    unsupported when llama.cpp is busy serving a concurrent
    completion.** llama.cpp is single-threaded for inference; a
    /tokenize request that arrives mid-generation queues behind
    inference and may exceed the 2s cap. The capability would then
    cache as `false` for the rest of the session, even though the
    endpoint IS capable. **Fix:** §9 risk row added documenting
    this. Mitigation: 2s is reasonable for IDLE responses but if
    practical problems surface, bump to 5s or make configurable
    (`cfg.tokenize.timeout_ms`). v1 ships 2s; revisit in verify if
    it bites.
 R6 (CONCERN, FOLDED). **Per-endpoint cache key conflates two
    same-endpoint/different-model presets.** B1 confirmed
    /tokenize ignores the model field, so two probes per session
    when one would suffice. **Fix:** §4 cache key SIMPLIFIED to
    just `model_cfg.endpoint` (B1-justified). Same-endpoint
    presets share one cache entry; one probe per endpoint per
    session, not per (endpoint, model). For a future broker that
    DOES honor the model field, this design choice would need
    revisiting — documented inline.
 R-N1..N4 (NITs, APPLIED):
  N1. §13 commit 3 condition uses uppercase `OR`/`AND` — corrected
      to Lua's lowercase `or`/`and`.
  N2. §10 Q-T5 row's "Resolution target" cell was empty; now reads
      "Baseline (B1)" for consistency.
  N3. §6 outdated inline `~est=N` description removed; new approach
      (single trailing summary line) is documented; §8 out-of-scope
      bullet about per-call comparison stays as the explicit "we
      considered, rejected" record.
  N4. PHASE8.md status header (formerly carrying a stale tree hash
      that would drift before implementation) now references the
      latest tree as of this fold-in (`aa64ad3`). Commit 5's status
      bump to "Implement" will refresh it again at that point.
 **Analyze findings (2026-05-16):**
 A1. **enforce_budget ONLY checks max_turns, not token_budget — major
    scope gap.** `Context:enforce_budget` (context.lua:319) iterates
    `while #self.turns > self.max_turns`; `self.token_budget = 4096`
    is set but NEVER consulted. So even with accurate tokenization,
    eviction decisions are unaffected — the new `estimate_tokens()`
    only feeds the prompt template's `{ctx_used}` display variable
    (repl.lua:630).
    **Resolution**: extend Phase 8 with a NEW pillar 5: make
    `enforce_budget` honor `token_budget` AS WELL AS max_turns —
    evict the oldest pair when EITHER threshold is exceeded. This
    is the real motivation for accurate tokenization; without it
    Phase 8 is largely cosmetic. Folded into §1 (5 pillars now),
    §3 (context.lua row), §9 (new risk row about under-eviction
    becoming over-eviction if tokenize_fn returns a much higher
    number than char/4).
 A2. **`ffi.curl.M.post` signature confirmed.** `(body, status)` on
    success, `(nil, err)` on failure. Matches the formulate-time
    sketch. status is the integer HTTP code. The probe checks
    `status == 200 and out` correctly.
 A3. **Single caller of `Context:estimate_tokens()` in tree.** Only
    `repl.lua:630` (prompt template `{ctx_used}` substitution) calls
    it. No internal callers in context.lua. This means:
    - The wiring point is ONE line in repl.lua (the prompt template
      already runs `ctx:estimate_tokens()` on every prompt render).
    - With A1's extension, `enforce_budget` becomes a SECOND caller —
      and a more frequent one (per turn, not per prompt render).
    - Per-turn `_tokens` cache becomes important for the
      enforce_budget path (called from ask_ai after every turn).
 A4. **Q-T1 RESOLVED**: per-turn `_tokens` cache lives on the turn
    dict. `:reset` clears `ctx.turns` so the cache dies with them.
    New turns get nil `_tokens`; lazy-set on first count. Trivial.
 A5. **Q-T2 RESOLVED**: tokenize_fn closure captures `active_cfg` as
    an UPVALUE. Upvalues are resolved at closure call time, not at
    definition time. When the user `:model cloud` switches,
    `active_cfg = config.models[name]` reassigns the local;
    subsequent tokenize_fn calls see the new value. Natural; no
    explicit re-binding needed.
 A6. **Q-T3 RESOLVED**: skip the probe entirely when
    `cfg.tokenize.use_endpoint = false` or unset. Don't even call
    `broker.token_count` — repl.lua won't wire `tokenize_fn` to
    Context.new in the first place. context.lua's tokenize_fn-nil
    branch handles it (char/4 fallback).
 A7. **Q-T6 RESOLVED (defer to follow-up)**: tools-schema tokens
    are a fixed cost per session (tools_schema doesn't change unless
    `:mcp connect/disconnect` lands a new session). The under-count
    is bounded and predictable. Defer to a future polish; v1
    counts only messages. Document in §8 out-of-scope.
 A8. **Per-turn `_tokens` cache invalidation.** Turn `content` is
    immutable after append (we don't mutate stored turns). Cache is
    safe to live forever on the turn. The only invalidation event
    is `:reset` (clears turns wholesale). No other invalidation
    needed.
 A9. **Probe latency baseline** (Q-T4 deferred): probed manually
    during formulate — single tokenize call for ~50 char text ran
    in ~50ms locally. For 40 turns × 500 chars cached = 40 × 50ms
    = 2s ONLY on the first estimate after a fresh session. After
    caching, subsequent estimates are O(1) per turn (dict lookups).
 A10. **Streaming during `chat_stream` interleaves with tokenize?**
     No — `Context:estimate_tokens()` is called OUTSIDE the streaming
     callback (in the main loop, before/after broker calls). No
     concurrent network competition.
 A11. **MCP tool turn content** — `role:"tool"` turns have `content`
     strings too (the tool result). These get tokenized identically;
     no special-case needed. Cache key is the turn dict itself, so
     tool turns get their own `_tokens` slot.
 A12. **`include_usage` interaction with tokenize**: orthogonal. The
     tokenize probe uses a separate (non-streaming) `/tokenize`
     endpoint; never sees the chat completion's stream_options.
 PHASE0 is the locked substrate; PHASE1-7 are layered on top. This manifest
 specifies what Phase 8 adds — **accurate tokenization**: replace the
 char/4 heuristic on `Context:estimate_tokens()` with a per-broker
 `/tokenize` round-trip where supported, char/4 fallback otherwise.
 Resolves Q1 (`PHASE0.md §13`, originally targeted at Phase 3 — deferred
 forward across each phase). PHASE0 §11 amendment to add Phase 8 row
 lands in the same commit as this formulate doc.
 ---
 ## 1. Scope of Phase 8
 Five pillars (A1 added pillar 5):
 1. **Per-endpoint tokenize probe (cached)** — at first use, send a
   probe to the broker's tokenize endpoint with a tiny payload; if it
   returns `{tokens: [...]}` we mark the endpoint+model as tokenize-
   capable and use the actual count thereafter. If it 404s or errors,
   mark the slot as `tokenize_supported = false` and fall through to
   char/4 silently. Cached per `(endpoint, model)` for the session.
 2. **`broker.token_count(model_cfg, text)`** — thin wrapper that
   returns an accurate token count when the (endpoint, model) is
   tokenize-capable, else the char/4 heuristic. Always returns a
   non-negative integer; never errors. The probe + fallback is
   transparent to callers.
 3. **`Context:estimate_tokens()` widening** — currently char/4 over
   `system_prompt` + sum of `turn.content`s. The new shape accepts
   an optional `tokenize_fn` (callback) at `Context.new` time and uses
   it when present; falls back to char/4 when nil. `repl.lua` wires
   `tokenize_fn = function(text) return broker.token_count(active_cfg, text) end`.
   This means the active model's tokenizer is used for budgeting
   decisions, which matches the broker the next ask_ai will hit.
 4. **`:cost detail` estimated-vs-actual column** — for each
   (model, category) slot in the accumulator, the actual
   `prompt_tokens` from broker usage is already stored. Add an
   estimated column computed via `broker.token_count` on the
   currently-buffered prompt-shape. Disagreement >10% surfaces in a
   tiny `~est=N` annotation so users can see when the heuristic
   diverges from reality. Display-only; no behavior change.
 5. **`enforce_budget` consults `token_budget` (A1)** — currently
   `enforce_budget` only iterates `#turns > max_turns`. Extend to
   ALSO check `estimate_tokens() > token_budget`. Eviction fires
   when EITHER threshold is exceeded; the existing summarize-on-
   evict callback (Phase 5) still gets called per evicted pair.
   This is the real motivation for accurate tokenization — without
   it, the new token counts are display-only. Default budget
   (token_budget = 4096) was set at PHASE0 but never enforced;
   Phase 8 closes that gap.
 **Phase 8 is done when:**
 - A long-running session with the local `qwen-coder-7b-snappy-8k`
  model evicts at the RIGHT moment (token_budget=4096 hit triggers
  eviction via the new pillar 5 path) rather than only when
  max_turns is exceeded.
 - `broker.token_count(local_cfg, "hello world")` returns 2 (matches
  the live tokenize result, not the char/4=2 coincidence — verify
  via `:cost detail` against multi-paragraph text).
 - `broker.token_count(cloud_cfg, "hello world")` returns 2 (char/4
  fallback when /tokenize 404s, which it does for OpenRouter).
 - Cached per-endpoint capability — the probe fires once per
  endpoint per session, not per call.
 - Existing configs without `cfg.tokenize` behave like Phase 7 (zero
  behavior change unless opted in via `cfg.tokenize.use_endpoint = true`).
 - `:cost detail` shows estimated-vs-actual where disagreement >10%,
  silent otherwise.
 ---
 ## 2. Technology Decisions (delta from Phase 7)
 | Decision | Choice | Rationale |
 |---|---|---|
 | Tokenize endpoint path | `<endpoint>/tokenize` (NOT `<endpoint>/v1/tokenize`) | Per real probe against hossenfelder: `/v1/tokenize` returns 404; `/tokenize` returns `{tokens: [...]}`. This is the llama.cpp server convention. |
 | Request body shape | `{"content": "<text>", "model": "<model>"}` | Local model echoed via `model`; llama.cpp ignores it but harmless. Probed shape works. |
 | Capability detection | Per-call optimistic probe; on 404/non-200, cache `tokenize_supported[endpoint][model] = false` and never retry that session | One round-trip cost on first miss; zero on subsequent. Sessions are short enough that re-probe across restarts is fine. |
 | Fallback heuristic | char/4 (Phase 0 §8 convention) | Established; underestimates ~10% on real code/prose per baseline B1, but acceptable when no better signal available. |
 | `Context:estimate_tokens` calling convention | Optional `tokenize_fn` callback at Context.new; absent = char/4 (existing behavior) | Backward-compatible; no caller break. Opt-in via repl.lua. |
 | Active-model tokenizer | repl.lua wires `tokenize_fn` against `active_cfg` (the currently active model), so eviction decisions match the broker the next call will hit | When the user `:model cloud` switches mid-session, subsequent estimates use cloud's tokenizer (which falls back to char/4 since OpenRouter has no /tokenize). |
 | Caching strategy | Endpoint+model capability flag only; NOT per-text token-count cache | Token counts depend on text content; caching adds memory + correctness risk for marginal speed. Probe latency dominates only on first call per endpoint. |
 | Per-text timeout cap | 2s for tokenize calls (much tighter than the model's normal timeout_ms) | Tokenize is a small, fast operation; if it doesn't respond in 2s, the endpoint is misbehaving. Bail to char/4. |
 | `:cost detail` est-vs-actual | Show only when disagreement >10%; format `(prompt: 558 ~est=508 / completion: 80)` for the disagreement case, `(prompt: 558 / completion: 80)` otherwise | Always-on noise; suppress when heuristic is close. |
 | New config key | `cfg.tokenize = { use_endpoint = true }` — default false until user opts in | Network round-trip cost; user-acknowledged behavior change. |
 ---
 ## 3. Module Changes
 | File | State after Phase 7 | Phase 8 changes |
 |---|---|---|
 | `broker.lua` | `chat`, `chat_stream`, `build_request` (opts-widened in Phase 7) | New `M.token_count(model_cfg, text)`: tries `<endpoint>/tokenize` once per (endpoint, model); caches capability; returns int. New `M.tokenize_supported(model_cfg)` introspection helper for tests. |
 | `context.lua` | `estimate_tokens()` char/4 sum over system_prompt + turn.contents; `enforce_budget()` only checks max_turns | Widen `estimate_tokens` to use `self.tokenize_fn(text)` if present; else char/4. Per-turn `_tokens` cache on each turn dict; lazy-set on first count. Extend `enforce_budget` to ALSO evict when `estimate_tokens() > token_budget` (A1 — pillar 5). |
 | `repl.lua` | wires Context.new with summarize_fn, hosts all metas | `tokenize_fn` wired into Context.new when `cfg.tokenize.use_endpoint = true`. `:cost detail` extended with est-vs-actual column. |
 | `config.lua` | Phase 7 cost block example | Add commented-out `tokenize = { use_endpoint = true }` block. |
 | `docs/PHASE0.md` | §11 lists phases 0-7 | Amendment: add Phase 8 row to §11. |
 No new module files.
 ---
 ## 4. Pillar 1+2 — `broker.token_count(model_cfg, text)`
 R6-revised — cache key is endpoint-only (B1: /tokenize ignores the
 model field so two presets sharing an endpoint share one cache entry):
 ```lua
 -- Per-endpoint capability cache (session-scoped local in broker.lua).
 -- Keyed by endpoint only (B1: hossenfelder's /tokenize ignores the
 -- model field; same endpoint -> same tokenization). If a future
 -- broker honors the model field, revisit this keying.
 local _tokenize_capable = {}    -- [endpoint] = true | false
 function M.token_count(model_cfg, text)
    text = text or ""
    if text == "" then return 0 end
    if not (model_cfg and model_cfg.endpoint) then
        return math.floor(#text / 4)   -- pure fallback
    end
    local ep  = model_cfg.endpoint
    local cap = _tokenize_capable[ep]
    if cap == false then
        return math.floor(#text / 4)
    end
    -- cap == nil OR cap == true; try the endpoint.
    local url = ep:gsub("/+$", "") .. "/tokenize"
    local body = json.encode({ content = text, model = model_cfg.model })
    local out, status = curl.post(url, body,
        { "Content-Type: application/json" },
        2000)  -- 2s timeout
    if not (status == 200 and out) then
        _tokenize_capable[ep] = false
        return math.floor(#text / 4)
    end
    local doc = json.decode(out)
    local toks = doc and doc.tokens
    if type(toks) ~= "table" then
        _tokenize_capable[ep] = false
        return math.floor(#text / 4)
    end
    _tokenize_capable[ep] = true
    return #toks
 end
 function M.tokenize_supported(model_cfg)
    if not (model_cfg and model_cfg.endpoint) then return nil end
    return _tokenize_capable[model_cfg.endpoint]
 end
 ```
 Uses Phase 1's `ffi/curl.M.post` (blocking POST, returns body + status).
 ---
 ## 5. Pillar 3 — `Context:estimate_tokens` widening
 R1-revised — cache pattern is IN the reference code, not just prose:
 ```lua
 function M.new(opts)
    ...
    return setmetatable({
        ...
        -- Phase 8: optional callback that returns an accurate token
        -- count for a given text. Set by repl.lua when cfg.tokenize.
        -- use_endpoint=true, calling broker.token_count(active_cfg, ...).
        -- nil = char/4 fallback (Phase 0 §8 behavior).
        tokenize_fn          = opts.tokenize_fn,
    }, Context)
 end
 function Context:estimate_tokens()
    if self.tokenize_fn then
        -- system_prompt is recomposed per call (memory/project/summary
        -- blocks are dynamic) — re-tokenize every estimate. Bounded
        -- by one round-trip.
        local n = self.tokenize_fn(self.system_prompt)
        -- R1: per-turn cache on the turn dict itself. Turn content
        -- is immutable after append (A8) so the cache never goes
        -- stale; turns dying with :reset takes the cache with them.
        for _, t in ipairs(self.turns) do
            if t._tokens == nil then
                t._tokens = self.tokenize_fn(t.content)
            end
            n = n + t._tokens
        end
        return n
    end
    -- char/4 fallback (existing behavior)
    local n = #self.system_prompt
    for _, t in ipairs(self.turns) do n = n + #t.content end
    return math.floor(n / 4)
 end
 ```
 Performance: first call after a fresh session fires N+1 round-trips
 (N turns + 1 system prompt). Subsequent calls fire 1 (system prompt)
 + N dict lookups. For N=40, that's 40 × ~30ms = 1.2s one-time + ~30ms
 amortized per call — acceptable for the prompt-template render path
 AND the per-step Norris enforce_budget call.
 ---
 ## 6. Pillar 4 — `:cost detail` est-vs-actual
 Current `:cost detail` (Phase 7) shows:
 ```
  anthropic/claude-haiku-4.5 main                 1 calls,    179 /      8 tokens, $0.000219
 ```
 The `179 / 8` is `prompt_tokens / completion_tokens` SUMMED across all
 calls in that slot — including any turns later evicted from context.
 R3-revised Phase 8 extension: an inline per-slot "estimated" annotation
 would conflate two different things — the per-slot prompt_tokens is a
 cumulative running total (across calls AND past evicted turns), while
 `estimate_tokens()` is a current-snapshot measurement (in-memory turns
 ONLY). Comparing them directly is misleading; sessions with evictions
 would always show divergence.
 Instead, add a SINGLE trailing summary line after the slot rows:
 ```
  ... per-slot rows ...
  [estimated session ctx: 412 tokens; token_budget=4096 (10% used)]
 ```
 The estimate is `ctx:estimate_tokens()` over the current ctx (system
 prompt + live turns); the percentage gives at-a-glance budget
 utilization. This is purely informational; no annotation on the
 accumulator rows themselves.
 ---
 ## 7. UX Surface Summary
 | Meta | Behavior change |
 |---|---|
 | `:cost detail` | Adds `~est=N` annotation per slot when heuristic disagreement >10% |
 | (no new metas in v1) | |
 | Config | Default | Effect |
 |---|---|---|
 | `cfg.tokenize.use_endpoint` | false | When true, repl.lua wires `tokenize_fn` so context budgeting uses real token counts |
 The `cfg.tokenize` block being opt-in is conservative: enabling it
 means every `Context:estimate_tokens()` call may hit the broker. For
 local llama.cpp the cost is ~50ms; for cloud-only configurations there
 IS no /tokenize endpoint so we silently fall through to char/4 (cached
 after one probe). No surprise; document in config example.
 ---
 ## 8. Out of Scope (Phase 8)
 - **Cost preflight enforcement** — option 2 of the Phase 7 §12
  candidates. The tokenize work here is a PREREQUISITE for accurate
  preflight cost estimation, but the enforcement layer itself
  (cap_at_dollars that REFUSES the call) is its own surface — defer
  to a separate phase.
 - **Cross-session cost rollup** — option 1 of Phase 7 §12 candidates.
  Independent of tokenization.
 - **Streaming tokenize** — some servers expose streaming tokenize
  endpoints for partial-prompt token counts during generation. Out
  of scope here; we use the blocking /tokenize for batch estimates.
 - **Multi-tokenizer support** (e.g. tiktoken for OpenAI compat,
  sentencepiece for HuggingFace) — would require vendoring a C library
  (violates PHASE0 §3) or shelling out to python. Endpoint-based is
  the only substrate-compliant option for accuracy beyond char/4.
 - **Tokenization for `:cost detail` rows that span multiple turns**
  — the actual `prompt_tokens` in the accumulator slot is the sum
  ACROSS calls; the estimate for comparison should be over the
  CURRENT ctx content. Show the per-call comparison only.
 ---
 ## 9. Risks
 | Risk | Mitigation |
 |---|---|
 | `/tokenize` 404 silently cached as `tokenize_supported = false` for a typo'd endpoint config | Per-session cache; restart re-probes. Acceptable. |
 | Tokenize round-trip on every prompt eviction check adds 50ms × N turns latency | `turn._tokens` per-turn cache set at append-time; only re-tokenize on cache miss. |
 | Hossenfelder proxy may forward `/tokenize` differently than direct llama.cpp (e.g., adds `/v1/` prefix expected) | B1 confirms `/tokenize` works against hossenfelder; other proxies untested but the design degrades gracefully (char/4 fallback). |
 | Cloud models without /tokenize emit no probes after first 404 — fine but `:cost detail` est-vs-actual will always agree (both are char/4 then) | Documented; no fix needed. Display annotation hides when est=actual exactly OR within 10%. |
 | `Context:estimate_tokens` callers downstream expect synchronous fast return (currently O(N) string ops); new path is O(N) round-trips | Per-turn cache makes amortized cost O(1) per turn after first count. |
 | Endpoint URL handling — currently `endpoint .. "/v1/chat/completions"` is hardcoded; tokenize uses `endpoint .. "/tokenize"` (no /v1) — asymmetric | Document the asymmetry inline; the llama.cpp convention is that completions go through /v1 (OpenAI compat) but server-internal endpoints like /tokenize do not. |
 | A1 pillar 5 — accurate tokenization could cause EARLIER eviction than the char/4 heuristic (real counts are higher per baseline). User session that fit in 4096 tokens under char/4 may now spill. | Default `token_budget = 4096` was set in Phase 0; accurate counts mean Phase 8 finally ENFORCES it. Users on `cfg.context.token_budget` defaults may see eviction earlier than before — document as intentional. Users can raise `token_budget` per their model's real context window. |
 | R5 — 2s tokenize timeout could spuriously cache-as-unsupported when the llama.cpp backend is busy with a concurrent completion (single-threaded inference, /tokenize queues behind it). Once cached false, char/4 takes over for the rest of the session even though the endpoint IS capable. | 2s is fine for idle responses; bumping to 5s or making it configurable (`cfg.tokenize.timeout_ms`) is a v1.1 polish if it bites in practice. Documented; revisit during verify. |
 ---
 ## 10. Open Questions (Phase 8)
 | # | Question | Impact | Resolution target |
 |---|---|---|---|
 | Q-T1 | Per-turn `_tokens` cache across `:reset` | A4 — dies with turns; new turns get nil and lazy-set on first count. Trivial. |
 | Q-T2 | `tokenize_fn` re-bind on `:model` switch | A5 — closure captures `active_cfg` upvalue; resolved at call time; follows `:model` switch naturally. No explicit re-binding needed. |
 | Q-T3 | Probe respects opt-out | A6 — when `cfg.tokenize.use_endpoint = false`, repl.lua doesn't wire `tokenize_fn`; context.lua's nil branch takes the char/4 fallback. No probe call at all. |
 | Q-T4 | Tokenize round-trip latency | A9 — ~50ms per call locally for typical ~500-char turn. With per-turn cache, amortized O(1) per turn after first count. |
 | Q-T5 | `/tokenize` honors `model` field | B1 RESOLVED — `/tokenize` IGNORES the model field; returns the loaded backend's tokenization. Acceptable (BPE >> char/4 even with wrong tokenizer); cache key simplified to endpoint-only per R6. |
 | Q-T6 | tools-schema tokens | A7 — deferred to follow-up. Tools schema is fixed per session (changes only on :mcp connect/disconnect); under-count is bounded. v1 counts messages only. |
 ---
 ## 11. Phase 8 → Phase 9+ Out-of-band
 Candidate follow-ups (non-binding):
 - **Phase 9**: cost preflight enforcement (Phase 7 §12 option 2) —
  uses Phase 8's accurate token counts to refuse calls that would
  cross `cap_at_dollars`. The accuracy work here is the foundation.
 - **Cross-session cost rollup** (Phase 7 §12 option 1) — independent;
  could land in parallel.
 - **Phase X**: project-local config overlay (`.aish.lua`) — was the
  alternative scope to Phase 7's cost work. Still valuable but
  independent of any current line.
 Phase 8 itself is self-contained — no upstream dependencies.
 ---
 ## 13. Implementation Plan (commit-by-commit)
 Bottom-up: broker first (the egress capability all callers depend
 on), then context (the consumer + the new pillar 5 budget extension),
 then repl.lua wiring + display, then config + status bump. Each
 commit leaves the tree green (existing tests + load smoke + per-
 commit feature smoke).
 ### Order
 1. **`broker.lua` — `M.token_count` helper + per-endpoint capability cache.**
   - Module-local `_tokenize_capable` table keyed by `endpoint .. "/" .. model`.
   - `M.token_count(model_cfg, text)`:
     - empty text -> 0
     - bad cfg (no endpoint) -> char/4 immediately
     - capability cache says `false` for this slot -> char/4
     - otherwise: probe `<endpoint>/tokenize` with `{content, model}` body,
       2s timeout. On `status == 200 + parseable {tokens=[...]}`:
       cache `true`, return `#tokens`. Anything else (non-200, parse
       fail, transport err): cache `false`, char/4.
   - `M.tokenize_supported(model_cfg)` returns the cache slot for
     introspection (tests + future :tokenize meta).
   - Smoke: hand-call `M.token_count(local_cfg, "hello world")` -> 2;
     `M.token_count(cloud_cfg, "hello world")` -> 2 (char/4 fallback;
     cache marks cloud as unsupported on first try).
 2. **`context.lua` — estimate_tokens widening + per-turn cache.**
   - Context.new accepts `opts.tokenize_fn` -> stored as `self.tokenize_fn`.
   - `Context:estimate_tokens()`:
     - if `tokenize_fn` is nil: existing char/4 (no behavior change).
     - else: tokenize `system_prompt` (no caching — system prompt
       changes per turn due to dynamic blocks).
       For each turn: if `turn._tokens` is set use it; else
       compute via tokenize_fn AND cache on turn._tokens.
   - No new helper; the change is internal to estimate_tokens.
   - Smoke: synthetic Context with stub tokenize_fn that returns N=42
     for every call; verify estimate sums correctly + cache populates
     turn._tokens.
 3. **`context.lua` — enforce_budget honors token_budget (pillar 5).**
   - Existing `while #self.turns > self.max_turns` loop extended.
     **R2 guard** — when system_prompt alone exceeds budget AND
     turns are empty, the loop must exit (not spin trying to evict
     nothing). Correct condition:
     ```lua
     while (#self.turns > self.max_turns
            or self:estimate_tokens() > self.token_budget)
           and #self.turns > 0 do
     ```
     Lowercase `or`/`and` per Lua syntax (N1).
   - Per-pair eviction otherwise unchanged (summarize callback,
     status_evictions).
   - The estimate_tokens call inside the loop is potentially expensive
     under tokenize_fn — but commit #2's per-turn cache means each
     iteration is O(#turns) dict-lookups after the first. Acceptable
     for the eviction hot path.
   - Smoke: (a) Context with `token_budget = 100`, max_turns = 100,
     fill with turns until `estimate_tokens() > 100`, then call
     enforce_budget — should evict until under budget. (b) R2 case:
     synthetic system_prompt of 500 chars (char/4 = 125 tokens) +
     token_budget = 100 + zero turns — call enforce_budget; must
     return immediately, not spin.
 4. **`repl.lua` — tokenize_fn wiring + :cost detail estimate row.**
   - When `config.tokenize and config.tokenize.use_endpoint`, build
     `ctx_opts.tokenize_fn = function(text)
        return broker.token_count(active_cfg, text)
      end`. **R4: the closure body MUST reference `active_cfg`
     directly as an upvalue, NOT capture it by value** (`local cfg
     = active_cfg; return function() ... cfg ... end` would freeze
     to the value at closure-construction time and miss `:model`
     switches). A5 verified upvalue semantics in Lua.
   - `:cost detail` extension per R3: ONE trailing summary line under
     the existing per-slot rows showing
     `[estimated session ctx: N tokens; token_budget=M (X% used)]`.
     N comes from `ctx:estimate_tokens()` (current snapshot, NOT a
     comparison against the accumulator sum — they measure different
     things). M is `ctx.token_budget`. X% = N/M × 100.
   - Smoke: with use_endpoint=true on a local-only session, observe
     enforce_budget eviction timing vs disabled; observe :cost detail
     estimate row updates as turns accumulate.
 5. **`config.lua` example block + `docs/PHASE8.md` status bump.**
   - Commented-out `tokenize = { use_endpoint = true }` block in
     config.lua with parity to Phase 1-7 example blocks. Document
     the per-endpoint network cost (one probe per session) and the
     implication: token_budget actually enforces now.
   - PHASE8.md status header -> **Implement**.
 ### Risk index per commit
 | Commit | Risk | Mitigation |
 |---|---|---|
 | 1 (broker) | Per-endpoint cache leaks across model_cfg deletions (e.g., user removes a model from config mid-session) | Cache is keyed by string; stale entries don't grow without bound (bounded by #configured models × 1). No GC needed. |
 | 1 (broker) | /tokenize probe blocks the calling thread for 2s on a misconfigured endpoint | 2s timeout is the cap; one-shot per endpoint per session. |
 | 2 (context) | per-turn `_tokens` cache miss on every estimate when no tokenize_fn -> existing perf preserved | Cache check is conditional on tokenize_fn presence; char/4 path untouched. |
 | 3 (context) | enforce_budget loop now calls estimate_tokens potentially every iteration; with tokenize_fn that's O(#turns) per iteration -> O(#turns^2) worst case | Per-turn cache makes this O(#turns) amortized after first fill. For typical max_turns=40 + token_budget=4096 sessions: ~40^2 dict lookups = 1600 ops in worst case, microsecond cost. |
 | 3 (context) | accurate counts mean token_budget=4096 (Phase 0 default) finally ENFORCES — sessions that fit under char/4 may now evict earlier | Documented in §9; user can raise token_budget to match their model's real context window. |
 | 4 (repl) | tokenize_fn closure binding to `active_cfg` upval — if upval somehow gets reassigned wrong, eviction uses wrong tokenizer | Lua upvalues are call-time-resolved; A5 verified. Test by smoke after `:model` switch. |
 | 5 (config + status) | none | |
 ### Tests + smoke per commit
 Each commit:
 - Pass `luajit test_safety.lua` (87/87) and `luajit test_router_model.lua` (31/31)
 - Load cleanly via `luajit -e 'package.path=...; require("repl"); print("ok")'`
 - Pass a per-feature smoke (described in each row above)
 ### Things deliberately NOT split
 - New module file for tokenize — small enough to live in broker.lua.
 - Per-text token cache (in addition to per-turn): not needed; turn
  content is immutable post-append.
 - :tokenize meta for introspecting the cache — `M.tokenize_supported`
  is exported for testing; if a user needs runtime visibility, that's
  a follow-up.
 ### Open at plan-time (resolve at implement)
 - :cost detail layout — how exactly to show "estimated session ctx"
  relative to the existing per-slot rows. Pick at commit 4 (likely
  a single trailing line under the detail table).
 - Whether to expose `:tokenize <text>` for direct-probe debugging.
  Nice-to-have; defer unless useful during verify.
@@ -0,0 +1,613 @@
 # aish — Phase 9 Manifest
 **Project:** aish — AI-augmented conversational shell
 **Document:** Phase 9 Requirements, Architecture & Design Decisions
 **Status:** Implement (4 commits landed: e525063, 34b465d, 5b6ee55, this)
 **Date:** 2026-05-16
 **Review findings (Sonnet, 2026-05-16) — 0 BLOCKERs, 7 CONCERNs
 folded, 5 NITs applied:**
 R1 (CONCERN, FOLDED). **HOME prefix false-positive in walk-up.**
    `dir:sub(1, #home) ~= home` lets `/home/user2/...` pass when
    HOME is `/home/user` (matches first 10 bytes). Real bug. Fix:
    `if dir ~= home and dir:sub(1, #home + 1) ~= home .. "/" then
    return nil end`. §4 code updated.
 R2 (CONCERN, FOLDED). **`io.read` trust-prompt fallback breaks
    `aish -p` piped stdin.** A8's fallback (`io.read("*l")` if
    rl.readline misbehaves at startup) would consume the first
    line of piped stdin in non-interactive mode. **Fix:** in
    one-shot mode (`opts.prompt` set), SKIP the trust prompt
    entirely and decline silently with a status line. Project
    overlays in `-p` mode require pre-existing trust. Documented
    in §13 commit 2.
 R3 (CONCERN, FOLDED). **Sources-map delivery decided: `cfg._sources`
    embedded on the config table** (NOT a global). `repl.run` reads
    `config._sources` for `:config show`. Backward-compatible — old
    callers of `repl.run` that don't pass `_sources` still work
    (`:config show` says `(sources unknown)`). §4 + §13 commits 2+3
    updated to reflect.
 R4 (CONCERN, FOLDED). **`_prompt_trust` signature contradicted
    `_check_trusted`'s "compute sha once" claim.** §5 sketch called
    `_record_trust(project_path)` which would re-sha256. **Fix:**
    `_prompt_trust(project_path, sha)` takes the pre-computed sha;
    `history.add_trusted(trust_path, project_path, sha)` is the
    one writer. §5 sketches updated to match §13 + the real
    history.lua API.
 R5 (CONCERN, FOLDED). **`_check_trusted` duplicated trust-file
    read logic vs history.lua API.** §5 sketch had inline JSONL
    read; §13 defines `M.is_trusted(trust_path, project_path,
    sha256)` in history.lua to own that. **Fix:** §5 sketches now
    call `history.is_trusted(...)` and `history.add_trusted(...)` —
    main.lua holds no trust-file logic itself. This also makes the
    `$AISH_TRUST_FILE` env override work cleanly (one resolution
    site).
 R6 (CONCERN, FOLDED). **`:config show full` mode masking
    unspecified for nested values** — the actual leak vector is
    `mcp.servers.<alias>.auth_token`. **Fix:** §6 + §13 commit 3
    spell out: same heuristic, applied RECURSIVELY in full mode.
    Top-level mode (default) already collapses nested tables, so
    no leak there.
 R7 (CONCERN, FOLDED). **Shallow merge silently drops user's entire
    models block** (or permissions, cost, etc.). Documented as
    "predictable" but is a real UX trap. **Fix:** §1 done-when +
    §7 UX surface + §13 commit 4 template-comment all gain a
    conspicuous warning: "If your `.aish.lua` sets a top-level
    block (models, permissions, cost, ...) it REPLACES your user
    config's entire block — list every entry you want available
    OR omit the block to keep the user's." Stronger framing than
    "predictable".
 R-N1..N5 (NITs, APPLIED):
  N1. (cosmetic — review-prompt clarification only; no doc change)
  N2. `key_env` / `auth_env` over-masking is a known false-positive
      of the heuristic (env-var NAME, not a secret). §13 commit 3
      risk row gains an explicit note: "values of `*_env` fields
      will be masked too; cosmetic only — they hold env-var names,
      not secrets. Future: refine heuristic to exempt `*_env`
      pattern."
  N3. §13 open-at-plan-time list now includes the
      sources-map-delivery decision (resolved by R3 — embed on cfg).
  N4. §9 risk row about trust file partial write gains explicit
      first-ever-write edge case + workaround (manually delete the
      corrupt file). Temp-file+rename is v2 polish.
  N5. §3 module table ffi/libc.lua row had stale "stat" mention;
      removed per A2 (io.open is sufficient).
 **Analyze + baseline findings (2026-05-16) — 5/6 open Qs resolved
 in-place; Q-P4 deferred to implement-time verify:**
 A1. **main.lua load_config surface clean.** `load_config(opts)` at
    `main.lua:53` returns `(cfg, path)` for the user config. Adding
    a project-overlay wrapper that calls it then walks for `.aish.lua`
    is additive — no refactor of the existing 4-tier resolution.
 A2. **No new FFI needed for walk-up.** `io.open(candidate, "rb")` is
    sufficient for existence check; `libc.getcwd()` from Phase 6
    provides the starting point. No new C bindings.
 A3. **Q-P2 RESOLVED via probe (B1 below): use `sha256sum`** — GNU
    coreutils ships it everywhere aish targets. Single-shell-out
    pattern; output: `<digest>  <path>` → `cut -d' ' -f1` for the
    hex digest. No new module dependency.
 A4. **Q-P1 RESOLVED: trust prompt AFTER `aish: loaded config`
    status.** The user sees what user-config is in play first, then
    decides about the overlay. Natural ordering.
 A5. **Q-P3 RESOLVED: don't log walk-up path by default.** Too noisy
    on every startup. If debugging "why isn't my project file
    found?", `:config show` after startup will reveal the walk
    result (declined-or-not-found is visible). Verbose-mode walk
    log is v2 polish.
 A6. **Q-P5 RESOLVED: `:config show` shows top-level only by default.**
    Nested tables collapsed to `{key1, key2, ...}` (just the inner
    table's keys for orientation). `:config show full` for the
    deep dump. Keeps the diagnostic surface tractable.
 A7. **Q-P6 RESOLVED: project layer CAN set `secrets.vault`** — it's
    part of the trust prompt's scope. User accepting the prompt
    accepts that the project file may redirect secrets. The
    in-memory secrets session is built AFTER config resolution, so
    a project-set `secrets.vault` IS honored.
 A8. **rl.readline at startup (Q-P4 — deferred).** Phase 4's
    `:memory summarize` candidate-prompt path also calls
    `rl.readline` early (in metas; not pre-loop). The trust prompt
    fires BEFORE the main loop opens — earlier than any existing
    rl.readline call site. **Implement-time check**: smoke-test
    that rl.readline behaves correctly when called from
    `load_config_with_overlay` before `M.run` ever fires. If it
    misbehaves, fall back to a `printf "..." + read` shell-out for
    the trust prompt.
 A9. **Walk-up performance is fine** — at most ~10 levels from a
    typical cwd to $HOME, each `io.open` is ~10us. Total walk
    cost < 1ms even on slow filesystems.
 A10. **Trust file race**: two aish instances starting concurrently
     could double-write to `~/.aish/trusted-projects`. JSONL append
     semantics handle this OK (each writes one complete line); a
     duplicate trust entry is harmless. No flock needed (unlike
     memory.jsonl per Phase 4 where the writer SOR was important).
 A11. **Sandboxed env for dofile?** Out of scope per §8. The trust
     prompt IS the gate; we accept full Lua execution post-trust.
 A12. **Bootstrap chicken-egg**: project's `.aish.lua` could set
     `secrets.vault` which would change WHICH secrets are loaded.
     A12 paths through cleanly: user config loaded → project
     overlay merged → effective config passed to M.run → M.run
     reads `config.secrets.vault` (now possibly the project's) →
     secrets_session built. Order is correct; no chicken-egg.
 **Baseline finding:**
 B1. `sha256sum` (GNU coreutils 9.7) and `openssl dgst -sha256` agree
    bit-for-bit on the same input file. Both present on noether.
    sha256sum chosen for simpler output parsing (digest in first
    whitespace-separated field; openssl needs `awk '{print $NF}'`).
    Per A3 resolution; documented in Q-P2.
 PHASE0 is the locked substrate; PHASE1-8 are layered on top. This manifest
 specifies what Phase 9 adds — **project-local config overlay (`.aish.lua`)**:
 a per-project config file in or above cwd that merges onto the user's
 global config, letting a repo ship its own permission rules, model
 presets, skills, hooks, etc. without modifying anyone's `~/.config`.
 PHASE0 §11 amendment to add the Phase 9 row lands in the same commit as
 this formulate doc.
 ---
 ## 1. Scope of Phase 9
 Four pillars:
 1. **Project-config resolution + walk-up** — at startup, walk up
   from cwd looking for `.aish.lua`. Walk stops at the first found
   file OR at `$HOME` OR at filesystem root (whichever comes first —
   filesystem-root reached without a hit means "no project config").
   The found path is the project layer; absence is a no-op (existing
   resolution path unchanged for users who don't ship project config).
 2. **Merge semantics (shallow over user-config)** — load the global
   config first, then `dofile` the project `.aish.lua` and merge its
   top-level keys ONTO the user config. Shallow merge: project's
   `models = {...}` REPLACES the user's entire `models` block (not
   per-model). Predictable; users who want to add ONE model layer
   it deliberately or write a complete `models` block in their
   project file.
 3. **Trust prompt + persistent record** — first time aish encounters
   a `.aish.lua` at a given path, prompt the user to trust it
   (`[aish] trust <path>? [y/N]`). On `y`, record the path's
   absolute path AND content hash in `~/.aish/trusted-projects`
   (one JSON line per entry: `{path, sha256, ts}`). On subsequent
   startups: load only if the recorded hash still matches; if the
   file changed since trust, re-prompt. On `n` or empty: skip the
   project layer for this session.
 4. **`:config show` meta** — print the resolved config sources
   (which file contributed which top-level key), plus a sanitized
   dump of the effective config (token-bearing fields like
   `auth_token` masked). Useful for debugging when "why doesn't
   my project policy apply?" comes up.
 **Phase 9 is done when:**
 - A repo with `.aish.lua` in its root opens correctly: aish prompts
  to trust on first encounter, loads + merges on subsequent startups
  (when the hash still matches), and the resulting config behavior
  visibly reflects the project layer (e.g., project-set
  `permissions = { allow = ... }` allow-rules fire).
 - `.aish.lua` walk-up finds the file from a nested cwd (e.g.,
  `~/src/aish/docs/` finds `~/src/aish/.aish.lua`).
 - Walking past `$HOME` stops (doesn't search `/home/` or `/`).
 - Mutating a trusted `.aish.lua` re-prompts (hash mismatch).
 - `:config show` lists each source path with the keys it provided.
 - Existing configs without any `.aish.lua` behave like Phase 8
  (Phase 8 regression coverage).
 ---
 ## 2. Technology Decisions (delta from Phase 8)
 | Decision | Choice | Rationale |
 |---|---|---|
 | Walk-up start | `libc.getcwd()` at startup | Matches existing convention (Phase 6 `:tree` cwd capture). |
 | Walk-up stop | `$HOME` OR filesystem root | Don't search outside the user's home — limits attack surface. If no `.aish.lua` between cwd and $HOME, no project layer. |
 | Project file name | `.aish.lua` (dotfile) | Matches `.envrc` / `.tool-versions` convention; gitignore-friendly. |
 | Merge semantics | Shallow top-level | Predictable; deep merge surprises users when they redefine an array (Lua tables-as-arrays don't merge cleanly). Project users who want to add a single MCP server can copy the user's full `mcp = {...}` block and append. |
 | Trust mechanism | Explicit prompt; persist absolute-path + sha256 to `~/.aish/trusted-projects` | Matches `direnv allow` posture. Defense against hostile cloned repos that ship malicious `.aish.lua` (would-be RCE on `cd` + `aish` start). |
 | Re-prompt trigger | sha256 mismatch on the recorded path | Trust the BYTES, not just the path — content change = re-prompt. |
 | Trust file format | JSONL: `{path, sha256, ts}` per line | Append-only; readable; trivially manageable by hand. |
 | Trust file mode | 0600 (matches secrets vault in Phase 5/13) | Local-user trust scope; not a secret per se but defensive. |
 | `dofile` execution context | Whatever `dofile` provides (full Lua env) | Project file is arbitrary Lua because that's what the user accepted at trust-prompt. No sandbox; the prompt is the gate. |
 | Reload on cd | NO — config resolved at startup only | Mid-session config mutation is a complexity tax. `cd` into a different project means restarting aish. Document. |
 | Status line on load | `[aish] project config: <path> (overlaid on <user-config>)` at startup | Visibility — user always knows when project layer is active. |
 | `:config show` shape | Lists each source path with the top-level keys it contributed | Diagnoses "why isn't my project rule applying?" cases. Token-bearing fields masked (`auth_token: <set>` rather than the value). |
 ---
 ## 3. Module Changes
 | File | State after Phase 8 | Phase 9 changes |
 |---|---|---|
 | `main.lua` | `load_config(opts)` walks $AISH_CONFIG → ~/.config/aish → ./config.lua | Wrap with `load_with_project_overlay(opts)` that finds the user config (existing logic) AND walks up from cwd for `.aish.lua`; if both found, merge project ONTO user and return merged. Records source-per-key for `:config show`. |
 | `ffi/libc.lua` | getcwd, chdir, isatty, flock | **No change** (per A2): `io.open(candidate, "rb")` is sufficient for existence-check during walk-up. No new FFI bindings needed. |
 | `repl.lua` | All the metas including `:config` (nope — no :config yet) | New `:config show` meta. Source-map carried on a module-local set at startup; meta reads it. |
 | `history.lua` | session log, memory.jsonl | New helpers: `M.read_trusted(path)` returns set of trusted entries; `M.add_trusted(path, target_path, sha256)` appends. Mode 0600 enforced. |
 | `config.lua` (the user's global; not the in-tree example) | n/a | No change. The in-tree `config.lua` becomes a template that project overlays can replace top-level keys of. |
 | `docs/PHASE0.md` | §11 lists phases 0-8; §10 resolution order | Amendment: add Phase 9 row to §11; update §10 to mention project overlay. |
 No new module files in v1. The hashing logic (sha256) — `openssl dgst -sha256` shelled out (or use `sha256sum`). Both POSIX-portable. Avoid vendoring a Lua sha256 since we already have `openssl` / `sha256sum` available everywhere aish runs.
 ---
 ## 4. Pillar 1+2 — Resolution + Merge
 ### Walk-up
 ```lua
 local function _find_project_config()
    local libc = require("ffi.libc")
    local home = os.getenv("HOME")
    if not home then return nil end
    local dir = libc.getcwd()
    if not dir then return nil end
    -- R1: don't walk OUTSIDE $HOME. The proper-prefix check requires
    -- `dir == home` OR `dir starts with home .. "/"` — bare
    -- `sub(1, #home) == home` matches "/home/user2" when HOME is
    -- "/home/user" (10-byte prefix). Real bug caught by review.
    if dir ~= home and dir:sub(1, #home + 1) ~= home .. "/" then
        return nil
    end
    while dir and #dir > 0 do
        local candidate = dir .. "/.aish.lua"
        local f = io.open(candidate, "r")
        if f then f:close(); return candidate end
        if dir == home or dir == "/" then return nil end
        -- Walk up one level
        dir = dir:gsub("/[^/]*$", "")
        if dir == "" then dir = "/" end
    end
    return nil
 end
 ```
 ### Merge
 ```lua
 local function _merge_project_over_user(user_cfg, project_cfg, sources)
    -- Shallow merge: project top-level keys REPLACE user keys.
    -- Source-map tracks who set each key for :config show.
    for k, v in pairs(project_cfg) do
        user_cfg[k] = v
        sources[k] = "project"
    end
    -- (sources for unmodified user keys stay "user")
    return user_cfg
 end
 ```
 ### Loader wrapper
 ```lua
 local function load_config_with_overlay(opts)
    -- Existing load_config returns (user_cfg, user_path)
    local user_cfg, user_path = load_config(opts)
    local sources = {}
    for k, _ in pairs(user_cfg) do sources[k] = "user" end
    local proj_path = _find_project_config()
    if not proj_path then
        return user_cfg, sources, { user = user_path }
    end
    -- Trust check
    local trusted = _check_trusted(proj_path)
    if not trusted then
        if not _prompt_trust(proj_path) then
            -- declined; skip project layer
            return user_cfg, sources, { user = user_path, project = "(declined)" }
        end
    end
    local ok, proj_cfg = pcall(dofile, proj_path)
    if not ok or type(proj_cfg) ~= "table" then
        renderer.status("project config " .. proj_path .. " failed to load; ignoring")
        return user_cfg, sources, { user = user_path, project = "(load failed)" }
    end
    _merge_project_over_user(user_cfg, proj_cfg, sources)
    return user_cfg, sources, { user = user_path, project = proj_path }
 end
 ```
 Source map is then carried as a closure local in `repl.run` for `:config show`.
 ---
 ## 5. Pillar 3 — Trust prompt + persistent record
 ### Trust file shape
 `~/.aish/trusted-projects` (mode 0600), JSONL:
 ```jsonl
 {"path":"/home/user/src/aish/.aish.lua","sha256":"abc123...","ts":"2026-05-16T12:34:56Z"}
 {"path":"/home/user/src/other/.aish.lua","sha256":"def456...","ts":"2026-05-16T12:40:00Z"}
 ```
 ### Trust check + prompt (R4 + R5 — calls history.lua API; sha computed once)
 ```lua
 -- R5: trust-file path resolves through history.lua + optional env override.
 -- main.lua never reads/writes the trust file directly.
 local function _trust_file_path()
    return os.getenv("AISH_TRUST_FILE")
        or ((os.getenv("HOME") or "") .. "/.aish/trusted-projects")
 end
 -- R4 + R5: compute sha ONCE; pass to history.is_trusted / add_trusted.
 local function _check_and_maybe_prompt(project_path)
    local sha = history._sha256_file(project_path)
    if not sha then
        renderer.status("project config "..project_path..": sha256 failed; skipping")
        return false
    end
    local tpath = _trust_file_path()
    if history.is_trusted(tpath, project_path, sha) then
        return true
    end
    renderer.status("project config found: " .. project_path)
    renderer.status("UNTRUSTED. Loading it runs arbitrary Lua code.")
    local ans = rl.readline("[aish] trust this project config? [y/N] ")
    if ans and ans:lower():sub(1, 1) == "y" then
        history.add_trusted(tpath, project_path, sha)
        return true
    end
    return false
 end
 ```
 ### sha256
 `history._sha256_file(path)` shells out to `sha256sum <path>` and parses
 the first whitespace-separated field. Single call per startup per
 project file (R4 — `_check_and_maybe_prompt` computes once and passes
 to both `history.is_trusted` and `history.add_trusted`).
 ---
 ## 6. Pillar 4 — `:config show`
 ```
 [aish] config sources:
  user:    ~/.config/aish/config.lua
  project: ~/src/aish/.aish.lua
 [aish] effective config (top-level keys):
  default_model   : "fast"          (user)
  models          : {fast, cloud}   (project)
  shell           : {confirm_cmd=true, ...} (user)
  permissions     : {allow={...}, ...}  (project)
  hooks           : (unset)
  ...
 ```
 Token-bearing fields (any key matching `token`, `secret`, `auth`,
 `key`, case-insensitive) displayed as `(set)` rather than the value.
 R6 — `:config show full` applies the SAME heuristic RECURSIVELY to
 nested values (the actual leak vector is `mcp.servers.<alias>.auth_token`
 which top-level mode collapses but full mode would dump).
 Known cosmetic false-positive (N2): `key_env` / `auth_env` config
 fields are over-masked. These hold env-var NAMES (e.g. `OPENAI_API_KEY`)
 not the secret values themselves — but the heuristic catches them.
 Future polish: exempt `*_env` from the heuristic.
 ---
 ## 7. UX Surface Summary
 | Meta | Behavior |
 |---|---|
 | `:config show` | Print resolved sources + sanitized effective config (read-only) |
 | Startup status | Behavior |
 |---|---|
 | (no project file) | nothing — existing UX preserved |
 | (project file found, untrusted) | `[aish] project config found: <path>` + `[aish] UNTRUSTED. Loading it runs arbitrary Lua.` + `[y/N]` prompt |
 | (project file found, trusted, sha matches) | `[aish] project config: <path> (overlaid on <user>)` |
 | (project file found, trusted, sha CHANGED) | re-prompt — bytes are different now |
 | (declined this session) | `[aish] project config: <path> (declined this session)` |
 No new config keys in v1 (the project overlay IS the new mechanism; it doesn't need a config flag to be enabled).
 ---
 ## 8. Out of Scope (Phase 9)
 - **Sandboxed `.aish.lua` execution** — `dofile` runs full Lua; the
  trust prompt IS the gate. A sandbox (allowlisted globals,
  no `io.popen`, etc.) is bigger work and out of scope.
 - **Reload on `cd`** — config is resolved at startup only. `cd`
  into a sibling project means restarting aish. Documented.
 - **Recursive merge** — top-level shallow only.
 - **Multiple project overlays** — walk-up stops at FIRST `.aish.lua`
  found. Nested projects (e.g., monorepo with per-package configs)
  would need deeper design; defer.
 - **`:trust` / `:untrust` metas for runtime management** — trust
  records edited manually in `~/.aish/trusted-projects` for v1. A
  meta surface is a v2 polish.
 - **Environment variable expansion in project file** — project file
  is plain Lua; users have `os.getenv` already.
 - **Project-wide aish profile selection** — `.aish.lua` returns a
  config table, not a profile name. If multi-profile support is
  desired, the project file can compute a different config based
  on its OWN env vars / heuristics.
 ---
 ## 9. Risks
 | Risk | Mitigation |
 |---|---|
 | Hostile `.aish.lua` in cloned repo runs arbitrary Lua on first `aish` run in that cwd | Trust prompt + sha256 persistence; default = decline if user just hits Enter at the [y/N]. |
 | Trust file becomes corrupted / unreadable | Best-effort: corrupted lines skipped (each line is independent JSON); missing file means all projects untrusted (re-prompt on next encounter). N4 edge case: if the FIRST-EVER write is interrupted partway, the file's sole line may be corrupt JSON and the project never stays trusted — user manually deletes `~/.aish/trusted-projects` to recover. Temp-file+rename atomicity is v2 polish. |
 | User trusts `.aish.lua`, repo is updated, malicious code is injected | sha256 mismatch on next startup triggers re-prompt. User sees the prompt and can investigate before granting trust again. |
 | `dofile` errors at load time (syntax error in project config) | pcall-protected; status line "project config X failed to load; ignoring" — aish continues with just the user config. |
 | Walk-up walks above $HOME (e.g., a repo cloned to `/tmp`) | $HOME boundary check stops the walk. `/tmp` repos get no project layer (user can move them under $HOME or use --config). |
 | **R7 — shallow merge silently DROPS the user's entire block on overlap.** A `.aish.lua` that sets `models = {...}` REPLACES the user's full models block; same for `permissions`, `cost`, `shell`, etc. This is a genuine UX trap, not just "predictable" — accept-and-warn-clearly is the resolution rather than hiding behind framing. | Conspicuous warning in §1 done-when + §7 UX table + config.lua template header: "If your `.aish.lua` sets a top-level block (models, permissions, cost, ...) it REPLACES your user config's entire block — list every entry you want available OR omit the block to keep the user's." Deep-merge-with-explicit-replace-syntax (systemd drop-in style) is v2 polish. |
 | Source map dict grows unboundedly with new keys mid-session | Bounded by #config top-level keys (small constant; <20). No GC needed. |
 ---
 ## 10. Open Questions (Phase 9)
 | # | Question | Impact | Resolution target |
 |---|---|---|---|
 | Q-P1 | Trust prompt before/after `aish: loaded config` status | A4 — **AFTER**; user sees user-config first, then decides about overlay. |
 | Q-P2 | sha256 backend choice | B1 RESOLVED — `sha256sum` (GNU coreutils; universal on Linux); simpler output parsing than openssl. |
 | Q-P3 | Log walk-up path | A5 — **no by default**; `:config show` reveals walk result on demand. Verbose-mode walk log is v2 polish. |
 | Q-P4 | rl.readline safe at startup | A8 — DEFERRED to implement-time smoke (Phase 4 metas call rl.readline early too; new wrinkle is firing BEFORE main loop opens). If issue, fall back to printf+read shell-out. |
 | Q-P5 | `:config show` full vs top-level | A6 — **top-level by default** (nested collapsed to inner keys); `:config show full` for deep dump. |
 | Q-P6 | Project layer setting `secrets.vault` security | A7 — **allowed**; part of the trust prompt's scope. Bootstrap order (A12) ensures project's vault is honored if set. |
 ---
 ## 11. Phase 9 → Phase 10+ Out-of-band
 Candidate follow-ups (non-binding):
 - **Phase 10 candidates**:
  - Cost preflight enforcement (Phase 7 §12 option 2; Phase 8 §11 candidate).
  - Cross-session cost rollup (Phase 7 §12 option 1; Phase 8 §11 candidate).
  - `:trust` / `:untrust` metas for runtime trust management.
  - Sandboxed `.aish.lua` execution (allowlisted Lua globals).
 - **Phase X+**: nested project overlays for monorepos; `:profile`
  switching; reload-on-cd.
 Phase 9 itself is self-contained — depends on no specific prior phase
 beyond the existing config loader.
 ---
 ## 13. Implementation Plan (commit-by-commit)
 4 commits, bottom-up:
 1. **`history.lua` — trust file helpers.**
   - `M.read_trusted(path)` -> list of `{path, sha256, ts}`
     entries; mode-check the file at 0600, refuse to load (warn)
     if wider. Missing file → empty list.
   - `M.add_trusted(trust_path, project_path, sha256)` appends a
     JSONL line; mkdir -p the parent if needed; chmod 0600.
   - `M.is_trusted(trust_path, project_path, sha256)` reads + checks
     for matching entry.
   - Internal `_sha256_file(path)` shells out to `sha256sum` and
     parses the first whitespace-separated field.
   - Smoke: 5 inline unit cases (read empty, add+read-back, mode
     check, sha mismatch returns false, missing file).
 2. **`main.lua` — walk-up + load_with_project_overlay.**
   - `_find_project_config()` walks from libc.getcwd() up to $HOME
     (R1 corrected proper-prefix check), returning first `.aish.lua`
     or nil.
   - `_check_and_maybe_prompt(project_path)` (R4 + R5) calls
     `history._sha256_file` ONCE; routes through `history.is_trusted`
     / `history.add_trusted` with the env-overridable trust file
     path. Returns true if the project file should be loaded.
   - `load_config_with_overlay(opts)` wraps existing `load_config`;
     finds project, checks trust, prompts if needed, dofiles +
     merges shallow over user config. **R2: in one-shot mode
     (`opts.prompt` is set), the trust prompt is SKIPPED entirely
     — the project layer is only loaded if it's already pre-trusted.
     Avoids io.read consuming the first line of piped stdin.**
   - **R3 sources delivery: embed on `config._sources`** (a sentinel
     field on the config table itself). NOT a global. `repl.run`
     reads `config._sources` for `:config show`; backward-compatible
     (old callers without _sources are reported as "(sources
     unknown)" by the meta).
   - Smoke: (a) tree-resolution from a nested cwd; (b) trust prompt
     accept-then-load + decline-then-skip paths; (c) -p mode with
     untrusted .aish.lua + piped stdin -> trust prompt SKIPPED, no
     stdin consumption; (d) A8: rl.readline early-startup smoke;
     if rl.readline misbehaves, NO fallback to io.read in
     interactive mode either — emit status + skip overlay (avoids
     the silent-data-loss risk R2 covers).
 3. **`repl.lua` — `:config show` meta + startup status line.**
   - `:config show` / `:config show full` meta reads `config._sources`
     (R3 cfg-embedded) + the effective config; sanitizes token-bearing
     values (any key containing "token"/"secret"/"auth"/"key",
     case-insensitive) → display as `(set)`. R6: in `full` mode,
     applies the heuristic RECURSIVELY to nested values (the real
     leak vector is `mcp.servers.<alias>.auth_token`).
     If `config._sources` is absent, status: "(sources unknown — main
     didn't pass _sources)" so the meta still runs but doesn't lie.
   - Startup status line per A4: AFTER the existing `aish: loaded
     config from <path>`, if project layer fired, emit
     `[aish] project config: <path> (overlaid on <user>)`.
   - HELP gains 2 `:config` lines.
   - N2 known false-positive: `key_env` / `auth_env` config field
     VALUES are masked too (they hold env-var names, not secrets).
     Cosmetic; future polish exempts `*_env`.
   - Smoke: with a test project file, run `:config show` and
     verify keys + sources line up; `:config show full` masks
     nested auth tokens but exposes other nested fields.
 4. **`config.lua` template note + status bump.**
   - Add a header comment to `config.lua` (the in-tree example)
     noting Phase 9 project-overlay availability (no other config
     change — overlay is a separate file).
   - PHASE9.md status header -> **Implement**.
 ### Risk index per commit
 | Commit | Risk | Mitigation |
 |---|---|---|
 | 1 (history) | sha256sum not installed (some minimal images) | Detect at startup; if missing, warn + decline all trust prompts (project layer disabled). Documented. |
 | 1 (history) | Trust file partial write (interrupted append) corrupts later parse | JSONL one-line-per-entry; partial line at EOF is skipped on read (each line is a single json.decode). |
 | 2 (main) | A8 — rl.readline at startup (before main loop) untested in earlier phases | Smoke-test at commit-time; if broken, fall back to `io.read("*l")` from stdin (no readline frills like ^C-handling but functional). |
 | 2 (main) | Walk-up symlink loops | `realpath`/`stat` defenses out of scope for v1; walk is bounded by $HOME stop. Pathological symlinks could waste cycles but not infinite-loop (every iteration strips a path component). |
 | 3 (repl) | :config show might leak token values if a config key isn't matched by the masking heuristic | Conservative mask: any key containing "token", "secret", "auth", "key" (case-insensitive) → display `(set)`. Errs toward over-masking. |
 | 4 (config + status) | None | |
 ### Tests + smoke per commit
 Each commit:
 - Pass `luajit test_safety.lua` (87/87) and `luajit test_router_model.lua` (31/31)
 - Load cleanly via `luajit -e 'package.path=...; require("repl"); print("ok")'`
 - Pass a per-feature smoke (described per row above)
 ### Things deliberately NOT split
 - Separate `project.lua` module — small enough; history.lua already
  handles file-with-mode-check (memory.jsonl); same shape.
 - :trust / :untrust runtime metas — manual ~/.aish/trusted-projects
  editing is fine for v1.
 - Walk-up logging on first startup — easy to add later if needed.
 ### Open at plan-time (resolve at implement)
 - A8: rl.readline early-startup behavior. R2 supersedes the
  formulate-time io.read fallback — if rl.readline misbehaves,
  emit status + skip the overlay entirely (NOT a fallback to
  stdin which would consume piped data in -p mode).
 - `$AISH_TRUST_FILE` env override — RESOLVED: implement it (one
  line; useful for CI / test isolation). Used by the verify TCs.
 - N3 — sources-map delivery RESOLVED: embed on `config._sources`
  (cfg-field; not a global). Per R3.
@@ -0,0 +1,488 @@
 -- config.lua — example aish configuration.
 -- Shipped by the aish package at /usr/share/doc/aish/examples/config.lua.
 -- Copy to ~/.config/aish/config.lua (preferred) and adapt to your fleet:
 --
 --     install -Dm600 /usr/share/doc/aish/examples/config.lua \
 --                    ~/.config/aish/config.lua
 --
 -- Mode 0600 matters because this file can carry MCP bearer tokens. The
 -- two tokens shown in the mcp.servers block below are PLACEHOLDERS and
 -- must be replaced — prefer the auth_env env-var indirection form (export
 -- MCP_PVE1_TOKEN=... in your shell init) over committing literals.
 --
 -- Loaded with dofile() at startup; returns a plain Lua table.
 -- See docs/PHASE0.md §10 for resolution order and full schema.
 --
 -- Per issue #12: hossenfelder is the canonical single-URL broker. It does
 -- model-aware routing server-side (local models on boltzmann; cloud routes
 -- through OpenRouter using its own bearer auth — no client-side key here).
 -- Discovery: GET http://hossenfelder.fritz.box:8082/v1/models.
 --
 -- Phase 9 (docs/PHASE9.md): a `.aish.lua` in/above your cwd (walking up
 -- to $HOME) overlays this user config. First encounter prompts to trust;
 -- sha256-pinned in ~/.aish/trusted-projects. Use it for repo-specific
 -- model presets, permissions, hooks, etc.
 --
 -- IMPORTANT: shallow merge. If your `.aish.lua` sets a top-level block
 -- (models, permissions, cost, shell, ...), it REPLACES the user's
 -- entire block — list every entry you want available OR omit the block
 -- to keep the user's. Inspect the merge via `:config show` at runtime.
 -- Replace with your own broker URL. This default targets the
 -- maintainer's home-LAN broker — useful as a structural example
 -- but will not resolve outside that network.
 local HOSSENFELDER = "http://hossenfelder.fritz.box:8082"
 return {
    default_model = "fast",
    -- 2026-05-17: full fleet exposed. 6 local + 14 cloud models live on the
    -- hossenfelder broker. Aliases below match the model IDs returned by
    -- /v1/models so the broker can route without prefix stripping.
    models = {
        -- ── LOCAL ────────────────────────────────────────────────────────
        fast = {  -- alias for the 1.5B; default
            endpoint    = HOSSENFELDER,
            model       = "qwen2.5-coder-1.5b-q4_k_m.gguf",
            temperature = 0.2,
        },
        ["coder-3b"] = {  -- pve2 (Haswell NUC, 1.8 GB model, ~4 tok/s)
            endpoint    = HOSSENFELDER,
            model       = "qwen2.5-coder-3b-instruct-pve2",
            temperature = 0.2,
        },
        ["coder-7b"] = {  -- pve1 (Haswell NUC)
            endpoint    = HOSSENFELDER,
            model       = "qwen2.5-coder-7b-instruct-pve1",
            temperature = 0.2,
        },
        ["coder-7b-snappy"] = {  -- dirac:8081, low-latency completion
            endpoint    = HOSSENFELDER,
            model       = "qwen-coder-7b-snappy-8k",
            temperature = 0.2,
        },
        ["qwen-7b"] = {  -- dirac:8080 chat
            endpoint    = HOSSENFELDER,
            model       = "Qwen2.5-7B-Instruct-Q4_K_M.gguf",
            temperature = 0.2,
        },
        deep = {  -- boltzmann:8085 — Qwen3-30B-A3B MoE, q8 KV cache
            endpoint    = HOSSENFELDER,
            model       = "qwen3-30b-a3b-instruct-2507",
            -- timeout_ms inherits broker default (30 min) — 30B prompt processing
            -- of long contexts on CPU can take 15-25 min before first token.
            temperature = 0.1,
        },
        -- ── CLOUD (OpenRouter via hossenfelder) ───────────────────────────
        cloud  = { endpoint = HOSSENFELDER, model = "anthropic/claude-haiku-4.5",      temperature = 0.2 },
        haiku  = { endpoint = HOSSENFELDER, model = "anthropic/claude-haiku-4.5",      temperature = 0.2 },
        sonnet = { endpoint = HOSSENFELDER, model = "anthropic/claude-sonnet-4.6",     temperature = 0.2 },
        opus   = { endpoint = HOSSENFELDER, model = "anthropic/claude-opus-4.7",       temperature = 0.2 },
        gpt5      = { endpoint = HOSSENFELDER, model = "openai/gpt-5.5",                 temperature = 0.2 },
        ["gpt5-mini"] = { endpoint = HOSSENFELDER, model = "openai/gpt-5.4-mini",        temperature = 0.2 },
        deepseek         = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v3.2",         temperature = 0.2 },
        ["deepseek-v4"]  = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-flash",     temperature = 0.2 },
        ["deepseek-pro"] = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-pro",       temperature = 0.2 },
        mistral   = { endpoint = HOSSENFELDER, model = "mistralai/mistral-large-2512",   temperature = 0.2 },
        ["qwen-cloud"] = { endpoint = HOSSENFELDER, model = "qwen/qwen3.5-27b",          temperature = 0.2 },
        owl       = { endpoint = HOSSENFELDER, model = "openrouter/owl-alpha",           temperature = 0.2 },
        -- ── CLOUD FREE-TIER ──────────────────────────────────────────────
        ["free-qwen-coder"]   = { endpoint = HOSSENFELDER, model = "qwen/qwen3-coder:free",                  temperature = 0.2 },
        ["free-llama-70b"]    = { endpoint = HOSSENFELDER, model = "meta-llama/llama-3.3-70b-instruct:free", temperature = 0.2 },
        ["free-qwen-80b"]     = { endpoint = HOSSENFELDER, model = "qwen/qwen3-next-80b-a3b-instruct:free",  temperature = 0.2 },
        ["free-gpt-oss"]      = { endpoint = HOSSENFELDER, model = "openai/gpt-oss-120b:free",               temperature = 0.2 },
        ["free-glm"]          = { endpoint = HOSSENFELDER, model = "z-ai/glm-4.5-air:free",                  temperature = 0.2 },
        ["free-deepseek-v4"]  = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-flash:free",        temperature = 0.2 },
    },
    shell = {
        known_commands = {
            "ls", "cat", "cd", "grep", "find", "cp", "mv", "rm",
            "mkdir", "rmdir", "git", "make", "cmake", "gcc", "clang",
            "python3", "luajit", "ssh", "scp", "curl", "wget",
        },
        capture_output = true,    -- inject exec output into context
        confirm_cmd    = true,    -- prompt before executing CMD: suggestions
        -- Issue #10: prompt template. When set, replaces the default
        -- "[aish:<model>]> " prompt. Variables (substituted via {name}):
        --   {model}  {ctx_used}  {ctx_max}  {turn}
        --   {cwd}    {cwd_short} (cwd with $HOME -> ~)
        --   {last_status} (last exec exit code, empty if none yet)
        --   {mode}   (norris / plan / normal)
        -- prompt = "[{model} {ctx_used}/{ctx_max}t T{turn} {mode}] {cwd_short} > ",
    },
    context = {
        max_turns    = 40,
        token_budget = 4096,
    },
    history = {
        dir = (os.getenv("HOME") or ".") .. "/.local/share/aish",
    },
    -- Issue #3: pre/post CMD hooks. Optional shell scripts triggered around
    -- every CMD: execution. Each hook receives the command on stdin and
    -- AISH_CMD / AISH_TURN / AISH_CWD as env vars. Non-zero exit on pre_cmd
    -- aborts execution; post_cmd's exit code is ignored but its stdout is
    -- logged. Default off (no hooks). Uncomment to enable.
    -- hooks = {
    --     pre_cmd  = (os.getenv("HOME") or ".") .. "/.aish/hooks/pre-cmd",
    --     post_cmd = (os.getenv("HOME") or ".") .. "/.aish/hooks/post-cmd",
    -- },
    -- Issue #13: secret redaction. Vault is a separate file at ~/.aish/
    -- secrets.lua (mode 0600 enforced). When set, outbound broker messages
    -- are scrubbed: vault literals + autodetect heuristics (OpenAI sk-,
    -- OpenRouter sk-or-v1-, GitHub ghp_/gho_/ghs_, AWS AKIA, JWT eyJ...,
    -- SSH/GPG PRIVATE KEY headers) become $AISH_SECRET_NNN placeholders.
    -- The streamed reply is rehydrated before display so the user sees
    -- real values. Per-broker override via models[*].redact:
    --   "off"               -- no scrubbing (trusted local)
    --   "vault"             -- vault literals only
    --   "vault+autodetect"  -- + heuristics (default when vault loaded)
    --   "stealth"           -- + heuristics, opaque decoys, no rehydrate
    -- Default per-broker is the global config.secrets.default, falling
    -- back to "vault+autodetect" when vault loaded, else "off".
    -- secrets = {
    --     vault   = "~/.aish/secrets.lua",
    --     default = "vault+autodetect",  -- applies when models[*].redact is nil
    -- },
    -- Issue #8: background CMD (CMD&: marker). Requires history.dir set
    -- (logs land at <history.dir>/bg/<id>.log + .status sidecar). The
    -- feature is always-on once history.dir exists — no config flag — but
    -- only fires when the model emits "CMD&: " or the user runs :bg-spawn.
    -- Issue #9: permission policy DSL for AI-suggested CMD: lines. When set,
    -- supersedes shell.confirm_cmd. Patterns are Lua patterns (NOT regex)
    -- per substrate invariant §3 (no compiled extensions). Priority order:
    -- deny > confirm > allow; first match in the chosen category wins.
    -- Unmatched commands default to "confirm". Probe with :perms check <cmd>.
    -- permissions = {
    --     allow   = { "^ls%s", "^cat%s", "^git status", "^git diff" },
    --     confirm = { "^rm%s", "^git push", "^docker%s", "^sudo%s" },
    --     deny    = { "^ssh%s+root@", "^curl%s+http[^s]" },
    -- },
    -- Phase 2 (docs/PHASE2.md): MCP server registry + tool-call policy.
    -- Aliases become the namespace prefix on tool names sent to the model
    -- ("<alias>__<tool>" — e.g. "pve1__list_dir"). Separator is "__" because
    -- Anthropic via Bedrock validates tool names against ^[a-zA-Z0-9_-]{1,128}$
    -- (dots are rejected). Aliases themselves must not contain "__".
    -- auth_token literal > auth_env env-var indirection > nil (no auth).
    mcp = {
        servers = {
            -- Example MCP server entries. Replace the URL with your own
            -- lmcp endpoint and source the bearer token via auth_env so
            -- it never lands in version control.
            --
            -- pve1: small sandbox host (stock lmcp tools — shell, read_file,
            -- write_file, edit_file, list_dir, search_files, shell_bg).
            -- pve1 = {
            --     url      = "http://pve1.example.local:8080/mcp",
            --     auth_env = "MCP_PVE1_TOKEN",
            -- },
            --
            -- hertz: home-network hub with lmcp v1.2+ built-in fetch /
            -- web_search tools — useful for letting the model do web
            -- research without leaving aish. Auto-approving these two
            -- is safe because they carry MCP readOnlyHint=true and
            -- openWorldHint=true (see auto_approve block below).
            -- hertz = {
            --     url      = "http://hertz.example.local:8080/mcp",
            --     auth_env = "MCP_HERTZ_TOKEN",
            -- },
        },
        -- Per-call confirm gate auto-approve policy. fetch / web_search
        -- carry MCP readOnlyHint=true + openWorldHint=true; safe to skip
        -- the per-call prompt since they neither mutate nor leak local
        -- state. Anything writable on the host (mqtt_pub, ha_cli, lxc_exec,
        -- wol_and_wait, ...) should keep prompting.
        auto_approve = {
            -- ["hertz__fetch"]      = true,
            -- ["hertz__web_search"] = true,
        },
        -- Tool-call sub-loop budget per ask_ai turn. Default 8 if absent.
        max_tool_depth = 8,
    },
    -- Phase 3 (docs/PHASE3.md): Chuck Norris autonomous mode + destructive-op
    -- heuristic. The block is OFF by default (sane defaults kick in when
    -- absent); uncomment to tune.
    --
    -- safety = {
    --     -- LLM second-opinion on commands the static patterns don't flag.
    --     -- Default true. Set false for static-only operation (faster, but
    --     -- misses novel destructive patterns the static list doesn't know
    --     -- about — bash -c content, custom destructive idioms, etc.).
    --     llm_second_opinion = true,
    --
    --     -- Which configured model to use for the YES/NO destructive probe.
    --     -- Precedence: this field → models.deep → models[default_model].
    --     -- R-B2: prefer an INDEPENDENT model class from the action-emitting
    --     -- model (avoids self-policing). Recommended values:
    --     --   "cloud"  — anthropic/claude-haiku-4.5 via openrouter. Fast and
    --     --              reliable. Costs money per probe (typical Norris
    --     --              session = 16 probes max, often cached).
    --     --   "deep"   — local large model (qwen3-30b on this fleet). Free
    --     --              but slow on RK3588 hardware (~1-3s per probe).
    --     --              Falls back here automatically if not set.
    --     --   "fast"   — same model as the action-emitter. NOT RECOMMENDED
    --     --              (circular trust); use only when no other option.
    --     llm_model = "cloud",
    --
    --     -- Norris planning-loop budget. Iterations of safety.norris_step.
    --     -- Each iteration is one broker round-trip + dispatch of actions.
    --     -- Default 8. Bump for long-running goals; cap low for testing.
    --     max_norris_steps = 8,
    -- },
    -- Phase 4 (docs/PHASE4.md): cross-session memory.jsonl + startup
    -- injection + :memory management surface. The block is OFF by
    -- default (no startup injection); uncomment to tune. Note that
    -- :remember / :memory list / :memory forget / :memory summarize
    -- all work without this block — they store to <history.dir>/
    -- memory.jsonl regardless. The block only configures the
    -- injection-into-system-prompt behavior at startup.
    --
    -- memory = {
    --     -- Cap on total characters injected at startup. ~2000 chars ≈
    --     -- 500 tokens. LRU-by-ts selection if your memory.jsonl has
    --     -- more recent items than fit. Older items remain in the
    --     -- file; only injection is bounded. Suppressed entirely in
    --     -- Norris mode (R-C1).
    --     inject_max_chars = 2000,
    --
    --     -- Which configured model to use for :memory summarize.
    --     -- Defaults to the active model when nil. Use "fast" for
    --     -- speed; "deep" or "cloud" for better extraction quality
    --     -- (cloud may have variable cost per session).
    --     summarizer_model = "fast",
    --
    --     -- #102: auto-summarize the session into memory.jsonl on :q.
    --     -- When true, shutdown_session runs the same distill flow as
    --     -- `:memory summarize`, non-interactively, and auto-adds the
    --     -- parsed candidates. Silent no-op for trivial sessions (turn
    --     -- count < min_turns_for_summary, default 5). pcall'd so a
    --     -- broker failure never blocks :q.
    --     auto_summarize_on_quit = true,
    --     min_turns_for_summary  = 5,
    --     summary_model          = "fast",  -- new alias; summarizer_model
    --                                       -- above is still honored for
    --                                       -- back-compat.
    -- },
    -- Phase 5 (docs/PHASE5.md): multi-model routing + cloud fallback +
    -- summarize-on-evict. OFF by default — auto-routing can spend money
    -- silently on the cloud preset; require explicit opt-in.
    --
    -- routing = {
    --     -- Enable auto-routing per request. When true, router.classify_model
    --     -- inspects each prompt and may switch the model for THAT request
    --     -- only (the :model selection is preserved across requests).
    --     -- Default false. Toggle at runtime with :route on / :route off.
    --     auto = true,
    --
    --     -- Class → model mapping. nil = "keep current" (heuristic fires
    --     -- but no override). Ships with reasoning = nil because mapping
    --     -- "explain ..." prompts to a paid cloud model would spend money
    --     -- silently — opt in by uncommenting the reasoning line below.
    --     classes = {
    --         code      = "deep",      -- code-like prompts to local deep
    --         -- reasoning = "cloud",  -- OPT-IN: "explain"/"why"/"how does" → paid
    --         -- default   = nil,      -- keep active model
    --     },
    --
    --     -- Single-hop retry on transport failure (HTTP 5xx, 408,
    --     -- 404 model_not_found, DNS, connection refused, timeouts).
    --     -- Retries against fallback_model once. Skipped if any text
    --     -- has already streamed (no partial-output duplication).
    --     -- Toggle at runtime with :fallback on / :fallback off.
    --     fallback       = false,           -- default off (cost-safety)
    --     fallback_model = "cloud",
    --
    --     -- Issue #86: per-class system_prompt override. When the
    --     -- classified request falls into a class with an entry here,
    --     -- the BASE system_prompt is REPLACED for that one request
    --     -- (dynamic blocks — [background], [project], [earlier
    --     -- summary], NORRIS suffix — still compose on top). Mostly
    --     -- useful for tightening small local models' instruction
    --     -- adherence. Default {} (no override).
    --     system_prompts = {
    --         code = [[You are a code assistant. Rules:
    -- 1. Output ONLY the requested code or command.
    -- 2. No prose explanation unless explicitly asked.
    -- 3. Wrap shell commands in CMD: prefix.
    -- 4. Max response: 200 tokens.]],
    --         default = [[You are a shell assistant.
    -- Output shell commands as: CMD: <command>
    -- Output answers as single short sentences.
    -- Do not ask clarifying questions.]],
    --         -- reasoning routes to cloud; no override usually needed
    --     },
    --
    --     -- Issue #88: per-class GBNF grammar passthrough. llama.cpp
    --     -- constrains the sampler to ONLY emit tokens matching the
    --     -- grammar — eliminates format drift on small models. Cloud
    --     -- (Anthropic/Bedrock) silently ignores the field, so default
    --     -- passthrough is safe; no per-model opt-out needed. Misformed
    --     -- grammar surfaces as a broker error at request time.
    --     grammars = {
    --         code    = [[root ::= "CMD: " [^\n]+ "\n"]],
    --         default = [[root ::= ("CMD: " [^\n]+ "\n") | [^\n]+ "\n"]],
    --     },
    -- },
    --
    -- Issue #88 (continued): for the safety LLM probe (YES/NO
    -- destructive classification), set safety.probe_grammar to force
    -- the probe model to emit exactly YES or NO. Eliminates the
    -- regex-match fallback for unparseable verdicts; small models
    -- become reliable enough to use as the probe.
    --
    -- safety = {
    --     llm_second_opinion = true,
    --     llm_model          = "fast",
    --     probe_grammar      = [[root ::= ("YES" | "NO")]],
    -- },
    -- ── Issue #87 (route-aware context compression).
    -- When a routed model preset has `local_compress = true`, each
    -- broker call against THAT preset gets a compressed view of
    -- ctx.turns: only the last `keep_turns` turns; any turn whose
    -- content exceeds `max_turn_chars` is tail-truncated. The full
    -- context lives on (visible via :history); compression is purely
    -- per-request for small models that effectively use a fraction
    -- of their advertised context window.
    --
    -- Set the per-model opt-in on models[<name>]:
    --     models.fast = { ..., local_compress = true }
    -- Defaults live under context.compress:
    --     context = {
    --         ...
    --         compress = { keep_turns = 2, max_turn_chars = 800 },
    --     }
    --
    -- Trade-off documented in the FR: tool turns lose information
    -- when tail-truncated. Acceptable for shell-output blocks (the
    -- tail is usually the relevant bit); known limitation for
    -- structured tool results. Disable per-model if it bites.
    -- ── Issue #89 / Phase 10: cloud preplanner → local executor split.
    -- When cfg.norris.preplanner names a model preset, :norris launch
    -- fires ONE broker.chat against that preset asking for a sequence
    -- of TASK: <imperative> lines. Parsed list (capped at tasks_max)
    -- becomes ctx.norris_tasks; the executor model (cfg.norris.executor,
    -- defaulting to the active :model selection) runs each task with
    -- the current task shown in the per-step header.
    --
    -- Goal: small fast local models are cheap per step but easily
    -- distracted on multi-step plans; cloud is capable at planning
    -- but expensive per step. Use cloud ONCE for the plan, local for
    -- every step. Falls back to single-model Norris (existing
    -- behavior) when preplanner unset / fails / produces no TASKs.
    --
    -- norris = {
    --     preplanner = "anthropic",   -- model name in cfg.models;
    --                                 -- this preset is called ONCE per
    --                                 -- :norris launch. Omit to run
    --                                 -- single-model (Phase 6 behavior).
    --     executor   = "fast",        -- model that runs each step.
    --                                 -- Omit to use the active :model.
    --     tasks_max  = 16,            -- cap on preplan list size.
    --     -- preplan_system = "...",  -- override the built-in prompt
    -- },
    --
    -- :cost detail separates norris-preplan and norris rows so you
    -- can see cloud planning cost vs local execution cost. The
    -- preplan call does NOT retry via fallback_model (a different
    -- model = a different decomposition; clean hard-fail to single-
    -- model is safer).
    -- ── Phase 5 context summarization on sliding-window eviction.
    -- Set INSIDE the context = { ... } block above to enable:
    --     context = {
    --         max_turns          = 40,
    --         token_budget       = 4096,
    --         summarize_on_evict = true,
    --         summarizer_model   = "fast",   -- model name in models{}
    --         max_summary_chars  = 2000,
    --
    --         -- #101 (proactive periodic summarization). When set,
    --         -- enforce_cadence fires every N appends (before
    --         -- enforce_budget) and folds turns OLDER than
    --         -- summarize_keep_recent into ctx.summary. Goal: keep the
    --         -- wire prompt tight from the start so small local models
    --         -- aren't fed near-budget context until eviction. Composes
    --         -- with summarize_on_evict (same summarize_fn closure;
    --         -- different trigger). Suppressed in Norris (R-C4 parity).
    --         summarize_every_n_turns = 10,    -- nil = disabled (default)
    --         summarize_keep_recent   = 4,
    --     },
    -- When summarize_on_evict is true, evicted turn pairs are fed to
    -- summarizer_model and the result lives on ctx.summary, appended to
    -- the system prompt as [earlier conversation summary]. Suppressed
    -- in Norris mode (R-C4 — planner stays on its goal). If broker
    -- fails, falls back to Phase 0 silent eviction (no crash).
    -- Phase 6 (docs/PHASE6.md): project file-tree context + :diff /
    -- :tree / :highlight metas. The :diff and :tree metas work without
    -- any config. The `project` block below only controls the
    -- AUTO-injection-at-startup behavior; manual `:tree` always works
    -- regardless. Uncomment to enable startup auto-inject.
    --
    -- project = {
    --     auto_tree      = true,   -- run `:tree` once at startup
    --     tree_depth     = 3,      -- depth filter for the scan (find fallback only;
    --                              -- git ls-files emits full repo-relative paths)
    --     tree_max_chars = 4096,   -- truncate the injected block above this
    -- },
    --
    -- :highlight has no config flag in v1 — toggled at runtime only.
    -- Requires the external `tree-sitter` CLI plus configured parser-
    -- directories with cloned + built `tree-sitter-<lang>` grammars
    -- (see `:highlight on` for the install hints).
    -- Phase 7 (docs/PHASE7.md): cost / usage observability. broker.lua
    -- captures `usage` (+ `cost` for cloud) from every chat/chat_stream
    -- call and routes via ctx:add_usage to a per-session accumulator.
    -- `:cost` / `:cost detail` / `:cost reset` surface the totals.
    -- The `cost` block below configures OPTIONAL warn thresholds —
    -- a single status line fires the first time the cumulative
    -- crosses each threshold. Default off. Useful when paid cloud
    -- presets are in play so runaway-cost sessions get a nudge.
    --
    -- cost = {
    --     warn_at_dollars = 0.50,    -- one-shot warn when cumulative cost crosses
    --     warn_at_tokens  = 100000,  -- one-shot warn when cumulative tokens crosses
    -- },
    --
    -- Both flags are independent (R4 — first-to-fire doesn't suppress
    -- the other); `:cost reset` re-arms both. Per-turn usage is also
    -- written to session/*.jsonl (assistant-turn `usage` field) for
    -- after-the-fact scripting; cross-session aggregation deferred
    -- to a future phase (Q-C2).
    -- Phase 8 (docs/PHASE8.md): accurate tokenization via the broker's
    -- /tokenize endpoint, replacing the Phase 0 §8 char/4 heuristic.
    -- Two consequences when use_endpoint=true:
    --   (1) Context:estimate_tokens hits <endpoint>/tokenize once per
    --       new turn (cached on the turn dict thereafter). Network
    --       cost is one round-trip (~30ms) per fresh turn; subsequent
    --       calls reuse the cache.
    --   (2) Context:enforce_budget actually ENFORCES token_budget now
    --       (previously only max_turns was checked). Sessions that
    --       fit under char/4 may evict earlier — raise token_budget
    --       to match your model's real context window if needed.
    -- Cloud endpoints (OpenRouter) don't expose /tokenize; capability
    -- cached as unsupported on first probe -> silent char/4 fallback.
    --
    -- tokenize = {
    --     use_endpoint = true,
    -- },
 }
@@ -1,25 +1,190 @@
 -- executor.lua — command execution.
-- Phase 0: io.popen with stderr merge. PTY (forkpty) lands in Phase 1.
+-- Phase 1: forkpty via ffi/pty + bidirectional multiplex. Replaces Phase 0's
-- `cd` is intercepted before popen and routed through libc chdir so the
+-- io.popen + sentinel-echo workaround. The multiplex loop forwards stdin
-- working directory persists across calls. See docs/PHASE0.md §7.
+-- keystrokes to the child master fd while streaming master output to stdout,
 -- so vim / less / htop / nano are usable end-to-end. Parent's tty (fd 0) is
 -- flipped to raw mode for the duration so single-key UIs work.
 -- `cd` interception is unchanged (still libc.chdir per §3, §7).
 -- See docs/PHASE0.md §7 and docs/PHASE1.md §5.
 local ffi  = require("ffi")
 local bit  = require("bit")
 local libc = require("ffi.libc")
 local pty  = require("ffi.pty")
 local M = {}
 local pollfd_arr2 = ffi.typeof("struct pollfd[2]")
 -- Multiplex stdin (fd 0) <-> sess.master_fd until the child writes EOF.
 -- Output is streamed live to stdout AND collected for the (output, code)
 -- return so context.append_exec_output still has the body to inject into
 -- the next user turn.
 local function multiplex(sess)
    local saved_termios = libc.set_raw(0)  -- nil if stdin isn't a tty
    local stdin_is_tty  = (saved_termios ~= nil)
    local fds = pollfd_arr2()
    -- Only poll stdin when it's a tty. With piped stdin (scripted runs /
    -- tests), aish's stdin holds the *next* aish commands queued for the
    -- repl loop — draining it into the child would swallow those.
    fds[0].fd = stdin_is_tty and 0 or -1
    fds[0].events = libc.POLLIN
    fds[1].fd = sess.master_fd
    fds[1].events = libc.POLLIN
    local chunks = {}
    while true do
        fds[0].revents = 0
        fds[1].revents = 0
        local rc = libc.poll(fds, 2, -1)
        if rc < 0 then
            if libc.errno() == libc.EINTR then
                -- signal during poll; loop and retry
            else
                break
            end
        else
            -- Drain master first (output priority). Read on *any* revents —
            -- POLLHUP fires (and POLLIN doesn't) when the child closes its
            -- slave PTY end on exit; reading then returns 0 = EOF.
            if fds[1].revents ~= 0 then
                local data, n = sess:read()
                if not data or n == 0 then break end
                chunks[#chunks + 1] = data
                io.write(data); io.flush()
            end
            -- Forward stdin keystrokes (or piped-in bytes) to the child.
            if fds[0].revents ~= 0 then
                local input, n = libc.read(0, 4096)
                if input and n > 0 then
                    sess:write(input)
                elseif input == "" then
                    -- aish's own stdin closed; stop forwarding but keep
                    -- draining master until child exits
                    fds[0].fd = -1
                end
            end
        end
    end
    if saved_termios then libc.restore_termios(0, saved_termios) end
    return chunks
 end
 -- Execute a shell command.
 -- Returns: (output_string, exit_code).
 --   0       success
 --   1..255  child exited with that status
 --   128+N   child killed by signal N (bash convention)
 --   -1      forkpty / spawn / wait failure
 function M.exec(cmd)
-    error("executor.exec: not implemented (Phase 0 pending)")
+    if not cmd or cmd:match("^%s*$") then
        return "(empty command)", -1
    end
    local sess, err = pty.spawn(cmd)
    if not sess then
        return "(pty.spawn failed: " .. tostring(err) .. ")", -1
    end
    local chunks = multiplex(sess)
    local kind, code = sess:wait()
    sess:close()
    -- PTY line discipline emits \r\n for every \n the child writes; collapse
    -- back to \n so the Phase 0 caller contract ("output uses \n separators")
    -- still holds for context-injection purposes.
    local output = table.concat(chunks):gsub("\r\n", "\n")
    if kind == "exit"   then return output, code         end
    if kind == "signal" then return output, 128 + code   end
    return output, -1
 end
 -- Intercept and apply `cd <path>` (or bare `cd` -> $HOME) without forking.
 -- Returns:
 --   nil          : the command is not a `cd` (caller falls through to exec)
 --   true         : it was a cd, libc.chdir succeeded
 --   false, err   : it was a cd, libc.chdir failed with errmsg
 function M.maybe_chdir(cmd)
-    error("executor.maybe_chdir: not implemented (Phase 0 pending)")
+    local rest = cmd:match("^%s*cd%s*$") and ""
              or cmd:match("^%s*cd%s+(.+)$")
    if not rest then return nil end
    local target = rest:match("^%s*(.-)%s*$") or ""
    -- Phase 0: no $OLDPWD support, so `cd -` is not handled.
    if target == ""  then target = os.getenv("HOME") or "/" end
    if target == "~" then target = os.getenv("HOME") or "/" end
    if target:sub(1, 2) == "~/" then
        target = (os.getenv("HOME") or "") .. target:sub(2)
    end
    return libc.chdir(target)
 end
-- Extract `CMD: ...` lines from an assistant response per the broker
+-- Extract `CMD: ` lines from an assistant response per the §6 broker contract.
-- contract (PHASE0.md §6 system prompt).
+-- The "CMD: " prefix is a §3 substrate invariant: exact prefix, single space,
 -- start-of-line only. Leading whitespace before CMD: does NOT match.
 -- "CMD&: " lines are issue #8 background variants — extracted separately so
 -- repl.lua can route them to the bg spawner instead of the synchronous gate.
 function M.extract_cmd_lines(text)
-    error("executor.extract_cmd_lines: not implemented (Phase 0 pending)")
+    local cmds = {}
    for line in (text or ""):gmatch("[^\n]+") do
        local cmd = line:match("^CMD: (.*)$")
        if cmd and cmd:match("%S") then cmds[#cmds + 1] = cmd end
    end
    return cmds
 end
 function M.extract_cmd_bg_lines(text)
    local cmds = {}
    for line in (text or ""):gmatch("[^\n]+") do
        local cmd = line:match("^CMD&: (.*)$")
        if cmd and cmd:match("%S") then cmds[#cmds + 1] = cmd end
    end
    return cmds
 end
 -- Issue #6: `DELEGATE: <preset> "<prompt>"` lines. Parses each into
 -- (preset, prompt) — quotes around the prompt are required so the
 -- parser can find the boundary unambiguously (the prompt may contain
 -- arbitrary punctuation otherwise). Lines that don't match the
 -- quoted shape are silently dropped (rendered as text to the user).
 function M.extract_delegate_lines(text)
    local out = {}
    for line in (text or ""):gmatch("[^\n]+") do
        local preset, prompt = line:match([[^DELEGATE: (%S+)%s+"(.+)"%s*$]])
        if not preset then
            preset, prompt = line:match([[^DELEGATE: (%S+)%s+'(.+)'%s*$]])
        end
        if preset and prompt and prompt:match("%S") then
            out[#out + 1] = { preset = preset, prompt = prompt }
        end
    end
    return out
 end
 -- Phase 10 / #89: extract `TASK: <imperative>` lines from a cloud
 -- preplanner's response. Wire contract for the planning/executor
 -- split: cloud emits a list of imperative TASKs once per :norris
 -- launch, local model executes each.
 --
 -- More permissive than extract_cmd_lines: tolerates leading
 -- whitespace (cloud models often indent) AND leading whitespace
 -- after the colon, AND strips trailing whitespace. Strict only on
 -- the literal "TASK:" prefix.
 --
 -- Returns an array of strings (already trimmed); empty TASKs and
 -- non-TASK lines are dropped silently.
 function M.extract_task_lines(text)
    local out = {}
    for line in (text or ""):gmatch("[^\n]+") do
        local task = line:match("^%s*TASK:%s*(.-)%s*$")
        if task and task:match("%S") then out[#out + 1] = task end
    end
    return out
 end
 return M
@@ -1,16 +1,266 @@
 -- ffi/curl.lua — libcurl easy interface binding.
-- Phase 0: blocking POST. Phase 1: SSE streaming via WRITEFUNCTION callback.
+-- Phase 0: blocking POST with header list and response capture into Lua string.
 -- Phase 1: M.post_sse for incremental Server-Sent-Events streaming. Reuses the
 -- same WRITEFUNCTION hook; parses `data: ...\n\n` events out of the chunk
 -- stream and invokes the caller's on_event(data) per event. JSON decode and
 -- OpenAI-shape interpretation stay in broker.lua (this module is HTTP-only).
 -- See docs/PHASE0.md §6 and docs/PHASE1.md §4.
 local ffi = require("ffi")
 ffi.cdef[[
 typedef void CURL;
 struct curl_slist {
    char *data;
    struct curl_slist *next;
 };
 CURL *curl_easy_init(void);
 void  curl_easy_cleanup(CURL *handle);
 int   curl_easy_setopt(CURL *handle, int option, ...);
 int   curl_easy_perform(CURL *handle);
 const char *curl_easy_strerror(int code);
 struct curl_slist *curl_slist_append(struct curl_slist *list, const char *string);
 void               curl_slist_free_all(struct curl_slist *list);
 int   curl_easy_setopt(CURL *handle, int option, ...);
 int   curl_easy_getinfo(CURL *handle, int info, ...);
 ]]
 -- libcurl-dev's unversioned `libcurl.so` symlink isn't assumed; fall back to
 -- versioned sonames so a runtime-only host (Debian without -dev) just works.
 local function load_curl()
    local errs = {}
    for _, name in ipairs({"curl", "curl.so.4", "curl-gnutls.so.4"}) do
        local ok, lib = pcall(ffi.load, name)
        if ok then return lib end
        errs[#errs+1] = name .. ": " .. tostring(lib)
    end
    error("libcurl not loadable: " .. table.concat(errs, "; "))
 end
 local C = load_curl()
 -- CURLoption codes from curl/curl.h. The bases are:
 --   CURLOPTTYPE_LONG          = 0
 --   CURLOPTTYPE_OBJECTPOINT   = 10000
 --   CURLOPTTYPE_FUNCTIONPOINT = 20000
 local OPT = {
    URL            = 10002,
    POST           = 47,
    POSTFIELDS     = 10015,
    HTTPHEADER     = 10023,
    WRITEFUNCTION  = 20011,
    NOSIGNAL       = 99,
    TIMEOUT_MS     = 155,
    USERAGENT      = 10018,
    FAILONERROR    = 45,
 }
 -- Variadic FFI calls demand explicit per-argument types. Pre-cast setopt to
 -- the three concrete signatures Phase 0 needs; bypasses libffi-flavoured
 -- variadic dispatch entirely.
 local setopt_str  = ffi.cast("int(*)(void*, int, const char*)", C.curl_easy_setopt)
 local setopt_long = ffi.cast("int(*)(void*, int, long)",        C.curl_easy_setopt)
 local setopt_ptr  = ffi.cast("int(*)(void*, int, void*)",       C.curl_easy_setopt)
 -- curl_easy_getinfo is variadic too. The Phase 2 caller only needs the
 -- CURLINFO_LONG family (HTTP response code); pre-cast to that signature.
 -- CURLINFO_RESPONSE_CODE = CURLINFO_LONG (0x200000) + 2 = 2097154.
 local getinfo_long = ffi.cast("int(*)(void*, int, long*)", C.curl_easy_getinfo)
 local INFO_RESPONSE_CODE = 2097154
 local function get_response_code(handle)
    local out = ffi.new("long[1]")
    if getinfo_long(handle, INFO_RESPONSE_CODE, out) == 0 then
        return tonumber(out[0])
    end
    return 0  -- 0 = no response (e.g. couldn't connect)
 end
 local M = {}
-- Phase 0 stubs; full binding lands with broker.chat() implementation.
+
 -- POST `body` to `url` with `headers` (list of "Name: value" strings) and an
 -- optional `timeout_ms`.
 -- Returns:
 --   body, status_code   on transport success — body is the raw response
 --                       string (may be empty); status_code is the HTTP
 --                       response code (2xx success, 4xx/5xx surface as
 --                       transport-level failure for callers that care,
 --                       e.g. mcp.lua treating 401 as auth failure).
 --                       FAILONERROR is intentionally NOT set so the body
 --                       is observable on non-2xx (lmcp's 401 returns a
 --                       non-JSON-RPC body that callers need to recognise).
 --   nil, errmsg         on libcurl-level failure (non-zero CURLcode)
 -- Phase 1 callers reading only the first slot stay correct: success
 -- returns truthy body, failure returns nil — same disjunction as before.
 function M.post(url, body, headers, timeout_ms)
    local handle = C.curl_easy_init()
    if handle == nil then return nil, "curl_easy_init returned NULL" end
    local chunks = {}
    local write_cb = ffi.cast(
        "size_t(*)(char*, size_t, size_t, void*)",
        function(ptr, size, nmemb, _)
            local n = tonumber(size) * tonumber(nmemb)
            chunks[#chunks+1] = ffi.string(ptr, n)
            return n
        end)
    local slist = nil
    for _, h in ipairs(headers or {}) do
        slist = C.curl_slist_append(slist, h)
    end
    setopt_str (handle, OPT.URL,            url)
    setopt_long(handle, OPT.POST,           1)
    setopt_str (handle, OPT.POSTFIELDS,     body)
    setopt_ptr (handle, OPT.HTTPHEADER,     slist)
    setopt_ptr (handle, OPT.WRITEFUNCTION,  write_cb)
    setopt_long(handle, OPT.NOSIGNAL,       1)
    setopt_str (handle, OPT.USERAGENT,      "aish/0.0 (luajit-ffi)")
    if timeout_ms then
        setopt_long(handle, OPT.TIMEOUT_MS, timeout_ms)
    end
    local rc = C.curl_easy_perform(handle)
    local result, status, err
    if rc == 0 then
        result = table.concat(chunks)
        status = get_response_code(handle)
    else
        err = ffi.string(C.curl_easy_strerror(rc))
    end
    C.curl_easy_cleanup(handle)
    if slist ~= nil then C.curl_slist_free_all(slist) end
    write_cb:free()
    if rc == 0 then return result, status end
    return nil, err
 end
 -- POST `body` to `url` with `headers`, streaming Server-Sent-Events back.
 -- For each complete `data: ...\n\n` event, `on_event(data_string)` is invoked
 -- synchronously from within the WRITEFUNCTION callback. The caller decides
 -- what to do with the payload (broker.lua decodes JSON, extracts the OpenAI
 -- delta.content). `[DONE]` sentinels and `:` comment lines are passed
 -- through as-is to on_event (broker filters them).
 -- Returns:
 --   true             stream completed successfully (HTTP 2xx, perform OK)
 --   nil, errmsg      libcurl failure (non-zero CURLcode); FAILONERROR is set
 --                    so non-2xx surfaces as a transport error rather than a
 --                    silent garbage-into-the-parser scenario.
 function M.post_sse(url, body, headers, on_event, timeout_ms)
    local handle = C.curl_easy_init()
    if handle == nil then return nil, "curl_easy_init returned NULL" end
    -- SSE parse state: buffer holds incomplete tail between callback deliveries.
    -- raw_body captures every byte we receive (regardless of SSE shape) so we
    -- can surface upstream error bodies (e.g. openrouter→bedrock 400 with a
    -- non-SSE JSON envelope). Truncated only at error-message time.
    local buffer   = ""
    local raw_body = ""
    local cb_error = nil
    local write_cb = ffi.cast(
        "size_t(*)(char*, size_t, size_t, void*)",
        function(ptr, size, nmemb, _)
            local n = tonumber(size) * tonumber(nmemb)
            -- pcall-wrap so a Lua error in on_event (or in the parse loop)
            -- doesn't propagate across the FFI callback boundary — LuaJIT
            -- documents that as process-fatal. Surface via cb_error and let
            -- curl keep draining (return n) so we can report after perform.
            local ok, err = pcall(function()
                local chunk = ffi.string(ptr, n)
                raw_body = raw_body .. chunk
                buffer   = buffer   .. chunk
                while true do
                    local b = buffer:find("\n\n", 1, true)
                    if not b then break end
                    local event = buffer:sub(1, b - 1)
                    buffer = buffer:sub(b + 2)
                    local data_parts = {}
                    for line in (event .. "\n"):gmatch("([^\n]*)\n") do
                        if line:sub(1, 1) == ":" then
                            -- SSE keepalive comment; ignore.
                        elseif line:sub(1, 6) == "data: " then
                            data_parts[#data_parts + 1] = line:sub(7)
                        elseif line:sub(1, 5) == "data:" then
                            data_parts[#data_parts + 1] = line:sub(6)
                        end
                    end
                    if #data_parts > 0 then
                        on_event(table.concat(data_parts, "\n"))
                    end
                end
            end)
            if not ok and not cb_error then cb_error = err end
            return n
        end)
    local slist = nil
    for _, h in ipairs(headers or {}) do
        slist = C.curl_slist_append(slist, h)
    end
    setopt_str (handle, OPT.URL,            url)
    setopt_long(handle, OPT.POST,           1)
    setopt_str (handle, OPT.POSTFIELDS,     body)
    setopt_ptr (handle, OPT.HTTPHEADER,     slist)
    setopt_ptr (handle, OPT.WRITEFUNCTION,  write_cb)
    setopt_long(handle, OPT.NOSIGNAL,       1)
    -- FAILONERROR intentionally NOT set: we want to read the response body
    -- on >=400 so the caller can surface upstream API errors (bedrock
    -- rejecting tool-name format, openrouter quota, etc.) instead of just
    -- "HTTP response code said error". Status code is checked after perform.
    setopt_str (handle, OPT.USERAGENT,      "aish/0.0 (luajit-ffi)")
    if timeout_ms then
        setopt_long(handle, OPT.TIMEOUT_MS, timeout_ms)
    end
    local rc = C.curl_easy_perform(handle)
    local err, status
    if rc == 0 then
        status = get_response_code(handle)
    else
        err = ffi.string(C.curl_easy_strerror(rc))
    end
    -- End-of-stream flush: the final event may lack a trailing \n\n if the
    -- server closed the connection right after writing the last data: line
    -- (some llama.cpp builds, and any plain HTTP/1.0 close-on-EOF feed).
    -- Parse any remaining buffer content as one last event. Same pcall shield.
    -- Only flush on 2xx — on error responses the buffer is the error body,
    -- not an SSE event.
    if rc == 0 and status < 400 and #buffer > 0 then
        local ok, perr = pcall(function()
            local data_parts = {}
            for line in (buffer .. "\n"):gmatch("([^\n]*)\n") do
                if line:sub(1, 6) == "data: " then
                    data_parts[#data_parts + 1] = line:sub(7)
                elseif line:sub(1, 5) == "data:" then
                    data_parts[#data_parts + 1] = line:sub(6)
                end
            end
            if #data_parts > 0 then on_event(table.concat(data_parts, "\n")) end
        end)
        if not ok and not cb_error then cb_error = perr end
    end
    C.curl_easy_cleanup(handle)
    if slist ~= nil then C.curl_slist_free_all(slist) end
    write_cb:free()
    if cb_error then return nil, "callback: " .. tostring(cb_error) end
    if rc ~= 0  then return nil, err end
    if status >= 400 then
        local snippet = raw_body ~= "" and raw_body:sub(1, 400) or "(no body)"
        return nil, ("HTTP %d: %s"):format(status, snippet)
    end
    return true
 end
 return M
@@ -1,18 +1,201 @@
-- ffi/libc.lua — shared libc bindings: errno, signal, write, read, chdir.
+-- ffi/libc.lua — shared libc bindings.
 -- Phase 0: chdir, errno, strerror — enough for `cd` interception in executor.
 -- Phase 1: waitpid + WEXITSTATUS, raw fd I/O (read/write/close), kill — the
 -- syscalls ffi/pty needs to drive a forkpty'd child.
 -- See docs/PHASE0.md §7 and docs/PHASE1.md §3.
 local ffi = require("ffi")
 local bit = require("bit")
 ffi.cdef[[
 int   chdir(const char *path);
-int   errno;
+int  *__errno_location(void);
 char *strerror(int errnum);
 typedef int     pid_t;
 typedef long    ssize_t;
 typedef unsigned long size_t;
 pid_t   waitpid(pid_t pid, int *wstatus, int options);
 ssize_t read   (int fd, void *buf, size_t count);
 ssize_t write  (int fd, const void *buf, size_t count);
 int     close  (int fd);
 int     kill   (pid_t pid, int sig);
 /* termios for raw-mode toggle around interactive PTY children. The struct
   is treated as opaque — cfmakeraw fills it; size 64 is comfortably larger
   than glibc's struct termios (60 bytes) on aarch64/x86_64 Linux. */
 struct termios { char _opaque[64]; };
 int  tcgetattr(int fd, struct termios *tio);
 int  tcsetattr(int fd, int actions, const struct termios *tio);
 void cfmakeraw(struct termios *tio);
 /* poll for stdin↔master multiplex in executor. */
 struct pollfd { int fd; short events; short revents; };
 int poll(struct pollfd *fds, unsigned long nfds, int timeout);
 /* Phase 4: advisory file locking on memory.jsonl. Single-writer
   enforcement via LOCK_EX | LOCK_NB — fail-fast if another aish
   process holds the lock. */
 int flock(int fd, int operation);
 /* TTY detection for non-interactive mode (`aish -p`). Returns 1 if the
   fd refers to a terminal, 0 otherwise (sets errno on error). */
 int isatty(int fd);
 /* getcwd — chdir() doesn't update PWD env, so prompt {cwd} needs the
   real cwd. NULL buffer + size 0 is the GNU extension that malloc()s
   the buffer; we use a fixed-size stack buffer instead. */
 char *getcwd(char *buf, size_t size);
 ]]
 local C = ffi.C
 local M = {}
-- Apply chdir per PHASE0.md §7 (intercepts `cd` so wd persists across popen).
+-- ---------------------------------------------------------------- chdir / errno
 -- Phase 0 invariants. Apply chdir per PHASE0.md §7.
 -- Returns: true on success; false, errmsg on failure.
 function M.chdir(path)
-    error("libc.chdir: not implemented (Phase 0 pending)")
+    local rc = C.chdir(path)
    if rc == 0 then return true end
    return false, ffi.string(C.strerror(C.__errno_location()[0]))
 end
 function M.errno()    return C.__errno_location()[0]                end
 function M.strerror(en) return ffi.string(C.strerror(en))           end
 -- ---------------------------------------------------------------- waitpid
 -- Mirrors glibc's WIFEXITED / WEXITSTATUS / WIFSIGNALED / WTERMSIG macros.
 local function WIFEXITED  (status) return bit.band(status, 0x7f) == 0 end
 local function WEXITSTATUS(status) return bit.band(bit.rshift(status, 8), 0xff) end
 local function WIFSIGNALED(status)
    -- signal-killed iff low 7 bits in 1..126
    local s = bit.band(status, 0x7f)
    return s ~= 0 and s ~= 0x7f
 end
 local function WTERMSIG   (status) return bit.band(status, 0x7f) end
 M.WIFEXITED   = WIFEXITED
 M.WEXITSTATUS = WEXITSTATUS
 M.WIFSIGNALED = WIFSIGNALED
 M.WTERMSIG    = WTERMSIG
 -- waitpid wrapper. Returns (kind, value):
 --   "exit",   exit_code   on normal exit (WIFEXITED -> WEXITSTATUS)
 --   "signal", signum      on signal kill (WIFSIGNALED -> WTERMSIG)
 --   nil, errmsg           on waitpid syscall failure
 local status_buf = ffi.new("int[1]")
 function M.waitpid(pid, options)
    status_buf[0] = 0
    local rc = C.waitpid(pid, status_buf, options or 0)
    if rc < 0 then
        return nil, ffi.string(C.strerror(C.__errno_location()[0]))
    end
    local status = status_buf[0]
    if WIFEXITED(status)   then return "exit",   WEXITSTATUS(status) end
    if WIFSIGNALED(status) then return "signal", WTERMSIG(status)    end
    return "other", status
 end
 -- ---------------------------------------------------------------- raw fd I/O
 -- Used by ffi/pty for master-fd transfer. Errors return nil + errmsg so
 -- callers can decide between EAGAIN/EINTR retry and abort. EOF on read is
 -- represented as ("", 0) — empty string, zero bytes.
 -- Note: READ_BUF is module-shared. Phase 1 has no reentrant M.read callers
 -- (no coroutines, no concurrent FFI callbacks performing reads); revisit if
 -- that ever changes.
 local READ_BUF = ffi.new("char[?]", 4096)
 function M.read(fd, count)
    count = count or 4096
    local buf = (count <= 4096) and READ_BUF or ffi.new("char[?]", count)
    local n = C.read(fd, buf, count)
    if n < 0 then
        return nil, ffi.string(C.strerror(C.__errno_location()[0])), M.errno()
    end
    return ffi.string(buf, n), tonumber(n)
 end
 function M.write(fd, data)
    local n = C.write(fd, data, #data)
    if n < 0 then
        return nil, ffi.string(C.strerror(C.__errno_location()[0])), M.errno()
    end
    return tonumber(n)
 end
 function M.close(fd)
    return C.close(fd) == 0
 end
 function M.kill(pid, sig)
    local rc = C.kill(pid, sig)
    if rc == 0 then return true end
    return false, ffi.string(C.strerror(C.__errno_location()[0]))
 end
 -- ---------------------------------------------------------------- termios
 -- Save current tty mode and switch to raw via cfmakeraw. Returns the saved
 -- termios pointer (to be passed back to M.restore_termios) or (nil, err) if
 -- fd isn't a tty (e.g. stdin redirected from a file in CI / scripted runs).
 local TCSANOW = 0
 function M.set_raw(fd)
    local saved = ffi.new("struct termios")
    if C.tcgetattr(fd, saved) < 0 then
        return nil, M.strerror(M.errno())
    end
    local raw = ffi.new("struct termios")
    ffi.copy(raw, saved, ffi.sizeof("struct termios"))
    C.cfmakeraw(raw)
    if C.tcsetattr(fd, TCSANOW, raw) < 0 then
        return nil, M.strerror(M.errno())
    end
    return saved
 end
 function M.restore_termios(fd, saved)
    return C.tcsetattr(fd, TCSANOW, saved) == 0
 end
 -- ---------------------------------------------------------------- poll
 M.POLLIN  = 0x0001
 M.EINTR   = 4
 -- Returns: rc (>= 0 fds ready, 0 timeout, -1 error)
 function M.poll(fds_arr, nfds, timeout_ms)
    return C.poll(fds_arr, nfds, timeout_ms or -1)
 end
 -- ---------------------------------------------------------------- flock
 -- Advisory file locking. Phase 4 uses LOCK_EX | LOCK_NB so a second
 -- aish process opening the same memory.jsonl fails fast rather than
 -- blocking. Lock is released on fd close or process exit.
 M.LOCK_EX = 2
 M.LOCK_NB = 4
 M.LOCK_UN = 8
 -- Returns: true on success; false, errmsg on failure (e.g. EWOULDBLOCK
 -- when LOCK_NB is set and another holder exists).
 function M.flock(fd, op)
    if C.flock(fd, op) == 0 then return true end
    return false, ffi.string(C.strerror(C.__errno_location()[0]))
 end
 -- ---------------------------------------------------------------- isatty
 function M.isatty(fd)
    return C.isatty(fd) == 1
 end
 -- ---------------------------------------------------------------- getcwd
 local CWD_BUF = ffi.new("char[?]", 4096)
 function M.getcwd()
    local p = C.getcwd(CWD_BUF, 4096)
    if p == nil then
        return nil, ffi.string(C.strerror(C.__errno_location()[0]))
    end
    return ffi.string(CWD_BUF)
 end
 return M
@@ -1,5 +1,91 @@
-- ffi/pty.lua — forkpty, openpty, waitpid bindings.
+-- ffi/pty.lua — forkpty-backed exec.
-- Phase 0: stub. Lands in Phase 1 to enable interactive programs (vim, htop).
+-- Phase 1: replaces Phase 0's io.popen path so interactive cmds (vim, less,
 -- htop) work and so executor's exit-code recovery can use waitpid instead
 -- of the §7 sentinel hack.
 -- See docs/PHASE1.md §5.
 local ffi  = require("ffi")
 local libc = require("ffi.libc")
 ffi.cdef[[
 typedef int pid_t;
 pid_t forkpty(int *amaster, char *name, void *termp, void *winp);
 int   execvp (const char *file, char *const argv[]);
 void  _exit  (int status);
 ]]
 -- libutil-dev's unversioned `libutil.so` symlink isn't assumed; fall back to
 -- versioned sonames so a runtime-only host (no -dev installed) works. Same
 -- idiom as ffi/readline + ffi/curl.
 local function load_util()
    local errs = {}
    for _, name in ipairs({"util", "util.so.1", "util.so.0"}) do
        local ok, lib = pcall(ffi.load, name)
        if ok then return lib end
        errs[#errs + 1] = name .. ": " .. tostring(lib)
    end
    error("libutil not loadable: " .. table.concat(errs, "; "))
 end
 local util = load_util()
 local C    = ffi.C
 local M       = {}
 local Session = {}
 Session.__index = Session
 -- Spawn `cmd` (shell-interpreted via /bin/sh -c) under a fresh PTY.
 -- Returns:
 --   session table : { pid, master_fd, closed } with :read/:write/:close/:wait/:signal
 --   nil, errmsg   : on forkpty failure
 function M.spawn(cmd)
    local master = ffi.new("int[1]")
    local pid = util.forkpty(master, nil, nil, nil)
    if pid < 0 then
        return nil, "forkpty: " .. libc.strerror(libc.errno())
    end
    if pid == 0 then
        -- child: exec /bin/sh -c cmd. argv must be NULL-terminated.
        local argv = ffi.new("const char *[4]")
        argv[0] = "/bin/sh"
        argv[1] = "-c"
        argv[2] = cmd
        argv[3] = nil
        C.execvp("/bin/sh", ffi.cast("char *const *", argv))
        -- execvp returned -> exec failed; abandon ship with conventional 127
        C._exit(127)
    end
    return setmetatable({
        pid       = pid,
        master_fd = master[0],
        closed    = false,
    }, Session)
 end
 -- Read up to `count` bytes from the master fd. Blocking.
 -- Returns:
 --   (string, n)     on success; n == 0 means EOF (child closed its end)
 --   (nil, errmsg)   on syscall failure
 function Session:read(count)
    return libc.read(self.master_fd, count or 4096)
 end
 function Session:write(data)
    return libc.write(self.master_fd, data)
 end
 function Session:close()
    if self.closed then return end
    libc.close(self.master_fd)
    self.closed = true
 end
 function Session:wait(options)
    return libc.waitpid(self.pid, options)
 end
 function Session:signal(sig)
    return libc.kill(self.pid, sig)
 end
 return M
@@ -1,6 +1,10 @@
 -- ffi/readline.lua — GNU readline binding.
-- Phase 0: readline + add_history + free. Phase 1: custom key bindings.
+-- Phase 0: readline + add_history + EOF handling.
-- See docs/PHASE0.md §9.
+-- Phase 1: custom key bindings via rl_bind_keyseq.
 -- Phase 3: rl_insert_text + rl_redisplay so bound key handlers can
 --          stuff text into the in-progress line buffer (used by \C-n
 --          to insert ":norris " in repl.lua).
 -- See docs/PHASE0.md §9 and docs/PHASE1.md §7 and docs/PHASE3.md §3.
 local ffi = require("ffi")
@@ -8,8 +12,93 @@ ffi.cdef[[
 char *readline(const char *prompt);
 void  add_history(const char *line);
 void  free(void *ptr);
 typedef int (*rl_command_func_t)(int, int);
 int rl_bind_keyseq(const char *keyseq, rl_command_func_t function);
 int rl_insert_text(const char *text);
 int rl_redisplay(void);
 ]]
 -- libreadline-dev (which ships the unversioned `libreadline.so` symlink) is
 -- not assumed to be installed on the runtime host; fall back to versioned
 -- sonames so a base Debian/Arch with just libreadline runtime works.
 local function load_readline()
    local errs = {}
    for _, name in ipairs({"readline", "readline.so.8", "readline.so.7"}) do
        local ok, lib = pcall(ffi.load, name)
        if ok then return lib end
        errs[#errs+1] = name .. ": " .. tostring(lib)
    end
    error("libreadline not loadable: " .. table.concat(errs, "; "))
 end
 local rl = load_readline()
 local C  = ffi.C
 local M = {}
-- Phase 0 stubs; wired with the REPL implementation.
+
 -- Read one line of input.
 -- Returns:
 --   string : the line (no trailing newline)
 --   nil    : EOF (Ctrl-D on empty line)
 function M.readline(prompt)
    local cstr = rl.readline(prompt)
    if cstr == nil then return nil end
    local s = ffi.string(cstr)
    C.free(cstr)
    return s
 end
 -- Append a non-empty line to readline's in-memory history.
 function M.add_history(line)
    if line and #line > 0 then
        rl.add_history(line)
    end
 end
 -- Bind `seq` (e.g. "\\C-n") to a Lua function that runs when the user types
 -- that key sequence at the readline prompt. The Lua fn takes no arguments
 -- (readline passes count + key, but consumers don't need them).
 -- Callback trampolines are pinned in module-local state for process
 -- lifetime. We do NOT free the previous binding on rebind: readline
 -- retains the function pointer in its keymap, and the window between
 -- :free() and the new rl_bind_keyseq is a potential use-after-free.
 -- Memory cost is bounded — one closure per bound key sequence.
 -- (Phase 3 R-C4 fold-in.)
 -- `_pinned` keeps every callback ever cast alive for process lifetime
 -- (so readline's keymap pointers never dangle even after a re-bind).
 -- `_bound` indexes by seq for "what's currently bound here" lookup but
 -- both old and new closures stay reachable via _pinned.
 local _bound  = {}
 local _pinned = {}
 function M.bind(seq, fn)
    local cb = ffi.cast("rl_command_func_t", function(_count, _key)
        local ok, err = pcall(fn)
        if not ok then
            io.stderr:write("ffi/readline bind handler error: " .. tostring(err) .. "\n")
        end
        return 0
    end)
    _pinned[#_pinned + 1] = cb       -- never freed; bounded by N rebinds
    local rc = rl.rl_bind_keyseq(seq, cb)
    _bound[seq] = cb
    return rc == 0
 end
 -- Insert `text` at the cursor in the in-progress readline buffer.
 -- Used by bound key handlers to stuff e.g. ":norris " into the line.
 -- Caller typically follows with M.redisplay() to refresh the display.
 function M.insert_text(text)
    if text and text ~= "" then
        rl.rl_insert_text(text)
    end
 end
 -- Force readline to redraw the current line. Call after insert_text or
 -- any other buffer mutation from inside a bound handler.
 function M.redisplay()
    rl.rl_redisplay()
 end
 return M
@@ -1,20 +1,370 @@
-- history.lua — persistent session log + memory.jsonl.
+-- history.lua — persistent session log + cross-session memory store.
-- Phase 0: NO disk I/O. This module is a stub placeholder so module names are
+-- Phase 1: append-only JSONL per session under <config.history.dir>/sessions/.
-- stable when Phase 1 lands the persistence layer.
+-- Phase 4: cross-session memory.jsonl at <config.history.dir>/memory.jsonl,
-- See docs/PHASE0.md §11 (Phase 1).
+--          single-writer enforced via flock(LOCK_EX | LOCK_NB) per PHASE4 R-B1.
 -- See docs/PHASE0.md §11, docs/PHASE1.md §6, docs/PHASE4.md §4.
 local json = require("dkjson")
 local libc = require("ffi.libc")
 local ffi  = require("ffi")
 local M = {}
-function M.open_session(dir)
+local Session = {}
-    error("history.open_session: not implemented (Phase 1)")
+Session.__index = Session
 local Memory = {}
 Memory.__index = Memory
 -- Best-effort mkdir -p. Failures are surfaced by io.open below. Uses
 -- single-quote escaping (Lua's %q double-quotes, which still expands $(...)
 -- and $VAR inside) so a path containing shell metacharacters doesn't trip.
 local function sh_singlequote(s)
    return "'" .. s:gsub("'", "'\\''") .. "'"
 end
-function M.append_turn(session, turn)
+local function ensure_dir(path)
-    error("history.append_turn: not implemented (Phase 1)")
+    if not path or path == "" then return end
    os.execute("mkdir -p " .. sh_singlequote(path))
 end
-function M.summarize_and_close(session, broker)
+local function parent_dir(path)
-    error("history.summarize_and_close: not implemented (Phase 3)")
+    return path:match("^(.*)/[^/]+$")
 end
 -- Open `path` for append. Creates parent dirs if missing. Returns the session
 -- handle, or (nil, errmsg) on open failure.
 --   path : absolute path to the .jsonl file
 --   meta : optional table written as the first line ONLY if the file is new /
 --          empty. Use this for the {started, model, version, ...} header per
 --          PHASE1.md §6.
 function M.open(path, meta)
    ensure_dir(parent_dir(path))
    -- Detect new-or-empty before opening for append (append + read does not
    -- give a portable way to inspect size on every libc). Simple two-step.
    local existing = io.open(path, "r")
    local is_empty = true
    if existing then
        local first = existing:read("*l")
        if first and #first > 0 then is_empty = false end
        existing:close()
    end
    local fh, err = io.open(path, "a")
    if not fh then return nil, err end
    local sess = setmetatable({ path = path, fh = fh, closed = false }, Session)
    if is_empty and meta then
        sess:append({ meta = meta })
    end
    return sess
 end
 function Session:append(turn)
    if self.closed then return false, "session closed" end
    local line = json.encode(turn)
    -- write + flush so a crash mid-session preserves all turns up to the
    -- last full append. Phase 1 default: no fsync per line (would dominate
    -- runtime on slow disks). Q16 tracks fsync policy if it ever bites.
    self.fh:write(line, "\n")
    self.fh:flush()
    return true
 end
 function Session:close()
    if self.closed then return end
    self.fh:close()
    self.fh    = nil
    self.closed = true
 end
 -- Load a session file. Returns:
 --   turns, meta   : turns is ALWAYS a table on success (possibly empty);
 --                   meta is the {meta={...}} header value or nil if absent
 --   nil,   err    : on file open failure (turns-first means callers can
 --                   test `if not turns then` without ambiguity vs a missing
 --                   meta-header line)
 function M.load(path)
    local fh, err = io.open(path, "r")
    if not fh then return nil, err end
    local meta, turns = nil, {}
    local first = true
    for line in fh:lines() do
        if #line > 0 then
            local obj = json.decode(line)
            if obj then
                if first and obj.meta then
                    meta = obj.meta
                elseif obj.role and obj.content then
                    turns[#turns + 1] = obj
                end
            end
            -- malformed lines (e.g. trailing partial write before crash) are
            -- silently skipped per the §6 recovery semantic
            first = false
        end
    end
    fh:close()
    return turns, meta
 end
 -- List session files in `dir` (just file basenames matching *.jsonl). Phase 1
 -- minimum: name only. mtime / turn count are a Phase 4 concern when :sessions
 -- starts wanting to surface a richer picker. Returns:
 --   array of strings (basenames, no path prefix)
 --   may be empty if dir doesn't exist
 function M.list_sessions(dir)
    local out = {}
    if not dir or dir == "" then return out end
    -- io.popen here is plain ls; executor.exec was swapped to PTY but
    -- io.popen itself still works. Single-quote escaping for path safety
    -- (see sh_singlequote rationale above).
    local p = io.popen("ls -1 " .. sh_singlequote(dir) .. " 2>/dev/null")
    if not p then return out end
    for name in p:lines() do
        if name:match("%.jsonl$") then out[#out + 1] = name end
    end
    p:close()
    table.sort(out)  -- ISO 8601 sorts lexicographically = chronologically
    return out
 end
 -- ============================================================================
 -- Phase 4: memory.jsonl — cross-session memory store.
 -- Same JSONL convention as session logs, but a single shared file rather
 -- than per-session. Single-writer enforced via flock advisory lock.
 -- See docs/PHASE4.md §2 / §4.
 -- ============================================================================
 -- We need an integer fd for flock. io.open returns a Lua FILE*; LuaJIT
 -- has no portable way to extract the underlying fd from that. Workaround:
 -- open via libc directly using open(2). Already exposed close() in libc;
 -- need to declare open() and read/write via the existing fd interface.
 ffi.cdef[[
 int open(const char *pathname, int flags, int mode);
 long lseek(int fd, long offset, int whence);
 ]]
 local O_RDWR   = 2
 local O_CREAT  = 64       -- 0100 octal on Linux/glibc
 local O_APPEND = 1024     -- 02000 octal on Linux/glibc
 local SEEK_SET = 0
 local FILE_MODE = 0x180   -- 0600 octal — owner rw only
 -- ---------------------------------------------------------------- M.open_memory
 -- Opens memory.jsonl at `path` for append, takes an exclusive non-blocking
 -- flock on the fd, scans existing content for max id, writes a meta header
 -- if the file is new. Returns:
 --   handle, nil   on success
 --   nil,    err   on lock-held / open failure
 function M.open_memory(path)
    ensure_dir(parent_dir(path))
    -- Open via libc open(2) so we have an integer fd for flock.
    local fd = ffi.C.open(path,
                          bit and bit.bor(O_RDWR, O_CREAT, O_APPEND)
                              or  (O_RDWR + O_CREAT + O_APPEND),
                          FILE_MODE)
    -- bit lib may not be loaded; fall back to numeric add (flags don't
    -- overlap so OR == add here).
    if fd < 0 then
        return nil, "open " .. path .. " failed: "
                    .. libc.strerror(libc.errno())
    end
    local ok, err = libc.flock(fd, libc.LOCK_EX + libc.LOCK_NB)
    if not ok then
        libc.close(fd)
        return nil, "memory.jsonl held by another aish process ("
                    .. tostring(err) .. ")"
    end
    -- Scan existing content for max id. lseek back to start, read all.
    local max_id = 0
    local was_empty = true
    ffi.C.lseek(fd, 0, SEEK_SET)
    while true do
        -- Read in 4K chunks. Use libc.read which returns string+len.
        local chunk, n = libc.read(fd, 4096)
        if not chunk or n == 0 then break end
        was_empty = false
        -- Accumulate into a buffer; on first scan we may straddle lines.
        -- Simple approach: keep a tail and split on newlines.
        for line in chunk:gmatch("[^\n]+") do
            local obj = json.decode(line)
            if obj and obj.id and obj.id > max_id then max_id = obj.id end
        end
    end
    -- Seek to end so subsequent libc.write appends.
    ffi.C.lseek(fd, 0, 2)  -- SEEK_END
    local handle = setmetatable({
        path    = path,
        fd      = fd,
        next_id = max_id + 1,
        closed  = false,
    }, Memory)
    if was_empty then
        -- Write meta header. No id; load_memory skips lines without id.
        handle:_write_raw({
            meta = {
                aish_version = "phase4",
                created      = os.date("!%Y-%m-%dT%H:%M:%SZ"),
            }
        })
    end
    return handle
 end
 -- Internal: append one JSON line to the fd.
 function Memory:_write_raw(obj)
    local line = json.encode(obj) .. "\n"
    libc.write(self.fd, line)
 end
 -- Append a memory item. Returns the assigned id.
 function Memory:add(kind, content, tags, source)
    assert(not self.closed, "memory:add on closed handle")
    assert(kind == "fact" or kind == "pref" or kind == "context",
           "memory:add: kind must be fact|pref|context (got " .. tostring(kind) .. ")")
    assert(content and content ~= "", "memory:add: content required")
    local id = self.next_id
    self.next_id = id + 1
    local item = {
        id      = id,
        ts      = os.date("!%Y-%m-%dT%H:%M:%SZ"),
        kind    = kind,
        content = content,
    }
    if tags   then item.tags   = tags   end
    if source then item.source = source end
    self:_write_raw(item)
    return id
 end
 -- Append a tombstone for `target_id`. Idempotent at the file level; the
 -- caller (e.g. `:memory forget` meta handler) may want to check
 -- M.load_memory first to surface a "not active" status to the user (N1).
 function Memory:forget(target_id)
    assert(not self.closed, "memory:forget on closed handle")
    self:_write_raw({
        id     = self.next_id,
        ts     = os.date("!%Y-%m-%dT%H:%M:%SZ"),
        kind   = "forget",
        target = target_id,
    })
    self.next_id = self.next_id + 1
 end
 function Memory:close()
    if self.closed then return end
    -- flock is released automatically on fd close.
    libc.close(self.fd)
    self.fd = nil
    self.closed = true
 end
 -- ---------------------------------------------------------------- M.load_memory
 -- Read all items, resolve tombstones, return active set sorted by ts desc.
 -- Items without an `id` field (e.g. the meta header) are silently dropped.
 -- Tombstones with non-matching targets are no-ops.
 -- Returns:
 --   items_table  array of {id, ts, kind, content, tags?, source?}
 --   may be empty if file doesn't exist or contains only meta/tombstones
 function M.load_memory(path)
    local fh = io.open(path, "r")
    if not fh then return {} end
    local items = {}      -- by id
    local forget = {}     -- set of target ids
    for line in fh:lines() do
        if #line > 0 then
            local obj = json.decode(line)
            if obj and obj.id then
                if obj.kind == "forget" then
                    if obj.target then forget[obj.target] = true end
                elseif obj.kind == "fact" or obj.kind == "pref"
                                          or obj.kind == "context" then
                    items[obj.id] = obj
                end
            end
        end
    end
    fh:close()
    local active = {}
    for id, item in pairs(items) do
        if not forget[id] then active[#active + 1] = item end
    end
    -- Sort by ts descending (most recent first). Strings sort right when
    -- they're ISO 8601 — ASCII order = chronological.
    table.sort(active, function(a, b) return a.ts > b.ts end)
    return active
 end
 -- ---------------------------------------------------------------- Phase 9 trust file
 -- ~/.aish/trusted-projects (JSONL, mode 0600). One entry per accepted
 -- project .aish.lua. Schema: {path = "<abs>", sha256 = "<hex>",
 -- ts = "<iso>"}. sha256 binds bytes; content change re-prompts.
 -- Internal helper: shell out to `sha256sum`. Returns hex digest or nil
 -- on any failure (binary missing, file unreadable, etc.). Caller
 -- treats nil as "skip the trust path" rather than crashing.
 function M._sha256_file(path)
    if not path or path == "" then return nil end
    local q = "'" .. path:gsub("'", [['\'']]) .. "'"
    local pipe = io.popen("sha256sum " .. q .. " 2>/dev/null")
    if not pipe then return nil end
    local line = pipe:read("*l")
    pipe:close()
    if not line then return nil end
    local digest = line:match("^(%x+)")  -- first whitespace-separated field
    if digest and #digest == 64 then return digest end
    return nil
 end
 -- Returns true iff a JSONL entry exists at trust_path matching BOTH
 -- project_path AND sha256. Missing / unreadable / corrupt-line file
 -- treated as "not trusted".
 function M.is_trusted(trust_path, project_path, sha256)
    if not (trust_path and project_path and sha256) then return false end
    local fh = io.open(trust_path, "r")
    if not fh then return false end
    for line in fh:lines() do
        if #line > 0 then
            local entry = json.decode(line)
            if entry and entry.path == project_path
                     and entry.sha256 == sha256 then
                fh:close()
                return true
            end
        end
    end
    fh:close()
    return false
 end
 -- Appends a trust record. mkdir -p parent; chmod 0600 on first creation.
 -- Append-only JSONL; partial writes corrupt at most one line (caller's
 -- subsequent reads skip them).
 function M.add_trusted(trust_path, project_path, sha256)
    if not (trust_path and project_path and sha256) then return false end
    ensure_dir(parent_dir(trust_path))
    local fh = io.open(trust_path, "a")
    if not fh then return false end
    local ts = os.date("!%Y-%m-%dT%H:%M:%SZ")
    fh:write(json.encode({ path = project_path, sha256 = sha256, ts = ts }) .. "\n")
    fh:close()
    -- Best-effort chmod 0600; ignore failure (next read will succeed).
    os.execute("chmod 600 '" .. trust_path:gsub("'", [['\'']]) .. "' 2>/dev/null")
    return true
 end
 return M
@@ -1,32 +1,272 @@
 -- main.lua — entry point
 -- Phase 0: arg parsing, config load, REPL start.
-- See docs/PHASE0.md §4, §10.
+-- See docs/PHASE0.md §4, §10. -p one-shot mode lands per issue #4.
 -- Resolve modules + vendored dkjson relative to this script's directory,
 -- not cwd. Packaged install puts main.lua at /usr/share/lua/5.1/aish/ and
 -- the /usr/bin/aish wrapper execs `luajit /usr/share/lua/5.1/aish/main.lua`
 -- from whatever cwd the user is in — siblings must still resolve. Dev mode
 -- (`luajit main.lua` from repo root) keeps working because arg[0] is then
 -- "main.lua" with no "/" — _dir falls back to "./".
 local _dir = arg[0]:match("(.*/)") or "./"
 package.path = _dir .. "?.lua;" .. _dir .. "vendor/?.lua;" .. package.path
 local USAGE = [[
 aish — AI-augmented conversational shell.
 Usage:
  aish [--config <path>] [--help]            -- interactive REPL
  aish -p "<prompt>" [--config <path>]       -- one-shot, print + exit
 In -p mode, if stdin is not a TTY it's read as additional context and
 prepended to the prompt as a fenced block — composes with Unix pipes:
  tail app.log | aish -p "any anomalies?"
 Config resolution order (PHASE0.md §10):
  1. --config <path>
  2. $AISH_CONFIG
  3. ~/.config/aish/config.lua
  4. ./config.lua
 ]]
 local function parse_args(argv)
    local out = {}
    local i = 1
    while i <= #argv do
        local a = argv[i]
        if a == "--config" then
            out.config = argv[i + 1]
            i = i + 2
        elseif a == "--help" or a == "-h" then
            out.help = true
            i = i + 1
        elseif a == "-p" or a == "--prompt" then
            out.prompt = argv[i + 1]
            if not out.prompt then
                io.stderr:write("aish: -p requires a prompt argument\n")
                os.exit(2)
            end
            i = i + 2
        else
            io.stderr:write("aish: unrecognized argument: " .. a .. "\n")
            os.exit(2)
        end
    end
    return out
 end
 local function load_config(opts)
    -- --config is explicit: use exactly that path or fail. No silent fallback.
    if opts.config then
        local f = io.open(opts.config, "r")
        if not f then
            error("aish: --config " .. opts.config .. ": cannot open")
        end
        f:close()
        return dofile(opts.config), opts.config
    end
 local function load_config()
    -- Resolution order per PHASE0.md §10:
    --   1. --config <path>   2. $AISH_CONFIG
    --   3. ~/.config/aish/config.lua   4. ./config.lua
    -- Phase 0 stub: pick the first existing path; no CLI parsing yet.
    local home = os.getenv("HOME") or ""
-    local candidates = {
+    local candidates = {}
-        os.getenv("AISH_CONFIG"),
+    local function push(p) if p and p ~= "" then candidates[#candidates + 1] = p end end
-        home .. "/.config/aish/config.lua",
+    push(os.getenv("AISH_CONFIG"))
-        "./config.lua",
+    push(home .. "/.config/aish/config.lua")
-    }
+    push("./config.lua")
    for _, path in ipairs(candidates) do
        if path then
        local f = io.open(path, "r")
        if f then f:close(); return dofile(path), path end
    end
-    end
+    error("aish: no config.lua found (tried: "
-    error("aish: no config.lua found in any standard location")
+          .. table.concat(candidates, ", ") .. ")")
 end
-local function main()
+-- ---------------------------------------------------------------- Phase 9 project overlay
-    local config, config_path = load_config()
+
 -- Walk-up from libc.getcwd() looking for .aish.lua. Stops at $HOME
 -- OR filesystem root (whichever comes first). Returns the first
 -- found path or nil. Per R1 (review fold-in), uses a proper-prefix
 -- check (NOT bare bytes-prefix) to avoid false positive when HOME
 -- is "/home/user" and cwd is "/home/user2/...".
 local function _find_project_config()
    local home = os.getenv("HOME")
    if not home or home == "" then return nil end
    -- Lazy-require so the existing load_config path stays untouched
    -- when no project overlay considered.
    local libc_ok, libc = pcall(require, "ffi.libc")
    if not libc_ok then return nil end
    local dir = libc.getcwd()
    if not dir then return nil end
    -- R1: proper prefix (dir == home OR dir starts with home .. "/")
    if dir ~= home and dir:sub(1, #home + 1) ~= home .. "/" then
        return nil
    end
    while dir and #dir > 0 do
        local candidate = dir .. "/.aish.lua"
        local f = io.open(candidate, "rb")
        if f then f:close(); return candidate end
        if dir == home or dir == "/" then return nil end
        -- Walk up one level
        dir = dir:gsub("/[^/]*$", "")
        if dir == "" then dir = "/" end
    end
    return nil
 end
 local function _trust_file_path()
    return os.getenv("AISH_TRUST_FILE")
        or ((os.getenv("HOME") or "") .. "/.aish/trusted-projects")
 end
 -- Interactive trust prompt. R2: caller must NOT invoke this in
 -- one-shot (-p) mode — io.read or rl.readline would consume piped
 -- stdin. Returns true on user accept (and persists the trust).
 -- Per A8, uses rl.readline; if it misbehaves at this early call
 -- site, the function returns false (skip overlay) rather than
 -- falling back to io.read.
 local function _check_and_maybe_prompt(project_path, history)
    local sha = history._sha256_file(project_path)
    if not sha then
        io.stderr:write("aish: project config " .. project_path
                        .. ": sha256 failed; skipping\n")
        return false
    end
    local tpath = _trust_file_path()
    if history.is_trusted(tpath, project_path, sha) then
        return true
    end
    -- Trust prompt.
    io.stderr:write("aish: project config found: " .. project_path .. "\n")
    io.stderr:write("aish: UNTRUSTED. Loading it runs arbitrary Lua code.\n")
    local rl_ok, rl = pcall(require, "ffi.readline")
    if not rl_ok then
        io.stderr:write("aish: readline unavailable; declining trust prompt\n")
        return false
    end
    local ans = rl.readline("[aish] trust this project config? [y/N] ")
    if ans and ans:lower():sub(1, 1) == "y" then
        history.add_trusted(tpath, project_path, sha)
        return true
    end
    return false
 end
 -- Wrap load_config with a project-overlay step. Always-on (no
 -- config flag); overlay activates only when a trusted .aish.lua
 -- is found in/above cwd. In one-shot (-p) mode the trust prompt
 -- is SKIPPED to avoid io consuming piped stdin (R2) — only pre-
 -- trusted overlays load in -p.
 local function load_config_with_overlay(opts)
    local user_cfg, user_path = load_config(opts)
    local sources = {}
    for k, _ in pairs(user_cfg) do sources[k] = "user" end
    local proj_path = _find_project_config()
    if not proj_path then
        user_cfg._sources = sources
        return user_cfg, user_path, nil
    end
    local history_ok, history = pcall(require, "history")
    if not history_ok then
        user_cfg._sources = sources
        return user_cfg, user_path, nil
    end
    -- R2: skip trust prompt in -p mode.
    local trusted
    if opts.prompt then
        local sha = history._sha256_file(proj_path)
        local tpath = _trust_file_path()
        trusted = sha and history.is_trusted(tpath, proj_path, sha)
        if not trusted then
            io.stderr:write("aish: project config " .. proj_path
                .. " skipped in -p mode (untrusted; run aish interactively to trust)\n")
        end
    else
        trusted = _check_and_maybe_prompt(proj_path, history)
    end
    if not trusted then
        user_cfg._sources = sources
        return user_cfg, user_path, nil
    end
    local ok, proj_cfg = pcall(dofile, proj_path)
    if not ok or type(proj_cfg) ~= "table" then
        io.stderr:write("aish: project config " .. proj_path
                        .. " load failed: " .. tostring(proj_cfg) .. "\n")
        user_cfg._sources = sources
        return user_cfg, user_path, nil
    end
    -- Shallow merge: project replaces user at top level. Update sources map.
    for k, v in pairs(proj_cfg) do
        user_cfg[k] = v
        sources[k] = "project"
    end
    user_cfg._sources = sources
    return user_cfg, user_path, proj_path
 end
 -- One-shot mode: read non-TTY stdin (if any), compose prompt, stream
 -- broker reply to stdout, exit. Bypasses repl.lua entirely — no REPL,
 -- no MCP, no tool loop, no Norris. The model's reply is printed
 -- verbatim (including any "CMD:" lines, which are NOT executed in
 -- this mode by design — the user can pipe-grep them as they wish).
 local function run_one_shot(config, user_prompt)
    local libc   = require("ffi.libc")
    local broker = require("broker")
    local composed = user_prompt
    if not libc.isatty(0) then
        local piped = io.read("*a") or ""
        if piped ~= "" then
            composed = "```\n" .. piped .. "\n```\n\n" .. user_prompt
        end
    end
    local model_name = config.default_model
    local model_cfg  = config.models and config.models[model_name]
    if not model_cfg then
        io.stderr:write(("aish: default_model '%s' not found in models{}\n")
                        :format(tostring(model_name)))
        os.exit(2)
    end
    local messages = { { role = "user", content = composed } }
    local got_any = false
    local ok, err = broker.chat_stream(model_cfg, messages,
        function(kind, payload)
            if kind == "text" and payload and payload ~= "" then
                io.write(payload); io.flush()
                got_any = true
            end
        end)
    if not ok then
        if got_any then io.write("\n") end
        io.stderr:write("aish: broker error: " .. tostring(err) .. "\n")
        os.exit(1)
    end
    if got_any then io.write("\n") end
 end
 local function main(argv)
    local opts = parse_args(argv or {})
    if opts.help then io.write(USAGE); return end
    local config, config_path, project_path = load_config_with_overlay(opts)
    io.stderr:write(("aish: loaded config from %s\n"):format(config_path))
    if project_path then
        io.stderr:write(("aish: project config: %s (overlaid on %s)\n")
                        :format(project_path, config_path))
    end
    if opts.prompt then
        run_one_shot(config, opts.prompt)
        return
    end
    local repl = require("repl")
    repl.run(config)
 end
-main()
+main(arg)
@@ -0,0 +1,153 @@
 -- mcp.lua — MCP (Model Context Protocol) JSON-RPC 2.0 client.
 -- Phase 2 v1: HTTP POST per RPC against lmcp servers; no long-lived SSE
 -- channel (lmcp doesn't push — capabilities.tools.listChanged = false).
 -- See docs/PHASE2.md §3 (module changes) and §4 (transport).
 local curl = require("ffi.curl")
 local json = require("dkjson")
 local M = {}
 local Session = {}
 Session.__index = Session
 local MCP_PROTOCOL_VERSION = "2025-03-26"
 -- ---------------------------------------------------------------- M.connect
 -- Open a session. No network traffic yet — call session:initialize()
 -- to actually round-trip initialize + tools/list.
 -- opts:
 --   alias       short name for this server (defaults to URL hostname)
 --   auth_token  literal Bearer token
 --   auth_env    env-var name to read the token from (used if auth_token nil)
 function M.connect(url, opts)
    opts = opts or {}
    local auth = opts.auth_token
    if (not auth or auth == "") and opts.auth_env then
        local env = os.getenv(opts.auth_env)
        if env and env ~= "" then auth = env end
    end
    return setmetatable({
        url             = url,
        alias           = opts.alias or url:match("https?://([^:/]+)") or url,
        auth            = auth,
        next_id         = 1,
        tools           = nil,   -- populated by initialize()
        server_info     = nil,
        server_caps     = nil,
        version_warning = nil,   -- non-nil string if server returned different protocolVersion
    }, Session)
 end
 -- ---------------------------------------------------------------- headers
 function Session:_headers()
    local h = { "Content-Type: application/json", "Accept: application/json" }
    if self.auth and self.auth ~= "" then
        h[#h + 1] = "Authorization: Bearer " .. self.auth
    end
    return h
 end
 -- ---------------------------------------------------------------- _rpc
 -- One round-trip. Returns:
 --   result_table, "ok"                          — JSON-RPC success
 --   nil,          "rpc_error",       error_obj  — JSON-RPC envelope error
 --   nil,          "transport_error", msg        — HTTP >=400 / libcurl / parse
 -- If has_id == false this is a notification: lmcp returns HTTP 202 empty
 -- body and we synthesize (true, "ok") on transport success.
 function Session:_rpc(method, params, has_id)
    local req = { jsonrpc = "2.0", method = method, params = params or {} }
    if has_id ~= false then
        req.id = self.next_id
        self.next_id = self.next_id + 1
    end
    local body, status = curl.post(self.url, json.encode(req), self:_headers())
    if not body then
        return nil, "transport_error", tostring(status)  -- 2nd slot is errmsg
    end
    if status >= 400 then
        return nil, "transport_error",
               ("HTTP %d: %s"):format(status, body:sub(1, 200))
    end
    if has_id == false then
        return true, "ok"
    end
    local doc, _, derr = json.decode(body)
    if not doc then
        return nil, "transport_error", "malformed JSON: " .. tostring(derr)
    end
    if doc.error then
        return nil, "rpc_error", doc.error
    end
    return doc.result or {}, "ok"
 end
 -- ---------------------------------------------------------------- initialize
 -- Round-trips initialize + sends notifications/initialized + caches tools/list.
 -- Returns:
 --   true,  "ok"                          — session ready
 --   false, kind, err                     — first failing RPC (caller logs)
 function Session:initialize()
    local r, kind, err = self:_rpc("initialize", {
        protocolVersion = MCP_PROTOCOL_VERSION,
        capabilities    = {},
        clientInfo      = { name = "aish", version = "phase2" },
    })
    if not r then return false, kind, err end
    self.server_info = r.serverInfo
    self.server_caps = r.capabilities
    local sv = r.protocolVersion
    if sv and sv ~= MCP_PROTOCOL_VERSION then
        self.version_warning =
            ("protocol version mismatch (sent %s, got %s); proceeding")
            :format(MCP_PROTOCOL_VERSION, tostring(sv))
    end
    -- notifications/initialized — fire-and-forget; failure non-fatal.
    self:_rpc("notifications/initialized", nil, false)
    -- Eagerly fetch tools (cache for session lifetime per
    -- capabilities.tools.listChanged = false).
    local tr, tkind, terr = self:_rpc("tools/list", {})
    if not tr then return false, tkind, terr end
    self.tools = tr.tools or {}
    return true, "ok"
 end
 -- ---------------------------------------------------------------- list_tools
 -- Cached. Returns the tool list captured at initialize() time;
 -- empty table if not initialized.
 function Session:list_tools()
    return self.tools or {}
 end
 -- ---------------------------------------------------------------- call_tool
 -- Returns:
 --   result_table, "ok"                          — tool succeeded (content[])
 --   result_table, "handler_error"               — tool ran but result.isError = true
 --                                                  (caller passes content through
 --                                                  to the model regardless;
 --                                                  PHASE2-baseline.md §3 also
 --                                                  notes isError may be false on
 --                                                  actual failure — content is
 --                                                  authoritative)
 --   nil,          "rpc_error",       error_obj  — JSON-RPC envelope error
 --   nil,          "transport_error", msg        — HTTP/libcurl/parse failure
 function Session:call_tool(name, args)
    local r, kind, err = self:_rpc("tools/call",
                                   { name = name, arguments = args or {} })
    if not r then return nil, kind, err end
    if r.isError then return r, "handler_error" end
    return r, "ok"
 end
 -- ---------------------------------------------------------------- close
 -- Drops cached state. lmcp has no session teardown — every RPC was
 -- already Connection: close.
 function Session:close()
    self.tools           = nil
    self.server_info     = nil
    self.server_caps     = nil
    self.version_warning = nil
 end
 return M
@@ -1,19 +1,284 @@
 -- renderer.lua — output formatting and ANSI sequences.
-- Phase 0: minimal — assistant text plain-printed; CMD: lines highlighted;
+-- Phase 0: assistant text plain-printed with `CMD: ` lines highlighted;
-- exec output framed. Syntax highlighting hooks land in Phase 5.
+-- exec output framed with the exit code on the closing rule.
 -- Phase 1: assistant_delta + assistant_flush for streaming render. CMD:
 -- highlighting in streaming mode is deferred (Q12); deltas print raw, the
 -- §6 substrate `CMD: ` line is still extractable by executor afterwards.
 -- Syntax highlighting hooks land in Phase 6 (was Phase 5 pre-MCP renumber).
 local M = {}
 local A = {
    reset = "\27[0m",
    bold  = "\27[1m",
    dim   = "\27[2m",
    cyan  = "\27[36m",
    red   = "\27[31m",
 }
 local function emit(...) io.write(...); io.flush() end
 -- Print assistant response text. Lines beginning with `CMD: ` (per the §3
 -- substrate-locked extraction marker) are emitted bold+cyan so the user
 -- can spot the suggestion without scanning prose.
 function M.assistant(text)
-    error("renderer.assistant: not implemented (Phase 0 pending)")
+    for line in ((text or "") .. "\n"):gmatch("([^\n]*)\n") do
        if line:sub(1, 5) == "CMD: " then
            emit(A.bold, A.cyan, line, A.reset, "\n")
        else
            emit(line, "\n")
        end
    end
 end
-function M.exec_output(output, exit_code)
+-- Phase 1: executor.exec streams output live to stdout (PTY multiplex), so
-    error("renderer.exec_output: not implemented (Phase 0 pending)")
+-- the frame is split — exec_begin before the spawn, exec_end after wait().
 -- The body is not re-rendered here; live output lands directly between the
 -- two rules.
 function M.exec_begin()
    emit(A.dim, "─── exec output ───", A.reset, "\n")
 end
 function M.exec_end(exit_code)
    if exit_code and exit_code ~= 0 then
        emit(A.dim, "─── exit ", A.reset,
             A.red, tostring(exit_code), A.reset,
             A.dim, " ───", A.reset, "\n")
    else
        emit(A.dim, "─── exit 0 ───", A.reset, "\n")
    end
 end
 -- Single-line dim status (e.g. §8 eviction notice, model switch confirms).
 function M.status(line)
-    error("renderer.status: not implemented (Phase 0 pending)")
+    emit(A.dim, "[aish] ", tostring(line), A.reset, "\n")
 end
 -- Streaming assistant output. Phase 1: deltas are written raw — the §6 CMD:
 -- highlighting from M.assistant() is not applied incrementally because
 -- mid-line cursor manipulation isn't worth the complexity for Phase 1.
 -- Q12 (PHASE1.md §10) tracks the upgrade. The full assistant text is still
 -- captured by repl.lua and CMD: extraction works against the reassembled
 -- string after the stream ends.
 local stream_buf = nil   -- non-nil while a stream is in progress
 -- Phase 6: fence-aware highlight filter. Off by default; toggled via
 -- M.set_highlight(enabled, detected, highlight_fn). State machine:
 --   outside: pass chunks through; hold a small tail when the suffix
 --            could be the start of an opening fence (R1 — split fences
 --            from local llama.cpp need accumulation).
 --   inside:  buffer until closing "\n```" is seen; emit
 --            highlight_fn(body, lang) then the closing fence verbatim.
 -- N1: fences only open at start-of-stream OR after a newline ("^```"
 --     or "\n```"); inline backticks in prose don't trigger.
 local hl_enabled  = false
 local hl_detected = false
 local hl_fn       = nil          -- function(body, lang) -> rendered
 local hl_state    = "outside"    -- "outside" | "inside"
 local hl_tail     = ""           -- outside-state lookahead
 local hl_inside_buf = ""         -- inside-state buffer
 local hl_lang     = nil          -- captured at fence open
 function M.set_highlight(enabled, detected, highlight_fn)
    hl_enabled  = not not enabled
    hl_detected = not not detected
    hl_fn       = highlight_fn
 end
 function M.highlight_state()
    return { enabled = hl_enabled, detected = hl_detected }
 end
 -- Longest suffix of `s` that is a prefix of any well-formed fence-open
 -- marker ("\n```<lang>\n" or "```<lang>\n" at SOL). Returns the suffix
 -- string. Bounded by max-lang-tag-length + 5.
 local function _hl_partial_suffix(s)
    -- Look back up to 32 chars.
    local hi = math.min(#s, 32)
    for k = hi, 1, -1 do
        local cand = s:sub(#s - k + 1)
        -- Possible prefixes of a fence-open:
        --   "\n", "\n`", "\n``", "\n```", "\n```<langchars>"
        --   if k == #s (full string == cand), also bare "`", "``", "```"
        if cand:match("^\n`*[%w_-]*$") then return cand end
        if (k == #s) and cand:match("^`*[%w_-]*$") and cand:find("`") then
            return cand
        end
    end
    return ""
 end
 -- Find fence open in combined string. Returns (fence_start, content_start,
 -- lang) or nil. fence_start = index of first backtick; content_start =
 -- index after the closing newline of the fence-info line.
 local function _hl_find_open(combined)
    -- Match at start-of-string OR after a newline.
    local s, e, lang = combined:find("^```([%w_-]*)\n")
    if s then return 1, e + 1, lang end
    s, e, lang = combined:find("\n```([%w_-]*)\n")
    if s then return s + 1, e + 1, lang end
    return nil
 end
 local function _hl_push(chunk)
    if not hl_enabled or not hl_fn then
        emit(chunk)
        return
    end
    if hl_state == "outside" then
        local combined = hl_tail .. chunk
        local fs, cs, lang = _hl_find_open(combined)
        if fs then
            if fs > 1 then emit(combined:sub(1, fs - 1)) end
            -- Emit the fence-open line verbatim too (model + user both
            -- see "```python\n" — the highlighter only colorizes BODY).
            emit(combined:sub(fs, cs - 1))
            hl_state      = "inside"
            hl_lang       = (lang ~= "" and lang) or nil
            hl_inside_buf = combined:sub(cs)
            hl_tail       = ""
            -- If the closing fence is already in the inside buffer
            -- (cloud may deliver whole blocks in one chunk), drain.
            if hl_inside_buf:find("\n```", 1, true) then
                _hl_push("")  -- triggers the inside branch's close detect
            end
            return
        end
        -- No opening fence — hold the trailing partial-fence so a
        -- split-fence ("``" then "`python\n") gets recognized.
        local hold = _hl_partial_suffix(combined)
        if #hold < #combined then
            emit(combined:sub(1, #combined - #hold))
        end
        hl_tail = hold
        return
    end
    -- state == "inside"
    hl_inside_buf = hl_inside_buf .. chunk
    local cpos = hl_inside_buf:find("\n```", 1, true)
    if not cpos then return end   -- still buffering
    local body    = hl_inside_buf:sub(1, cpos - 1)
    local closing = hl_inside_buf:sub(cpos, cpos + 3)  -- "\n```"
    local rest    = hl_inside_buf:sub(cpos + 4)
    local ok, rendered = pcall(hl_fn, body, hl_lang or "")
    emit((ok and rendered) or body)
    emit(closing)
    hl_state      = "outside"
    hl_inside_buf = ""
    hl_lang       = nil
    if rest ~= "" then _hl_push(rest) end
 end
 function M.assistant_delta(chunk)
    if not chunk or chunk == "" then return end
    if stream_buf == nil then stream_buf = "" end
    stream_buf = stream_buf .. chunk
    _hl_push(chunk)
 end
 function M.assistant_flush()
    if stream_buf == nil then return end
    -- Flush any held tail or in-progress fence body so the user sees it.
    if hl_state == "inside" and hl_inside_buf ~= "" then
        -- Stream ended mid-fence — emit raw (no highlight; no closing
        -- fence was seen). User sees the partial code as-is.
        emit(hl_inside_buf)
        hl_inside_buf = ""
        hl_state      = "outside"
        hl_lang       = nil
    elseif hl_tail ~= "" then
        emit(hl_tail)
        hl_tail = ""
    end
    if not stream_buf:match("\n$") then emit("\n") end
    stream_buf = nil
 end
 -- Phase 2: MCP tool-call frame. Visual parity with the exec_begin/exec_end
 -- frame so the user reads tool dispatch and shell dispatch the same way.
 -- tool_call_begin renders the top rule + (optionally) the args as a dim
 -- preview; tool_call_end renders the result content followed by a status
 -- rule. Status is "ok" (dim) by default; "error" (red) if is_error is true.
 -- See docs/PHASE2.md §3 renderer.lua row + §4 Tool invocation.
 function M.tool_call_begin(name, args)
    emit(A.dim, "─── tool: ", A.reset,
         A.cyan, name, A.reset,
         A.dim, " ───", A.reset, "\n")
    if args and args ~= "" and args ~= "{}" then
        local shown = (#args <= 200) and args or (args:sub(1, 197) .. "...")
        emit(A.dim, shown, A.reset, "\n")
    end
 end
 function M.tool_call_end(content, is_error)
    if content and content ~= "" then
        emit(content)
        if not content:match("\n$") then emit("\n") end
    end
    if is_error then
        emit(A.dim, "─── ", A.reset,
             A.red, "error", A.reset,
             A.dim, " ───", A.reset, "\n")
    else
        emit(A.dim, "─── ok ───", A.reset, "\n")
    end
 end
 -- Phase 3: Norris autonomous mode frames. Banner-style on enter/exit,
 -- step counter per iteration, red HALT banner when the destructive-op
 -- gate fires. The interactive prompt also gets a ⚡ marker when Norris
 -- is active (handled in repl.lua's prompt() function per PHASE0.md §9).
 -- See docs/PHASE3.md §3 renderer row.
 function M.norris_begin(goal)
    emit(A.bold, A.cyan, "─── NORRIS MODE ─────────────────────────",
         A.reset, "\n")
    if goal and goal ~= "" then
        emit(A.dim, "  goal: ", A.reset, goal, "\n")
    end
    emit(A.bold, A.cyan, "─────────────────────────────────────────",
         A.reset, "\n")
 end
 function M.norris_step(n, max_n, descr)
    emit(A.dim, ("  ─ step %d/%d ─ "):format(n, max_n), A.reset)
    if descr and descr ~= "" then emit(A.dim, descr, A.reset) end
    emit("\n")
 end
 function M.norris_halt(step_n, max_n, reason, action)
    emit(A.bold, A.red, "─── NORRIS HALT ──────────────────────────",
         A.reset, "\n")
    emit(A.dim, "  step:   ", A.reset, ("%d/%d"):format(step_n, max_n), "\n")
    emit(A.dim, "  reason: ", A.reset, A.red, tostring(reason), A.reset, "\n")
    -- action may be a long string (command line or JSON-serialized tool call);
    -- truncate at 400 chars to keep the banner readable
    local act = tostring(action or ""):gsub("\n", " ")
    if #act > 400 then act = act:sub(1, 397) .. "..." end
    emit(A.dim, "  action: ", A.reset, act, "\n")
    emit(A.bold, A.red, "──────────────────────────────────────────",
         A.reset, "\n")
 end
 -- Norris loop exit. status ∈ {"done", "aborted", "budget_exhausted",
 --                              "stalled", "broker_error"}.
 function M.norris_end(status, reason)
    -- Phase 10: "tasks_complete" is a success-ish exit (executor ran
    -- through all preplanned tasks but didn't explicitly say GOAL: done).
    local non_error = (status == "done") or (status == "tasks_complete")
    local color = non_error and A.cyan or A.red
    local label = status:upper():gsub("_", " ")
    emit(A.bold, color, "─── NORRIS ", label, " ──",
         (" "):rep(math.max(0, 28 - #label)),
         A.reset, "\n")
    if reason and reason ~= "" then
        emit(A.dim, "  ", reason, A.reset, "\n")
    end
    emit(A.bold, color, "──────────────────────────────────────────",
         A.reset, "\n")
 end
 return M
@@ -1,14 +1,139 @@
-- router.lua — task classifier: meta / shell / AI.
+-- router.lua — task classifier: meta / shell / AI / model-routing.
-- See docs/PHASE0.md §5.
+-- See docs/PHASE0.md §5 and docs/PHASE5.md §4 for Phase 5 additions.
 --
 -- M.classify(line, config) → (kind, payload) for input dispatch (Phase 0).
 -- M.classify_model(text, cfg) → name | nil for per-request model routing
 --                               (Phase 5; pure-Lua heuristics, no IO).
 local M = {}
-- Classify an input line.
+local function trim(s)
-- Returns one of: "meta", "shell", "ai" plus the (possibly stripped) payload.
+    return (s:gsub("^%s+", ""):gsub("%s+$", ""))
 function M.classify(line, config)
    error("router.classify: not implemented (Phase 0 pending)")
 end
-- Default known-command allowlist seeds the heuristic in §5.1.
+local function first_word(s)
-- Final list is config.shell.known_commands at runtime.
+    return s:match("^(%S+)") or ""
 end
 local function known_commands_set(config)
    local set = {}
    local list = config and config.shell and config.shell.known_commands or {}
    for _, c in ipairs(list) do set[c] = true end
    return set
 end
 -- §5.1 path-like: ./foo, ../foo, /usr/bin/foo, ~/foo, bare ~. Quoted /
 -- escaped paths are intentionally out of scope in Phase 0. ~ is included
 -- for symmetry with executor.maybe_chdir, which expands ~ on `cd ~/foo`.
 local function path_like(token)
    return token            == "~"
        or token:sub(1, 1)  == "/"
        or token:sub(1, 2)  == "./"
        or token:sub(1, 2)  == "~/"
        or token:sub(1, 3)  == "../"
 end
 function M.classify(line, config)
    line = trim(line or "")
    if line == "" then return "ai", "" end
    -- meta: ":" prefix
    if line:sub(1, 1) == ":" then
        return "meta", line:sub(2)
    end
    -- shell explicit override: "$" prefix
    if line:sub(1, 1) == "$" then
        return "shell", trim(line:sub(2))
    end
    local first = first_word(line)
    local known = known_commands_set(config)
    -- known-command allowlist
    if known[first] then return "shell", line end
    -- path-like first token
    if path_like(first) then return "shell", line end
    -- everything else -> AI
    return "ai", line
 end
 -- ---------------------------------------------------------------- classify_model
 -- Phase 5: per-request model routing heuristic. Pure-Lua, no IO.
 -- Returns the NAME of a model preset (string) to switch to for this
 -- request, or nil to keep the active model unchanged.
 --
 -- The mapping from class to model name lives in `cfg.routing.classes`.
 -- A class with value `nil` means "keep current" — even though the
 -- heuristic fires, no override happens (used by default for the
 -- `reasoning` class per R-N2 cost-safety policy).
 --
 -- This function ALWAYS evaluates the heuristic regardless of
 -- `cfg.routing.auto` — the caller (repl.ask_ai) gates on the flag.
 -- This separation lets `:route check <text>` introspect the heuristic
 -- even when routing is disabled (N1).
 local function classify_class(text)
    if not text or text == "" then return "default" end
    -- ── Code class — looks like a paste or contains code markers
    if text:find("```", 1, true) then return "code" end
    local lower = text:lower()
    if lower:find("traceback", 1, true)
       or lower:find("stacktrace", 1, true)
       or lower:find("stack trace", 1, true) then
        return "code"
    end
    -- exception/error markers near beginning (first 60 chars)
    if lower:sub(1, 60):find("error:", 1, true)
       or lower:sub(1, 60):find("exception:", 1, true) then
        return "code"
    end
    -- path with code-extension token
    if text:match("[%./~][%w%-_/.]+%.([%w]+)") then
        local ext = text:match("[%./~][%w%-_/.]+%.([%w]+)")
        if ext == "py"  or ext == "lua" or ext == "c"
           or ext == "js" or ext == "go"  or ext == "rs"
           or ext == "cpp" or ext == "h"   or ext == "ts" then
            return "code"
        end
    end
    -- multi-line + indented (looks like a code paste)
    local nlines = 0
    for _ in (text .. "\n"):gmatch("[^\n]*\n") do nlines = nlines + 1 end
    if nlines > 4 and text:find("\n%s+%S") then return "code" end
    -- ── Reasoning class
    -- Min length 15 — catches "how does X work" but excludes bare "why" / "explain"
    if #text >= 15 then
        if lower:find("explain", 1, true)
           or lower:find("why ", 1, true)        -- trailing space (not "whyever")
           or lower:find("how does", 1, true)
           or lower:find("compare", 1, true) then
            return "reasoning"
        end
    end
    if text:find("?", 1, true) and #text > 100 then
        return "reasoning"
    end
    return "default"
 end
 -- Public API.
 function M.classify_model(text, cfg)
    local class = classify_class(text)
    local classes = (cfg and cfg.routing and cfg.routing.classes) or {}
    local target = classes[class]
    -- nil target = keep current (this is the R-N2 default for "reasoning")
    if target == nil then return nil, class end
    -- Caller may want the class label for the status line; return both.
    return target, class
 end
 -- Exposed for `:route check` introspection (N1).
 M._classify_class = classify_class
 return M
@@ -1,18 +1,577 @@
-- safety.lua — destructive op heuristic + Chuck Norris autonomous gate.
+-- safety.lua — workflow safeguards for tool execution.
-- Phase 0: stub. Lands in Phase 2.
+-- Phase 2: M.confirm_tool_call (per-call confirm gate + auto-approve policy).
-- See docs/PHASE0.md §11 (Phase 2), §12 (security posture is workflow-not-OS).
+-- Phase 3: M.is_destructive (static pattern + LLM second-opinion gate for
 --          Norris autonomous mode) and M.norris_step (single-iteration
 --          planning loop). See docs/PHASE2.md §6 and docs/PHASE3.md §4 / §5.
 -- Issue #9: M.classify_command (allow/confirm/deny rule list — interactive
 --           CMD: gate, supersedes the confirm_cmd boolean when configured).
 local rl     = require("ffi.readline")
 local json   = require("dkjson")
 local broker = require("broker")
 local M = {}
-- Returns true if cmd matches the destructive-op heuristic and should HALT
+-- ---------------------------------------------------------------- classify_command
-- in Norris mode pending user confirmation.
+-- Walk config.permissions (allow / confirm / deny rule lists) against `cmd`
-function M.is_destructive(cmd)
+-- in priority order: deny > confirm > allow. First match in the chosen
-    error("safety.is_destructive: not implemented (Phase 2)")
+-- category wins. Returns the verdict string and the matching pattern (for
 -- status messages); falls back to the legacy confirm_cmd boolean when no
 -- permissions table is configured. Default verdict when permissions is set
 -- but no rule matches is "confirm" — per the issue body.
 --   verdict ∈ "allow" | "confirm" | "deny"
 local function _match_any(cmd, rules)
    if not rules then return nil end
    for _, p in ipairs(rules) do
        if cmd:find(p) then return p end
    end
    return nil
 end
 function M.classify_command(cmd, cfg)
    local perms = cfg and cfg.permissions
    if perms then
        local mp = _match_any(cmd, perms.deny);    if mp then return "deny",    mp end
              mp = _match_any(cmd, perms.confirm); if mp then return "confirm", mp end
              mp = _match_any(cmd, perms.allow);   if mp then return "allow",   mp end
        return "confirm", nil
    end
    if cfg and cfg.shell and cfg.shell.confirm_cmd then
        return "confirm", nil
    end
    return "allow", nil
 end
-- Norris mode planning loop entry point.
+-- Render the call as `name({"path":"/tmp"})` for the confirm prompt.
-function M.norris_step(plan, broker, executor)
+-- Truncate to keep one-line prompts.
-    error("safety.norris_step: not implemented (Phase 2)")
+local function pretty_call(name, args)
    local body = ""
    if args and next(args) then
        local ok, encoded = pcall(json.encode, args)
        if ok then
            body = (#encoded <= 80) and encoded or (encoded:sub(1, 77) .. "...")
        else
            body = "..."
        end
    end
    return name .. "(" .. body .. ")"
 end
 -- Ask the user whether tool `name` may be called with `args`, consulting
 -- `cfg.mcp.auto_approve` first. Policy keys:
 --   "<alias>__<tool>"  → exact-match auto-approve
 --   "<alias>__*"       → whole-server auto-approve
 -- Anything else falls back to a [y/N] prompt; empty / non-"y" answer rejects.
 -- The separator switched from "." to "__" 2026-05-12 because Anthropic via
 -- Bedrock rejects dots in tool names (regex ^[a-zA-Z0-9_-]{1,128}$).
 function M.confirm_tool_call(name, args, cfg)
    local policy = (cfg and cfg.mcp and cfg.mcp.auto_approve) or {}
    if policy[name] then return true end
    local alias = name:match("^(.-)__")
    if alias and alias ~= "" and policy[alias .. "__*"] then return true end
    local prompt = ("call '%s'? [y/N] "):format(pretty_call(name, args))
    local ans = rl.readline(prompt) or ""
    return ans:lower():sub(1, 1) == "y"
 end
 -- ---------------------------------------------------------------- is_destructive
 -- Phase 3 commit #1: static-pattern matcher only (no LLM second-opinion yet —
 -- that lands in commit #2). Patterns are Lua patterns (NOT regex). When
 -- `ci = true` is set on a rule, the input is lowercased before matching so
 -- the rule matches case-insensitively (`DROP TABLE`, `drop table`, etc.).
 -- See docs/PHASE3.md §5 for the rationale and the wrapper-bypass class
 -- (R-B1) the first nine entries below are guarding against.
 local DESTRUCTIVE_PATTERNS = {
    -- ── Shell wrappers (R-B1) — flag the wrapper itself; can't inspect
    --    the inner content safely without parsing the inner shell.
    --    Norris HALTs on these unconditionally; the user reads the inner
    --    before proceeding.
    { pat = "^%s*bash%s+%-l?c%s",              reason = "bash -c (wrapped shell)" },
    { pat = "^%s*sh%s+%-l?c%s",                reason = "sh -c (wrapped shell)" },
    { pat = "^%s*zsh%s+%-l?c%s",               reason = "zsh -c (wrapped shell)" },
    { pat = "^%s*eval%s",                      reason = "eval (dynamic shell)" },
    { pat = "^%s*python3?%s+%-c%s",            reason = "python -c (inline script)" },
    { pat = "^%s*perl%s+%-e%s",                reason = "perl -e (inline script)" },
    { pat = "|%s*sh%s",                        reason = "pipe-to-sh" },
    { pat = "|%s*sh%s*$",                      reason = "pipe-to-sh (eol)" },
    { pat = "|%s*bash%s",                      reason = "pipe-to-bash" },
    { pat = "|%s*bash%s*$",                    reason = "pipe-to-bash (eol)" },
    { pat = "xargs%s+.-rm",                    reason = "xargs ... rm" },
    -- ── Filesystem destructive
    { pat = "rm%s+.-%-rf?",                    reason = "rm -rf" },
    { pat = "rm%s+.-%-fr?",                    reason = "rm -fr" },
    { pat = "find%s+.-%-delete",               reason = "find -delete" },
    { pat = "find%s+.-%-exec%s+rm",            reason = "find -exec rm" },
    { pat = ">%s*/dev/sd[a-z]",                reason = "write to raw disk" },
    { pat = "dd%s+.-of=/dev/",                 reason = "dd to device" },
    { pat = "mkfs%.",                          reason = "mkfs (format)" },
    { pat = "shred%s",                         reason = "shred" },
    { pat = "wipefs%s",                        reason = "wipefs" },
    { pat = "truncate%s+.-%-s%s*0",            reason = "truncate to zero" },
    -- ── Version control destructive
    { pat = "git%s+push%s+.-%-%-force",        reason = "git push --force" },
    { pat = "git%s+push%s+.-%-f%f[%s]",        reason = "git push -f" },
    { pat = "git%s+reset%s+.-%-%-hard",        reason = "git reset --hard" },
    { pat = "git%s+clean%s+.-%-fd?",           reason = "git clean -fd" },
    { pat = "git%s+branch%s+.-%-D",            reason = "git branch -D" },
    -- ── Database / process
    -- ci=true rules use lowercase patterns; the matcher lowercases the
    -- input before testing. Don't use uppercase patterns with ci=true.
    { pat = "drop%s+table",                    reason = "DROP TABLE",        ci = true },
    { pat = "drop%s+database",                 reason = "DROP DATABASE",     ci = true },
    { pat = "truncate%s+table",                reason = "TRUNCATE TABLE",    ci = true },
    -- pkill BEFORE kill so the more specific match wins (Lua tables are
    -- order-preserving; first hit reports the reason).
    { pat = "pkill%s+%-9",                     reason = "pkill -9" },
    -- kill -9 needs a word boundary so "pkill -9" doesn't match this rule's
    -- "kill" substring. %f[%w] is Lua's frontier pattern — matches a
    -- transition from non-word to word characters.
    { pat = "%f[%w]kill%s+%-9",                reason = "kill -9" },
    -- ── Network/permission
    { pat = "chmod%s+.-777",                   reason = "chmod 777" },
    { pat = "chown%s+.-%s+/%s*$",              reason = "chown on root path" },
 }
 -- Match each rule against `cmd`. Returns (true, reason) on first hit;
 -- (false, nil) if no rule matches. Static-only — does NOT invoke the
 -- LLM probe (that's `is_destructive` below, which calls this first).
 local function match_static(cmd)
    if type(cmd) ~= "string" or cmd == "" then return false, nil end
    local lower = nil  -- lazily computed for ci-rules
    for _, rule in ipairs(DESTRUCTIVE_PATTERNS) do
        local target = cmd
        if rule.ci then
            lower = lower or cmd:lower()
            target = lower
        end
        if target:match(rule.pat) then
            return true, rule.reason
        end
    end
    return false, nil
 end
 -- ---------------------------------------------------------------- LLM probe
 -- Session-scoped cache for the LLM second-opinion. Keyed by the normalized
 -- (lowercased, whitespace-collapsed) command text. Mitigates Q23 latency
 -- when the same command pattern recurs within a single Norris run.
 local _llm_cache = {}
 local function normalize(cmd)
    return cmd:lower():gsub("%s+", " "):gsub("^%s+", ""):gsub("%s+$", "")
 end
 -- Per-probe timeout. The probe must be quick — destructive detection has
 -- to keep up with Norris's pace. We override the model's default timeout
 -- (which can be 30+ min for deep/slow local models) with a tight cap.
 local PROBE_TIMEOUT_MS = 15000
 -- Ask `model_cfg` whether `cmd` is destructive. Returns "YES"/"NO" string
 -- (not bool — caller cares about disagreement between probes).
 --
 -- Issue #52: when `opts.scrub_msgs` is provided, scrub the outbound
 -- {system, user(cmd)} message pair using the caller's secrets-aware
 -- scrubber. The probe model sees placeholders for any secrets the
 -- CMD: line happens to contain. Verdict text ("YES"/"NO") is unlikely
 -- to carry placeholders but we rehydrate defensively via opts.rehydrate
 -- so any echoed value comes back clean.
 local function llm_probe(model_cfg, system, cmd, opts)
    local msgs = {
        { role = "system", content = system },
        { role = "user",   content = cmd },
    }
    if opts and opts.scrub_msgs then
        msgs = opts.scrub_msgs(msgs, model_cfg)
    end
    -- Phase 7: opts.category = "probe" tags the usage in the
    -- accumulator so :cost detail surfaces probe spend separately.
    -- broker.chat returns (text, usage) on success; capture as
    -- (reply, second) and branch on reply nil-ness.
    -- #88: optional GBNF grammar passthrough to constrain the probe
    -- model's output to exactly the YES/NO tokens we'll accept. Set
    -- via opts.grammar (caller forwards cfg.safety.probe_grammar).
    -- Cloud silently ignores; local llama.cpp enforces. Skips the
    -- regex match dance entirely when the model can't drift.
    local reply, second = broker.chat(model_cfg, msgs,
        { max_tokens = 4, timeout_ms = PROBE_TIMEOUT_MS, category = "probe",
          grammar = opts and opts.grammar })
    if not reply then
        -- Broker failure → safe default: treat as YES (destructive)
        return "YES_FAILSAFE", second
    end
    -- Phase 7 (N4): route the usage payload through opts.on_usage if
    -- the caller wired one (repl.lua's _record_usage when secrets/
    -- cost are configured).
    if second and opts and opts.on_usage then
        opts.on_usage(second.model, second.category, second)
    end
    if opts and opts.rehydrate then reply = opts.rehydrate(reply) end
    local upper = reply:upper()
    if upper:match("YES") then return "YES" end
    if upper:match("NO")  then return "NO"  end
    -- Unparseable response → treat as YES (safe default)
    return "YES_UNPARSEABLE", reply
 end
 -- LLM second-opinion gate. Returns:
 --   true,  "LLM flagged as destructive"      first probe said YES
 --   true,  "LLM probe disagreement"          first NO, second NO ("is it safe?")
 --   true,  "LLM probe failed: <err>"         broker error → fail-safe YES
 --   false, nil                                both probes agree command is safe
 -- Cached per normalized command for the session.
 --
 -- Model-selection precedence (per PHASE3.md §5):
 --   1. cfg.safety.llm_model         — explicit model-name from config (e.g. "cloud")
 --   2. cfg.models.deep              — local independent-class model (qwen3-30b)
 --   3. cfg.models[cfg.default_model] — fallback to whatever the user runs by default
 local function llm_second_opinion(cmd, cfg, opts)
    local probe_cfg
    if cfg and cfg.safety and cfg.safety.llm_model and cfg.models then
        probe_cfg = cfg.models[cfg.safety.llm_model]
    end
    if not probe_cfg and cfg and cfg.models then
        probe_cfg = cfg.models.deep or cfg.models[cfg.default_model or "fast"]
    end
    if not probe_cfg then
        -- No probe model available → fail-safe YES
        return true, "LLM probe unavailable (no model in config)"
    end
    local key = normalize(cmd)
    local cached = _llm_cache[key]
    if cached ~= nil then
        if cached[1] then return cached[1], cached[2] end
        return false, nil
    end
    local function memo(is_d, reason)
        _llm_cache[key] = { is_d, reason }
        return is_d, reason
    end
    -- Probe 1: "Is this destructive?"
    local p1 = llm_probe(probe_cfg,
        "Detect destructive shell commands. Reply YES if running this "
        .. "would delete, overwrite, or irreversibly modify data, "
        .. "processes, or system state. Otherwise reply NO. Respond "
        .. "with only YES or NO.", cmd, opts)
    if p1 == "YES" then return memo(true, "LLM flagged as destructive") end
    if p1 == "YES_FAILSAFE"    then return memo(true, "LLM probe failed (fail-safe)") end
    if p1 == "YES_UNPARSEABLE" then return memo(true, "LLM unparseable (fail-safe)") end
    -- Probe 2 (NO → re-roll with inverted phrasing per R-B2):
    local p2 = llm_probe(probe_cfg,
        "Reply YES or NO only. Is the following shell command SAFE to "
        .. "run autonomously without user review?", cmd, opts)
    if p2 == "YES" then return memo(false, nil) end
    -- Disagreement or fail-safe → HALT
    return memo(true, "LLM probe disagreement")
 end
 -- Main entry point. Returns (true, reason) if EITHER the static patterns
 -- OR the LLM second-opinion flag the command. Used by the Norris loop.
 -- `cfg` is the full aish config (carries cfg.safety.llm_second_opinion
 -- and cfg.models for the probe model lookup). When cfg is nil, only the
 -- static layer runs (handy for unit tests and tooling that wants the
 -- fast deterministic gate without an LLM round-trip).
 -- Issue #52: opts.scrub_msgs(messages, model_cfg) + opts.rehydrate(text)
 -- callbacks let the LLM probe scrub the outbound cmd before sending and
 -- rehydrate the YES/NO verdict before parsing. Both optional; absent
 -- opts = no-op (backwards-compatible). Caller (repl.lua / norris_step
 -- helpers) provides them when secrets are configured.
 function M.is_destructive(cmd, cfg, opts)
    if type(cmd) ~= "string" or cmd == "" then return false, nil end
    -- Static patterns first (fast, deterministic).
    local hit, reason = match_static(cmd)
    if hit then return true, reason end
    -- LLM second-opinion. Default ON when cfg is present; off when cfg
    -- is nil (test/static-only mode). Explicit opt-out via
    -- cfg.safety.llm_second_opinion = false.
    if cfg == nil then return false, nil end
    if cfg.safety and cfg.safety.llm_second_opinion == false then
        return false, nil
    end
    -- #88: thread cfg.safety.probe_grammar through opts.grammar so
    -- llm_probe can constrain the model's output to YES/NO. Cloud
    -- ignores; local llama.cpp enforces. Existing caller opts
    -- (scrub_msgs, rehydrate, on_usage) preserved.
    local merged_opts = opts and {} or nil
    if opts then
        for k, v in pairs(opts) do merged_opts[k] = v end
    end
    if cfg.safety and cfg.safety.probe_grammar then
        merged_opts = merged_opts or {}
        if not merged_opts.grammar then
            merged_opts.grammar = cfg.safety.probe_grammar
        end
    end
    return llm_second_opinion(cmd, cfg, merged_opts)
 end
 -- Expose the pattern table for `:safety patterns` meta and for testing.
 M._patterns       = DESTRUCTIVE_PATTERNS
 M._match_static   = match_static       -- testable in isolation
 M._reset_cache    = function() _llm_cache = {} end
 -- ---------------------------------------------------------------- norris_step
 -- One iteration of the Norris planning loop per PHASE3.md §4.
 -- The driver in repl.lua calls this in a while loop, advancing on every
 -- non-terminal status.
 --
 -- Inputs:
 --   ctx          aish Context (read & written: turns + pending_exec_output)
 --   model_cfg    the active broker model config (model_cfg.endpoint/.model/etc.)
 --   helpers      table of injected dispatch helpers:
 --                  .tools_schema()         → tools array for opts.tools
 --                  .exec_cmd(cmd)          → run shell cmd; returns (out, exit_code)
 --                  .dispatch_tool(call,args)→ run an MCP tool; returns (content, is_error)
 --                  .extract_cmd_lines(text)→ executor.extract_cmd_lines (passed in)
 --                  .halt(step_n, max_n, reason, action) → "proceed"|"skip"|"abort"
 --                  .render_step(n, max_n, descr)        (renderer.norris_step)
 --                  .render_tool_begin(name, args)       (renderer.tool_call_begin)
 --                  .render_tool_end(content, is_error)  (renderer.tool_call_end)
 --                  .render_exec_begin()                 (renderer.exec_begin)
 --                  .render_exec_end(code)               (renderer.exec_end)
 --                  .render_assistant_delta(chunk)       (renderer.assistant_delta)
 --                  .render_assistant_flush()            (renderer.assistant_flush)
 --                  .log_turn(turn)                      (session log append)
 --   opts:
 --                  .step_n             current step (1-based)
 --                  .max_steps          budget cap (default 8)
 --                  .cfg                full aish config (for is_destructive)
 --
 -- Returns: { status, reason } where status ∈ {
 --    "continue"          — keep looping (driver bumps step_n)
 --    "done"              — model emitted GOAL: complete
 --    "aborted"           — user typed abort at a halt prompt
 --    "stalled"           — model emitted nothing actionable
 --    "budget_exhausted"  — step_n >= max_steps after this iteration
 --    "broker_error"      — broker.chat_stream returned (nil, err)
 -- }
 function M.norris_step(ctx, model_cfg, helpers, opts)
    local step_n    = opts.step_n or 1
    local max_steps = opts.max_steps or 8
    local cfg       = opts.cfg
    -- Phase 10 / #89: when the cloud preplanner emitted a TASK list
    -- at :norris launch, surface the current task as the per-step
    -- descr so the user sees `─ step k/M ─ <task text>` in real time.
    -- ctx.norris_tasks is nil when preplan is disabled / failed →
    -- descr falls through to nil → renderer prints just the step bar
    -- (existing behavior).
    local task_descr
    if ctx.norris_tasks and ctx.norris_tasks.list then
        task_descr = ctx.norris_tasks.list[ctx.norris_tasks.current]
    end
    helpers.render_step(step_n, max_steps, task_descr)
    -- (1) one broker round-trip — stream text + collect tool_calls.
    --
    -- Issue #52: when helpers.scrub_msgs is provided, scrub outbound
    -- per the active model's redact policy; when helpers.streaming_rehydrator
    -- is provided, wrap on_delta so the user sees rehydrated text AND
    -- text_parts accumulates rehydrated chunks (so any extracted CMD: /
    -- DELEGATE: lines downstream see plain values — matches ask_ai's
    -- contract in repl.lua).
    local msgs = ctx:to_messages()
    if helpers.scrub_msgs then msgs = helpers.scrub_msgs(msgs, model_cfg) end
    local rehydrator = helpers.streaming_rehydrator and helpers.streaming_rehydrator() or nil
    -- Phase 7: thread on_usage callback into the LLM probe via
    -- probe_opts so destructive-check costs land in the accumulator
    -- under the "probe" category. helpers.on_usage is repl.lua's
    -- _record_usage (the central chokepoint with warn-threshold check).
    local probe_opts = nil
    if helpers.scrub_msgs or helpers.rehydrate or helpers.on_usage then
        probe_opts = {
            scrub_msgs = helpers.scrub_msgs,
            rehydrate  = helpers.rehydrate,
            on_usage   = helpers.on_usage,
        }
    end
    local text_parts      = {}
    local tool_calls_seen = {}
    local ok, err = broker.chat_stream(model_cfg, msgs,
        function(kind, payload)
            if kind == "text" then
                local emit = rehydrator and rehydrator:push(payload) or payload
                if emit ~= "" then
                    text_parts[#text_parts + 1] = emit
                    helpers.render_assistant_delta(emit)
                end
            elseif kind == "tool_call" then
                tool_calls_seen[#tool_calls_seen + 1] = payload
            elseif kind == "usage" then
                -- Phase 7: route Norris's own broker usage to the
                -- accumulator via helpers.on_usage. R5 chokepoint
                -- (_record_usage) is what's wired in.
                if helpers.on_usage then
                    helpers.on_usage(payload.model, payload.category, payload)
                end
            end
        end,
        { tools = helpers.tools_schema(), category = "norris" })
    if rehydrator then
        local tail = rehydrator:flush()
        if tail ~= "" then
            text_parts[#text_parts + 1] = tail
            helpers.render_assistant_delta(tail)
        end
    end
    helpers.render_assistant_flush()
    if not ok then
        return { status = "broker_error", reason = tostring(err) }
    end
    local resp_text = table.concat(text_parts)
    -- (2) parse actions from response
    local cmd_lines = helpers.extract_cmd_lines(resp_text) or {}
    local goal_done = false
    for line in (resp_text .. "\n"):gmatch("([^\n]*)\n") do
        local trimmed = line:gsub("^%s+", ""):gsub("%s+$", "")
        if trimmed == "GOAL: complete" then
            goal_done = true; break
        end
    end
    local n_actions = #tool_calls_seen + #cmd_lines
    -- (3) record assistant turn (with optional tool_calls)
    if #tool_calls_seen > 0 then
        ctx:append({ role = "assistant", content = resp_text,
                     tool_calls = tool_calls_seen })
    else
        ctx:append({ role = "assistant", content = resp_text })
    end
    helpers.log_turn(ctx.turns[#ctx.turns])
    if n_actions == 0 and not goal_done then
        return { status = "stalled", reason = "no action emitted" }
    end
    -- (4) dispatch tool_calls first (structured route)
    for _, call in ipairs(tool_calls_seen) do
        local args_table = {}
        if call.arguments and call.arguments ~= "" then
            local d, _, derr = json.decode(call.arguments)
            if d then args_table = d
            else
                -- Argument JSON parse failure: synthesize tool turn (alternation)
                ctx:append({ role = "tool", tool_call_id = call.id,
                             content = "[aish] tool arguments not "
                                       .. "parseable as JSON: " .. tostring(derr) })
                helpers.log_turn(ctx.turns[#ctx.turns])
                goto continue_tool
            end
        end
        -- Probe destructive on the JSON-serialized call as a proxy.
        local call_repr = (call.name or "?") .. " " .. (call.arguments or "")
        local destr, reason = M.is_destructive(call_repr, cfg, probe_opts)
        local verdict
        if destr then
            verdict = helpers.halt(step_n, max_steps, reason or "destructive",
                                   call_repr)
        else
            -- Non-destructive tool_call: auto_approve OR halt for consent
            local policy = cfg and cfg.mcp and cfg.mcp.auto_approve or {}
            local alias = (call.name or ""):match("^(.-)__")
            local auto = policy[call.name]
                         or (alias and alias ~= "" and policy[alias .. "__*"])
            if auto then
                verdict = "proceed"
            else
                verdict = helpers.halt(step_n, max_steps, "tool consent",
                                       call_repr)
            end
        end
        if verdict == "abort" then
            return { status = "aborted", reason = "user abort at halt" }
        elseif verdict == "skip" then
            ctx.norris_consecutive_skips = (ctx.norris_consecutive_skips or 0) + 1
            ctx:append({ role = "tool", tool_call_id = call.id,
                         content = "[aish] tool call skipped by user: "
                                   .. (reason or "no reason") })
            helpers.log_turn(ctx.turns[#ctx.turns])
        else  -- proceed
            ctx.norris_consecutive_skips = 0
            helpers.render_tool_begin(call.name, call.arguments)
            local content, is_error = helpers.dispatch_tool(call.name, args_table)
            helpers.render_tool_end(content, is_error)
            ctx:append({ role = "tool", tool_call_id = call.id,
                         content = content or "" })
            helpers.log_turn(ctx.turns[#ctx.turns])
        end
        ::continue_tool::
    end
    -- (5) dispatch CMD: lines (legacy route)
    for _, cmd in ipairs(cmd_lines) do
        local destr, reason = M.is_destructive(cmd, cfg, probe_opts)
        local verdict
        if destr then
            verdict = helpers.halt(step_n, max_steps, reason or "destructive",
                                   cmd)
        else
            verdict = "proceed"  -- non-destructive CMD: runs without consent
                                 -- in Norris (Norris user accepted autonomy)
        end
        if verdict == "abort" then
            return { status = "aborted", reason = "user abort at halt" }
        elseif verdict == "skip" then
            ctx.norris_consecutive_skips = (ctx.norris_consecutive_skips or 0) + 1
            -- CMD: skip → synthesize exec-output line so the model sees it
            ctx:append_exec_output("[aish] CMD skipped by user: "
                                   .. (reason or "no reason"))
        else  -- proceed
            ctx.norris_consecutive_skips = 0
            helpers.render_exec_begin()
            local out, code = helpers.exec_cmd(cmd)
            helpers.render_exec_end(code)
            if cfg and cfg.shell and cfg.shell.capture_output then
                ctx:append_exec_output(out)
            end
        end
    end
    -- Skip-budget escalation: R-C1
    if (ctx.norris_consecutive_skips or 0) >= 3 then
        local verdict = helpers.halt(step_n, max_steps,
            ("%d consecutive user skips"):format(ctx.norris_consecutive_skips),
            "(repeated similar destructive proposals)")
        if verdict == "abort" then
            return { status = "aborted", reason = "user abort on skip-escalation" }
        end
        -- Else: reset the counter and continue (user said proceed)
        ctx.norris_consecutive_skips = 0
    end
    -- (6) goal_done after dispatch
    if goal_done then
        return { status = "done", reason = "GOAL: complete" }
    end
    -- (7) budget
    if step_n >= max_steps then
        return { status = "budget_exhausted",
                 reason = ("%d step limit reached"):format(max_steps) }
    end
    return { status = "continue" }
 end
 return M
@@ -0,0 +1,250 @@
 -- secrets.lua — vault + scrub/rehydrate for issue #13.
 --
 -- Pipeline:
 --   1. M.load(path) reads the user's vault. Refuses to load if the file
 --      isn't mode 0600 (matches ssh's behavior for ~/.ssh/id_rsa).
 --   2. M.make_session(vault, opts) returns a per-conversation state object.
 --      session:scrub(text, mode) substitutes secrets with stable placeholders
 --      ($AISH_SECRET_001, _002, ...) and records the mapping. session:rehydrate
 --      reverses it. The mapping is stable across the conversation, so the same
 --      literal value always maps to the same placeholder slot.
 --   3. M.streaming_rehydrator(session) wraps the per-delta rehydration so a
 --      placeholder split across SSE chunks doesn't render half-substituted.
 --
 -- Modes (per call to session:scrub):
 --   "off"               → identity (returns text unchanged, no mapping)
 --   "vault"             → vault literals only, placeholders, rehydratable
 --   "vault+autodetect"  → + heuristic regexes, placeholders, rehydratable
 --   "stealth"           → + heuristic regexes, opaque decoys, NOT rehydratable
 --                         (one-way scrub for zero-info brokers — user and
 --                          model both see decoys; real values only in the
 --                          executor stream which is pre-scrub)
 local M = {}
 -- ---------------------------------------------------------------- AUTODETECT_PATTERNS
 -- Order matters: longer / more-specific prefixes must come first so a generic
 -- "sk-..." rule doesn't shadow "sk-or-v1-..." which IS the actual key. Each
 -- entry is { pat = "<lua pattern>", min_len = N (optional), max_len = N (opt),
 --           label = "<short tag for decoy names>" }.
 -- Lua patterns don't support {N} repeats; we use explicit repetition for fixed
 -- widths and a post-match length check for variable ones.
 local FOURTEEN_WORD = "%w%w%w%w%w%w%w%w%w%w%w%w%w%w"   -- 14
 local SIXTEEN_UPPER = "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]"
                   .. "[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]"
 M.AUTODETECT_PATTERNS = {
    -- OpenRouter (long form). v1+ catches v1, v2, ...
    { pat = "sk%-or%-v%d+%-[%w_-]+",      min_len = 20, label = "openrouter" },
    -- GitHub Personal Access Tokens (ghp_*) — ~40 char body
    { pat = "ghp_[%w]+",                  min_len = 36, label = "ghp" },
    { pat = "gho_[%w]+",                  min_len = 36, label = "gho" },
    { pat = "ghs_[%w]+",                  min_len = 36, label = "ghs" },
    -- AWS access keys: exactly AKIA + 16 chars [A-Z0-9].
    { pat = "AKIA" .. SIXTEEN_UPPER,      label = "aws-key" },
    -- JWT: 3 base64url segments separated by dots; require eyJ prefix
    -- (decodes to `{"`) so we don't match arbitrary dotted slugs.
    { pat = "eyJ[%w_-]+%.[%w_-]+%.[%w_-]+", min_len = 30, label = "jwt" },
    -- OpenAI generic (must come AFTER sk-or-* to avoid double match)
    { pat = "sk%-[%w]+",                  min_len = 20, label = "openai" },
    -- SSH/GPG private key block (multi-line; match header only, the caller
    -- can extend matching to include the body if needed). Greedy across
    -- newlines isn't easy in Lua patterns — we match just the header line
    -- and let policy decide to redact the whole file.
    { pat = "%-%-%-%-%-BEGIN[^\n]-PRIVATE KEY%-%-%-%-%-",
                                            label = "private-key-hdr" },
 }
 -- ---------------------------------------------------------------- load(path)
 -- Returns (vault, err). vault is { entries = {{name=, value=}, ...} }.
 -- entries may be {name=, value=} tables or bare strings (per the issue body).
 -- Bare strings get a synthesized name from the first 8 chars of value.
 function M.load(path)
    local f = io.open(path, "r")
    if not f then
        return nil, ("secrets: %s: not found"):format(path)
    end
    f:close()
    -- Mode check: refuse to load if not 0600. stat -c %a is GNU coreutils.
    local sh = io.popen(("stat -c %%a %q 2>/dev/null"):format(path))
    local mode = sh and sh:read("*l")
    if sh then sh:close() end
    if not mode then
        return nil, ("secrets: %s: cannot stat"):format(path)
    end
    if mode ~= "600" then
        return nil, ("secrets: %s: refusing to load (mode %s, want 600 — chmod 600)"):format(path, mode)
    end
    local ok, payload = pcall(dofile, path)
    if not ok then
        return nil, ("secrets: %s: load failed: %s"):format(path, tostring(payload))
    end
    if type(payload) ~= "table" then
        return nil, ("secrets: %s: must return a list, got %s"):format(path, type(payload))
    end
    local entries = {}
    for i, e in ipairs(payload) do
        if type(e) == "string" then
            entries[#entries + 1] = {
                name  = ("LITERAL_%d"):format(i),
                value = e,
            }
        elseif type(e) == "table" and type(e.value) == "string" then
            entries[#entries + 1] = {
                name  = e.name or ("ENTRY_%d"):format(i),
                value = e.value,
            }
        end
    end
    return { entries = entries }
 end
 -- ---------------------------------------------------------------- session
 local Session = {}
 Session.__index = Session
 function M.make_session(vault, opts)
    opts = opts or {}
    return setmetatable({
        entries              = (vault and vault.entries) or {},
        mapping_by_value     = {},  -- [value]       -> placeholder|decoy
        mapping_by_placeholder = {},-- [placeholder] -> value (for rehydrate)
        counter              = 0,
        autodetect_patterns  = opts.autodetect_patterns or M.AUTODETECT_PATTERNS,
    }, Session)
 end
 local function _meets_length(s, p)
    if p.min_len and #s < p.min_len then return false end
    if p.max_len and #s > p.max_len then return false end
    return true
 end
 -- Allocate a placeholder for `value`, stable across calls in this session.
 -- For "stealth" mode we use opaque decoys; non-stealth uses $AISH_SECRET_NNN.
 function Session:_placeholder_for(value, stealth, label)
    local existing = self.mapping_by_value[value]
    if existing then return existing end
    self.counter = self.counter + 1
    local p
    if stealth then
        -- Opaque decoy keyed off the label (so distinct kinds look distinct
        -- to anyone reading along, without revealing the actual value).
        p = ("xxxxxx-fake-%s-%03d-xxxxxx"):format(label or "secret", self.counter)
    else
        p = ("$AISH_SECRET_%03d"):format(self.counter)
        -- Only non-stealth placeholders go into the rehydration map.
        self.mapping_by_placeholder[p] = value
    end
    self.mapping_by_value[value] = p
    return p
 end
 -- Substitute all vault literals + (in autodetect/stealth modes) all
 -- AUTODETECT_PATTERNS matches. Returns the scrubbed string.
 function Session:scrub(text, mode)
    if not text or text == "" then return text or "" end
    mode = mode or "vault"
    if mode == "off" then return text end
    local stealth = (mode == "stealth")
    local use_autodetect = (mode == "vault+autodetect" or mode == "stealth")
    -- Vault literals first (deterministic by user's list order).
    -- Use plain-text find so vault values aren't interpreted as Lua patterns.
    for _, e in ipairs(self.entries) do
        local v = e.value
        if v ~= "" then
            local out, last, ix = {}, 1, 1
            while true do
                local s, fend = text:find(v, ix, true)
                if not s then break end
                out[#out + 1] = text:sub(last, s - 1)
                out[#out + 1] = self:_placeholder_for(v, stealth, e.name)
                last = fend + 1
                ix   = fend + 1
            end
            if #out > 0 then
                out[#out + 1] = text:sub(last)
                text = table.concat(out)
            end
        end
    end
    -- Autodetect heuristics (Lua patterns). Order matters per AUTODETECT_PATTERNS.
    if use_autodetect then
        for _, p in ipairs(self.autodetect_patterns) do
            text = text:gsub(p.pat, function(m)
                if _meets_length(m, p) then
                    return self:_placeholder_for(m, stealth, p.label)
                end
                return m
            end)
        end
    end
    return text
 end
 -- Reverse the placeholder substitution. Tolerant to trailing punctuation
 -- and surrounding quotes/backticks (gotcha 1 in the issue body).
 function Session:rehydrate(text)
    if not text or text == "" then return text or "" end
    return (text:gsub("%$AISH_SECRET_(%d%d%d)", function(n)
        return self.mapping_by_placeholder["$AISH_SECRET_" .. n] or ("$AISH_SECRET_" .. n)
    end))
 end
 -- Introspection helpers for the :secrets meta.
 function Session:mapping_size() return self.counter end
 function Session:has_vault()    return #self.entries > 0 end
 function Session:vault_names()
    local out = {}
    for _, e in ipairs(self.entries) do out[#out + 1] = e.name end
    return out
 end
 -- ---------------------------------------------------------------- streaming rehydrator
 -- Streamed assistant deltas may split a placeholder across chunks
 -- ($AISH_SE then CRET_001). Buffer just enough to recognize an
 -- incomplete placeholder match at the tail; emit everything before
 -- the last `$` that could be the start of a partial placeholder.
 local Stream = {}
 Stream.__index = Stream
 function M.streaming_rehydrator(session)
    return setmetatable({ session = session, tail = "" }, Stream)
 end
 function Stream:push(chunk)
    local combined = self.tail .. (chunk or "")
    -- Substitute any complete placeholders.
    combined = self.session:rehydrate(combined)
    -- Hold the trailing partial-placeholder, if any, in the tail buffer.
    -- A partial is "$" optionally followed by a prefix of "AISH_SECRET_NNN".
    local last_dollar = nil
    for i = #combined, 1, -1 do
        if combined:sub(i, i) == "$" then last_dollar = i; break end
    end
    if last_dollar then
        local maybe = combined:sub(last_dollar)
        -- Pattern: starts with `$`, then any prefix of `AISH_SECRET_NNN`.
        if maybe:match("^%$A?I?S?H?_?S?E?C?R?E?T?_?%d?%d?%d?$") then
            self.tail = maybe
            return combined:sub(1, last_dollar - 1)
        end
    end
    self.tail = ""
    return combined
 end
 function Stream:flush()
    local r = self.tail
    self.tail = ""
    -- One last rehydrate pass — the tail might contain a complete
    -- placeholder we held only because there was no chunk after it.
    return self.session:rehydrate(r)
 end
 return M
@@ -0,0 +1,108 @@
 -- test_router_model.lua — Phase 5 commit #1 corpus for classify_model.
 -- Run from repo root: `luajit test_router_model.lua` (exit 0 on pass).
 package.path = "./?.lua;./vendor/?.lua;" .. package.path
 local router = require("router")
 local cfg = {
    routing = {
        auto = true,
        classes = {
            code      = "deep",
            reasoning = "cloud",
            default   = nil,   -- nil → keep current
        },
    },
 }
 -- (text, expected_class)
 local CASES = {
    -- ── code class
    { "```python\ndef foo(): pass\n```",            "code" },
    { "Traceback (most recent call last):",          "code" },
    { "got a stack trace from my server",            "code" },
    { "error: undefined reference to `foo'",         "code" },
    { "exception: file not found",                   "code" },
    { "please look at ./src/main.lua",               "code" },
    { "the issue is in ~/repos/foo/bar.py",          "code" },
    { "check /usr/lib/python3/dist-packages/x.py",   "code" },
    { "fix this:\n    if x == 0:\n        return\n    else:\n        pass\n", "code" },
    -- ── reasoning class
    { "Explain how MMAP works on Linux",             "reasoning" },
    { "why does my disk fill up so fast",            "reasoning" },
    { "how does ASLR work?",                         "reasoning" },
    { "compare ZFS and btrfs in terms of snapshots", "reasoning" },
    { "Can you explain the difference between a process and a thread in detail?", "reasoning" },
    { "I have a long question with a question mark in it that goes well past one hundred characters does it route to reasoning?", "reasoning" },
    -- ── default class — short queries, no markers
    { "hi",                                          "default" },
    { "what time is it",                             "default" },
    { "ls /tmp",                                     "default" },
    { "thanks",                                      "default" },
    { "explain",                                     "default" },   -- bare "explain" < 30 chars threshold
    { "why",                                         "default" },   -- bare "why"
    { "?",                                           "default" },   -- bare ?
    { "hello world",                                 "default" },
    -- ── edge: ambiguous — prefer false-positive into reasoning over false-negative
    { "How does it feel to be a robot? Just curious.", "reasoning" },  -- 47 chars + how does
    -- ── edge: short error mention should still be code
    { "got error: foo",                              "code" },
    -- ── edge: a non-code path-like (e.g. README.md, .txt) should NOT match
    { "see notes.md for details",                    "default" },
    { "lookup /tmp/x.txt",                           "default" },
 }
 local pass, fail = 0, 0
 local fails = {}
 for i, c in ipairs(CASES) do
    local _model, class = router.classify_model(c[1], cfg)
    if class == c[2] then
        pass = pass + 1
    else
        fail = fail + 1
        fails[#fails+1] = string.format("  [%2d] text=%q expected=%s got=%s",
            i, c[1]:sub(1, 60), c[2], tostring(class))
    end
 end
 print(string.format("router.classify_model: %d/%d pass", pass, pass+fail))
 for _, f in ipairs(fails) do print(f) end
 -- Verify model routing: code → "deep", reasoning → "cloud", default → nil
 local cases_model = {
    { "Traceback",                "deep",  "code"      },
    { "Explain in detail how X works", "cloud", "reasoning" },
    { "hi",                       nil,     "default"   },
 }
 print()
 for _, c in ipairs(cases_model) do
    local m, k = router.classify_model(c[1], cfg)
    if m == c[2] and k == c[3] then
        pass = pass + 1
    else
        fail = fail + 1
        fails[#fails+1] = string.format(
            "  model: text=%q expected (%s,%s) got (%s,%s)",
            c[1], tostring(c[2]), c[3], tostring(m), tostring(k))
    end
 end
 -- Verify the R-N2 default: when classes.reasoning = nil, reasoning text → no override
 local cfg_safe = { routing = { auto = true, classes = { code = "deep", reasoning = nil } } }
 local m, k = router.classify_model("Explain in detail how X works", cfg_safe)
 if m == nil and k == "reasoning" then
    pass = pass + 1
 else
    fail = fail + 1
    fails[#fails+1] = string.format(
        "  reasoning-nil-default: got (%s,%s); want (nil,reasoning)",
        tostring(m), tostring(k))
 end
 print(string.format("\nTOTAL: %d/%d pass", pass, pass+fail))
 for _, f in ipairs(fails) do print(f) end
 os.exit(fail == 0 and 0 or 1)
@@ -0,0 +1,140 @@
 -- test_safety.lua — Phase 3 commit #1 test corpus.
 -- Run from repo root: `luajit test_safety.lua` (exits 0 on pass, 1 on fail).
 -- No test framework dependency by PHASE0.md §5 convention.
 package.path = "./?.lua;./vendor/?.lua;" .. package.path
 local safety = require("safety")
 -- (cmd, expect_destructive [, expect_reason_substring])
 local CASES = {
    -- ── Wrapper class (R-B1) — flag the wrapper itself
    { 'bash -c "rm -rf /"',                      true,  "bash -c" },
    { 'bash -lc "rm -rf /"',                     true,  "bash -c" },
    { 'sh -c "ls"',                              true,  "sh -c"   },
    { 'sh -lc "echo hi"',                        true,  "sh -c"   },
    { 'zsh -c "echo hi"',                        true,  "zsh -c"  },
    { 'eval rm -rf /tmp/x',                      true,  "eval"    },
    { 'eval "cd /tmp"',                          true,  "eval"    },
    { 'python -c "import os; os.system(\'rm x\')"', true, "python -c" },
    { 'python3 -c "x=1"',                        true,  "python -c" },
    { 'perl -e "unlink \'x\'"',                  true,  "perl -e" },
    { 'curl http://x | sh',                      true,  "pipe-to-sh" },
    { 'curl http://x | sh ',                     true,  "pipe-to-sh" },
    { 'curl http://x | sh -x',                   true,  "pipe-to-sh" },
    { 'curl http://x | bash',                    true,  "pipe-to-bash" },
    { 'curl http://x | bash -e',                 true,  "pipe-to-bash" },
    { 'wget -qO- http://x | sh',                 true,  "pipe-to-sh" },
    { 'xargs rm /tmp/*',                         true,  "xargs"   },
    { 'find /tmp -print0 | xargs -0 rm',         true,  "xargs"   },
    -- ── Filesystem destructive — should HIT
    { 'rm -rf /tmp/foo',                         true,  "rm -rf"  },
    { 'rm -fr /tmp/foo',                         true,  "rm -fr"  },
    { 'rm -r /tmp/foo',                          true,  "rm -rf"  },  -- -r alone matches "rf?"
    { 'sudo rm -rf /var/cache',                  true,  "rm -rf"  },
    { 'find . -name "*.log" -delete',            true,  "find -delete" },
    { 'find . -type f -exec rm {} \\;',          true,  "find -exec rm" },
    { 'dd if=/dev/zero of=/dev/sda',             true,  "dd to device" },
    { 'dd of=/dev/sdb1 if=img.bin',              true,  "dd to device" },
    { 'echo x > /dev/sda',                       true,  "raw disk" },
    { 'mkfs.ext4 /dev/sda1',                     true,  "mkfs"    },
    { 'mkfs.vfat /dev/sdb',                      true,  "mkfs"    },
    { 'shred -uvz /tmp/file',                    true,  "shred"   },
    { 'wipefs -a /dev/sda',                      true,  "wipefs"  },
    { 'truncate -s 0 important.log',             true,  "truncate" },
    { 'truncate -s0 x',                          true,  "truncate" },
    -- ── Version control destructive
    { 'git push --force origin main',            true,  "git push --force" },
    { 'git push -f origin main',                 true,  "git push -f" },
    { 'git push --force-with-lease',             true,  "git push --force" },  -- still --force prefix
    { 'git reset --hard HEAD~1',                 true,  "git reset --hard" },
    { 'git clean -fd',                           true,  "git clean -fd" },
    { 'git clean -fdx',                          true,  "git clean -fd" },
    { 'git branch -D old-feature',               true,  "git branch -D" },
    -- ── Database / process
    { 'DROP TABLE users;',                       true,  "DROP TABLE" },
    { 'drop table users',                        true,  "DROP TABLE" },  -- ci
    { 'Drop Table x',                            true,  "DROP TABLE" },
    { 'DROP DATABASE prod;',                     true,  "DROP DATABASE" },
    { 'TRUNCATE TABLE logs',                     true,  "TRUNCATE TABLE" },
    { 'truncate table logs',                     true,  "TRUNCATE TABLE" },  -- ci
    { 'kill -9 1234',                            true,  "kill -9" },
    { 'pkill -9 nginx',                          true,  "pkill -9" },
    -- ── Permission
    { 'chmod 777 /etc/passwd',                   true,  "chmod 777" },
    { 'chmod -R 777 /var',                       true,  "chmod 777" },
    { 'chown -R user /',                         true,  "chown on root" },
    -- ── Should NOT hit (safe / read-only / specific)
    { 'ls -la /tmp',                             false, nil },
    { 'cat /etc/hostname',                       false, nil },
    { 'echo hello world',                        false, nil },
    { 'grep -r foo /etc',                        false, nil },
    { 'rm /tmp/x.log',                           false, nil },   -- no -r/-f flag
    { 'find . -name "*.log"',                    false, nil },   -- no -delete/-exec rm
    { 'find . -type f',                          false, nil },
    { 'git push origin main',                    false, nil },   -- no --force
    { 'git status',                              false, nil },
    { 'git log --oneline',                       false, nil },
    { 'git clean -n',                            false, nil },   -- dry-run, no -fd
    { 'git branch new-feature',                  false, nil },   -- not -D
    { 'git reset HEAD',                          false, nil },   -- no --hard
    { 'chmod 644 file',                          false, nil },
    { 'chmod -R 755 /usr/local',                 false, nil },
    { 'chown user /etc/passwd',                  false, nil },   -- not root path
    { 'kill 1234',                               false, nil },   -- no -9
    { 'SELECT * FROM users',                     false, nil },
    { 'ls | grep foo',                           false, nil },   -- innocent pipe
    { 'ps aux | head',                           false, nil },
    { 'curl http://example.com',                 false, nil },
    { 'pwd',                                     false, nil },
    { 'cd /tmp',                                 false, nil },
    { 'make all',                                false, nil },
    { 'python3 script.py',                       false, nil },   -- not -c
    { 'perl script.pl',                          false, nil },   -- not -e
    { 'bash script.sh',                          false, nil },   -- not -c
    { 'sh script.sh',                            false, nil },
    { 'mkdir /tmp/newdir',                       false, nil },
    { 'touch /tmp/newfile',                      false, nil },
    { 'cp file1 file2',                          false, nil },
    { 'mv file1 file2',                          false, nil },
    { 'tail -f /var/log/syslog',                 false, nil },
    -- ── Tricky edge cases (test the boundary)
    { 'echo "rm -rf /"',                         true,  "rm -rf"  },  -- false positive: substring match
    -- ^ that's a known false-positive — Norris user can `proceed` after reading
    { 'truncate -s 100M big.dat',                false, nil },   -- not -s 0
    { '',                                        false, nil },   -- empty
 }
 local pass, fail = 0, 0
 local fails = {}
 for i, c in ipairs(CASES) do
    local cmd, expect_destructive, expect_reason = c[1], c[2], c[3]
    local got_destr, got_reason = safety.is_destructive(cmd)
    got_destr = got_destr and true or false  -- normalize
    local ok = (got_destr == expect_destructive)
    if ok and expect_destructive and expect_reason then
        -- Optional reason substring check
        ok = (got_reason and got_reason:find(expect_reason, 1, true) ~= nil)
    end
    if ok then
        pass = pass + 1
    else
        fail = fail + 1
        fails[#fails + 1] = string.format(
            "  [%2d] cmd=%q  expected=%s  got=%s  reason=%s",
            i, cmd, tostring(expect_destructive), tostring(got_destr),
            tostring(got_reason))
    end
 end
 print(string.format("safety test: %d/%d pass", pass, pass + fail))
 for _, f in ipairs(fails) do print(f) end
 os.exit(fail == 0 and 0 or 1)
@@ -0,0 +1,752 @@
 -- Module options:
 local always_use_lpeg = false
 local register_global_module_table = false
 local global_module_name = 'json'
 --[==[
 David Kolf's JSON module for Lua 5.1 - 5.4
 Version 2.8
 For the documentation see the corresponding readme.txt or visit
 <http://dkolf.de/dkjson-lua/>.
 You can contact the author by sending an e-mail to 'david' at the
 domain 'dkolf.de'.
 Copyright (C) 2010-2024 David Heiko Kolf
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be
 included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 --]==]
 -- global dependencies:
 local pairs, type, tostring, tonumber, getmetatable, setmetatable =
      pairs, type, tostring, tonumber, getmetatable, setmetatable
 local error, require, pcall, select = error, require, pcall, select
 local floor, huge = math.floor, math.huge
 local strrep, gsub, strsub, strbyte, strchar, strfind, strlen, strformat =
      string.rep, string.gsub, string.sub, string.byte, string.char,
      string.find, string.len, string.format
 local strmatch = string.match
 local concat = table.concat
 local json = { version = "dkjson 2.8" }
 local jsonlpeg = {}
 if register_global_module_table then
  if always_use_lpeg then
    _G[global_module_name] = jsonlpeg
  else
    _G[global_module_name] = json
  end
 end
 local _ENV = nil -- blocking globals in Lua 5.2 and later
 pcall (function()
  -- Enable access to blocked metatables.
  -- Don't worry, this module doesn't change anything in them.
  local debmeta = require "debug".getmetatable
  if debmeta then getmetatable = debmeta end
 end)
 json.null = setmetatable ({}, {
  __tojson = function () return "null" end
 })
 local function isarray (tbl)
  local max, n, arraylen = 0, 0, 0
  for k,v in pairs (tbl) do
    if k == 'n' and type(v) == 'number' then
      arraylen = v
      if v > max then
        max = v
      end
    else
      if type(k) ~= 'number' or k < 1 or floor(k) ~= k then
        return false
      end
      if k > max then
        max = k
      end
      n = n + 1
    end
  end
  if max > 10 and max > arraylen and max > n * 2 then
    return false -- don't create an array with too many holes
  end
  return true, max
 end
 local escapecodes = {
  ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b", ["\f"] = "\\f",
  ["\n"] = "\\n",  ["\r"] = "\\r",  ["\t"] = "\\t"
 }
 local function escapeutf8 (uchar)
  local value = escapecodes[uchar]
  if value then
    return value
  end
  local a, b, c, d = strbyte (uchar, 1, 4)
  a, b, c, d = a or 0, b or 0, c or 0, d or 0
  if a <= 0x7f then
    value = a
  elseif 0xc0 <= a and a <= 0xdf and b >= 0x80 then
    value = (a - 0xc0) * 0x40 + b - 0x80
  elseif 0xe0 <= a and a <= 0xef and b >= 0x80 and c >= 0x80 then
    value = ((a - 0xe0) * 0x40 + b - 0x80) * 0x40 + c - 0x80
  elseif 0xf0 <= a and a <= 0xf7 and b >= 0x80 and c >= 0x80 and d >= 0x80 then
    value = (((a - 0xf0) * 0x40 + b - 0x80) * 0x40 + c - 0x80) * 0x40 + d - 0x80
  else
    return ""
  end
  if value <= 0xffff then
    return strformat ("\\u%.4x", value)
  elseif value <= 0x10ffff then
    -- encode as UTF-16 surrogate pair
    value = value - 0x10000
    local highsur, lowsur = 0xD800 + floor (value/0x400), 0xDC00 + (value % 0x400)
    return strformat ("\\u%.4x\\u%.4x", highsur, lowsur)
  else
    return ""
  end
 end
 local function fsub (str, pattern, repl)
  -- gsub always builds a new string in a buffer, even when no match
  -- exists. First using find should be more efficient when most strings
  -- don't contain the pattern.
  if strfind (str, pattern) then
    return gsub (str, pattern, repl)
  else
    return str
  end
 end
 local function quotestring (value)
  -- based on the regexp "escapable" in https://github.com/douglascrockford/JSON-js
  value = fsub (value, "[%z\1-\31\"\\\127]", escapeutf8)
  if strfind (value, "[\194\216\220\225\226\239]") then
    value = fsub (value, "\194[\128-\159\173]", escapeutf8)
    value = fsub (value, "\216[\128-\132]", escapeutf8)
    value = fsub (value, "\220\143", escapeutf8)
    value = fsub (value, "\225\158[\180\181]", escapeutf8)
    value = fsub (value, "\226\128[\140-\143\168-\175]", escapeutf8)
    value = fsub (value, "\226\129[\160-\175]", escapeutf8)
    value = fsub (value, "\239\187\191", escapeutf8)
    value = fsub (value, "\239\191[\176-\191]", escapeutf8)
  end
  return "\"" .. value .. "\""
 end
 json.quotestring = quotestring
 local function replace(str, o, n)
  local i, j = strfind (str, o, 1, true)
  if i then
    return strsub(str, 1, i-1) .. n .. strsub(str, j+1, -1)
  else
    return str
  end
 end
 -- locale independent num2str and str2num functions
 local decpoint, numfilter
 local function updatedecpoint ()
  decpoint = strmatch(tostring(0.5), "([^05+])")
  -- build a filter that can be used to remove group separators
  numfilter = "[^0-9%-%+eE" .. gsub(decpoint, "[%^%$%(%)%%%.%[%]%*%+%-%?]", "%%%0") .. "]+"
 end
 updatedecpoint()
 local function num2str (num)
  return replace(fsub(tostring(num), numfilter, ""), decpoint, ".")
 end
 local function str2num (str)
  local num = tonumber(replace(str, ".", decpoint))
  if not num then
    updatedecpoint()
    num = tonumber(replace(str, ".", decpoint))
  end
  return num
 end
 local function addnewline2 (level, buffer, buflen)
  buffer[buflen+1] = "\n"
  buffer[buflen+2] = strrep ("  ", level)
  buflen = buflen + 2
  return buflen
 end
 function json.addnewline (state)
  if state.indent then
    state.bufferlen = addnewline2 (state.level or 0,
                           state.buffer, state.bufferlen or #(state.buffer))
  end
 end
 local encode2 -- forward declaration
 local function addpair (key, value, prev, indent, level, buffer, buflen, tables, globalorder, state)
  local kt = type (key)
  if kt ~= 'string' and kt ~= 'number' then
    return nil, "type '" .. kt .. "' is not supported as a key by JSON."
  end
  if prev then
    buflen = buflen + 1
    buffer[buflen] = ","
  end
  if indent then
    buflen = addnewline2 (level, buffer, buflen)
  end
  -- When Lua is compiled with LUA_NOCVTN2S this will fail when
  -- numbers are mixed into the keys of the table. JSON keys are always
  -- strings, so this would be an implicit conversion too and the failure
  -- is intentional.
  buffer[buflen+1] = quotestring (key)
  buffer[buflen+2] = ":"
  return encode2 (value, indent, level, buffer, buflen + 2, tables, globalorder, state)
 end
 local function appendcustom(res, buffer, state)
  local buflen = state.bufferlen
  if type (res) == 'string' then
    buflen = buflen + 1
    buffer[buflen] = res
  end
  return buflen
 end
 local function exception(reason, value, state, buffer, buflen, defaultmessage)
  defaultmessage = defaultmessage or reason
  local handler = state.exception
  if not handler then
    return nil, defaultmessage
  else
    state.bufferlen = buflen
    local ret, msg = handler (reason, value, state, defaultmessage)
    if not ret then return nil, msg or defaultmessage end
    return appendcustom(ret, buffer, state)
  end
 end
 function json.encodeexception(reason, value, state, defaultmessage)
  return quotestring("<" .. defaultmessage .. ">")
 end
 encode2 = function (value, indent, level, buffer, buflen, tables, globalorder, state)
  local valtype = type (value)
  local valmeta = getmetatable (value)
  valmeta = type (valmeta) == 'table' and valmeta -- only tables
  local valtojson = valmeta and valmeta.__tojson
  if valtojson then
    if tables[value] then
      return exception('reference cycle', value, state, buffer, buflen)
    end
    tables[value] = true
    state.bufferlen = buflen
    local ret, msg = valtojson (value, state)
    if not ret then return exception('custom encoder failed', value, state, buffer, buflen, msg) end
    tables[value] = nil
    buflen = appendcustom(ret, buffer, state)
  elseif value == nil then
    buflen = buflen + 1
    buffer[buflen] = "null"
  elseif valtype == 'number' then
    local s
    if value ~= value or value >= huge or -value >= huge then
      -- This is the behaviour of the original JSON implementation.
      s = "null"
    else
      s = num2str (value)
    end
    buflen = buflen + 1
    buffer[buflen] = s
  elseif valtype == 'boolean' then
    buflen = buflen + 1
    buffer[buflen] = value and "true" or "false"
  elseif valtype == 'string' then
    buflen = buflen + 1
    buffer[buflen] = quotestring (value)
  elseif valtype == 'table' then
    if tables[value] then
      return exception('reference cycle', value, state, buffer, buflen)
    end
    tables[value] = true
    level = level + 1
    local isa, n = isarray (value)
    if n == 0 and valmeta and valmeta.__jsontype == 'object' then
      isa = false
    end
    local msg
    if isa then -- JSON array
      buflen = buflen + 1
      buffer[buflen] = "["
      for i = 1, n do
        buflen, msg = encode2 (value[i], indent, level, buffer, buflen, tables, globalorder, state)
        if not buflen then return nil, msg end
        if i < n then
          buflen = buflen + 1
          buffer[buflen] = ","
        end
      end
      buflen = buflen + 1
      buffer[buflen] = "]"
    else -- JSON object
      local prev = false
      buflen = buflen + 1
      buffer[buflen] = "{"
      local order = valmeta and valmeta.__jsonorder or globalorder
      if order then
        local used = {}
        n = #order
        for i = 1, n do
          local k = order[i]
          local v = value[k]
          if v ~= nil then
            used[k] = true
            buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state)
            if not buflen then return nil, msg end
            prev = true -- add a seperator before the next element
          end
        end
        for k,v in pairs (value) do
          if not used[k] then
            buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state)
            if not buflen then return nil, msg end
            prev = true -- add a seperator before the next element
          end
        end
      else -- unordered
        for k,v in pairs (value) do
          buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state)
          if not buflen then return nil, msg end
          prev = true -- add a seperator before the next element
        end
      end
      if indent then
        buflen = addnewline2 (level - 1, buffer, buflen)
      end
      buflen = buflen + 1
      buffer[buflen] = "}"
    end
    tables[value] = nil
  else
    return exception ('unsupported type', value, state, buffer, buflen,
      "type '" .. valtype .. "' is not supported by JSON.")
  end
  return buflen
 end
 function json.encode (value, state)
  state = state or {}
  local oldbuffer = state.buffer
  local buffer = oldbuffer or {}
  state.buffer = buffer
  updatedecpoint()
  local ret, msg = encode2 (value, state.indent, state.level or 0,
                   buffer, state.bufferlen or 0, state.tables or {}, state.keyorder, state)
  if not ret then
    error (msg, 2)
  elseif oldbuffer == buffer then
    state.bufferlen = ret
    return true
  else
    state.bufferlen = nil
    state.buffer = nil
    return concat (buffer)
  end
 end
 local function loc (str, where)
  local line, pos, linepos = 1, 1, 0
  while true do
    pos = strfind (str, "\n", pos, true)
    if pos and pos < where then
      line = line + 1
      linepos = pos
      pos = pos + 1
    else
      break
    end
  end
  return strformat ("line %d, column %d", line, where - linepos)
 end
 local function unterminated (str, what, where)
  return nil, strlen (str) + 1, "unterminated " .. what .. " at " .. loc (str, where)
 end
 local function scanwhite (str, pos)
  while true do
    pos = strfind (str, "%S", pos)
    if not pos then return nil end
    local sub2 = strsub (str, pos, pos + 1)
    if sub2 == "\239\187" and strsub (str, pos + 2, pos + 2) == "\191" then
      -- UTF-8 Byte Order Mark
      pos = pos + 3
    elseif sub2 == "//" then
      pos = strfind (str, "[\n\r]", pos + 2)
      if not pos then return nil end
    elseif sub2 == "/*" then
      pos = strfind (str, "*/", pos + 2)
      if not pos then return nil end
      pos = pos + 2
    else
      return pos
    end
  end
 end
 local escapechars = {
  ["\""] = "\"", ["\\"] = "\\", ["/"] = "/", ["b"] = "\b", ["f"] = "\f",
  ["n"] = "\n", ["r"] = "\r", ["t"] = "\t"
 }
 local function unichar (value)
  if value < 0 then
    return nil
  elseif value <= 0x007f then
    return strchar (value)
  elseif value <= 0x07ff then
    return strchar (0xc0 + floor(value/0x40),
                    0x80 + (floor(value) % 0x40))
  elseif value <= 0xffff then
    return strchar (0xe0 + floor(value/0x1000),
                    0x80 + (floor(value/0x40) % 0x40),
                    0x80 + (floor(value) % 0x40))
  elseif value <= 0x10ffff then
    return strchar (0xf0 + floor(value/0x40000),
                    0x80 + (floor(value/0x1000) % 0x40),
                    0x80 + (floor(value/0x40) % 0x40),
                    0x80 + (floor(value) % 0x40))
  else
    return nil
  end
 end
 local function scanstring (str, pos)
  local lastpos = pos + 1
  local buffer, n = {}, 0
  while true do
    local nextpos = strfind (str, "[\"\\]", lastpos)
    if not nextpos then
      return unterminated (str, "string", pos)
    end
    if nextpos > lastpos then
      n = n + 1
      buffer[n] = strsub (str, lastpos, nextpos - 1)
    end
    if strsub (str, nextpos, nextpos) == "\"" then
      lastpos = nextpos + 1
      break
    else
      local escchar = strsub (str, nextpos + 1, nextpos + 1)
      local value
      if escchar == "u" then
        value = tonumber (strsub (str, nextpos + 2, nextpos + 5), 16)
        if value then
          local value2
          if 0xD800 <= value and value <= 0xDBff then
            -- we have the high surrogate of UTF-16. Check if there is a
            -- low surrogate escaped nearby to combine them.
            if strsub (str, nextpos + 6, nextpos + 7) == "\\u" then
              value2 = tonumber (strsub (str, nextpos + 8, nextpos + 11), 16)
              if value2 and 0xDC00 <= value2 and value2 <= 0xDFFF then
                value = (value - 0xD800)  * 0x400 + (value2 - 0xDC00) + 0x10000
              else
                value2 = nil -- in case it was out of range for a low surrogate
              end
            end
          end
          value = value and unichar (value)
          if value then
            if value2 then
              lastpos = nextpos + 12
            else
              lastpos = nextpos + 6
            end
          end
        end
      end
      if not value then
        value = escapechars[escchar] or escchar
        lastpos = nextpos + 2
      end
      n = n + 1
      buffer[n] = value
    end
  end
  if n == 1 then
    return buffer[1], lastpos
  elseif n > 1 then
    return concat (buffer), lastpos
  else
    return "", lastpos
  end
 end
 local scanvalue -- forward declaration
 local function scantable (what, closechar, str, startpos, nullval, objectmeta, arraymeta)
  local tbl, n = {}, 0
  local pos = startpos + 1
  if what == 'object' then
    setmetatable (tbl, objectmeta)
  else
    setmetatable (tbl, arraymeta)
  end
  while true do
    pos = scanwhite (str, pos)
    if not pos then return unterminated (str, what, startpos) end
    local char = strsub (str, pos, pos)
    if char == closechar then
      return tbl, pos + 1
    end
    local val1, err
    val1, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta)
    if err then return nil, pos, err end
    pos = scanwhite (str, pos)
    if not pos then return unterminated (str, what, startpos) end
    char = strsub (str, pos, pos)
    if char == ":" then
      if val1 == nil then
        return nil, pos, "cannot use nil as table index (at " .. loc (str, pos) .. ")"
      end
      pos = scanwhite (str, pos + 1)
      if not pos then return unterminated (str, what, startpos) end
      local val2
      val2, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta)
      if err then return nil, pos, err end
      tbl[val1] = val2
      pos = scanwhite (str, pos)
      if not pos then return unterminated (str, what, startpos) end
      char = strsub (str, pos, pos)
    else
      n = n + 1
      tbl[n] = val1
    end
    if char == "," then
      pos = pos + 1
    end
  end
 end
 scanvalue = function (str, pos, nullval, objectmeta, arraymeta)
  pos = pos or 1
  pos = scanwhite (str, pos)
  if not pos then
    return nil, strlen (str) + 1, "no valid JSON value (reached the end)"
  end
  local char = strsub (str, pos, pos)
  if char == "{" then
    return scantable ('object', "}", str, pos, nullval, objectmeta, arraymeta)
  elseif char == "[" then
    return scantable ('array', "]", str, pos, nullval, objectmeta, arraymeta)
  elseif char == "\"" then
    return scanstring (str, pos)
  else
    local pstart, pend = strfind (str, "^%-?[%d%.]+[eE]?[%+%-]?%d*", pos)
    if pstart then
      local number = str2num (strsub (str, pstart, pend))
      if number then
        return number, pend + 1
      end
    end
    pstart, pend = strfind (str, "^%a%w*", pos)
    if pstart then
      local name = strsub (str, pstart, pend)
      if name == "true" then
        return true, pend + 1
      elseif name == "false" then
        return false, pend + 1
      elseif name == "null" then
        return nullval, pend + 1
      end
    end
    return nil, pos, "no valid JSON value at " .. loc (str, pos)
  end
 end
 local function optionalmetatables(...)
  if select("#", ...) > 0 then
    return ...
  else
    return {__jsontype = 'object'}, {__jsontype = 'array'}
  end
 end
 function json.decode (str, pos, nullval, ...)
  local objectmeta, arraymeta = optionalmetatables(...)
  return scanvalue (str, pos, nullval, objectmeta, arraymeta)
 end
 function json.use_lpeg ()
  local g = require ("lpeg")
  if type(g.version) == 'function' and g.version() == "0.11" then
    error "due to a bug in LPeg 0.11, it cannot be used for JSON matching"
  end
  local pegmatch = g.match
  local P, S, R = g.P, g.S, g.R
  local function ErrorCall (str, pos, msg, state)
    if not state.msg then
      state.msg = msg .. " at " .. loc (str, pos)
      state.pos = pos
    end
    return false
  end
  local function Err (msg)
    return g.Cmt (g.Cc (msg) * g.Carg (2), ErrorCall)
  end
  local function ErrorUnterminatedCall (str, pos, what, state)
    return ErrorCall (str, pos - 1, "unterminated " .. what, state)
  end
  local SingleLineComment = P"//" * (1 - S"\n\r")^0
  local MultiLineComment = P"/*" * (1 - P"*/")^0 * P"*/"
  local Space = (S" \n\r\t" + P"\239\187\191" + SingleLineComment + MultiLineComment)^0
  local function ErrUnterminated (what)
    return g.Cmt (g.Cc (what) * g.Carg (2), ErrorUnterminatedCall)
  end
  local PlainChar = 1 - S"\"\\\n\r"
  local EscapeSequence = (P"\\" * g.C (S"\"\\/bfnrt" + Err "unsupported escape sequence")) / escapechars
  local HexDigit = R("09", "af", "AF")
  local function UTF16Surrogate (match, pos, high, low)
    high, low = tonumber (high, 16), tonumber (low, 16)
    if 0xD800 <= high and high <= 0xDBff and 0xDC00 <= low and low <= 0xDFFF then
      return true, unichar ((high - 0xD800)  * 0x400 + (low - 0xDC00) + 0x10000)
    else
      return false
    end
  end
  local function UTF16BMP (hex)
    return unichar (tonumber (hex, 16))
  end
  local U16Sequence = (P"\\u" * g.C (HexDigit * HexDigit * HexDigit * HexDigit))
  local UnicodeEscape = g.Cmt (U16Sequence * U16Sequence, UTF16Surrogate) + U16Sequence/UTF16BMP
  local Char = UnicodeEscape + EscapeSequence + PlainChar
  local String = P"\"" * (g.Cs (Char ^ 0) * P"\"" + ErrUnterminated "string")
  local Integer = P"-"^(-1) * (P"0" + (R"19" * R"09"^0))
  local Fractal = P"." * R"09"^0
  local Exponent = (S"eE") * (S"+-")^(-1) * R"09"^1
  local Number = (Integer * Fractal^(-1) * Exponent^(-1))/str2num
  local Constant = P"true" * g.Cc (true) + P"false" * g.Cc (false) + P"null" * g.Carg (1)
  local SimpleValue = Number + String + Constant
  local ArrayContent, ObjectContent
  -- The functions parsearray and parseobject parse only a single value/pair
  -- at a time and store them directly to avoid hitting the LPeg limits.
  local function parsearray (str, pos, nullval, state)
    local obj, cont
    local start = pos
    local npos
    local t, nt = {}, 0
    repeat
      obj, cont, npos = pegmatch (ArrayContent, str, pos, nullval, state)
      if cont == 'end' then
        return ErrorUnterminatedCall (str, start, "array", state)
      end
      pos = npos
      if cont == 'cont' or cont == 'last' then
        nt = nt + 1
        t[nt] = obj
      end
    until cont ~= 'cont'
    return pos, setmetatable (t, state.arraymeta)
  end
  local function parseobject (str, pos, nullval, state)
    local obj, key, cont
    local start = pos
    local npos
    local t = {}
    repeat
      key, obj, cont, npos = pegmatch (ObjectContent, str, pos, nullval, state)
      if cont == 'end' then
        return ErrorUnterminatedCall (str, start, "object", state)
      end
      pos = npos
      if cont == 'cont' or cont == 'last' then
        t[key] = obj
      end
    until cont ~= 'cont'
    return pos, setmetatable (t, state.objectmeta)
  end
  local Array = P"[" * g.Cmt (g.Carg(1) * g.Carg(2), parsearray)
  local Object = P"{" * g.Cmt (g.Carg(1) * g.Carg(2), parseobject)
  local Value = Space * (Array + Object + SimpleValue)
  local ExpectedValue = Value + Space * Err "value expected"
  local ExpectedKey = String + Err "key expected"
  local End = P(-1) * g.Cc'end'
  local ErrInvalid = Err "invalid JSON"
  ArrayContent = (Value * Space * (P"," * g.Cc'cont' + P"]" * g.Cc'last'+ End + ErrInvalid)  + g.Cc(nil) * (P"]" * g.Cc'empty' + End  + ErrInvalid)) * g.Cp()
  local Pair = g.Cg (Space * ExpectedKey * Space * (P":" + Err "colon expected") * ExpectedValue)
  ObjectContent = (g.Cc(nil) * g.Cc(nil) * P"}" * g.Cc'empty' + End + (Pair * Space * (P"," * g.Cc'cont' + P"}" * g.Cc'last' + End + ErrInvalid) + ErrInvalid)) * g.Cp()
  local DecodeValue = ExpectedValue * g.Cp ()
  jsonlpeg.version = json.version
  jsonlpeg.encode = json.encode
  jsonlpeg.null = json.null
  jsonlpeg.quotestring = json.quotestring
  jsonlpeg.addnewline = json.addnewline
  jsonlpeg.encodeexception = json.encodeexception
  jsonlpeg.using_lpeg = true
  function jsonlpeg.decode (str, pos, nullval, ...)
    local state = {}
    state.objectmeta, state.arraymeta = optionalmetatables(...)
    local obj, retpos = pegmatch (DecodeValue, str, pos, nullval, state)
    if state.msg then
      return nil, state.pos, state.msg
    else
      return obj, retpos
    end
  end
  -- cache result of this function:
  json.use_lpeg = function () return jsonlpeg end
  jsonlpeg.use_lpeg = json.use_lpeg
  return jsonlpeg
 end
 if always_use_lpeg then
  return json.use_lpeg()
 end
 return json