diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..53b4768
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Markus Fritsche
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 4c70367..ade8b75 100644
--- a/README.md
+++ b/README.md
@@ -111,7 +111,7 @@ Replace these with your own llama.cpp endpoints if you're not on that LAN.
 
 ## License
 
-Not yet selected. Default-private until decided.
+MIT — see [`LICENSE`](LICENSE).
 
 ## Project conventions
 
diff --git a/bin/aish b/bin/aish
new file mode 100755
index 0000000..48cd7ca
--- /dev/null
+++ b/bin/aish
@@ -0,0 +1,23 @@
+#!/bin/sh
+# aish — AI-augmented conversational shell launcher.
+# Source of truth: git.reauktion.de/marfrit/aish
+#
+# Installed by the aish package at /usr/bin/aish; execs LuaJIT against
+# the packaged main.lua under $AISH_LIB (default /usr/share/lua/5.1/aish).
+#
+# Dev mode: AISH_LIB=$HOME/src/aish aish ...
+
+AISH_LIB="${AISH_LIB:-/usr/share/lua/5.1/aish}"
+
+if [ ! -r "$AISH_LIB/main.lua" ]; then
+    echo "aish: $AISH_LIB/main.lua not found." >&2
+    echo "aish: set AISH_LIB to the directory containing main.lua." >&2
+    exit 2
+fi
+
+if ! command -v luajit >/dev/null 2>&1; then
+    echo "aish: luajit not found in PATH. Install luajit." >&2
+    exit 2
+fi
+
+exec luajit "$AISH_LIB/main.lua" "$@"
diff --git a/examples/config.lua b/examples/config.lua
new file mode 100644
index 0000000..bac5084
--- /dev/null
+++ b/examples/config.lua
@@ -0,0 +1,488 @@
+-- config.lua — example aish configuration.
+-- Shipped by the aish package at /usr/share/doc/aish/examples/config.lua.
+-- Copy to ~/.config/aish/config.lua (preferred) and adapt to your fleet:
+--
+--     install -Dm600 /usr/share/doc/aish/examples/config.lua \
+--                    ~/.config/aish/config.lua
+--
+-- Mode 0600 matters because this file can carry MCP bearer tokens. The
+-- two tokens shown in the mcp.servers block below are PLACEHOLDERS and
+-- must be replaced — prefer the auth_env env-var indirection form (export
+-- MCP_PVE1_TOKEN=... in your shell init) over committing literals.
+--
+-- Loaded with dofile() at startup; returns a plain Lua table.
+-- See docs/PHASE0.md §10 for resolution order and full schema.
+--
+-- Per issue #12: hossenfelder is the canonical single-URL broker. It does
+-- model-aware routing server-side (local models on boltzmann; cloud routes
+-- through OpenRouter using its own bearer auth — no client-side key here).
+-- Discovery: GET http://hossenfelder.fritz.box:8082/v1/models.
+--
+-- Phase 9 (docs/PHASE9.md): a `.aish.lua` in/above your cwd (walking up
+-- to $HOME) overlays this user config. First encounter prompts to trust;
+-- sha256-pinned in ~/.aish/trusted-projects. Use it for repo-specific
+-- model presets, permissions, hooks, etc.
+--
+-- IMPORTANT: shallow merge. If your `.aish.lua` sets a top-level block
+-- (models, permissions, cost, shell, ...), it REPLACES the user's
+-- entire block — list every entry you want available OR omit the block
+-- to keep the user's. Inspect the merge via `:config show` at runtime.
+
+-- Replace with your own broker URL. This default targets the
+-- maintainer's home-LAN broker — useful as a structural example
+-- but will not resolve outside that network.
+local HOSSENFELDER = "http://hossenfelder.fritz.box:8082"
+
+return {
+    default_model = "fast",
+
+    -- 2026-05-17: full fleet exposed. 6 local + 14 cloud models live on the
+    -- hossenfelder broker. Aliases below match the model IDs returned by
+    -- /v1/models so the broker can route without prefix stripping.
+    models = {
+        -- ── LOCAL ────────────────────────────────────────────────────────
+        fast = {  -- alias for the 1.5B; default
+            endpoint    = HOSSENFELDER,
+            model       = "qwen2.5-coder-1.5b-q4_k_m.gguf",
+            temperature = 0.2,
+        },
+        ["coder-3b"] = {  -- pve2 (Haswell NUC, 1.8 GB model, ~4 tok/s)
+            endpoint    = HOSSENFELDER,
+            model       = "qwen2.5-coder-3b-instruct-pve2",
+            temperature = 0.2,
+        },
+        ["coder-7b"] = {  -- pve1 (Haswell NUC)
+            endpoint    = HOSSENFELDER,
+            model       = "qwen2.5-coder-7b-instruct-pve1",
+            temperature = 0.2,
+        },
+        ["coder-7b-snappy"] = {  -- dirac:8081, low-latency completion
+            endpoint    = HOSSENFELDER,
+            model       = "qwen-coder-7b-snappy-8k",
+            temperature = 0.2,
+        },
+        ["qwen-7b"] = {  -- dirac:8080 chat
+            endpoint    = HOSSENFELDER,
+            model       = "Qwen2.5-7B-Instruct-Q4_K_M.gguf",
+            temperature = 0.2,
+        },
+        deep = {  -- boltzmann:8085 — Qwen3-30B-A3B MoE, q8 KV cache
+            endpoint    = HOSSENFELDER,
+            model       = "qwen3-30b-a3b-instruct-2507",
+            -- timeout_ms inherits broker default (30 min) — 30B prompt processing
+            -- of long contexts on CPU can take 15-25 min before first token.
+            temperature = 0.1,
+        },
+
+        -- ── CLOUD (OpenRouter via hossenfelder) ───────────────────────────
+        cloud  = { endpoint = HOSSENFELDER, model = "anthropic/claude-haiku-4.5",      temperature = 0.2 },
+        haiku  = { endpoint = HOSSENFELDER, model = "anthropic/claude-haiku-4.5",      temperature = 0.2 },
+        sonnet = { endpoint = HOSSENFELDER, model = "anthropic/claude-sonnet-4.6",     temperature = 0.2 },
+        opus   = { endpoint = HOSSENFELDER, model = "anthropic/claude-opus-4.7",       temperature = 0.2 },
+        gpt5      = { endpoint = HOSSENFELDER, model = "openai/gpt-5.5",                 temperature = 0.2 },
+        ["gpt5-mini"] = { endpoint = HOSSENFELDER, model = "openai/gpt-5.4-mini",        temperature = 0.2 },
+        deepseek         = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v3.2",         temperature = 0.2 },
+        ["deepseek-v4"]  = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-flash",     temperature = 0.2 },
+        ["deepseek-pro"] = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-pro",       temperature = 0.2 },
+        mistral   = { endpoint = HOSSENFELDER, model = "mistralai/mistral-large-2512",   temperature = 0.2 },
+        ["qwen-cloud"] = { endpoint = HOSSENFELDER, model = "qwen/qwen3.5-27b",          temperature = 0.2 },
+        owl       = { endpoint = HOSSENFELDER, model = "openrouter/owl-alpha",           temperature = 0.2 },
+
+        -- ── CLOUD FREE-TIER ──────────────────────────────────────────────
+        ["free-qwen-coder"]   = { endpoint = HOSSENFELDER, model = "qwen/qwen3-coder:free",                  temperature = 0.2 },
+        ["free-llama-70b"]    = { endpoint = HOSSENFELDER, model = "meta-llama/llama-3.3-70b-instruct:free", temperature = 0.2 },
+        ["free-qwen-80b"]     = { endpoint = HOSSENFELDER, model = "qwen/qwen3-next-80b-a3b-instruct:free",  temperature = 0.2 },
+        ["free-gpt-oss"]      = { endpoint = HOSSENFELDER, model = "openai/gpt-oss-120b:free",               temperature = 0.2 },
+        ["free-glm"]          = { endpoint = HOSSENFELDER, model = "z-ai/glm-4.5-air:free",                  temperature = 0.2 },
+        ["free-deepseek-v4"]  = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-flash:free",        temperature = 0.2 },
+    },
+
+    shell = {
+        known_commands = {
+            "ls", "cat", "cd", "grep", "find", "cp", "mv", "rm",
+            "mkdir", "rmdir", "git", "make", "cmake", "gcc", "clang",
+            "python3", "luajit", "ssh", "scp", "curl", "wget",
+        },
+        capture_output = true,    -- inject exec output into context
+        confirm_cmd    = true,    -- prompt before executing CMD: suggestions
+
+        -- Issue #10: prompt template. When set, replaces the default
+        -- "[aish:<model>]> " prompt. Variables (substituted via {name}):
+        --   {model}  {ctx_used}  {ctx_max}  {turn}
+        --   {cwd}    {cwd_short} (cwd with $HOME -> ~)
+        --   {last_status} (last exec exit code, empty if none yet)
+        --   {mode}   (norris / plan / normal)
+        -- prompt = "[{model} {ctx_used}/{ctx_max}t T{turn} {mode}] {cwd_short} > ",
+    },
+
+    context = {
+        max_turns    = 40,
+        token_budget = 4096,
+    },
+
+    history = {
+        dir = (os.getenv("HOME") or ".") .. "/.local/share/aish",
+    },
+
+    -- Issue #3: pre/post CMD hooks. Optional shell scripts triggered around
+    -- every CMD: execution. Each hook receives the command on stdin and
+    -- AISH_CMD / AISH_TURN / AISH_CWD as env vars. Non-zero exit on pre_cmd
+    -- aborts execution; post_cmd's exit code is ignored but its stdout is
+    -- logged. Default off (no hooks). Uncomment to enable.
+    -- hooks = {
+    --     pre_cmd  = (os.getenv("HOME") or ".") .. "/.aish/hooks/pre-cmd",
+    --     post_cmd = (os.getenv("HOME") or ".") .. "/.aish/hooks/post-cmd",
+    -- },
+
+    -- Issue #13: secret redaction. Vault is a separate file at ~/.aish/
+    -- secrets.lua (mode 0600 enforced). When set, outbound broker messages
+    -- are scrubbed: vault literals + autodetect heuristics (OpenAI sk-,
+    -- OpenRouter sk-or-v1-, GitHub ghp_/gho_/ghs_, AWS AKIA, JWT eyJ...,
+    -- SSH/GPG PRIVATE KEY headers) become $AISH_SECRET_NNN placeholders.
+    -- The streamed reply is rehydrated before display so the user sees
+    -- real values. Per-broker override via models[*].redact:
+    --   "off"               -- no scrubbing (trusted local)
+    --   "vault"             -- vault literals only
+    --   "vault+autodetect"  -- + heuristics (default when vault loaded)
+    --   "stealth"           -- + heuristics, opaque decoys, no rehydrate
+    -- Default per-broker is the global config.secrets.default, falling
+    -- back to "vault+autodetect" when vault loaded, else "off".
+    -- secrets = {
+    --     vault   = "~/.aish/secrets.lua",
+    --     default = "vault+autodetect",  -- applies when models[*].redact is nil
+    -- },
+
+    -- Issue #8: background CMD (CMD&: marker). Requires history.dir set
+    -- (logs land at <history.dir>/bg/<id>.log + .status sidecar). The
+    -- feature is always-on once history.dir exists — no config flag — but
+    -- only fires when the model emits "CMD&: " or the user runs :bg-spawn.
+
+    -- Issue #9: permission policy DSL for AI-suggested CMD: lines. When set,
+    -- supersedes shell.confirm_cmd. Patterns are Lua patterns (NOT regex)
+    -- per substrate invariant §3 (no compiled extensions). Priority order:
+    -- deny > confirm > allow; first match in the chosen category wins.
+    -- Unmatched commands default to "confirm". Probe with :perms check <cmd>.
+    -- permissions = {
+    --     allow   = { "^ls%s", "^cat%s", "^git status", "^git diff" },
+    --     confirm = { "^rm%s", "^git push", "^docker%s", "^sudo%s" },
+    --     deny    = { "^ssh%s+root@", "^curl%s+http[^s]" },
+    -- },
+
+    -- Phase 2 (docs/PHASE2.md): MCP server registry + tool-call policy.
+    -- Aliases become the namespace prefix on tool names sent to the model
+    -- ("<alias>__<tool>" — e.g. "pve1__list_dir"). Separator is "__" because
+    -- Anthropic via Bedrock validates tool names against ^[a-zA-Z0-9_-]{1,128}$
+    -- (dots are rejected). Aliases themselves must not contain "__".
+    -- auth_token literal > auth_env env-var indirection > nil (no auth).
+    mcp = {
+        servers = {
+            -- Example MCP server entries. Replace the URL with your own
+            -- lmcp endpoint and source the bearer token via auth_env so
+            -- it never lands in version control.
+            --
+            -- pve1: small sandbox host (stock lmcp tools — shell, read_file,
+            -- write_file, edit_file, list_dir, search_files, shell_bg).
+            -- pve1 = {
+            --     url      = "http://pve1.example.local:8080/mcp",
+            --     auth_env = "MCP_PVE1_TOKEN",
+            -- },
+            --
+            -- hertz: home-network hub with lmcp v1.2+ built-in fetch /
+            -- web_search tools — useful for letting the model do web
+            -- research without leaving aish. Auto-approving these two
+            -- is safe because they carry MCP readOnlyHint=true and
+            -- openWorldHint=true (see auto_approve block below).
+            -- hertz = {
+            --     url      = "http://hertz.example.local:8080/mcp",
+            --     auth_env = "MCP_HERTZ_TOKEN",
+            -- },
+        },
+
+        -- Per-call confirm gate auto-approve policy. fetch / web_search
+        -- carry MCP readOnlyHint=true + openWorldHint=true; safe to skip
+        -- the per-call prompt since they neither mutate nor leak local
+        -- state. Anything writable on the host (mqtt_pub, ha_cli, lxc_exec,
+        -- wol_and_wait, ...) should keep prompting.
+        auto_approve = {
+            -- ["hertz__fetch"]      = true,
+            -- ["hertz__web_search"] = true,
+        },
+
+        -- Tool-call sub-loop budget per ask_ai turn. Default 8 if absent.
+        max_tool_depth = 8,
+    },
+
+    -- Phase 3 (docs/PHASE3.md): Chuck Norris autonomous mode + destructive-op
+    -- heuristic. The block is OFF by default (sane defaults kick in when
+    -- absent); uncomment to tune.
+    --
+    -- safety = {
+    --     -- LLM second-opinion on commands the static patterns don't flag.
+    --     -- Default true. Set false for static-only operation (faster, but
+    --     -- misses novel destructive patterns the static list doesn't know
+    --     -- about — bash -c content, custom destructive idioms, etc.).
+    --     llm_second_opinion = true,
+    --
+    --     -- Which configured model to use for the YES/NO destructive probe.
+    --     -- Precedence: this field → models.deep → models[default_model].
+    --     -- R-B2: prefer an INDEPENDENT model class from the action-emitting
+    --     -- model (avoids self-policing). Recommended values:
+    --     --   "cloud"  — anthropic/claude-haiku-4.5 via openrouter. Fast and
+    --     --              reliable. Costs money per probe (typical Norris
+    --     --              session = 16 probes max, often cached).
+    --     --   "deep"   — local large model (qwen3-30b on this fleet). Free
+    --     --              but slow on RK3588 hardware (~1-3s per probe).
+    --     --              Falls back here automatically if not set.
+    --     --   "fast"   — same model as the action-emitter. NOT RECOMMENDED
+    --     --              (circular trust); use only when no other option.
+    --     llm_model = "cloud",
+    --
+    --     -- Norris planning-loop budget. Iterations of safety.norris_step.
+    --     -- Each iteration is one broker round-trip + dispatch of actions.
+    --     -- Default 8. Bump for long-running goals; cap low for testing.
+    --     max_norris_steps = 8,
+    -- },
+
+    -- Phase 4 (docs/PHASE4.md): cross-session memory.jsonl + startup
+    -- injection + :memory management surface. The block is OFF by
+    -- default (no startup injection); uncomment to tune. Note that
+    -- :remember / :memory list / :memory forget / :memory summarize
+    -- all work without this block — they store to <history.dir>/
+    -- memory.jsonl regardless. The block only configures the
+    -- injection-into-system-prompt behavior at startup.
+    --
+    -- memory = {
+    --     -- Cap on total characters injected at startup. ~2000 chars ≈
+    --     -- 500 tokens. LRU-by-ts selection if your memory.jsonl has
+    --     -- more recent items than fit. Older items remain in the
+    --     -- file; only injection is bounded. Suppressed entirely in
+    --     -- Norris mode (R-C1).
+    --     inject_max_chars = 2000,
+    --
+    --     -- Which configured model to use for :memory summarize.
+    --     -- Defaults to the active model when nil. Use "fast" for
+    --     -- speed; "deep" or "cloud" for better extraction quality
+    --     -- (cloud may have variable cost per session).
+    --     summarizer_model = "fast",
+    --
+    --     -- #102: auto-summarize the session into memory.jsonl on :q.
+    --     -- When true, shutdown_session runs the same distill flow as
+    --     -- `:memory summarize`, non-interactively, and auto-adds the
+    --     -- parsed candidates. Silent no-op for trivial sessions (turn
+    --     -- count < min_turns_for_summary, default 5). pcall'd so a
+    --     -- broker failure never blocks :q.
+    --     auto_summarize_on_quit = true,
+    --     min_turns_for_summary  = 5,
+    --     summary_model          = "fast",  -- new alias; summarizer_model
+    --                                       -- above is still honored for
+    --                                       -- back-compat.
+    -- },
+
+    -- Phase 5 (docs/PHASE5.md): multi-model routing + cloud fallback +
+    -- summarize-on-evict. OFF by default — auto-routing can spend money
+    -- silently on the cloud preset; require explicit opt-in.
+    --
+    -- routing = {
+    --     -- Enable auto-routing per request. When true, router.classify_model
+    --     -- inspects each prompt and may switch the model for THAT request
+    --     -- only (the :model selection is preserved across requests).
+    --     -- Default false. Toggle at runtime with :route on / :route off.
+    --     auto = true,
+    --
+    --     -- Class → model mapping. nil = "keep current" (heuristic fires
+    --     -- but no override). Ships with reasoning = nil because mapping
+    --     -- "explain ..." prompts to a paid cloud model would spend money
+    --     -- silently — opt in by uncommenting the reasoning line below.
+    --     classes = {
+    --         code      = "deep",      -- code-like prompts to local deep
+    --         -- reasoning = "cloud",  -- OPT-IN: "explain"/"why"/"how does" → paid
+    --         -- default   = nil,      -- keep active model
+    --     },
+    --
+    --     -- Single-hop retry on transport failure (HTTP 5xx, 408,
+    --     -- 404 model_not_found, DNS, connection refused, timeouts).
+    --     -- Retries against fallback_model once. Skipped if any text
+    --     -- has already streamed (no partial-output duplication).
+    --     -- Toggle at runtime with :fallback on / :fallback off.
+    --     fallback       = false,           -- default off (cost-safety)
+    --     fallback_model = "cloud",
+    --
+    --     -- Issue #86: per-class system_prompt override. When the
+    --     -- classified request falls into a class with an entry here,
+    --     -- the BASE system_prompt is REPLACED for that one request
+    --     -- (dynamic blocks — [background], [project], [earlier
+    --     -- summary], NORRIS suffix — still compose on top). Mostly
+    --     -- useful for tightening small local models' instruction
+    --     -- adherence. Default {} (no override).
+    --     system_prompts = {
+    --         code = [[You are a code assistant. Rules:
+    -- 1. Output ONLY the requested code or command.
+    -- 2. No prose explanation unless explicitly asked.
+    -- 3. Wrap shell commands in CMD: prefix.
+    -- 4. Max response: 200 tokens.]],
+    --         default = [[You are a shell assistant.
+    -- Output shell commands as: CMD: <command>
+    -- Output answers as single short sentences.
+    -- Do not ask clarifying questions.]],
+    --         -- reasoning routes to cloud; no override usually needed
+    --     },
+    --
+    --     -- Issue #88: per-class GBNF grammar passthrough. llama.cpp
+    --     -- constrains the sampler to ONLY emit tokens matching the
+    --     -- grammar — eliminates format drift on small models. Cloud
+    --     -- (Anthropic/Bedrock) silently ignores the field, so default
+    --     -- passthrough is safe; no per-model opt-out needed. Misformed
+    --     -- grammar surfaces as a broker error at request time.
+    --     grammars = {
+    --         code    = [[root ::= "CMD: " [^\n]+ "\n"]],
+    --         default = [[root ::= ("CMD: " [^\n]+ "\n") | [^\n]+ "\n"]],
+    --     },
+    -- },
+    --
+    -- Issue #88 (continued): for the safety LLM probe (YES/NO
+    -- destructive classification), set safety.probe_grammar to force
+    -- the probe model to emit exactly YES or NO. Eliminates the
+    -- regex-match fallback for unparseable verdicts; small models
+    -- become reliable enough to use as the probe.
+    --
+    -- safety = {
+    --     llm_second_opinion = true,
+    --     llm_model          = "fast",
+    --     probe_grammar      = [[root ::= ("YES" | "NO")]],
+    -- },
+
+    -- ── Issue #87 (route-aware context compression).
+    -- When a routed model preset has `local_compress = true`, each
+    -- broker call against THAT preset gets a compressed view of
+    -- ctx.turns: only the last `keep_turns` turns; any turn whose
+    -- content exceeds `max_turn_chars` is tail-truncated. The full
+    -- context lives on (visible via :history); compression is purely
+    -- per-request for small models that effectively use a fraction
+    -- of their advertised context window.
+    --
+    -- Set the per-model opt-in on models[<name>]:
+    --     models.fast = { ..., local_compress = true }
+    -- Defaults live under context.compress:
+    --     context = {
+    --         ...
+    --         compress = { keep_turns = 2, max_turn_chars = 800 },
+    --     }
+    --
+    -- Trade-off documented in the FR: tool turns lose information
+    -- when tail-truncated. Acceptable for shell-output blocks (the
+    -- tail is usually the relevant bit); known limitation for
+    -- structured tool results. Disable per-model if it bites.
+
+    -- ── Issue #89 / Phase 10: cloud preplanner → local executor split.
+    -- When cfg.norris.preplanner names a model preset, :norris launch
+    -- fires ONE broker.chat against that preset asking for a sequence
+    -- of TASK: <imperative> lines. Parsed list (capped at tasks_max)
+    -- becomes ctx.norris_tasks; the executor model (cfg.norris.executor,
+    -- defaulting to the active :model selection) runs each task with
+    -- the current task shown in the per-step header.
+    --
+    -- Goal: small fast local models are cheap per step but easily
+    -- distracted on multi-step plans; cloud is capable at planning
+    -- but expensive per step. Use cloud ONCE for the plan, local for
+    -- every step. Falls back to single-model Norris (existing
+    -- behavior) when preplanner unset / fails / produces no TASKs.
+    --
+    -- norris = {
+    --     preplanner = "anthropic",   -- model name in cfg.models;
+    --                                 -- this preset is called ONCE per
+    --                                 -- :norris launch. Omit to run
+    --                                 -- single-model (Phase 6 behavior).
+    --     executor   = "fast",        -- model that runs each step.
+    --                                 -- Omit to use the active :model.
+    --     tasks_max  = 16,            -- cap on preplan list size.
+    --     -- preplan_system = "...",  -- override the built-in prompt
+    -- },
+    --
+    -- :cost detail separates norris-preplan and norris rows so you
+    -- can see cloud planning cost vs local execution cost. The
+    -- preplan call does NOT retry via fallback_model (a different
+    -- model = a different decomposition; clean hard-fail to single-
+    -- model is safer).
+
+    -- ── Phase 5 context summarization on sliding-window eviction.
+    -- Set INSIDE the context = { ... } block above to enable:
+    --     context = {
+    --         max_turns          = 40,
+    --         token_budget       = 4096,
+    --         summarize_on_evict = true,
+    --         summarizer_model   = "fast",   -- model name in models{}
+    --         max_summary_chars  = 2000,
+    --
+    --         -- #101 (proactive periodic summarization). When set,
+    --         -- enforce_cadence fires every N appends (before
+    --         -- enforce_budget) and folds turns OLDER than
+    --         -- summarize_keep_recent into ctx.summary. Goal: keep the
+    --         -- wire prompt tight from the start so small local models
+    --         -- aren't fed near-budget context until eviction. Composes
+    --         -- with summarize_on_evict (same summarize_fn closure;
+    --         -- different trigger). Suppressed in Norris (R-C4 parity).
+    --         summarize_every_n_turns = 10,    -- nil = disabled (default)
+    --         summarize_keep_recent   = 4,
+    --     },
+    -- When summarize_on_evict is true, evicted turn pairs are fed to
+    -- summarizer_model and the result lives on ctx.summary, appended to
+    -- the system prompt as [earlier conversation summary]. Suppressed
+    -- in Norris mode (R-C4 — planner stays on its goal). If broker
+    -- fails, falls back to Phase 0 silent eviction (no crash).
+
+    -- Phase 6 (docs/PHASE6.md): project file-tree context + :diff /
+    -- :tree / :highlight metas. The :diff and :tree metas work without
+    -- any config. The `project` block below only controls the
+    -- AUTO-injection-at-startup behavior; manual `:tree` always works
+    -- regardless. Uncomment to enable startup auto-inject.
+    --
+    -- project = {
+    --     auto_tree      = true,   -- run `:tree` once at startup
+    --     tree_depth     = 3,      -- depth filter for the scan (find fallback only;
+    --                              -- git ls-files emits full repo-relative paths)
+    --     tree_max_chars = 4096,   -- truncate the injected block above this
+    -- },
+    --
+    -- :highlight has no config flag in v1 — toggled at runtime only.
+    -- Requires the external `tree-sitter` CLI plus configured parser-
+    -- directories with cloned + built `tree-sitter-<lang>` grammars
+    -- (see `:highlight on` for the install hints).
+
+    -- Phase 7 (docs/PHASE7.md): cost / usage observability. broker.lua
+    -- captures `usage` (+ `cost` for cloud) from every chat/chat_stream
+    -- call and routes via ctx:add_usage to a per-session accumulator.
+    -- `:cost` / `:cost detail` / `:cost reset` surface the totals.
+    -- The `cost` block below configures OPTIONAL warn thresholds —
+    -- a single status line fires the first time the cumulative
+    -- crosses each threshold. Default off. Useful when paid cloud
+    -- presets are in play so runaway-cost sessions get a nudge.
+    --
+    -- cost = {
+    --     warn_at_dollars = 0.50,    -- one-shot warn when cumulative cost crosses
+    --     warn_at_tokens  = 100000,  -- one-shot warn when cumulative tokens crosses
+    -- },
+    --
+    -- Both flags are independent (R4 — first-to-fire doesn't suppress
+    -- the other); `:cost reset` re-arms both. Per-turn usage is also
+    -- written to session/*.jsonl (assistant-turn `usage` field) for
+    -- after-the-fact scripting; cross-session aggregation deferred
+    -- to a future phase (Q-C2).
+
+    -- Phase 8 (docs/PHASE8.md): accurate tokenization via the broker's
+    -- /tokenize endpoint, replacing the Phase 0 §8 char/4 heuristic.
+    -- Two consequences when use_endpoint=true:
+    --   (1) Context:estimate_tokens hits <endpoint>/tokenize once per
+    --       new turn (cached on the turn dict thereafter). Network
+    --       cost is one round-trip (~30ms) per fresh turn; subsequent
+    --       calls reuse the cache.
+    --   (2) Context:enforce_budget actually ENFORCES token_budget now
+    --       (previously only max_turns was checked). Sessions that
+    --       fit under char/4 may evict earlier — raise token_budget
+    --       to match your model's real context window if needed.
+    -- Cloud endpoints (OpenRouter) don't expose /tokenize; capability
+    -- cached as unsupported on first probe -> silent char/4 fallback.
+    --
+    -- tokenize = {
+    --     use_endpoint = true,
+    -- },
+}
diff --git a/main.lua b/main.lua
index 403f934..d037c24 100644
--- a/main.lua
+++ b/main.lua
@@ -2,16 +2,21 @@
 -- Phase 0: arg parsing, config load, REPL start.
 -- See docs/PHASE0.md §4, §10. -p one-shot mode lands per issue #4.
 
--- Make project modules and the vendored dkjson resolvable from the repo root.
--- Run aish with the repo root as cwd; PTY-relative resolution lands later.
-package.path = "./?.lua;./vendor/?.lua;" .. package.path
+-- Resolve modules + vendored dkjson relative to this script's directory,
+-- not cwd. Packaged install puts main.lua at /usr/share/lua/5.1/aish/ and
+-- the /usr/bin/aish wrapper execs `luajit /usr/share/lua/5.1/aish/main.lua`
+-- from whatever cwd the user is in — siblings must still resolve. Dev mode
+-- (`luajit main.lua` from repo root) keeps working because arg[0] is then
+-- "main.lua" with no "/" — _dir falls back to "./".
+local _dir = arg[0]:match("(.*/)") or "./"
+package.path = _dir .. "?.lua;" .. _dir .. "vendor/?.lua;" .. package.path
 
 local USAGE = [[
 aish — AI-augmented conversational shell.
 
 Usage:
-  luajit main.lua [--config <path>] [--help]            -- interactive REPL
-  luajit main.lua -p "<prompt>" [--config <path>]       -- one-shot, print + exit
+  aish [--config <path>] [--help]            -- interactive REPL
+  aish -p "<prompt>" [--config <path>]       -- one-shot, print + exit
 
 In -p mode, if stdin is not a TTY it's read as additional context and
 prepended to the prompt as a fenced block — composes with Unix pipes: