-- config.lua — model registry, routing rules, user preferences. -- Loaded with dofile() at startup; returns a plain Lua table. -- See docs/PHASE0.md §10 for resolution order and full schema. -- -- Per issue #12: hossenfelder is the canonical single-URL broker. It does -- model-aware routing server-side (local models on boltzmann; cloud routes -- through OpenRouter using its own bearer auth — no client-side key here). -- Discovery: GET http://hossenfelder.fritz.box:8082/v1/models. -- -- Phase 9 (docs/PHASE9.md): a `.aish.lua` in/above your cwd (walking up -- to $HOME) overlays this user config. First encounter prompts to trust; -- sha256-pinned in ~/.aish/trusted-projects. Use it for repo-specific -- model presets, permissions, hooks, etc. -- -- IMPORTANT: shallow merge. If your `.aish.lua` sets a top-level block -- (models, permissions, cost, shell, ...), it REPLACES the user's -- entire block — list every entry you want available OR omit the block -- to keep the user's. Inspect the merge via `:config show` at runtime. local HOSSENFELDER = "http://hossenfelder.fritz.box:8082" return { default_model = "fast", models = { fast = { endpoint = HOSSENFELDER, model = "qwen2.5-coder-1.5b-q4_k_m.gguf", temperature = 0.2, }, deep = { endpoint = HOSSENFELDER, -- 2026-05-13: qwen3-30b not loaded on hossenfelder right now; -- using deepseek-coder-v2-lite (16B MoE, ~2.4B active) for the -- time being. Restore qwen3-30b when the slot is back up. model = "deepseek-coder-v2-lite", timeout_ms = 300000, -- 5 min; MoE inference is faster than dense 30B temperature = 0.1, }, cloud = { endpoint = HOSSENFELDER, model = "anthropic/claude-haiku-4.5", temperature = 0.2, }, }, shell = { known_commands = { "ls", "cat", "cd", "grep", "find", "cp", "mv", "rm", "mkdir", "rmdir", "git", "make", "cmake", "gcc", "clang", "python3", "luajit", "ssh", "scp", "curl", "wget", }, capture_output = true, -- inject exec output into context confirm_cmd = true, -- prompt before executing CMD: suggestions -- Issue #10: prompt template. When set, replaces the default -- "[aish:]> " prompt. Variables (substituted via {name}): -- {model} {ctx_used} {ctx_max} {turn} -- {cwd} {cwd_short} (cwd with $HOME -> ~) -- {last_status} (last exec exit code, empty if none yet) -- {mode} (norris / plan / normal) -- prompt = "[{model} {ctx_used}/{ctx_max}t T{turn} {mode}] {cwd_short} > ", }, context = { max_turns = 40, token_budget = 4096, }, history = { dir = (os.getenv("HOME") or ".") .. "/.local/share/aish", }, -- Issue #3: pre/post CMD hooks. Optional shell scripts triggered around -- every CMD: execution. Each hook receives the command on stdin and -- AISH_CMD / AISH_TURN / AISH_CWD as env vars. Non-zero exit on pre_cmd -- aborts execution; post_cmd's exit code is ignored but its stdout is -- logged. Default off (no hooks). Uncomment to enable. -- hooks = { -- pre_cmd = (os.getenv("HOME") or ".") .. "/.aish/hooks/pre-cmd", -- post_cmd = (os.getenv("HOME") or ".") .. "/.aish/hooks/post-cmd", -- }, -- Issue #13: secret redaction. Vault is a separate file at ~/.aish/ -- secrets.lua (mode 0600 enforced). When set, outbound broker messages -- are scrubbed: vault literals + autodetect heuristics (OpenAI sk-, -- OpenRouter sk-or-v1-, GitHub ghp_/gho_/ghs_, AWS AKIA, JWT eyJ..., -- SSH/GPG PRIVATE KEY headers) become $AISH_SECRET_NNN placeholders. -- The streamed reply is rehydrated before display so the user sees -- real values. Per-broker override via models[*].redact: -- "off" -- no scrubbing (trusted local) -- "vault" -- vault literals only -- "vault+autodetect" -- + heuristics (default when vault loaded) -- "stealth" -- + heuristics, opaque decoys, no rehydrate -- Default per-broker is the global config.secrets.default, falling -- back to "vault+autodetect" when vault loaded, else "off". -- secrets = { -- vault = "~/.aish/secrets.lua", -- default = "vault+autodetect", -- applies when models[*].redact is nil -- }, -- Issue #8: background CMD (CMD&: marker). Requires history.dir set -- (logs land at /bg/.log + .status sidecar). The -- feature is always-on once history.dir exists — no config flag — but -- only fires when the model emits "CMD&: " or the user runs :bg-spawn. -- Issue #9: permission policy DSL for AI-suggested CMD: lines. When set, -- supersedes shell.confirm_cmd. Patterns are Lua patterns (NOT regex) -- per substrate invariant §3 (no compiled extensions). Priority order: -- deny > confirm > allow; first match in the chosen category wins. -- Unmatched commands default to "confirm". Probe with :perms check . -- permissions = { -- allow = { "^ls%s", "^cat%s", "^git status", "^git diff" }, -- confirm = { "^rm%s", "^git push", "^docker%s", "^sudo%s" }, -- deny = { "^ssh%s+root@", "^curl%s+http[^s]" }, -- }, -- Phase 2 (docs/PHASE2.md): MCP server registry + tool-call policy. -- The block is OFF by default — connect-at-startup happens only when -- `servers` is non-empty. Uncomment + adjust per your fleet. -- -- mcp = { -- servers = { -- -- Each entry: alias = { url = "...", auth_token = "..." | auth_env = "..." } -- -- auth_token literal > auth_env env-var indirection > nil (no auth). -- -- Aliases become the namespace prefix on tool names sent to the model -- -- ("__" — e.g. "boltzmann__list_dir"). The separator is -- -- "__" (two underscores) because Anthropic via Bedrock validates tool -- -- names against ^[a-zA-Z0-9_-]{1,128}$ — dots are rejected. -- -- Aliases themselves must not contain "__". -- boltzmann = { -- url = "http://boltzmann.fritz.box:8080/mcp", -- auth_env = "BOLTZMANN_MCP_TOKEN", -- }, -- hertz = { -- url = "http://hertz.fritz.box:8080/mcp", -- auth_env = "HERTZ_MCP_TOKEN", -- }, -- broglie = { -- url = "http://broglie.fritz.box:8080/mcp", -- LAN-only, no auth -- }, -- }, -- -- -- Per-call confirm gate auto-approve policy. -- -- Key forms: -- -- "__" — auto-approve one specific tool -- -- "__*" — auto-approve every tool on that server -- -- Anything not matched falls back to the [y/N] prompt. -- auto_approve = { -- ["boltzmann__read_file"] = true, -- ["boltzmann__list_dir"] = true, -- ["boltzmann__search_files"] = true, -- ["hertz__*"] = true, -- trust the hub fully -- }, -- -- -- Tool-call sub-loop budget per ask_ai turn. Hitting the cap surfaces -- -- a status and breaks; default 8 if absent. -- max_tool_depth = 8, -- }, -- Phase 3 (docs/PHASE3.md): Chuck Norris autonomous mode + destructive-op -- heuristic. The block is OFF by default (sane defaults kick in when -- absent); uncomment to tune. -- -- safety = { -- -- LLM second-opinion on commands the static patterns don't flag. -- -- Default true. Set false for static-only operation (faster, but -- -- misses novel destructive patterns the static list doesn't know -- -- about — bash -c content, custom destructive idioms, etc.). -- llm_second_opinion = true, -- -- -- Which configured model to use for the YES/NO destructive probe. -- -- Precedence: this field → models.deep → models[default_model]. -- -- R-B2: prefer an INDEPENDENT model class from the action-emitting -- -- model (avoids self-policing). Recommended values: -- -- "cloud" — anthropic/claude-haiku-4.5 via openrouter. Fast and -- -- reliable. Costs money per probe (typical Norris -- -- session = 16 probes max, often cached). -- -- "deep" — local large model (qwen3-30b on this fleet). Free -- -- but slow on RK3588 hardware (~1-3s per probe). -- -- Falls back here automatically if not set. -- -- "fast" — same model as the action-emitter. NOT RECOMMENDED -- -- (circular trust); use only when no other option. -- llm_model = "cloud", -- -- -- Norris planning-loop budget. Iterations of safety.norris_step. -- -- Each iteration is one broker round-trip + dispatch of actions. -- -- Default 8. Bump for long-running goals; cap low for testing. -- max_norris_steps = 8, -- }, -- Phase 4 (docs/PHASE4.md): cross-session memory.jsonl + startup -- injection + :memory management surface. The block is OFF by -- default (no startup injection); uncomment to tune. Note that -- :remember / :memory list / :memory forget / :memory summarize -- all work without this block — they store to / -- memory.jsonl regardless. The block only configures the -- injection-into-system-prompt behavior at startup. -- -- memory = { -- -- Cap on total characters injected at startup. ~2000 chars ≈ -- -- 500 tokens. LRU-by-ts selection if your memory.jsonl has -- -- more recent items than fit. Older items remain in the -- -- file; only injection is bounded. Suppressed entirely in -- -- Norris mode (R-C1). -- inject_max_chars = 2000, -- -- -- Which configured model to use for :memory summarize. -- -- Defaults to the active model when nil. Use "fast" for -- -- speed; "deep" or "cloud" for better extraction quality -- -- (cloud may have variable cost per session). -- summarizer_model = "fast", -- }, -- Phase 5 (docs/PHASE5.md): multi-model routing + cloud fallback + -- summarize-on-evict. OFF by default — auto-routing can spend money -- silently on the cloud preset; require explicit opt-in. -- -- routing = { -- -- Enable auto-routing per request. When true, router.classify_model -- -- inspects each prompt and may switch the model for THAT request -- -- only (the :model selection is preserved across requests). -- -- Default false. Toggle at runtime with :route on / :route off. -- auto = true, -- -- -- Class → model mapping. nil = "keep current" (heuristic fires -- -- but no override). Ships with reasoning = nil because mapping -- -- "explain ..." prompts to a paid cloud model would spend money -- -- silently — opt in by uncommenting the reasoning line below. -- classes = { -- code = "deep", -- code-like prompts to local deep -- -- reasoning = "cloud", -- OPT-IN: "explain"/"why"/"how does" → paid -- -- default = nil, -- keep active model -- }, -- -- -- Single-hop retry on transport failure (HTTP 5xx, 408, -- -- 404 model_not_found, DNS, connection refused, timeouts). -- -- Retries against fallback_model once. Skipped if any text -- -- has already streamed (no partial-output duplication). -- -- Toggle at runtime with :fallback on / :fallback off. -- fallback = false, -- default off (cost-safety) -- fallback_model = "cloud", -- -- -- Issue #86: per-class system_prompt override. When the -- -- classified request falls into a class with an entry here, -- -- the BASE system_prompt is REPLACED for that one request -- -- (dynamic blocks — [background], [project], [earlier -- -- summary], NORRIS suffix — still compose on top). Mostly -- -- useful for tightening small local models' instruction -- -- adherence. Default {} (no override). -- system_prompts = { -- code = [[You are a code assistant. Rules: -- 1. Output ONLY the requested code or command. -- 2. No prose explanation unless explicitly asked. -- 3. Wrap shell commands in CMD: prefix. -- 4. Max response: 200 tokens.]], -- default = [[You are a shell assistant. -- Output shell commands as: CMD: -- Output answers as single short sentences. -- Do not ask clarifying questions.]], -- -- reasoning routes to cloud; no override usually needed -- }, -- -- -- Issue #88: per-class GBNF grammar passthrough. llama.cpp -- -- constrains the sampler to ONLY emit tokens matching the -- -- grammar — eliminates format drift on small models. Cloud -- -- (Anthropic/Bedrock) silently ignores the field, so default -- -- passthrough is safe; no per-model opt-out needed. Misformed -- -- grammar surfaces as a broker error at request time. -- grammars = { -- code = [[root ::= "CMD: " [^\n]+ "\n"]], -- default = [[root ::= ("CMD: " [^\n]+ "\n") | [^\n]+ "\n"]], -- }, -- }, -- -- Issue #88 (continued): for the safety LLM probe (YES/NO -- destructive classification), set safety.probe_grammar to force -- the probe model to emit exactly YES or NO. Eliminates the -- regex-match fallback for unparseable verdicts; small models -- become reliable enough to use as the probe. -- -- safety = { -- llm_second_opinion = true, -- llm_model = "fast", -- probe_grammar = [[root ::= ("YES" | "NO")]], -- }, -- ── Phase 5 context summarization on sliding-window eviction. -- Set INSIDE the context = { ... } block above to enable: -- context = { -- max_turns = 40, -- token_budget = 4096, -- summarize_on_evict = true, -- summarizer_model = "fast", -- model name in models{} -- max_summary_chars = 2000, -- }, -- When summarize_on_evict is true, evicted turn pairs are fed to -- summarizer_model and the result lives on ctx.summary, appended to -- the system prompt as [earlier conversation summary]. Suppressed -- in Norris mode (R-C4 — planner stays on its goal). If broker -- fails, falls back to Phase 0 silent eviction (no crash). -- Phase 6 (docs/PHASE6.md): project file-tree context + :diff / -- :tree / :highlight metas. The :diff and :tree metas work without -- any config. The `project` block below only controls the -- AUTO-injection-at-startup behavior; manual `:tree` always works -- regardless. Uncomment to enable startup auto-inject. -- -- project = { -- auto_tree = true, -- run `:tree` once at startup -- tree_depth = 3, -- depth filter for the scan (find fallback only; -- -- git ls-files emits full repo-relative paths) -- tree_max_chars = 4096, -- truncate the injected block above this -- }, -- -- :highlight has no config flag in v1 — toggled at runtime only. -- Requires the external `tree-sitter` CLI plus configured parser- -- directories with cloned + built `tree-sitter-` grammars -- (see `:highlight on` for the install hints). -- Phase 7 (docs/PHASE7.md): cost / usage observability. broker.lua -- captures `usage` (+ `cost` for cloud) from every chat/chat_stream -- call and routes via ctx:add_usage to a per-session accumulator. -- `:cost` / `:cost detail` / `:cost reset` surface the totals. -- The `cost` block below configures OPTIONAL warn thresholds — -- a single status line fires the first time the cumulative -- crosses each threshold. Default off. Useful when paid cloud -- presets are in play so runaway-cost sessions get a nudge. -- -- cost = { -- warn_at_dollars = 0.50, -- one-shot warn when cumulative cost crosses -- warn_at_tokens = 100000, -- one-shot warn when cumulative tokens crosses -- }, -- -- Both flags are independent (R4 — first-to-fire doesn't suppress -- the other); `:cost reset` re-arms both. Per-turn usage is also -- written to session/*.jsonl (assistant-turn `usage` field) for -- after-the-fact scripting; cross-session aggregation deferred -- to a future phase (Q-C2). -- Phase 8 (docs/PHASE8.md): accurate tokenization via the broker's -- /tokenize endpoint, replacing the Phase 0 §8 char/4 heuristic. -- Two consequences when use_endpoint=true: -- (1) Context:estimate_tokens hits /tokenize once per -- new turn (cached on the turn dict thereafter). Network -- cost is one round-trip (~30ms) per fresh turn; subsequent -- calls reuse the cache. -- (2) Context:enforce_budget actually ENFORCES token_budget now -- (previously only max_turns was checked). Sessions that -- fit under char/4 may evict earlier — raise token_budget -- to match your model's real context window if needed. -- Cloud endpoints (OpenRouter) don't expose /tokenize; capability -- cached as unsupported on first probe -> silent char/4 fallback. -- -- tokenize = { -- use_endpoint = true, -- }, }