packaging: bin/aish wrapper + examples/config.lua + LICENSE for v0.1.0 release
main.lua now resolves package.path relative to its own script directory rather than cwd, so the packaged install at /usr/share/lua/5.1/aish/ finds its siblings regardless of where the user invokes aish from. Dev mode (luajit main.lua from the repo root) is preserved: arg[0] is "main.lua" with no "/" so the regex returns nil and _dir falls back to "./" — identical to the previous behavior. bin/aish is a POSIX-sh wrapper that execs luajit against $AISH_LIB/main.lua (default /usr/share/lua/5.1/aish). The AISH_LIB env override lets users point at a dev checkout without uninstalling the package. Wrapper emits distinct errors when AISH_LIB is missing or when luajit isn't on PATH so broken installs surface clearly instead of through a bare sh: not found. examples/config.lua is the canonical commented reference, shipped at /usr/share/doc/aish/examples/config.lua. Stripped of the two live MCP bearer tokens carried by the in-tree config.lua and switched to the auth_env env-var indirection form; mcp.servers entries are commented out so a copy-to-~/.config/aish/config.lua produces a working starting point on first uncomment. HOSSENFELDER URL flagged as maintainer-LAN. LICENSE: MIT, copyright 2026 Markus Fritsche. README updated to match. Sonnet review of the changeset (per feedback_reviews_use_sonnet.md + bugfix-process step 4): no blockers; the two Important findings (USAGE text still said "luajit main.lua", bin/aish didn't pre-check luajit) and one Nit (unredacted HOSSENFELDER URL) were folded in before commit. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,488 @@
|
||||
-- config.lua — example aish configuration.
|
||||
-- Shipped by the aish package at /usr/share/doc/aish/examples/config.lua.
|
||||
-- Copy to ~/.config/aish/config.lua (preferred) and adapt to your fleet:
|
||||
--
|
||||
-- install -Dm600 /usr/share/doc/aish/examples/config.lua \
|
||||
-- ~/.config/aish/config.lua
|
||||
--
|
||||
-- Mode 0600 matters because this file can carry MCP bearer tokens. The
|
||||
-- two tokens shown in the mcp.servers block below are PLACEHOLDERS and
|
||||
-- must be replaced — prefer the auth_env env-var indirection form (export
|
||||
-- MCP_PVE1_TOKEN=... in your shell init) over committing literals.
|
||||
--
|
||||
-- Loaded with dofile() at startup; returns a plain Lua table.
|
||||
-- See docs/PHASE0.md §10 for resolution order and full schema.
|
||||
--
|
||||
-- Per issue #12: hossenfelder is the canonical single-URL broker. It does
|
||||
-- model-aware routing server-side (local models on boltzmann; cloud routes
|
||||
-- through OpenRouter using its own bearer auth — no client-side key here).
|
||||
-- Discovery: GET http://hossenfelder.fritz.box:8082/v1/models.
|
||||
--
|
||||
-- Phase 9 (docs/PHASE9.md): a `.aish.lua` in/above your cwd (walking up
|
||||
-- to $HOME) overlays this user config. First encounter prompts to trust;
|
||||
-- sha256-pinned in ~/.aish/trusted-projects. Use it for repo-specific
|
||||
-- model presets, permissions, hooks, etc.
|
||||
--
|
||||
-- IMPORTANT: shallow merge. If your `.aish.lua` sets a top-level block
|
||||
-- (models, permissions, cost, shell, ...), it REPLACES the user's
|
||||
-- entire block — list every entry you want available OR omit the block
|
||||
-- to keep the user's. Inspect the merge via `:config show` at runtime.
|
||||
|
||||
-- Replace with your own broker URL. This default targets the
|
||||
-- maintainer's home-LAN broker — useful as a structural example
|
||||
-- but will not resolve outside that network.
|
||||
local HOSSENFELDER = "http://hossenfelder.fritz.box:8082"
|
||||
|
||||
return {
|
||||
default_model = "fast",
|
||||
|
||||
-- 2026-05-17: full fleet exposed. 6 local + 14 cloud models live on the
|
||||
-- hossenfelder broker. Aliases below match the model IDs returned by
|
||||
-- /v1/models so the broker can route without prefix stripping.
|
||||
models = {
|
||||
-- ── LOCAL ────────────────────────────────────────────────────────
|
||||
fast = { -- alias for the 1.5B; default
|
||||
endpoint = HOSSENFELDER,
|
||||
model = "qwen2.5-coder-1.5b-q4_k_m.gguf",
|
||||
temperature = 0.2,
|
||||
},
|
||||
["coder-3b"] = { -- pve2 (Haswell NUC, 1.8 GB model, ~4 tok/s)
|
||||
endpoint = HOSSENFELDER,
|
||||
model = "qwen2.5-coder-3b-instruct-pve2",
|
||||
temperature = 0.2,
|
||||
},
|
||||
["coder-7b"] = { -- pve1 (Haswell NUC)
|
||||
endpoint = HOSSENFELDER,
|
||||
model = "qwen2.5-coder-7b-instruct-pve1",
|
||||
temperature = 0.2,
|
||||
},
|
||||
["coder-7b-snappy"] = { -- dirac:8081, low-latency completion
|
||||
endpoint = HOSSENFELDER,
|
||||
model = "qwen-coder-7b-snappy-8k",
|
||||
temperature = 0.2,
|
||||
},
|
||||
["qwen-7b"] = { -- dirac:8080 chat
|
||||
endpoint = HOSSENFELDER,
|
||||
model = "Qwen2.5-7B-Instruct-Q4_K_M.gguf",
|
||||
temperature = 0.2,
|
||||
},
|
||||
deep = { -- boltzmann:8085 — Qwen3-30B-A3B MoE, q8 KV cache
|
||||
endpoint = HOSSENFELDER,
|
||||
model = "qwen3-30b-a3b-instruct-2507",
|
||||
-- timeout_ms inherits broker default (30 min) — 30B prompt processing
|
||||
-- of long contexts on CPU can take 15-25 min before first token.
|
||||
temperature = 0.1,
|
||||
},
|
||||
|
||||
-- ── CLOUD (OpenRouter via hossenfelder) ───────────────────────────
|
||||
cloud = { endpoint = HOSSENFELDER, model = "anthropic/claude-haiku-4.5", temperature = 0.2 },
|
||||
haiku = { endpoint = HOSSENFELDER, model = "anthropic/claude-haiku-4.5", temperature = 0.2 },
|
||||
sonnet = { endpoint = HOSSENFELDER, model = "anthropic/claude-sonnet-4.6", temperature = 0.2 },
|
||||
opus = { endpoint = HOSSENFELDER, model = "anthropic/claude-opus-4.7", temperature = 0.2 },
|
||||
gpt5 = { endpoint = HOSSENFELDER, model = "openai/gpt-5.5", temperature = 0.2 },
|
||||
["gpt5-mini"] = { endpoint = HOSSENFELDER, model = "openai/gpt-5.4-mini", temperature = 0.2 },
|
||||
deepseek = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v3.2", temperature = 0.2 },
|
||||
["deepseek-v4"] = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-flash", temperature = 0.2 },
|
||||
["deepseek-pro"] = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-pro", temperature = 0.2 },
|
||||
mistral = { endpoint = HOSSENFELDER, model = "mistralai/mistral-large-2512", temperature = 0.2 },
|
||||
["qwen-cloud"] = { endpoint = HOSSENFELDER, model = "qwen/qwen3.5-27b", temperature = 0.2 },
|
||||
owl = { endpoint = HOSSENFELDER, model = "openrouter/owl-alpha", temperature = 0.2 },
|
||||
|
||||
-- ── CLOUD FREE-TIER ──────────────────────────────────────────────
|
||||
["free-qwen-coder"] = { endpoint = HOSSENFELDER, model = "qwen/qwen3-coder:free", temperature = 0.2 },
|
||||
["free-llama-70b"] = { endpoint = HOSSENFELDER, model = "meta-llama/llama-3.3-70b-instruct:free", temperature = 0.2 },
|
||||
["free-qwen-80b"] = { endpoint = HOSSENFELDER, model = "qwen/qwen3-next-80b-a3b-instruct:free", temperature = 0.2 },
|
||||
["free-gpt-oss"] = { endpoint = HOSSENFELDER, model = "openai/gpt-oss-120b:free", temperature = 0.2 },
|
||||
["free-glm"] = { endpoint = HOSSENFELDER, model = "z-ai/glm-4.5-air:free", temperature = 0.2 },
|
||||
["free-deepseek-v4"] = { endpoint = HOSSENFELDER, model = "deepseek/deepseek-v4-flash:free", temperature = 0.2 },
|
||||
},
|
||||
|
||||
shell = {
|
||||
known_commands = {
|
||||
"ls", "cat", "cd", "grep", "find", "cp", "mv", "rm",
|
||||
"mkdir", "rmdir", "git", "make", "cmake", "gcc", "clang",
|
||||
"python3", "luajit", "ssh", "scp", "curl", "wget",
|
||||
},
|
||||
capture_output = true, -- inject exec output into context
|
||||
confirm_cmd = true, -- prompt before executing CMD: suggestions
|
||||
|
||||
-- Issue #10: prompt template. When set, replaces the default
|
||||
-- "[aish:<model>]> " prompt. Variables (substituted via {name}):
|
||||
-- {model} {ctx_used} {ctx_max} {turn}
|
||||
-- {cwd} {cwd_short} (cwd with $HOME -> ~)
|
||||
-- {last_status} (last exec exit code, empty if none yet)
|
||||
-- {mode} (norris / plan / normal)
|
||||
-- prompt = "[{model} {ctx_used}/{ctx_max}t T{turn} {mode}] {cwd_short} > ",
|
||||
},
|
||||
|
||||
context = {
|
||||
max_turns = 40,
|
||||
token_budget = 4096,
|
||||
},
|
||||
|
||||
history = {
|
||||
dir = (os.getenv("HOME") or ".") .. "/.local/share/aish",
|
||||
},
|
||||
|
||||
-- Issue #3: pre/post CMD hooks. Optional shell scripts triggered around
|
||||
-- every CMD: execution. Each hook receives the command on stdin and
|
||||
-- AISH_CMD / AISH_TURN / AISH_CWD as env vars. Non-zero exit on pre_cmd
|
||||
-- aborts execution; post_cmd's exit code is ignored but its stdout is
|
||||
-- logged. Default off (no hooks). Uncomment to enable.
|
||||
-- hooks = {
|
||||
-- pre_cmd = (os.getenv("HOME") or ".") .. "/.aish/hooks/pre-cmd",
|
||||
-- post_cmd = (os.getenv("HOME") or ".") .. "/.aish/hooks/post-cmd",
|
||||
-- },
|
||||
|
||||
-- Issue #13: secret redaction. Vault is a separate file at ~/.aish/
|
||||
-- secrets.lua (mode 0600 enforced). When set, outbound broker messages
|
||||
-- are scrubbed: vault literals + autodetect heuristics (OpenAI sk-,
|
||||
-- OpenRouter sk-or-v1-, GitHub ghp_/gho_/ghs_, AWS AKIA, JWT eyJ...,
|
||||
-- SSH/GPG PRIVATE KEY headers) become $AISH_SECRET_NNN placeholders.
|
||||
-- The streamed reply is rehydrated before display so the user sees
|
||||
-- real values. Per-broker override via models[*].redact:
|
||||
-- "off" -- no scrubbing (trusted local)
|
||||
-- "vault" -- vault literals only
|
||||
-- "vault+autodetect" -- + heuristics (default when vault loaded)
|
||||
-- "stealth" -- + heuristics, opaque decoys, no rehydrate
|
||||
-- Default per-broker is the global config.secrets.default, falling
|
||||
-- back to "vault+autodetect" when vault loaded, else "off".
|
||||
-- secrets = {
|
||||
-- vault = "~/.aish/secrets.lua",
|
||||
-- default = "vault+autodetect", -- applies when models[*].redact is nil
|
||||
-- },
|
||||
|
||||
-- Issue #8: background CMD (CMD&: marker). Requires history.dir set
|
||||
-- (logs land at <history.dir>/bg/<id>.log + .status sidecar). The
|
||||
-- feature is always-on once history.dir exists — no config flag — but
|
||||
-- only fires when the model emits "CMD&: " or the user runs :bg-spawn.
|
||||
|
||||
-- Issue #9: permission policy DSL for AI-suggested CMD: lines. When set,
|
||||
-- supersedes shell.confirm_cmd. Patterns are Lua patterns (NOT regex)
|
||||
-- per substrate invariant §3 (no compiled extensions). Priority order:
|
||||
-- deny > confirm > allow; first match in the chosen category wins.
|
||||
-- Unmatched commands default to "confirm". Probe with :perms check <cmd>.
|
||||
-- permissions = {
|
||||
-- allow = { "^ls%s", "^cat%s", "^git status", "^git diff" },
|
||||
-- confirm = { "^rm%s", "^git push", "^docker%s", "^sudo%s" },
|
||||
-- deny = { "^ssh%s+root@", "^curl%s+http[^s]" },
|
||||
-- },
|
||||
|
||||
-- Phase 2 (docs/PHASE2.md): MCP server registry + tool-call policy.
|
||||
-- Aliases become the namespace prefix on tool names sent to the model
|
||||
-- ("<alias>__<tool>" — e.g. "pve1__list_dir"). Separator is "__" because
|
||||
-- Anthropic via Bedrock validates tool names against ^[a-zA-Z0-9_-]{1,128}$
|
||||
-- (dots are rejected). Aliases themselves must not contain "__".
|
||||
-- auth_token literal > auth_env env-var indirection > nil (no auth).
|
||||
mcp = {
|
||||
servers = {
|
||||
-- Example MCP server entries. Replace the URL with your own
|
||||
-- lmcp endpoint and source the bearer token via auth_env so
|
||||
-- it never lands in version control.
|
||||
--
|
||||
-- pve1: small sandbox host (stock lmcp tools — shell, read_file,
|
||||
-- write_file, edit_file, list_dir, search_files, shell_bg).
|
||||
-- pve1 = {
|
||||
-- url = "http://pve1.example.local:8080/mcp",
|
||||
-- auth_env = "MCP_PVE1_TOKEN",
|
||||
-- },
|
||||
--
|
||||
-- hertz: home-network hub with lmcp v1.2+ built-in fetch /
|
||||
-- web_search tools — useful for letting the model do web
|
||||
-- research without leaving aish. Auto-approving these two
|
||||
-- is safe because they carry MCP readOnlyHint=true and
|
||||
-- openWorldHint=true (see auto_approve block below).
|
||||
-- hertz = {
|
||||
-- url = "http://hertz.example.local:8080/mcp",
|
||||
-- auth_env = "MCP_HERTZ_TOKEN",
|
||||
-- },
|
||||
},
|
||||
|
||||
-- Per-call confirm gate auto-approve policy. fetch / web_search
|
||||
-- carry MCP readOnlyHint=true + openWorldHint=true; safe to skip
|
||||
-- the per-call prompt since they neither mutate nor leak local
|
||||
-- state. Anything writable on the host (mqtt_pub, ha_cli, lxc_exec,
|
||||
-- wol_and_wait, ...) should keep prompting.
|
||||
auto_approve = {
|
||||
-- ["hertz__fetch"] = true,
|
||||
-- ["hertz__web_search"] = true,
|
||||
},
|
||||
|
||||
-- Tool-call sub-loop budget per ask_ai turn. Default 8 if absent.
|
||||
max_tool_depth = 8,
|
||||
},
|
||||
|
||||
-- Phase 3 (docs/PHASE3.md): Chuck Norris autonomous mode + destructive-op
|
||||
-- heuristic. The block is OFF by default (sane defaults kick in when
|
||||
-- absent); uncomment to tune.
|
||||
--
|
||||
-- safety = {
|
||||
-- -- LLM second-opinion on commands the static patterns don't flag.
|
||||
-- -- Default true. Set false for static-only operation (faster, but
|
||||
-- -- misses novel destructive patterns the static list doesn't know
|
||||
-- -- about — bash -c content, custom destructive idioms, etc.).
|
||||
-- llm_second_opinion = true,
|
||||
--
|
||||
-- -- Which configured model to use for the YES/NO destructive probe.
|
||||
-- -- Precedence: this field → models.deep → models[default_model].
|
||||
-- -- R-B2: prefer an INDEPENDENT model class from the action-emitting
|
||||
-- -- model (avoids self-policing). Recommended values:
|
||||
-- -- "cloud" — anthropic/claude-haiku-4.5 via openrouter. Fast and
|
||||
-- -- reliable. Costs money per probe (typical Norris
|
||||
-- -- session = 16 probes max, often cached).
|
||||
-- -- "deep" — local large model (qwen3-30b on this fleet). Free
|
||||
-- -- but slow on RK3588 hardware (~1-3s per probe).
|
||||
-- -- Falls back here automatically if not set.
|
||||
-- -- "fast" — same model as the action-emitter. NOT RECOMMENDED
|
||||
-- -- (circular trust); use only when no other option.
|
||||
-- llm_model = "cloud",
|
||||
--
|
||||
-- -- Norris planning-loop budget. Iterations of safety.norris_step.
|
||||
-- -- Each iteration is one broker round-trip + dispatch of actions.
|
||||
-- -- Default 8. Bump for long-running goals; cap low for testing.
|
||||
-- max_norris_steps = 8,
|
||||
-- },
|
||||
|
||||
-- Phase 4 (docs/PHASE4.md): cross-session memory.jsonl + startup
|
||||
-- injection + :memory management surface. The block is OFF by
|
||||
-- default (no startup injection); uncomment to tune. Note that
|
||||
-- :remember / :memory list / :memory forget / :memory summarize
|
||||
-- all work without this block — they store to <history.dir>/
|
||||
-- memory.jsonl regardless. The block only configures the
|
||||
-- injection-into-system-prompt behavior at startup.
|
||||
--
|
||||
-- memory = {
|
||||
-- -- Cap on total characters injected at startup. ~2000 chars ≈
|
||||
-- -- 500 tokens. LRU-by-ts selection if your memory.jsonl has
|
||||
-- -- more recent items than fit. Older items remain in the
|
||||
-- -- file; only injection is bounded. Suppressed entirely in
|
||||
-- -- Norris mode (R-C1).
|
||||
-- inject_max_chars = 2000,
|
||||
--
|
||||
-- -- Which configured model to use for :memory summarize.
|
||||
-- -- Defaults to the active model when nil. Use "fast" for
|
||||
-- -- speed; "deep" or "cloud" for better extraction quality
|
||||
-- -- (cloud may have variable cost per session).
|
||||
-- summarizer_model = "fast",
|
||||
--
|
||||
-- -- #102: auto-summarize the session into memory.jsonl on :q.
|
||||
-- -- When true, shutdown_session runs the same distill flow as
|
||||
-- -- `:memory summarize`, non-interactively, and auto-adds the
|
||||
-- -- parsed candidates. Silent no-op for trivial sessions (turn
|
||||
-- -- count < min_turns_for_summary, default 5). pcall'd so a
|
||||
-- -- broker failure never blocks :q.
|
||||
-- auto_summarize_on_quit = true,
|
||||
-- min_turns_for_summary = 5,
|
||||
-- summary_model = "fast", -- new alias; summarizer_model
|
||||
-- -- above is still honored for
|
||||
-- -- back-compat.
|
||||
-- },
|
||||
|
||||
-- Phase 5 (docs/PHASE5.md): multi-model routing + cloud fallback +
|
||||
-- summarize-on-evict. OFF by default — auto-routing can spend money
|
||||
-- silently on the cloud preset; require explicit opt-in.
|
||||
--
|
||||
-- routing = {
|
||||
-- -- Enable auto-routing per request. When true, router.classify_model
|
||||
-- -- inspects each prompt and may switch the model for THAT request
|
||||
-- -- only (the :model selection is preserved across requests).
|
||||
-- -- Default false. Toggle at runtime with :route on / :route off.
|
||||
-- auto = true,
|
||||
--
|
||||
-- -- Class → model mapping. nil = "keep current" (heuristic fires
|
||||
-- -- but no override). Ships with reasoning = nil because mapping
|
||||
-- -- "explain ..." prompts to a paid cloud model would spend money
|
||||
-- -- silently — opt in by uncommenting the reasoning line below.
|
||||
-- classes = {
|
||||
-- code = "deep", -- code-like prompts to local deep
|
||||
-- -- reasoning = "cloud", -- OPT-IN: "explain"/"why"/"how does" → paid
|
||||
-- -- default = nil, -- keep active model
|
||||
-- },
|
||||
--
|
||||
-- -- Single-hop retry on transport failure (HTTP 5xx, 408,
|
||||
-- -- 404 model_not_found, DNS, connection refused, timeouts).
|
||||
-- -- Retries against fallback_model once. Skipped if any text
|
||||
-- -- has already streamed (no partial-output duplication).
|
||||
-- -- Toggle at runtime with :fallback on / :fallback off.
|
||||
-- fallback = false, -- default off (cost-safety)
|
||||
-- fallback_model = "cloud",
|
||||
--
|
||||
-- -- Issue #86: per-class system_prompt override. When the
|
||||
-- -- classified request falls into a class with an entry here,
|
||||
-- -- the BASE system_prompt is REPLACED for that one request
|
||||
-- -- (dynamic blocks — [background], [project], [earlier
|
||||
-- -- summary], NORRIS suffix — still compose on top). Mostly
|
||||
-- -- useful for tightening small local models' instruction
|
||||
-- -- adherence. Default {} (no override).
|
||||
-- system_prompts = {
|
||||
-- code = [[You are a code assistant. Rules:
|
||||
-- 1. Output ONLY the requested code or command.
|
||||
-- 2. No prose explanation unless explicitly asked.
|
||||
-- 3. Wrap shell commands in CMD: prefix.
|
||||
-- 4. Max response: 200 tokens.]],
|
||||
-- default = [[You are a shell assistant.
|
||||
-- Output shell commands as: CMD: <command>
|
||||
-- Output answers as single short sentences.
|
||||
-- Do not ask clarifying questions.]],
|
||||
-- -- reasoning routes to cloud; no override usually needed
|
||||
-- },
|
||||
--
|
||||
-- -- Issue #88: per-class GBNF grammar passthrough. llama.cpp
|
||||
-- -- constrains the sampler to ONLY emit tokens matching the
|
||||
-- -- grammar — eliminates format drift on small models. Cloud
|
||||
-- -- (Anthropic/Bedrock) silently ignores the field, so default
|
||||
-- -- passthrough is safe; no per-model opt-out needed. Misformed
|
||||
-- -- grammar surfaces as a broker error at request time.
|
||||
-- grammars = {
|
||||
-- code = [[root ::= "CMD: " [^\n]+ "\n"]],
|
||||
-- default = [[root ::= ("CMD: " [^\n]+ "\n") | [^\n]+ "\n"]],
|
||||
-- },
|
||||
-- },
|
||||
--
|
||||
-- Issue #88 (continued): for the safety LLM probe (YES/NO
|
||||
-- destructive classification), set safety.probe_grammar to force
|
||||
-- the probe model to emit exactly YES or NO. Eliminates the
|
||||
-- regex-match fallback for unparseable verdicts; small models
|
||||
-- become reliable enough to use as the probe.
|
||||
--
|
||||
-- safety = {
|
||||
-- llm_second_opinion = true,
|
||||
-- llm_model = "fast",
|
||||
-- probe_grammar = [[root ::= ("YES" | "NO")]],
|
||||
-- },
|
||||
|
||||
-- ── Issue #87 (route-aware context compression).
|
||||
-- When a routed model preset has `local_compress = true`, each
|
||||
-- broker call against THAT preset gets a compressed view of
|
||||
-- ctx.turns: only the last `keep_turns` turns; any turn whose
|
||||
-- content exceeds `max_turn_chars` is tail-truncated. The full
|
||||
-- context lives on (visible via :history); compression is purely
|
||||
-- per-request for small models that effectively use a fraction
|
||||
-- of their advertised context window.
|
||||
--
|
||||
-- Set the per-model opt-in on models[<name>]:
|
||||
-- models.fast = { ..., local_compress = true }
|
||||
-- Defaults live under context.compress:
|
||||
-- context = {
|
||||
-- ...
|
||||
-- compress = { keep_turns = 2, max_turn_chars = 800 },
|
||||
-- }
|
||||
--
|
||||
-- Trade-off documented in the FR: tool turns lose information
|
||||
-- when tail-truncated. Acceptable for shell-output blocks (the
|
||||
-- tail is usually the relevant bit); known limitation for
|
||||
-- structured tool results. Disable per-model if it bites.
|
||||
|
||||
-- ── Issue #89 / Phase 10: cloud preplanner → local executor split.
|
||||
-- When cfg.norris.preplanner names a model preset, :norris launch
|
||||
-- fires ONE broker.chat against that preset asking for a sequence
|
||||
-- of TASK: <imperative> lines. Parsed list (capped at tasks_max)
|
||||
-- becomes ctx.norris_tasks; the executor model (cfg.norris.executor,
|
||||
-- defaulting to the active :model selection) runs each task with
|
||||
-- the current task shown in the per-step header.
|
||||
--
|
||||
-- Goal: small fast local models are cheap per step but easily
|
||||
-- distracted on multi-step plans; cloud is capable at planning
|
||||
-- but expensive per step. Use cloud ONCE for the plan, local for
|
||||
-- every step. Falls back to single-model Norris (existing
|
||||
-- behavior) when preplanner unset / fails / produces no TASKs.
|
||||
--
|
||||
-- norris = {
|
||||
-- preplanner = "anthropic", -- model name in cfg.models;
|
||||
-- -- this preset is called ONCE per
|
||||
-- -- :norris launch. Omit to run
|
||||
-- -- single-model (Phase 6 behavior).
|
||||
-- executor = "fast", -- model that runs each step.
|
||||
-- -- Omit to use the active :model.
|
||||
-- tasks_max = 16, -- cap on preplan list size.
|
||||
-- -- preplan_system = "...", -- override the built-in prompt
|
||||
-- },
|
||||
--
|
||||
-- :cost detail separates norris-preplan and norris rows so you
|
||||
-- can see cloud planning cost vs local execution cost. The
|
||||
-- preplan call does NOT retry via fallback_model (a different
|
||||
-- model = a different decomposition; clean hard-fail to single-
|
||||
-- model is safer).
|
||||
|
||||
-- ── Phase 5 context summarization on sliding-window eviction.
|
||||
-- Set INSIDE the context = { ... } block above to enable:
|
||||
-- context = {
|
||||
-- max_turns = 40,
|
||||
-- token_budget = 4096,
|
||||
-- summarize_on_evict = true,
|
||||
-- summarizer_model = "fast", -- model name in models{}
|
||||
-- max_summary_chars = 2000,
|
||||
--
|
||||
-- -- #101 (proactive periodic summarization). When set,
|
||||
-- -- enforce_cadence fires every N appends (before
|
||||
-- -- enforce_budget) and folds turns OLDER than
|
||||
-- -- summarize_keep_recent into ctx.summary. Goal: keep the
|
||||
-- -- wire prompt tight from the start so small local models
|
||||
-- -- aren't fed near-budget context until eviction. Composes
|
||||
-- -- with summarize_on_evict (same summarize_fn closure;
|
||||
-- -- different trigger). Suppressed in Norris (R-C4 parity).
|
||||
-- summarize_every_n_turns = 10, -- nil = disabled (default)
|
||||
-- summarize_keep_recent = 4,
|
||||
-- },
|
||||
-- When summarize_on_evict is true, evicted turn pairs are fed to
|
||||
-- summarizer_model and the result lives on ctx.summary, appended to
|
||||
-- the system prompt as [earlier conversation summary]. Suppressed
|
||||
-- in Norris mode (R-C4 — planner stays on its goal). If broker
|
||||
-- fails, falls back to Phase 0 silent eviction (no crash).
|
||||
|
||||
-- Phase 6 (docs/PHASE6.md): project file-tree context + :diff /
|
||||
-- :tree / :highlight metas. The :diff and :tree metas work without
|
||||
-- any config. The `project` block below only controls the
|
||||
-- AUTO-injection-at-startup behavior; manual `:tree` always works
|
||||
-- regardless. Uncomment to enable startup auto-inject.
|
||||
--
|
||||
-- project = {
|
||||
-- auto_tree = true, -- run `:tree` once at startup
|
||||
-- tree_depth = 3, -- depth filter for the scan (find fallback only;
|
||||
-- -- git ls-files emits full repo-relative paths)
|
||||
-- tree_max_chars = 4096, -- truncate the injected block above this
|
||||
-- },
|
||||
--
|
||||
-- :highlight has no config flag in v1 — toggled at runtime only.
|
||||
-- Requires the external `tree-sitter` CLI plus configured parser-
|
||||
-- directories with cloned + built `tree-sitter-<lang>` grammars
|
||||
-- (see `:highlight on` for the install hints).
|
||||
|
||||
-- Phase 7 (docs/PHASE7.md): cost / usage observability. broker.lua
|
||||
-- captures `usage` (+ `cost` for cloud) from every chat/chat_stream
|
||||
-- call and routes via ctx:add_usage to a per-session accumulator.
|
||||
-- `:cost` / `:cost detail` / `:cost reset` surface the totals.
|
||||
-- The `cost` block below configures OPTIONAL warn thresholds —
|
||||
-- a single status line fires the first time the cumulative
|
||||
-- crosses each threshold. Default off. Useful when paid cloud
|
||||
-- presets are in play so runaway-cost sessions get a nudge.
|
||||
--
|
||||
-- cost = {
|
||||
-- warn_at_dollars = 0.50, -- one-shot warn when cumulative cost crosses
|
||||
-- warn_at_tokens = 100000, -- one-shot warn when cumulative tokens crosses
|
||||
-- },
|
||||
--
|
||||
-- Both flags are independent (R4 — first-to-fire doesn't suppress
|
||||
-- the other); `:cost reset` re-arms both. Per-turn usage is also
|
||||
-- written to session/*.jsonl (assistant-turn `usage` field) for
|
||||
-- after-the-fact scripting; cross-session aggregation deferred
|
||||
-- to a future phase (Q-C2).
|
||||
|
||||
-- Phase 8 (docs/PHASE8.md): accurate tokenization via the broker's
|
||||
-- /tokenize endpoint, replacing the Phase 0 §8 char/4 heuristic.
|
||||
-- Two consequences when use_endpoint=true:
|
||||
-- (1) Context:estimate_tokens hits <endpoint>/tokenize once per
|
||||
-- new turn (cached on the turn dict thereafter). Network
|
||||
-- cost is one round-trip (~30ms) per fresh turn; subsequent
|
||||
-- calls reuse the cache.
|
||||
-- (2) Context:enforce_budget actually ENFORCES token_budget now
|
||||
-- (previously only max_turns was checked). Sessions that
|
||||
-- fit under char/4 may evict earlier — raise token_budget
|
||||
-- to match your model's real context window if needed.
|
||||
-- Cloud endpoints (OpenRouter) don't expose /tokenize; capability
|
||||
-- cached as unsupported on first probe -> silent char/4 fallback.
|
||||
--
|
||||
-- tokenize = {
|
||||
-- use_endpoint = true,
|
||||
-- },
|
||||
}
|
||||
Reference in New Issue
Block a user